Author Topic: NewSphinxCmm Examples  (Read 75 times)

Emil_halim

  • Member
  • **
  • Posts: 86
NewSphinxCmm Examples
« on: May 15, 2017, 02:36:38 AM »
Hi all,

Here is an other Example that showing you how to mix C-- & Masm codes.

NoteWell:
=======
1-  NewSphinxC extended the extern keyword so that it will accept an external block of decelerations.

Code: [Select]
extern {
  cdecl GetStrLen();   // declares external function
  cdecl Ten();             // declares external function
  byte  buffer;            // declares external data
  byte  hw;                // declares external data
}
   

2- using  EXTERNDEF keyword in Masm block to external data of function those declared outside Masm code block and you entend to use them in Masm block.

3- the Masm code block started with .model  directive  with term 'c' which is a calling convention.

4- MasmPlgIn allows some c-- features with masm code blcok .
Code: [Select]
EAX := -1
EAX++ 

here the demo code
Code: [Select]
/*************************************
*         New Sphinx Cmm             * 
*                                    *
*           masm  test2              *
*                                    *
*         by Emil_halim              *         
*                                    *
*************************************/
#pragma option w32c       //create Windows console EXE.
#pragma option OS         //speed optimization

#pragma option dbg
#pragma option lst

#Entry  main
#includelib  win32.lib MSVCRT.lib

// $ will replaced with SphinxC-- main path
#includepath "$\winlib" 
 
#include <windows.h> 
#include <MSVCRT.H-->

// tells Cmm about data & code in masm section
//-------------------------------------------
extern {
  cdecl StCpy();
  cdecl GetStrLen();
  cdecl Ten();
  byte  buffer;
  byte  hw;
}

// declare some Cmm variables
cmm_buffer: $DB 12 dup 0
char* cmm_hw = "Hello World from cmm";

int cmm_val = 100;

// masm code start here
^^
  .MODEL flat, c   
  .nolist
 
   EXTERNDEF  cmm_val:SDWORD

.data?
buffer db 12 dup(?) ; destination

.data
hw db "Hello World from masm", 0 ; source

.code

// Gets the length of a string(not including the NULL terminator)
GetStrLen Proc ,_str:PTR
MOV ECX,_str // Move source pointer to ECX
EAX := -1 // Start of at -1 so we can build a faster loop
  next_char:
EAX++      // EAX==NULL
CMP byte PTR[ECX+EAX],0
JNE next_char // If BYTE is not equal to NULL process next .
RET // Returns string length in EAX
GetStrLen EndP

Ten:
   mov EAX,10
   add EAX,cmm_val
   ret
   
FASTPROC StCpy
  push esi        
  push edi
  mov edi, [esp+8+4] // dest from stack (4 bytes each)
  mov esi, [esp+8+8] // src from stack
  .Repeat
  lodsb
  stosb
  .Until al==0
  pop edi
  pop esi
  ret   
END FASTPROC   

^^
// masm code end here


main()
{
       
   printf("string length = %d \n" , GetStrLen("CmmPro Is the best"));
   
   StCpy(#cmm_buffer,cmm_hw);
   puts(#cmm_buffer);
   
   StCpy(#buffer,#hw);
   puts(#buffer);

   printf("Ten = %d\n",Ten());
   system("pause");   
 
}


Emil_halim

  • Member
  • **
  • Posts: 86
Re: NewSphinxCmm Examples
« Reply #1 on: May 15, 2017, 02:38:29 AM »
Hi All,

Here is another Example that mix SphinxC and HJWasm.

some roles you have to care of.

1- masm block of code must started with term '^^' and end with the same term.
2- masm bolck will extract from sphinxc then compile and later linked with alink.
3- this process will accomplished by MasmPlgIn , you have not use the cpu directive.
4- MasmPlgIn will specify tha path of include directory.
5- take care of function call convention , it may crash your code.


the next masm code was taken from http://www.masmforum.com/board/index.php?topic=14696.105
it determines the cpu of computer.

Code: [Select]
/*************************************
*         New Sphinx Cmm             * 
*                                    *
*           masm test                *
*                                    *
*         by Emil_halim              *         
*                                    *
*************************************/

#pragma option w32c       //create Windows console EXE.
#pragma option OS         //speed optimization

#pragma option dbg
#pragma option lst

#Entry  main
#includelib  win32.lib MSVCRT.lib ole32.lib

// $ will replaced with SphinxC-- main path
#includepath "$\winlib" 
 
#include <windows.h> 
#include <MSVCRT.H-->

// tells SphinxC about masm ShowCpu function
extern stdcall ShowCpu(dword v);

^^ //start of Masm code
.nolist
include masm32rt.inc

include m32lib/dwtoa.asm
include m32lib/stdout.asm

.code
;//  masm code taken from the next link
;//  http://www.masmforum.com/board/index.php?topic=14696.105
ShowCpu proc stdcall ; mode:DWORD
COMMENT @ Usage:
  push 0, call ShowCpu ; simple, no printing, just returns SSE level
  push 1, call ShowCpu ; prints the brand string and returns SSE level @
 
  pushad
  sub esp, 80     ; create a buffer for the brand string
  mov edi, esp ; point edi to it
  xor ebp, ebp
  .Repeat
  lea eax, [ebp+80000002h]
db 0Fh, 0A2h ; cpuid 80000002h-80000004h
stosd
mov eax, ebx
stosd
mov eax, ecx
stosd
mov eax, edx
stosd
inc ebp
  .Until ebp>=3
  push 1
  pop eax
  db 0Fh, 0A2h ; cpuid 1
  xor ebx, ebx ; CpuSSE
  xor esi, esi ; add zero plus the carry flag
  bt edx, 25 ; edx bit 25, SSE1
  adc ebx, esi
  bt edx, 26 ; edx bit 26, SSE2
  adc ebx, esi
  bt ecx, esi ; ecx bit 0, SSE3
  adc ebx, esi
  bt ecx, 9 ; ecx bit 9, SSE4
  adc ebx, esi
  dec dword ptr [esp+4+32+80] ; dec mode in stack
  .if Zero?
mov edi, esp ; restore pointer to brand string
  .Repeat
.Break .if byte ptr [edi]!=32 ; mode was 1, so show a string but skip leading blanks
inc edi
.Until 0
.if byte ptr [edi]<32
print chr$("pre-P4")
.else
print edi ; CpuBrand
.endif
.if ebx
print chr$(32, 40, "SSE") ; info on SSE level, 40=(
print str$(ebx), 41, 13, 10 ; 41=)
.endif
  .endif
  add esp, 80 ; discard brand buffer (after printing!)
  mov [esp+32-4], ebx ; move ebx into eax stack position - returns eax to main for further use
  popad
  ret 4
ShowCpu endp

^^ //End of Masm code


;//***********************************************************************************//


main()
{
   ShowCpu(1); // print brand string and SSE level
     
   system("pause");   
 
}

Emil_halim

  • Member
  • **
  • Posts: 86
Re: memory-copy-benchmarks
« Reply #2 on: May 17, 2017, 04:20:02 AM »
Hi All,

Here is another Example from PowerBasic Forum written by Steve Hutchesson,

I have  convert his Example to C-- & Masm, the procedures converted to Masm , and the main code to c--.

Code: [Select]
/*************************************
*         New Sphinx Cmm             * 
*                                    *
*     memory-copy-benchmarks         *
*                                    *
*      from powerbasic Forum         *         
*                                    *
*************************************/


#pragma option w32c       //create Windows console EXE.
#pragma option OS         //speed optimization

#pragma option dbg
#pragma option lst

//#pragma option upx-

#Entry  main
#includelib  win32.lib MSVCRT.lib ole32.lib

// $ will replaced with SphinxC-- main path
#includepath "$\winlib" 
 
#include <windows.h> 
#include <MSVCRT.H-->

#pragma option ia

// tells SphinxC about masm ShowCpu function
extern {
   cdecl SSEcopy(dword src,dword dst,dword blen);
   cdecl SSEcopy2(dword src,dword dst,dword blen);
}
//start of Masm code
^^
  .MODEL flat, c   
  .nolist
.data

pflead dd 0

.code
;//  masm code taken from the next link and cereated by Steve Hutchesson.
;//  https://forum.powerbasic.com/forum/user-to-user-discussions/powerbasic-inline-assembler/43459-memory-copy-benchmarks

SSEcopy proc c,src:DWORD,dst:DWORD,blen:DWORD

     mov esi, src
     mov edi, dst
     mov ebx, blen
     shr ebx, 6                        ; int divide ebx by 64
     xor edx, edx                     ; zero EDX and use as INDEX

  align 4
  lbl0:
     movdqa xmm0, [esi+edx]            ; 16 byte aligned reads
     movdqa xmm1, [esi+edx+16]
     movdqa xmm2, [esi+edx+32]
     movdqa xmm3, [esi+edx+48]

     movntdq [edi+edx], xmm0           ; non temporal writes
     movntdq [edi+edx+16], xmm1
     movntdq [edi+edx+32], xmm2
     movntdq [edi+edx+48], xmm3
     
     add edx, 64                       ; add block copy size to INDEX

     sub ebx, 1                        ; decrement loop counter
     jnz lbl0

     mov ebx, edx                      ; test for remainder
     sub ebx, blen                     ; EBX is remainder loop counter if not zero
     jz lbl2

  align 4
  lbl1:
     movzx eax, BYTE PTR [esi+edx]     ; copy remainder
     mov [edi+edx], al
     add edx, 1                        ; increment the INDEX
     sub ebx, 1                        ; decrement the loop counter
     jnz lbl1

  lbl2:
    ret
SSEcopy endp

SSEcopy2 proc c,src:DWORD,dst:DWORD,blen:DWORD

     mov esi, src
     mov edi, dst
     mov ebx, blen
     shr ebx, 7                        ;// int divide ebx by 128
     xor edx, edx                      ;// zero EDX and use as INDEX

  align 4
  lbl0:
    ; prefetchnta BYTE PTR [esi+edx+pflead]
    ;//  prefetcht0 BYTE PTR [esi+edx+%pflead]
    ;//  prefetcht1 BYTE PTR [esi+edx+%pflead]
    ;// prefetcht2 BYTE PTR [esi+edx+%pflead]

     movdqa xmm0, [esi+edx]            ;// 16 byte aligned reads
     movdqa xmm1, [esi+edx+16]
     movdqa xmm2, [esi+edx+32]
     movdqa xmm3, [esi+edx+48]

     movdqa xmm4, [esi+edx+64]
     movdqa xmm5, [esi+edx+80]
     movdqa xmm6, [esi+edx+96]
     movdqa xmm7, [esi+edx+112]

     movntdq [edi+edx], xmm0           ;// non temporal writes
     movntdq [edi+edx+16], xmm1
     movntdq [edi+edx+32], xmm2
     movntdq [edi+edx+48], xmm3

     movntdq [edi+edx+64], xmm4
     movntdq [edi+edx+80], xmm5
     movntdq [edi+edx+96], xmm6
     movntdq [edi+edx+112], xmm7

     add edx, 128                      ;// add block copy size to INDEX

     sub ebx, 1                        ;// decrement loop counter
     jnz lbl0

     mov ebx, edx                      ;// test for remainder
     sub ebx, blen                     ;// EBX is remainder loop counter if not zero
     jz lbl2

  align 4
  lbl1:
     movzx eax, BYTE PTR [esi+edx]     ;// copy remainder
     mov [edi+edx], al
     add edx, 1                        ;// increment the INDEX
     sub ebx, 1                        ;// decrement the loop counter
     jnz lbl1

  lbl2:
    ret
SSEcopy2 endp

^^
//End of Masm code


;//***********************************************************************************//

#define MEMLEN  1024*1024*129

main()
{
   dword hMem,tMem,aMem,tc;
   
   hMem = GlobalAlloc(GMEM_FIXED | GMEM_ZEROINIT,MEMLEN); // allocate 129 meg
   tMem = GlobalAlloc(GMEM_FIXED,MEMLEN);                 // allocate 129 meg
 
      MOV esi, hMem
  // -------------------------------
  // align ESI to a 16 byte boundary
  // -------------------------------
      add esi, 15
      and esi, -16
      mov aMem, esi
     
      puts("please wait......");
     
      tc = GetTickCount();
     
      mov ecx, 100
  lbl0:
      SSEcopy2(aMem,tMem,MEMLEN);
      sub ecx, 1
      jnz lbl0

      tc = GetTickCount() - tc;
     
      printf("XMM copy 12.8 gig memory copy in %d ms\n",tc);
     
   GlobalFree(hMem);
   GlobalFree(tMem); 
   system("pause");   
   
}