Hi all,
Here is an other Example that showing you how to mix C-- & Masm codes.
NoteWell:
=======
1- NewSphinxC extended the extern keyword so that it will accept an external block of decelerations.
extern {
cdecl GetStrLen(); // declares external function
cdecl Ten(); // declares external function
byte buffer; // declares external data
byte hw; // declares external data
}
2- using EXTERNDEF keyword in Masm block to external data of function those declared outside Masm code block and you entend to use them in Masm block.
3- the Masm code block started with .model directive with term 'c' which is a calling convention.
4- MasmPlgIn allows some c-- features with masm code blcok .
EAX := -1
EAX++
here the demo code
/*************************************
* New Sphinx Cmm *
* *
* masm test2 *
* *
* by Emil_halim *
* *
*************************************/
#pragma option w32c //create Windows console EXE.
#pragma option OS //speed optimization
#pragma option dbg
#pragma option lst
#Entry main
#includelib win32.lib MSVCRT.lib
// $ will replaced with SphinxC-- main path
#includepath "$\winlib"
#include <windows.h>
#include <MSVCRT.H-->
// tells Cmm about data & code in masm section
//-------------------------------------------
extern {
cdecl StCpy();
cdecl GetStrLen();
cdecl Ten();
byte buffer;
byte hw;
}
// declare some Cmm variables
cmm_buffer: $DB 12 dup 0
char* cmm_hw = "Hello World from cmm";
int cmm_val = 100;
// masm code start here
^^
.MODEL flat, c
.nolist
EXTERNDEF cmm_val:SDWORD
.data?
buffer db 12 dup(?) ; destination
.data
hw db "Hello World from masm", 0 ; source
.code
// Gets the length of a string(not including the NULL terminator)
GetStrLen Proc ,_str:PTR
MOV ECX,_str // Move source pointer to ECX
EAX := -1 // Start of at -1 so we can build a faster loop
next_char:
EAX++ // EAX==NULL
CMP byte PTR[ECX+EAX],0
JNE next_char // If BYTE is not equal to NULL process next .
RET // Returns string length in EAX
GetStrLen EndP
Ten:
mov EAX,10
add EAX,cmm_val
ret
FASTPROC StCpy
push esi
push edi
mov edi, [esp+8+4] // dest from stack (4 bytes each)
mov esi, [esp+8+8] // src from stack
.Repeat
lodsb
stosb
.Until al==0
pop edi
pop esi
ret
END FASTPROC
^^
// masm code end here
main()
{
printf("string length = %d \n" , GetStrLen("CmmPro Is the best"));
StCpy(#cmm_buffer,cmm_hw);
puts(#cmm_buffer);
StCpy(#buffer,#hw);
puts(#buffer);
printf("Ten = %d\n",Ten());
system("pause");
}
Hi All,
Here is another Example that mix SphinxC and HJWasm.
some roles you have to care of.
1- masm block of code must started with term '^^' and end with the same term.
2- masm bolck will extract from sphinxc then compile and later linked with alink.
3- this process will accomplished by MasmPlgIn , you have not use the cpu directive.
4- MasmPlgIn will specify tha path of include directory.
5- take care of function call convention , it may crash your code.
the next masm code was taken from http://www.masmforum.com/board/index.php?topic=14696.105
it determines the cpu of computer.
/*************************************
* New Sphinx Cmm *
* *
* masm test *
* *
* by Emil_halim *
* *
*************************************/
#pragma option w32c //create Windows console EXE.
#pragma option OS //speed optimization
#pragma option dbg
#pragma option lst
#Entry main
#includelib win32.lib MSVCRT.lib ole32.lib
// $ will replaced with SphinxC-- main path
#includepath "$\winlib"
#include <windows.h>
#include <MSVCRT.H-->
// tells SphinxC about masm ShowCpu function
extern stdcall ShowCpu(dword v);
^^ //start of Masm code
.nolist
include masm32rt.inc
include m32lib/dwtoa.asm
include m32lib/stdout.asm
.code
;// masm code taken from the next link
;// http://www.masmforum.com/board/index.php?topic=14696.105
ShowCpu proc stdcall ; mode:DWORD
COMMENT @ Usage:
push 0, call ShowCpu ; simple, no printing, just returns SSE level
push 1, call ShowCpu ; prints the brand string and returns SSE level @
pushad
sub esp, 80 ; create a buffer for the brand string
mov edi, esp ; point edi to it
xor ebp, ebp
.Repeat
lea eax, [ebp+80000002h]
db 0Fh, 0A2h ; cpuid 80000002h-80000004h
stosd
mov eax, ebx
stosd
mov eax, ecx
stosd
mov eax, edx
stosd
inc ebp
.Until ebp>=3
push 1
pop eax
db 0Fh, 0A2h ; cpuid 1
xor ebx, ebx ; CpuSSE
xor esi, esi ; add zero plus the carry flag
bt edx, 25 ; edx bit 25, SSE1
adc ebx, esi
bt edx, 26 ; edx bit 26, SSE2
adc ebx, esi
bt ecx, esi ; ecx bit 0, SSE3
adc ebx, esi
bt ecx, 9 ; ecx bit 9, SSE4
adc ebx, esi
dec dword ptr [esp+4+32+80] ; dec mode in stack
.if Zero?
mov edi, esp ; restore pointer to brand string
.Repeat
.Break .if byte ptr [edi]!=32 ; mode was 1, so show a string but skip leading blanks
inc edi
.Until 0
.if byte ptr [edi]<32
print chr$("pre-P4")
.else
print edi ; CpuBrand
.endif
.if ebx
print chr$(32, 40, "SSE") ; info on SSE level, 40=(
print str$(ebx), 41, 13, 10 ; 41=)
.endif
.endif
add esp, 80 ; discard brand buffer (after printing!)
mov [esp+32-4], ebx ; move ebx into eax stack position - returns eax to main for further use
popad
ret 4
ShowCpu endp
^^ //End of Masm code
;//***********************************************************************************//
main()
{
ShowCpu(1); // print brand string and SSE level
system("pause");
}
Hi All,
Here is another Example from PowerBasic Forum written by Steve Hutchesson,
I have convert his Example to C-- & Masm, the procedures converted to Masm , and the main code to c--.
/*************************************
* New Sphinx Cmm *
* *
* memory-copy-benchmarks *
* *
* from powerbasic Forum *
* *
*************************************/
#pragma option w32c //create Windows console EXE.
#pragma option OS //speed optimization
#pragma option dbg
#pragma option lst
//#pragma option upx-
#Entry main
#includelib win32.lib MSVCRT.lib ole32.lib
// $ will replaced with SphinxC-- main path
#includepath "$\winlib"
#include <windows.h>
#include <MSVCRT.H-->
#pragma option ia
// tells SphinxC about masm ShowCpu function
extern {
cdecl SSEcopy(dword src,dword dst,dword blen);
cdecl SSEcopy2(dword src,dword dst,dword blen);
}
//start of Masm code
^^
.MODEL flat, c
.nolist
.data
pflead dd 0
.code
;// masm code taken from the next link and cereated by Steve Hutchesson.
;// https://forum.powerbasic.com/forum/user-to-user-discussions/powerbasic-inline-assembler/43459-memory-copy-benchmarks
SSEcopy proc c,src:DWORD,dst:DWORD,blen:DWORD
mov esi, src
mov edi, dst
mov ebx, blen
shr ebx, 6 ; int divide ebx by 64
xor edx, edx ; zero EDX and use as INDEX
align 4
lbl0:
movdqa xmm0, [esi+edx] ; 16 byte aligned reads
movdqa xmm1, [esi+edx+16]
movdqa xmm2, [esi+edx+32]
movdqa xmm3, [esi+edx+48]
movntdq [edi+edx], xmm0 ; non temporal writes
movntdq [edi+edx+16], xmm1
movntdq [edi+edx+32], xmm2
movntdq [edi+edx+48], xmm3
add edx, 64 ; add block copy size to INDEX
sub ebx, 1 ; decrement loop counter
jnz lbl0
mov ebx, edx ; test for remainder
sub ebx, blen ; EBX is remainder loop counter if not zero
jz lbl2
align 4
lbl1:
movzx eax, BYTE PTR [esi+edx] ; copy remainder
mov [edi+edx], al
add edx, 1 ; increment the INDEX
sub ebx, 1 ; decrement the loop counter
jnz lbl1
lbl2:
ret
SSEcopy endp
SSEcopy2 proc c,src:DWORD,dst:DWORD,blen:DWORD
mov esi, src
mov edi, dst
mov ebx, blen
shr ebx, 7 ;// int divide ebx by 128
xor edx, edx ;// zero EDX and use as INDEX
align 4
lbl0:
; prefetchnta BYTE PTR [esi+edx+pflead]
;// prefetcht0 BYTE PTR [esi+edx+%pflead]
;// prefetcht1 BYTE PTR [esi+edx+%pflead]
;// prefetcht2 BYTE PTR [esi+edx+%pflead]
movdqa xmm0, [esi+edx] ;// 16 byte aligned reads
movdqa xmm1, [esi+edx+16]
movdqa xmm2, [esi+edx+32]
movdqa xmm3, [esi+edx+48]
movdqa xmm4, [esi+edx+64]
movdqa xmm5, [esi+edx+80]
movdqa xmm6, [esi+edx+96]
movdqa xmm7, [esi+edx+112]
movntdq [edi+edx], xmm0 ;// non temporal writes
movntdq [edi+edx+16], xmm1
movntdq [edi+edx+32], xmm2
movntdq [edi+edx+48], xmm3
movntdq [edi+edx+64], xmm4
movntdq [edi+edx+80], xmm5
movntdq [edi+edx+96], xmm6
movntdq [edi+edx+112], xmm7
add edx, 128 ;// add block copy size to INDEX
sub ebx, 1 ;// decrement loop counter
jnz lbl0
mov ebx, edx ;// test for remainder
sub ebx, blen ;// EBX is remainder loop counter if not zero
jz lbl2
align 4
lbl1:
movzx eax, BYTE PTR [esi+edx] ;// copy remainder
mov [edi+edx], al
add edx, 1 ;// increment the INDEX
sub ebx, 1 ;// decrement the loop counter
jnz lbl1
lbl2:
ret
SSEcopy2 endp
^^
//End of Masm code
;//***********************************************************************************//
#define MEMLEN 1024*1024*129
main()
{
dword hMem,tMem,aMem,tc;
hMem = GlobalAlloc(GMEM_FIXED | GMEM_ZEROINIT,MEMLEN); // allocate 129 meg
tMem = GlobalAlloc(GMEM_FIXED,MEMLEN); // allocate 129 meg
MOV esi, hMem
// -------------------------------
// align ESI to a 16 byte boundary
// -------------------------------
add esi, 15
and esi, -16
mov aMem, esi
puts("please wait......");
tc = GetTickCount();
mov ecx, 100
lbl0:
SSEcopy2(aMem,tMem,MEMLEN);
sub ecx, 1
jnz lbl0
tc = GetTickCount() - tc;
printf("XMM copy 12.8 gig memory copy in %d ms\n",tc);
GlobalFree(hMem);
GlobalFree(tMem);
system("pause");
}
Hi All,
This is pure SphinxC-- code.
shuffle array created by hutch and converted by me.
http://masm32.com/board/index.php?PHPSESSID=a641fd02d84ff11e8eb1ce754b88cbda&topic=5367.0 (http://masm32.com/board/index.php?PHPSESSID=a641fd02d84ff11e8eb1ce754b88cbda&topic=5367.0)
/*************************************
* New Sphinx Cmm *
* *
* shuffle array *
* *
* from Masm Forum *
* *
*************************************/
#pragma option w32c //create Windows console EXE.
#pragma option OS //speed optimization
#jumptomain NEAR
#parsecommandline TRUE
#pragma option dbg
#pragma option lst
#pragma option upx-
#Entry __startupproc
#includelib win32.lib MSVCRT.lib ole32.lib
// $ will replaced with SphinxC-- main path
#includepath "$\winlib"
#include <windows.h>
#include <MSVCRT.H-->
#pragma option ia
dword nrandom_seed = 0;
nrandom Proc c rbase:DWORD
mov eax, nrandom_seed
// ****************************************
test eax, 80000000h
jz nxt
add eax, 7fffffffh
nxt:
// ****************************************
xor edx, edx
mov ecx, 127773
div ecx
mov ecx, eax
mov eax, 16807
mul edx
mov edx, ecx
mov ecx, eax
mov eax, 2836
mul edx
sub ecx, eax
xor edx, edx
mov eax, ecx
mov nrandom_seed, ecx
div rbase
mov eax, edx
ret
nrandom endp
shuffle_array proc arr:DWORD,cnt:DWORD
LOCAL lcnt :DWORD
lcnt = cnt; // copy cnt to lcnt
push ebx,esi,edi
mov esi, arr
mov edi, arr
xor ebx, ebx
@@:
invoke nrandom,cnt // get the random number within "cnt" range
mov ecx, [esi+ebx*4] // get the incremental pointer
mov edx, [edi+eax*4] // get the random pointer
mov [esi+ebx*4], edx // write random pointer back to incremental location
mov [edi+eax*4], ecx // write incremental pointer back to random location
add ebx, 1 // increment the original pointer
sub lcnt, 1 // decrement the loop counter
jnz @B
pop edi,esi,ebx
ret
shuffle_array endp
ltok Proc c src : DWORD, pArray : DWORD
dword pTxt, pmem , bcnt;
// ---------------------------------------------------------------
// tokenise lines in a text source writing an array of pointers
// to the address of "pArray" and returning the line count in EAX.
//
// The address written to the variable "pArray" should be released
// within the same scope as the variable with a call to GlobalFree()
// when the pointer array is no longer required.
//
// EXAMPLE
// cnt = ltok(ptxt,ByVal VarPtr(harr)) ' tokenise source lines
// dim tline(cnt) as ASCIIZ PTR at harr ' treat it as an ASCIIZ PTR array.
// .....
// GlobalFree harr ' deallocate memory from "ltok"
// ---------------------------------------------------------------
pTxt = src;
mov edi, 1 // set counter to 1 in case of no trailing CRLF
mov esi, pTxt
sub esi, 1
// ----------------
// count line feeds
// ----------------
@@:
add esi, 1
movzx edx, BYTE PTR [esi]
test edx, edx // test for terminator
jz @F
cmp edx, 10 // test for line feed
jne @B
add edi, 1 // lf count in EDI
jmp @B
@@:
// --------------------
// multiply result by 4
// --------------------
add edi, edi
add edi, edi
mov bcnt, edi
pmem = GlobalAlloc(GMEM_FIXED | GMEM_ZEROINIT,bcnt);
mov edi, pmem // copy allocated memory address into EDI
mov esi, pTxt
xor eax, eax // zero arg counter
sub esi, 1
jmp Ftrim
// ---------------------------------
Terminate:
mov BYTE PTR [esi], 0 // terminate end of current line
Ftrim: // scan to find next acceptable character
add esi, 1
movzx edx, BYTE PTR [esi] // zero extend byte
test edx, edx // test for zero terminator
jz Lout
cmp edx, 32
jbe Ftrim // scan again for 32 or less
// ¤=÷=¤=÷=¤=÷=¤=÷=¤
mov [edi], esi // write current location to pointer
add edi, 4 // set next pointer location
add eax, 1 // increment arg count return value
// ¤=÷=¤=÷=¤=÷=¤=÷=¤
Ttrim: // scan to find the next CR or LF
add esi, 1
movzx edx, BYTE PTR [esi] // zero extend byte
cmp edx, 13
jg Ttrim //short loop on normal case
je Terminate
cmp edx, 10 // extra test for ascii 10
je Terminate
test edx, edx
jnz Ttrim // loop back if not zero, IE TAB.
// ---------------------------------
Lout:
mov esi, pArray // load passed handle address into ESI
mov ecx, pmem // local memory handle into ECX
mov [esi], ecx // store local array handle at address of passed handle
dec eax
ret // return the line count
ltok endp
// FASTPROC
.code
str_len:
mov eax, [esp+4]
sub eax, 1
lbl:
add eax, 1
cmp BYTE PTR [eax], 0
jne lbl
sub eax, [esp+4]
ret 4
.data
BOOL Exist(char *szFilePath)
{
if (GetFileAttributes(szFilePath) != 0xffffffff) return TRUE;
return FALSE;
}
help()
{
puts( "\nSHFLARR : shuffle text file lines to random order\n" );
puts( "Syntax : shflarr inputfile outputfile" );
}
dword load_file(dword fname)
{
unsigned long size;
dword filehandle;
dword buf;
filehandle=fopen(fname,"rb");
if(filehandle==0)return NULL;
// obtain file size:
fseek (filehandle , 0 , SEEK_END);
size = ftell (filehandle);
rewind (filehandle);
if(size==0){
fclose(filehandle);
return NULL;
}
buf=malloc(size+1);
if(fread(buf,1,size,filehandle)!= size)
{
fclose(filehandle);
return NULL;
}
fclose(filehandle);
return buf;
}
//***********************************************************************************//
main()
{
char *ifile,*ofile;
dword psrc,lcnt,parr,pstr,hFil;
ifile = PARAMSTR(1);
ofile = PARAMSTR(2);
if(!Exist(ifile))
{
puts("Cannot find input file");
help();
@EXIT(0);
}
if(ofile==NULL)
{
puts("No output file specified");
help();
@EXIT(0);
}
// ----------------------------------------------
// seed the random algo with a near unique number
// ----------------------------------------------
nrandom_seed = GetTickCount();
psrc = load_file(ifile);
// -------------------------------
// tokenise file into memory array
// -------------------------------
lcnt = ltok(psrc,#parr);
printf("Name of input file = %s\n",ifile);
printf("Text file line count = %d\n", lcnt);
printf("Array shuffle count = 100\n");
printf("Name of output file = %s\n",ofile);
// -----------------
// shuffle 100 times
// -----------------
mov esi, 100
@@:
shuffle_array(parr,lcnt);
sub esi, 1
jnz @B
// -----------------
// --------------------
// write result to disk
// --------------------
hFil = fopen(ofile,"wb");
mov esi, lcnt
mov ebx, parr
@@:
mov eax, [ebx]
mov pstr, eax
fwrite(pstr,1,str_len(pstr),hFil); // the line of text
fwrite("\n",1,2,hFil); // crlf
add ebx, 4
sub esi, 1
jnz @B
fclose(hFil);
// --------------------
// ------------------------------------------
// free the memory allocated in the tokeniser
// ------------------------------------------
GlobalFree( parr );
free(psrc);
}
:biggrin:
Hi All,
Example from msdn microsoft.
/*************************************
* New Sphinx Cmm *
* *
* sample multithread program *
* *
* from msdn microsoft *
* *
*************************************/
// https://msdn.microsoft.com/en-us/library/esszf9hw.aspx
// Bounce - Creates a new thread each time the letter 'a' is typed.
// Each thread bounces a happy face of a different color around
// the screen. All threads are terminated when the letter 'Q' is
// entered.
#pragma option w32c //create Windows console EXE.
#pragma option OS //speed optimization
#pragma option dbg
#pragma option lst
#pragma option upx-
#includelib win32.lib MSVCRT.lib ole32.lib
// $ will replaced with SphinxC-- main path
#includepath "$\winlib"
#include <windows.h>
#include <MSVCRT.H-->
#pragma option ia
#define MAX_THREADS 32
// The function getrandom returns a random number between
// min and max, which must be in integer range.
#define getrandom( min, max ) EAX=max; EAX++; EAX-=min; AX = rand() % EAX; AX+=min;
int main( void ); // Thread 1: main
void KbdFunc( void ); // Keyboard input, thread dispatch
void BounceProc( dword MyID ); // Threads 2 to n: display
void ClearScreen( void ); // Screen clear
void ShutDown( void ); // Program shutdown
void WriteTitle( int ThreadNum ); // Display title bar information
HANDLE hConsoleOut; // Handle to the console
HANDLE hRunMutex; // "Keep Running" mutex
HANDLE hScreenMutex; // "Screen update" mutex
int ThreadNr; // Number of threads started
CONSOLE_SCREEN_BUFFER_INFO csbiInfo; // Console information
int main() // Thread One
{
// Get display screen information & clear the screen.
hConsoleOut = GetStdHandle( STD_OUTPUT_HANDLE );
GetConsoleScreenBufferInfo( hConsoleOut, #csbiInfo );
ClearScreen();
WriteTitle( 0 );
// Create the mutexes and reset thread count.
hScreenMutex = CreateMutex( NULL, FALSE, NULL ); // Cleared
hRunMutex = CreateMutex( NULL, TRUE, NULL ); // Set
ThreadNr = 0;
// Start waiting for keyboard input to dispatch threads or exit.
KbdFunc();
// All threads done. Clean up handles.
CloseHandle( hScreenMutex );
CloseHandle( hRunMutex );
CloseHandle( hConsoleOut );
}
void ShutDown( void ) // Shut down threads
{
while ( ThreadNr > 0 )
{
// Tell thread to die and record its death.
ReleaseMutex( hRunMutex );
ThreadNr--;
}
// Clean up display when done
WaitForSingleObject( hScreenMutex, INFINITE );
ClearScreen();
}
void KbdFunc( void ) // Dispatch and count threads.
{
dword tID;
do
{
if ( GetAsyncKeyState(VK_A) & 0x8000 ) && ( ThreadNr < MAX_THREADS )
{
ThreadNr++;
CreateThread(0,0,#BounceProc,#ThreadNr,0,#tID);
WriteTitle( ThreadNr );
Sleep(100);
}
} while( !GetAsyncKeyState(VK_Q) & 0x8000 );
ShutDown();
}
void BounceProc( dword pMyID )
{
char MyCell, OldCell;
WORD MyAttrib, OldAttrib;
char BlankCell;
COORD Coords, Delta;
COORD Old;
DWORD Dummy;
dword MyID;
BlankCell = 0x20;
Old.X = Old.Y = 0;
MyID = pMyID;
// Generate update increments and initial
// display coordinates.
srand( MyID * 3 );
Coords.X = getrandom( 0, csbiInfo.dwSize.X - 1 );
Coords.Y = getrandom( 0, csbiInfo.dwSize.Y - 1 );
Delta.X = getrandom( -3, 3 );
Delta.Y = getrandom( -3, 3 );
// Set up "happy face" & generate color
// attribute from thread number.
if( MyID > 16)
MyCell = 0x01; // outline face
else
MyCell = 0x02; // solid face 3
MyAttrib = MyID & 0x0F; // force black background
do
{
// Wait for display to be available, then lock it.
WaitForSingleObject( hScreenMutex, INFINITE );
// If we still occupy the old screen position, blank it out.
ReadConsoleOutputCharacter( hConsoleOut, #OldCell, 1, DSDWORD[#Old], #Dummy );
ReadConsoleOutputAttribute( hConsoleOut, #OldAttrib, 1, DSDWORD[#Old], #Dummy );
if (( OldCell == MyCell ) && (OldAttrib == MyAttrib))
WriteConsoleOutputCharacter( hConsoleOut, #BlankCell, 1, DSDWORD[#Old], #Dummy );
// Draw new face, then clear screen lock
WriteConsoleOutputCharacter( hConsoleOut, #MyCell, 1, DSDWORD[#Coords], #Dummy );
WriteConsoleOutputAttribute( hConsoleOut, #MyAttrib, 1, DSDWORD[#Coords], #Dummy );
ReleaseMutex( hScreenMutex );
// Increment the coordinates for next placement of the block.
Old.X = Coords.X;
Old.Y = Coords.Y;
Coords.X += Delta.X;
Coords.Y += Delta.Y;
// If we are about to go off the screen, reverse direction
if( Coords.X < 0 ) || ( Coords.X >= csbiInfo.dwSize.X )
{
Delta.X = -Delta.X;
Beep( 400, 50 );
}
if( Coords.Y < 0 ) || ( Coords.Y > csbiInfo.dwSize.Y )
{
Delta.Y = -Delta.Y;
Beep( 600, 50 );
}
}
// Repeat while RunMutex is still taken.
while ( WaitForSingleObject( hRunMutex, 75L ) == WAIT_TIMEOUT );
}
void WriteTitle( int ThreadNum )
{
char NThreadMsg[80];
sprintf( #NThreadMsg, "Threads running: %02d. Press 'A' " "to start a thread,'Q' to quit.", ThreadNum );
SetConsoleTitle( #NThreadMsg );
}
void ClearScreen( void )
{
DWORD dummy;
COORD Home;
Home.X = Home.Y = 0 ;
FillConsoleOutputCharacter( hConsoleOut, ' ', csbiInfo.dwSize.X * csbiInfo.dwSize.Y, DSDWORD[#Home], #dummy );
}
Hi all
Here is the cmm version of Michael Webster's code timing macros.http://masm32.com/board/index.php?topic=49.0 (http://masm32.com/board/index.php?topic=49.0)
I have converted to CMM, there was a small problem when i tried to do that.
the problem is , how to put the code in-between the 2 CMM macro and let it works just like masm code. i uesd a trick , which is that at the end of first macro i puted this code
call label
@label: /* Start test loop */
?aligncode 16 /* Optimal loop alignment for P6 */
it push the current address in the stack . and puted this in the front of second macro
dec __counter__loop__counter__
jz @F
jmp DSDWORD[ESP];
@@:
ESP += 4;
so it jumps to the end of first macro if the counter is greater than zero.
Here is the macros [ cntrcmm.inc ]
============
/*************************************
* New Sphinx Cmm *
* *
* counter *
* *
* from Masm Forum *
* *
*************************************/
//Michael Webster's code timing macros
//http://masm32.com/board/index.php?topic=49.0
/* ---------------------------------------------------------------------
; These two macros perform the grunt work involved in measuring the
; processor clock cycle count for a block of code. These macros must
; be used in pairs, and the block of code must be placed in between
; the counter_begin and counter_end macro calls. The counter_end macro
; returns the clock cycle count for a single pass through the block of
; code, corrected for the test loop overhead, in EAX.
;
; These macros require a .586 or higher processor directive.
;
;
; The loopcount parameter should be set to a relatively high value to
; produce repeatable results.
;
; Note that setting the priority parameter to REALTIME_PRIORITY_CLASS
; involves some risk, as it will cause your process to preempt *all*
; other processes, including critical Windows processes. Setting the
; priority parameter to HIGH_PRIORITY_CLASS instead will significantly
; reduce the risk, and in most cases will produce the same cycle count.
; --------------------------------------------------------------------- */
#pragma option ia
dword _loop_count_;
dword _process_priority_class_;
int _thread_priority_;
dword __counter__loop__counter__=0;
qword tmp1,tmp2,__counter__qword__count__;
#define counter_begin( arg ) EAX = arg; _counter_begin();
inline _counter_begin( )
{
_loop_count_ = EAX;
_process_priority_class_ = GetPriorityClass(GetCurrentProcess());
_thread_priority_ = GetThreadPriority(GetCurrentThread());
SetPriorityClass(GetCurrentProcess(), REALTIME_PRIORITY_CLASS);
SetThreadPriority(GetCurrentThread(), THREAD_PRIORITY_TIME_CRITICAL);
xor eax, eax /* Use same CPUID input value for each call*/
cpuid /* Flush pipe & wait for pending ops to finish*/
rdtsc /* Read Time Stamp Counter*/
tmp1 = EDX:EAX;
__counter__loop__counter__ = _loop_count_;
xor eax, eax
cpuid /* Make sure loop setup instructions finish */
?aligncode 16 /* Optimal loop alignment for P6 */
call Lab
@Lab: /* Start an empty reference loop */
dec __counter__loop__counter__
jz @F
jmp DSDWORD[ESP];
@@:
ESP += 4;
xor eax, eax
cpuid /* Make sure loop instructions finish */
rdtsc /* Read end count */
EDX:EAX -= tmp1;
tmp1 = EDX:EAX;
xor eax, eax
cpuid
rdtsc
tmp2 = EDX:EAX;
__counter__loop__counter__ = _loop_count_;
xor eax, eax
cpuid /* Make sure loop setup instructions finish */
call label
@label: /* Start test loop */
?aligncode 16 /* Optimal loop alignment for P6 */
}
inline counter_end()
{
dec __counter__loop__counter__
jz @F
jmp DSDWORD[ESP];
@@:
ESP += 4;
xor eax, eax
cpuid // Make sure loop instructions finish
rdtsc // Read end count
EDX:EAX -= tmp2;
__counter__qword__count__ = EDX:EAX - tmp1;
SetPriorityClass(GetCurrentProcess(),_process_priority_class_);
SetThreadPriority(GetCurrentThread(),_thread_priority_);
finit
fild DSQWORD [# __counter__qword__count__ ]
fild dword [# _loop_count_ ]
fdiv
fistp dword [# __counter__qword__count__ ]
mov eax, dword [# __counter__qword__count__ ]
}
//--------------------------------------------------------------------------------------
/* ---------------------------------------------------------------------
; These two macros perform the grunt work involved in measuring the
; execution time in milliseconds for a specified number of loops
; through a block of code. These macros must be used in pairs, and
; the block of code must be placed in between the timer_begin and
; timer_end macro calls. The timer_end macro returns the elapsed
; milliseconds for the entire loop in EAX.
;
; These macros utilize the high-resolution performance counter.
; The return value will be zero if the high-resolution performance
; counter is not available.
;
; The loopcount parameter should be set to a relatively high value to
; produce repeatable results.
;
; Note that setting the priority parameter to REALTIME_PRIORITY_CLASS
; involves some risk, as it will cause your process to preempt *all*
; other processes, including critical Windows processes. Setting the
; priority parameter to HIGH_PRIORITY_CLASS instead will significantly
; reduce the risk, and in most cases will produce very nearly the same
; result.
; --------------------------------------------------------------------- */
__timer__pc__frequency__ : dq 0
__timer__pc__count__ : dq 0
__timer__loop__counter__ : dd 0
__timer__dw_count__ : dd 0
#define timer_begin( arg ) EAX = arg; _timer_begin();
inline _timer_begin()
{
_loop_count_ = EAX;
QueryPerformanceFrequency( # __timer__pc__frequency__ );
if( EAX != 0 )
{
_process_priority_class_ = GetPriorityClass(GetCurrentProcess());
_thread_priority_ = GetThreadPriority(GetCurrentThread());
SetPriorityClass(GetCurrentProcess(), REALTIME_PRIORITY_CLASS);
SetThreadPriority(GetCurrentThread(), THREAD_PRIORITY_TIME_CRITICAL);
QueryPerformanceCounter( # __timer__pc__count__ );
push DSDWORD [#__timer__pc__count__ + 4]
push DSDWORD [#__timer__pc__count__]
DSDWORD [#__timer__loop__counter__] = _loop_count_;
call Lab
@Lab:
?aligncode 8 // Optimal loop alignment for P6
// Start an empty reference loop
sub DSDWORD [#__timer__loop__counter__], 1
jz @F
goto DSDWORD[ESP];
@@:
ESP += 4;
QueryPerformanceCounter( # __timer__pc__count__ );
pop ecx // Recover low-order 32 bits of start count
sub DSDWORD [# __timer__pc__count__ ], ecx
pop ecx // Recover high-order 32 bits of start count
sbb DSDWORD [# __timer__pc__count__ + 4 ], ecx
push DSDWORD [# __timer__pc__count__ + 4 ] // Overhead count
push DSDWORD [# __timer__pc__count__ ] // Overhead count
QueryPerformanceCounter( # __timer__pc__count__ );
push DSDWORD [# __timer__pc__count__ + 4 ] // Start count
push DSDWORD [# __timer__pc__count__ ] // Start count
DSDWORD [#__timer__loop__counter__] = _loop_count_;
call label
@label: // Start test loop
?aligncode 16 // Optimal loop alignment for P6
}
}
inline timer_end()
{
dec DSDWORD [#__timer__loop__counter__]
jz @F
jmp DSDWORD[ESP];
@@:
ESP += 4;
QueryPerformanceFrequency( # __timer__pc__frequency__ );
if( EAX != 0)
{
QueryPerformanceCounter( # __timer__pc__count__ );
pop ecx // Recover low-order 32 bits of start count
sub DSDWORD [# __timer__pc__count__ ], ecx
pop ecx // Recover high-order 32 bits of start count
sbb DSDWORD [# __timer__pc__count__ + 4 ], ecx
pop ecx // Recover low-order 32 bits of overhead count
sub DSDWORD [# __timer__pc__count__ ], ecx
pop ecx // Recover high-order 32 bits of overhead count
sbb DSDWORD [# __timer__pc__count__ + 4 ], ecx
}
SetPriorityClass(GetCurrentProcess(),_process_priority_class_);
SetThreadPriority(GetCurrentThread(),_thread_priority_);
finit
fild DSQWORD[# __timer__pc__count__]
fild DSQWORD[# __timer__pc__frequency__]
fdiv
mov DSDWORD[#__timer__dw_count__], 1000
fild dword [# __timer__dw_count__]
fmul
fistp dword [# __timer__dw_count__ ]
mov eax, [# __timer__dw_count__]
}
Here is a test program
===============
/*************************************
* New Sphinx Cmm *
* *
* counter *
* *
* from Masm Forum *
* *
*************************************/
//Michael Webster's code timing macros
//http://masm32.com/board/index.php?topic=49.0
#pragma option w32c //create Windows console EXE.
#pragma option OS //speed optimization
#pragma option dbg
#pragma option lst
#pragma option upx-
#includelib win32.lib MSVCRT.lib ole32.lib
// $ will replaced with SphinxC-- main path
#includepath "$\winlib"
#include <windows.h>
#include <MSVCRT.H-->
#include "cntrcmm.inc"
//#include <math64.h-->
#define LOOP_COUNT 10000000
main()
{
unsigned long long pam, sam, tsc;
unsigned long pid;
SetProcessAffinityMask(GetCurrentProcess(), 1);
GetProcessAffinityMask(GetCurrentProcess(), #pam, #sam);
printf("%I64d\t%I64d\n", pam, sam);
counter_begin(LOOP_COUNT);
mov eax, 2
cpuid
counter_end();
printf("the process takes: %d cycles\n", EAX );
timer_begin(LOOP_COUNT);
mov eax, 2
cpuid
timer_end();
printf("the process takes: %d cycles\n", EAX );
system("pause");
}
Hi All
MOV vs PUSH ticks comparison from http://masm32.com/board/index.php?topic=6324.0 (http://masm32.com/board/index.php?topic=6324.0)
so , this converted example shows you that how it is so easy to convert masm code to NewSphinxCmm then using CMM stunning features.
/*************************************
* New Sphinx Cmm *
* *
* timeit *
* *
* from Masm Forum *
* *
*************************************/
/*
; MOV vs PUSH ticks comparison
; MOV wins only for 7 strikes. After that,
; PUSH wins.
http://masm32.com/board/index.php?topic=6324.0
*/
#pragma option w32c //create Windows console EXE.
#pragma option OS //speed optimization
#pragma option dbg
#pragma option lst
#pragma option upx-
#includelib win32.lib MSVCRT.lib ole32.lib
// $ will replaced with SphinxC-- main path
#includepath "$\winlib"
#include <windows.h>
#include <MSVCRT.H-->
#pragma option ia
loops equ 50000000 //50 mil
disply1: db "MOV = %u ticks\n",0ah,0
disply2: db "PUSH = %u ticks\n",0ah,0
disply3: db "%u*2*6\n",0dh,0ah,0
mytest(dword MainLoops)
{
dword MainLoop;
//;;===========MOV==============
push MainLoops
push offset disply3
call printf
add esp,8
cpuid
call GetTickCount
mov esi,eax
mov ecx,loops
@@:
MainLoop = MainLoops;
.repeat
sub esp,4*6
mov [esp],eax
mov [esp+4],ebx
mov [esp+8],ecx
mov [esp+12],edx
mov [esp+16],edi
mov [esp+20],esi
dec MainLoop
.until MINUSFLAG
MainLoop = MainLoops;
.repeat
mov eax,[esp]
mov ebx,[esp+4]
mov ecx,[esp+8]
mov edx,[esp+12]
mov edi,[esp+16]
mov esi,[esp+20]
add esp,4*6
dec MainLoop
.until MINUSFLAG
sub ecx,1
jnz @B
call GetTickCount
sub eax,esi
push eax
push offset disply1
call printf
add esp,8
//;;============PUSH==============
cpuid
call GetTickCount
mov esi,eax
mov ecx,loops
@@:
MainLoop = MainLoops;
.repeat
push eax
push ebx
push ecx
push edx
push edi
push esi
dec MainLoop
.until MINUSFLAG
MainLoop = MainLoops;
.repeat
pop esi
pop edi
pop edx
pop ecx
pop ebx
pop eax
dec MainLoop
.until MINUSFLAG
sub ecx,1
jnz @B
call GetTickCount
sub eax,esi
push eax
push offset disply2
call printf
add esp,8
}
main()
{
mytest( 1 );
mytest( 1 );
mytest( 2 );
mytest( 2 );
mytest( 4 );
mytest( 8 );
mytest( 16 );
mytest( 32 );
//;;===========MOV==============
cpuid
call GetTickCount
mov esi,eax
mov ecx,loops
@@:
sub esp,4*7
mov [esp],eax
mov [esp+4],ebx
mov [esp+8],ecx
mov [esp+12],edx
mov [esp+16],edi
mov [esp+20],esi
mov [esp+24],ebp
mov eax,[esp]
mov ebx,[esp+4]
mov ecx,[esp+8]
mov edx,[esp+12]
mov edi,[esp+16]
mov esi,[esp+20]
mov ebp,[esp+24]
add esp,4*7
sub ecx,1
jnz @B
call GetTickCount
sub eax,esi
push eax
push offset disply1
call printf
add esp,8
//;;============PUSH==============
cpuid
call GetTickCount
mov esi,eax
mov ecx,loops
@@:
push eax
push ebx
push ecx
push edx
push edi
push esi
push ebp
pop ebp
pop esi
pop edi
pop edx
pop ecx
pop ebx
pop eax
sub ecx,1
jnz @B
call GetTickCount
sub eax,esi
push eax
push offset disply2
call printf
add esp,8
system("pause");
}
Hi all,
still the code is close to masm syntax.
this part of code ported to CMM from http://masm32.com/board/index.php?topic=4940.msg53093#msg53093 (http://masm32.com/board/index.php?topic=4940.msg53093#msg53093)
/*************************************
* New Sphinx Cmm *
* *
* CPU detection *
* *
* from Masm Forum *
* *
*************************************/
/*
this is part of Fast memory allocation
written by nidud , converted by Me.
http://masm32.com/board/index.php?topic=4940.msg53093#msg53093
*/
#pragma option w32c //create Windows console EXE.
#pragma option OS //speed optimization
#pragma option dbg
#pragma option lst
#pragma option upx-
#includelib win32.lib MSVCRT.lib ole32.lib
// $ will replaced with SphinxC-- main path
#includepath "$\winlib"
#include <windows.h>
#include <MSVCRT.H-->
#pragma option ia
SSE_MMX equ 00000000001B
SSE_SSE equ 00000000010B
SSE_SSE2 equ 00000000100B
SSE_SSE3 equ 00000001000B
SSE_SSSE3 equ 00000010000B
SSE_SSE41 equ 00000100000B
SSE_SSE42 equ 00001000000B
SSE_XGETBV equ 00010000000B
SSE_AVX equ 00100000000B
SSE_AVX2 equ 01000000000B
SSE_AVXOS equ 10000000000B
dword sselevel=0;
main()
{
//-------------------------------------------------------------------------------
// CPU detection
//-------------------------------------------------------------------------------
pushfd
pop eax
mov ecx,200000h
mov edx,eax
xor eax,ecx
push eax
popfd
pushfd
pop eax
xor eax,edx
and eax,ecx
push ebx
.if ! ZEROFLAG
xor eax,eax
cpuid
.if EAX
.if AH == 5
xor eax,eax
.else
mov eax,7
xor ecx,ecx
cpuid // check AVX2 support
xor eax,eax
bt ebx,5 // AVX2
rcl eax,1 // into bit 9
push eax
mov eax,1
cpuid
pop eax
bt ecx,28 // AVX support by CPU
rcl eax,1 // into bit 8
bt ecx,27 // XGETBV supported
rcl eax,1 // into bit 7
bt ecx,20 // SSE4.2
rcl eax,1 // into bit 6
bt ecx,19 // SSE4.1
rcl eax,1 // into bit 5
bt ecx,9 // SSSE3
rcl eax,1 // into bit 4
bt ecx,0 // SSE3
rcl eax,1 // into bit 3
bt edx,26 // SSE2
rcl eax,1 // into bit 2
bt edx,25 // SSE
rcl eax,1 // into bit 1
bt ecx,0 // MMX
rcl eax,1 // into bit 0
mov sselevel,eax
.endif
.endif
.endif
.if EAX & SSE_XGETBV
push eax
xor ecx,ecx
db 0x0F,0x01,0xD0 // xgetbv
and eax,6 // AVX support by OS?
pop eax
.if !ZEROFLAG
or sselevel,SSE_AVXOS
.endif
.endif
pop ebx
.if ! EAX = sselevel & SSE_SSE2
printf( "CPU error: Need SSE2 level\n" );
system("pause");
ExitProcess( 0 );
.endif
sub esp,80
mov edi,esp
xor esi,esi
.repeat
lea eax,[esi+80000002h]
cpuid
mov [edi],eax
mov [edi+4],ebx
mov [edi+8],ecx
mov [edi+12],edx
add edi,16
inc esi
.until ESI == 3
mov eax,esp
.while DSBYTE [EAX] == ' '
inc eax
.endw
printf( EAX );
add esp,80
printf( " (" );
.if EAX = sselevel & SSE_AVX2
printf( "AVX2" );
.elseif EAX = sselevel & SSE_AVX
printf( "AVX" );
.elseif EAX = sselevel & SSE_SSE42
printf( "SSE4.2" );
.elseif EAX = sselevel & SSE_SSE41
printf( "SSE4.1" );
.elseif EAX = sselevel & SSE_SSSE3
printf( "SSSE3" );
.elseif EAX = sselevel & SSE_SSE3
printf( "SSE3" );
.else
printf( "SSE2" );
.endif
printf( ")\n----------------------------------------------\n" );
system("pause");
ExitProcess( 0 );
}
Hi All,
This demo will show you that , how it is so easy to create Function Address Table which holds the address of functions then later you can call them in a certain order.
also it shows you how to code the same function in many deferent syntax way , such as c style , asm style , c-- style .......
finally it shows you how easy to make a benchmark test to see the deferent speed of some cods.
/***************************************
* New Sphinx Cmm *
* *
* strlen demo By Emil Halim *
* *
***************************************/
#pragma option w32c //create Windows console EXE.
#pragma option OS //speed optimization
#includepath "$\winlib"
#include <windows.h>
#include <MSVCRT.H-->
#includelib win32.lib , msvcrt.lib
#pragma option upx-
#pragma option LST
#pragma option ia
dword strlen0( char * pStr )
{
char* cp = pStr;
while ( *cp != 0 ) cp++;
return cp - pStr;
}
int strlen1(char* pStr)
{
EAX=0;
while(byte *pStr !=0 )
{
pStr++;
EAX++;
}
}
int fastcall strlen2(EAX)
{
EBX=EAX;
while(DSBYTE[EAX] !=0 )
{
EAX++;
}
EAX -= EBX;
}
int fastcall strlen3(EAX) // pure ASM code
{
MOV EBX,EAX
@lop:
CMP DSBYTE[EAX],0
JE near @fin
INC EAX
JMP lop
@fin:
SUB EAX,EBX
}
?aligncode 4
strlen4 Proc item:DWORD
push ebx
mov eax, item // get pointer to string
lea edx, [eax+3] // pointer+3 used in the end
@@:
mov ebx, [eax] // read first 4 bytes
add eax, 4 // increment pointer
lea ecx, [ebx-01010101h] // subtract 1 from each byte
not ebx // invert all bytes
and ecx, ebx // and these two
and ecx, 80808080h
jz @B // no zero bytes, continue loop
test ecx, 00008080h // test first two bytes
jnz @F
shr ecx, 16 // not in the first 2 bytes
add eax, 2
@@:
shl cl, 1 // use carry flag to avoid branch
sbb eax, edx // compute length
pop ebx
strlen4 Endp
// *** SSE2 version from MASM forum***
? aligncode 16
int fastcall strlen5(EAX)
{
EBX = EAX ; // get the string pointer
LEA ECX, DSDWORD[EAX+16] // save pointer to string, on par with eax after first loop
EAX &= 0xFFFFFFF0; // align for use with SSE2
@shiftOK:
XORPS XMM0, XMM0 // zero xmm0 for finding zero bytes
@a1:
PCMPEQB XMM0, DSQWORD[EAX] // ---- inner loop -----
PMOVMSKB EDX, XMM0 // set byte mask in edx
EAX += 16; // len counter (best position here)
TEST EDX,EDX
JE a1
if(ECX<=EAX) goto a2;
ECX -= EAX; // get difference, and cancel "misalign flag"
SHR EDX, CL // shift invalid
SHL EDX, CL // bits out
JE shiftOK
@a2:
BSF EDX, EDX // bit scan for the index
SUB EAX, EBX // subtract original src pointer
LEA EAX, DSDWORD[EAX+EDX-16] // add scan index
}
? aligncode 4
.code
strlen6:
mov eax, [esp+4]
sub eax, 1
lbl:
add eax, 1
cmp BYTE PTR [eax], 0
jne lbl
sub eax, [esp+4]
ret 4
.data
strlen7 Proc mstr:DWORD
MOV ECX,mstr // Move source pointer to ECX
EAX = -1; // Start of at -1 so we can build a faster loop
next_char:
EAX++; // EAX==NULL
CMP DSBYTE[ECX+EAX],0
JNE next_char // If BYTE is not equal to NULL process next .
RET // Returns string length in EAX
strlen7 Endp
//-----------function address table-----------------
FunTbl : dd # strlen0,
# strlen1,
# strlen2,
# strlen3,
# strlen4,
# strlen5,
# strlen6,
# strlen7,
0
/*-------------------------------------------------------------------------------*/
char* testStr = "NewSphinxCmm is a stunning program language";
qword temp_1;
qword temp_2;
main()
{
int i;
double reslt1, reslt2;
int count;
printf("strlen = %d\n",strlen ( testStr ));
printf("strlen0 = %d\n",strlen0( testStr ));
printf("strlen1 = %d\n",strlen1( testStr ));
printf("strlen2 = %d\n",strlen2( testStr ));
printf("strlen3 = %d\n",strlen3( testStr ));
printf("strlen4 = %d\n",strlen4( testStr ));
printf("strlen5 = %d\n",strlen5( testStr ));
printf("strlen6 = %d\n",strlen6( testStr ));
printf("strlen7 = %d\n",strlen7( testStr ));
i = 0;
while(DSDWORD[i*4+#FunTbl])
{
EDX=DSDWORD[i*4+#FunTbl];
if (i==2) || (i==3) || (i==5) // fast functions they have no stack frame
{
EAX=testStr;
EDX();
}
else
EDX( testStr );
printf("len[%d] = %d\n",i, EAX );
i++;
}
count = 1000000;
SetPriorityClass( GetCurrentProcess(), HIGH_PRIORITY_CLASS);
rdtsc
temp_1 = EDX:EAX;
for(i=0; i < count; i++)
{
strlen1( testStr );
}
rdtsc
temp_1 = EDX:EAX - temp_1;
rdtsc
temp_2 = EDX:EAX;
for(i=0; i < count; i++)
{
strlen5( testStr );
}
rdtsc
temp_2 = EDX:EAX - temp_2;
SetPriorityClass(GetCurrentProcess(), NORMAL_PRIORITY_CLASS);
ST(0) = temp_1 / count;
fstp reslt1
ST(0) = temp_2 / count;
fstp reslt2
printf("strlen1 is %f\nstrlen5 is %f\n", reslt1 , reslt2 );
system("pause");
}
Hi ALL,
To use ilink32 of Borland , just first download a BorlandC++ trail version in your system.
then try the next Example , needs NewSphinxCmm version 256.
/***************************************
* New Sphinx Cmm *
* *
* using ilink By Emil Halim *
* *
***************************************/
#pragma option w32c //create Windows console EXE.
#pragma option OS //speed optimization
#pragma option obj //tells NewSphinxCmm to do not link
#OnExit "ilink32 /ap /m -s -Gt -Gn $OFILE$.obj c0x32.OBJ , , , import32.lib cw32.lib , , "
#includepath "$\winlib"
#include <windows.h>
#pragma option LST
#pragma option ia
dword dumy; // disable error of missing a section of class 2 by ilink
/*------------------------*/
extern cdecl _printf();
#define printf _printf
extern cdecl _strlen();
#define strlen _strlen
extern cdecl _system();
#define system _system
_main()
{
main:
printf("hello world ......\n\n");
printf("using ilink Borland linker ......\n\n");
printf("the length of welcome = %d\n\n" , strlen("welcome"));
system("pause");
}
/*------------------------*/
Hi ALL,
In this Demo i have changed the __acrtused procedure in the c0nt.asm to be like that
;----------------------------------------------------------------------
; Startup code
EXTRN cmmstratup:NEAR ; Added By Emil Halim
_TEXT SEGMENT DWORD USE32 PUBLIC 'CODE'
public __acrtused
__acrtused PROC NEAR
jmp cmmstratup ; Added By Emil Halim
__acrtused ENDP
then OnExit directive will assemble it with tasm32 see below.
also i remove the underscore from _main so that it will call CMM main.
so the cmmstratup cmm code holds the startup code , you can modify it as you wish.
Here is the CMM demo
==============
/***************************************
* New Sphinx Cmm *
* *
* using ilink By Emil Halim *
* *
***************************************/
/* borland C++ console starup code */
#pragma option w32c //create Windows console EXE.
#pragma option OS //speed optimization
#pragma option obj //tells NewSphinxCmm to do not link
#OnExit "tasm32 /ml c0nt.asm"
#OnExit "ilink32 /ap /m -s -Gt -Gn -Gl $OFILE$.obj c0nt.obj, , ,vcl.lib import32.lib cw32.lib , , "
#includepath "$\winlib"
#include <windows.h>
#pragma option LST
#pragma option ia
/*------------------------*/
extern cdecl _printf();
#define printf _printf
extern cdecl _strlen();
#define strlen _strlen
extern cdecl _system();
#define system _system
dword hinst;
/*-----------startup code----------------*/
extern
{
dword ___CPPdebugHook_segment;
dword __TLS_index;
dword __TLS_index4;
dword __hInstance;
dword ___CPPdebugHook;
dword module_data;
___CRTL_VCL_Init();
___CRTL_MEM_UseBorMM();
___CRTL_VCLLIB_Linkage();
__ExceptInit();
__startup();
}
/* borland C++ console starup code */
cmmstratup()
{
jmp skip_dbg_vector
db "fb:C++HOOK" // special signature
nop // alignment byte
db 0E9h // encode a jmp instruction so that the disassembler in the IDE can see past this address to the skip_dbg_vector
dd # ___CPPdebugHook_segment
skip_dbg_vector:
__TLS_index4 = __TLS_index << 2;
push edx
push 0 // NULL returns current module
edx = GetModuleHandle();
___CRTL_VCL_Init(); // EDX now has hInstance in it
pop edx
___CRTL_MEM_UseBorMM(); // Call out to potentially re-vector the memory manager
___CRTL_VCLLIB_Linkage(); // Call out to touch a symbol that will be undefined if vcl.lib was used with any of the CW32xx forms of the RTL.
skip_CRTL_xxxx:
push 0
__ExceptInit();
pop ecx
not_process_attach:
push # module_data
push 0 // NULL returns current module
__hInstance = GetModuleHandle();
hinst = EAX; // Added By Emil Halim you cau put your own work
push 0 // dummy return address
goto __startup;
}
/*-----------end of startup code----------------*/
main()
{
printf("hello world ......\n\n");
printf("using ilink borland linker ......\n\n");
printf("the length of welcome = %d\n\n" , strlen("welcome"));
printf("the hInstance = %d\n\n" , hinst );
system("pause");
}