The MASM Forum
General => The Laboratory => Topic started by: Farabi on January 03, 2013, 06:23:28 PM
-
fsmVecSub proc uses esi edi lpVDest:dword,lpVA:dword,lpVB:dword
mov esi,lpVA
mov edi,lpVB
mov eax,lpVDest
movups xmm0,[esi]
movups xmm1,[edi]
subps xmm0,xmm1
movups [eax],xmm0
ret
fsmVecSub endp
fsmVecAdd proc uses esi edi lpVDest:dword,lpVA:dword,lpVB:dword
mov esi,lpVA
mov edi,lpVB
mov eax,lpVDest
movups xmm0,[esi]
movups xmm1,[edi]
addps xmm0,xmm1
movups [eax],xmm0
ret
fsmVecAdd endp
fsmVecMul proc uses esi edi lpVDest:dword,lpVA:dword,lpVB:dword
mov esi,lpVA
mov edi,lpVB
mov eax,lpVDest
movups xmm0,[esi]
movups xmm1,[edi]
mulps xmm0,xmm1
movups [eax],xmm0
ret
fsmVecMul endp
Im sorry for wasting your time, but can anyone do timing for this function for me. It seems I messed up my timer code and it always yield -1 result. I want to know how many microseconds it achieved on 100 milions loops. It should be about half seconds.
-
as always, you can get Michael Webster's timers.asm in the first thread of the laboratory
;###############################################################################################
.XCREF
.NoList
INCLUDE \Masm32\Include\Masm32rt.inc
.686p
.MMX
.XMM
INCLUDE \Masm32\Macros\Timers.asm
.List
;###############################################################################################
Loop_Count = 10000 ;adjust the loop count so that each pass takes about 0.5 seconds
;###############################################################################################
.DATA
;***********************************************************************************************
.DATA?
;###############################################################################################
.CODE
;***********************************************************************************************
_main PROC
INVOKE GetCurrentProcess
INVOKE SetProcessAffinityMask,eax,1
INVOKE Sleep,750
mov ecx,5
Loop00: push ecx
counter_begin Loop_Count,HIGH_PRIORITY_CLASS
;put your code to be timed here
counter_end
print str$(eax),32
pop ecx
dec ecx
jnz Loop00
print chr$(13,10)
inkey
INVOKE ExitProcess,0
_main ENDP
;###############################################################################################
END _main
-
I did, but each time I used mul eax, the timer is error and return -1.
-
Based on my timing it took half a second for 100 milions loop. Multiply it by 8 it reached almost 1 GFlops per second. Not bad. Making a 3D software raytracer should be posible.