In my tests there was no significant speed advantage of JAE/JBE over JA/JB (even though intuitively it seems to me that there should be), and no speed advantage of setting min/max to the first element over setting it to +/- FLT_MAX.
;==============================================================================
include \masm32\include\masm32rt.inc
.686
;==============================================================================
;-------------------------------------
; These from VC Toolkit 2003 float.h:
;-------------------------------------
FLT_MAX equ 3.402823466e+38
DBL_MAX equ 1.7976931348623158e+308
;==============================================================================
.data
array real4 -8.8, -3.9, 111.5, 0.5, 3.6, 1.2, 4.9, 9.9, -98.2, 0.0
r4 real4 ?
r8 real8 ?
.code
;==============================================================================
;------------------------------------------------------------------------
; This is Abel's version of a Park-Miller-Carta generator, details here:
; http://www.masm32.com/board/index.php?topic=6558.0
; Modified to return a floating-point value in the interval [0,1) at
; the top of the FPU stack in ST(0), as per the normal convention.
;
; The period of the core generator is 2147483646 (tested), and it runs
; in 23 cycles on a P3, including the call overhead and a fstp to store
; the result to memory. Note that setting frnd_divider to the period
; instead of to a power of 2 caused a 2x slowdown.
;------------------------------------------------------------------------
OPTION PROLOGUE:NONE
OPTION EPILOGUE:NONE
align 4
frnd proc
.data
align 8
abel_rand_seed dd 1
frnd_divider dq 2147483648
.code
mov eax, abel_rand_seed
mov ecx, 16807 ; a = 7^5
mul ecx ; edx:eax == a*seed == D:A
mov ecx, 7fffffffh ; ecx = m
add edx, edx ; edx = 2*D
cmp eax, ecx ; eax = A
jna @F
sub eax, ecx ; if A>m, A = A - m
@@:
add eax, edx ; eax = A + 2*D
jns @F
sub eax, ecx ; If (A + 2*D)>m
@@:
mov abel_rand_seed, eax ; save new seed
fild abel_rand_seed
fild frnd_divider
fdiv
ret
frnd endp
OPTION PROLOGUE:PrologueDef
OPTION EPILOGUE:EpilogueDef
;==============================================================================
fltmax proc p:dword, n:dword
local _max:real4
;xor esi, esi
mov ecx, n
dec ecx
mov edx, p
;fld4 -FLT_MAX
fld real4 ptr [edx+ecx*4]
fstp _max
L0:
;inc esi
fld real4 ptr [edx+ecx*4]
fld _max
fcomip st, st(1)
ja L1
;inc esi
fst _max
L1:
fstp st
sub ecx, 1
jns L0
fld _max
;printf("max:%d\n",esi)
ret
fltmax endp
;==============================================================================
fltmin proc p:dword, n:dword
local _min:real4
;xor esi, esi
mov ecx, n
dec ecx
mov edx, p
;fld4 FLT_MAX
fld real4 ptr [edx+ecx*4]
fstp _min
L0:
;inc esi
fld real4 ptr [edx+ecx*4]
fld _min
fcomip st, st(1)
jb L1
;inc esi
fst _min
L1:
fstp st
sub ecx, 1
jns L0
fld _min
;printf("min:%d\n",esi)
ret
fltmin endp
;==============================================================================
start:
;==============================================================================
invoke Sleep, 3000
mov esi, alloc(10000000*4)
xor ebx, ebx
invoke GetTickCount
movzx edi, ax
.WHILE ebx < edi
invoke frnd
fstp r4
add ebx, 1
.ENDW
xor ebx, ebx
.WHILE ebx < 10000000
invoke frnd
fld4 999.0
fmul
fld4 498.0
fsub
fstp r4
mov eax, dword ptr r4
mov [esi+ebx*4], eax
add ebx, 1
.ENDW
invoke GetTickCount
push eax
invoke fltmin, esi, 9999999
fstp r8
;printf("%.1f\n",r8)
invoke fltmax, esi, 9999999
fstp r8
;printf("%.1f\n",r8)
invoke GetTickCount
pop edx
sub eax, edx
printf("%d\n",eax)
free esi
inkey
exit
;==============================================================================
end start
The only advantage I can see of setting min/max to the first element (or any other particular element) instead of setting it to +/- FLT_MAX would be if there is a significant error in the value of FLT_MAX, and the array contains values that are close to +/- FLT_MAX.