Hi guga,
Very fast real4 SSE2 Exponent routine.
Up to 4 exponents at once.
Precision: 7 digits
.const
Chebylog2E real4 4 dup (1.44269504089)
; 5th Degree Chebyshev ( Remez Algorithm ) Polynomials
Cheby5Exp0 real4 4 dup (0.0018775767)
Cheby5Exp1 real4 4 dup (0.0089893397)
Cheby5Exp2 real4 4 dup (0.055826318)
Cheby5Exp3 real4 4 dup (0.24015361)
Cheby5Exp4 real4 4 dup (0.69315308)
Cheby5Exp5 real4 4 dup (0.99999994)
.code
align 16
SSE2_Exp: ; in: xmm0, out: xmm0
movaps xmm2,oword ptr Chebylog2E
mulps xmm2,xmm0
psrld xmm0,31
cvttps2dq xmm1,xmm2
psubd xmm1,xmm0
movdqa xmm0,xmm1
cvtdq2ps xmm1,xmm1
subps xmm2,xmm1
movaps xmm1,oword ptr Cheby5Exp0
pslld xmm0,23
mulps xmm1,xmm2
addps xmm1,oword ptr Cheby5Exp1
mulps xmm1,xmm2
addps xmm1,oword ptr Cheby5Exp2
mulps xmm1,xmm2
addps xmm1,oword ptr Cheby5Exp3
mulps xmm1,xmm2
addps xmm1,oword ptr Cheby5Exp4
mulps xmm1,xmm2
addps xmm1,oword ptr Cheby5Exp5
paddd xmm0,xmm1
ret