Hi Jack,
Guga already compiled it for us.
Hi Guga,
Exp(x) = Exp2(x / Logn(2.0))
1/Logn(2.0) = 1.4426950408889634073599246810019
Now we can use ( x * 1.4426950408889634073599246810019 ) to calculate the Exp.
E = x * 1.442695
To use the polynomials we have to use only the fractional part of E [0 to 1]
When done, scale the result by adding the integer part to the exponent field of the floating point result.
Here are the 10 degree polynomials and the SSE2 routine.
lolremez -d 10 -r 0:1 "2**x" "2**x"
// Approximation of f(x) = 2**x
// with weight function g(x) = 2**x
// on interval [ 0, 1 ]
// with a polynomial of degree 10.
double f(double x)
{
double u = 9.9522144894077596e-9;
u = u * x + 9.4609455637620191e-8;
u = u * x + 1.331271998884894e-6;
u = u * x + 1.5244659656760603e-5;
u = u * x + 1.5403957490414841e-4;
u = u * x + 1.3333543730974749e-3;
u = u * x + 9.6181294091749148e-3;
u = u * x + 5.5504108628244985e-2;
u = u * x + 2.402265069613608e-1;
u = u * x + 6.9314718055989127e-1;
return u * x + 1.0000000000000002;
}
.const
Chebylog2E real4 4 dup (1.44269504089) ; 1/Logn(2.0)
Cheby10Exp0 real4 4 dup (9.9522144894077596e-9)
Cheby10Exp1 real4 4 dup (9.4609455637620191e-8)
Cheby10Exp2 real4 4 dup (1.331271998884894e-6)
Cheby10Exp3 real4 4 dup (1.5244659656760603e-5)
Cheby10Exp4 real4 4 dup (1.5403957490414841e-4)
Cheby10Exp5 real4 4 dup (1.3333543730974749e-3)
Cheby10Exp6 real4 4 dup (9.6181294091749148e-3)
Cheby10Exp7 real4 4 dup (5.5504108628244985e-2)
Cheby10Exp8 real4 4 dup (2.402265069613608e-1)
Cheby10Exp9 real4 4 dup (6.9314718055989127e-1)
Cheby10Exp10 real4 4 dup (1.0000000000000002)
.code
align 16
SSE2_Exp_10: ; in: xmm0, out: xmm1
movaps xmm2,oword ptr Chebylog2E
mulps xmm2,xmm0
psrld xmm0,31
cvttps2dq xmm1,xmm2
psubd xmm1,xmm0
movdqa xmm0,xmm1
cvtdq2ps xmm1,xmm1
subps xmm2,xmm1
movaps xmm1,oword ptr Cheby10Exp0
pslld xmm0,23
mulps xmm1,xmm2
addps xmm1,oword ptr Cheby10Exp1
mulps xmm1,xmm2
addps xmm1,oword ptr Cheby10Exp2
mulps xmm1,xmm2
addps xmm1,oword ptr Cheby10Exp3
mulps xmm1,xmm2
addps xmm1,oword ptr Cheby10Exp4
mulps xmm1,xmm2
addps xmm1,oword ptr Cheby10Exp5
mulps xmm1,xmm2
addps xmm1,oword ptr Cheby10Exp6
mulps xmm1,xmm2
addps xmm1,oword ptr Cheby10Exp7
mulps xmm1,xmm2
addps xmm1,oword ptr Cheby10Exp8
mulps xmm1,xmm2
addps xmm1,oword ptr Cheby10Exp9
mulps xmm1,xmm2
addps xmm1,oword ptr Cheby10Exp10
paddd xmm0,xmm1
ret