Author Topic: Calculate a Pythagorean triple in C++ and assembly  (Read 788 times)

aw27

  • Member
  • ****
  • Posts: 605
Re: Calculate a Pythagorean triple in C++ and assembly
« Reply #15 on: August 17, 2017, 01:32:51 AM »
This is a solution without using x87 instructions. Calculations are performed on floats (real4) not doubles (real8). Of course, it could be done on doubles as well.

Code: [Select]
.686
.xmm

.model flat, stdcall
option casemap :none 

includelib \masm32\lib\msvcrt.lib
printf proto C :vararg
includelib \masm32\lib\kernel32.lib
ExitProcess proto :dword

.const
format0 db "m=%f, n=%.2f",13,10,0,0
format1 db "a = 2 * m*n : %.2f", 13,10,0,0
format2 db "b = pow(m, 2) - pow(n, 2) : %.2f", 13,10,0,0
format3 db "b = pow(m, 2) + pow(n, 2) : %.2f", 13,10,0,0

_Constant_ps_min_norm_pos_powf dd 800000h
align 16
_Constant_ps_inv_mant_mask_powf dd 807fffffh
align 16
_Constant_ps_0p5_powf dd 0.500000000
align 16
_Constant_pi32_0x7f_powf dd 7fh
align 16
_Constant_one_powf dd 1.0
_Constant__ps_cephes_SQRTHF_powf dd 0.707106769
_Constant_ps_cephes_log_p0_powf dd 0.0703768358
_Constant_ps_cephes_log_p1_powf dd -0.115146101
_Constant_ps_cephes_log_p2_powf dd 0.116769984
_Constant_ps_cephes_log_p3_powf dd -0.124201410
_Constant_ps_cephes_log_p4_powf dd 0.142493233
_Constant_ps_cephes_log_p5_powf dd -0.166680574
_Constant_ps_cephes_log_p6_powf dd 0.200007141
_Constant_ps_cephes_log_p7_powf dd -0.249999940
_Constant_ps_cephes_log_p8_powf dd 0.333333313
_Constant_ps_cephes_log_q1_powf dd -0.000212194442
_Constant_ps_cephes_log_q2_powf dd 0.693359375
_Constant_ps_cephes_LOG2EF_powf dd 1.44269502
_Constant_ps_cephes_exp_C1_powf dd 0.693359375
_Constant_ps_cephes_exp_C2_powf dd -0.000212194442
_Constant_ps_cephes_exp_p0_powf dd 0.000198756912
_Constant_ps_cephes_exp_p1_powf dd 0.00139819994
_Constant_ps_cephes_exp_p2_powf dd 0.00833345205
_Constant_ps_cephes_exp_p3_powf dd 0.0416657962
_Constant_ps_cephes_exp_p4_powf dd 0.166666657
_Constant_ps_cephes_exp_p5_powf dd 0.500000000

base1 REAL4 4.5
base2 REAL4 5.5
exp REAL4 2.0
mulInt DWORD 2

.code

calcPow proc public _base: real4, _exp : real4
LOCAL res : REAL4

movss xmm7, _base
xorps xmm0, xmm0
movss xmm6, xmm7
cmpleps xmm6, xmm0
maxss xmm7, real4 ptr _Constant_ps_min_norm_pos_powf
movss xmm5, xmm7
psrld xmm5, 17h

andps xmm7, real4 ptr _Constant_ps_inv_mant_mask_powf
orps xmm7, real4 ptr _Constant_ps_0p5_powf
psubd xmm5, real4 ptr _Constant_pi32_0x7f_powf

cvtdq2ps xmm5, xmm5
addss xmm5, real4 ptr _Constant_one_powf
movss xmm4, xmm7
cmpltss xmm4, real4 ptr _Constant__ps_cephes_SQRTHF_powf
movss xmm0, xmm7
andps xmm0, xmm4
subss xmm7, real4 ptr _Constant_one_powf
movss xmm1, real4 ptr _Constant_one_powf
andps xmm1, xmm4
subss xmm5, xmm1
addss xmm7, xmm0
movss xmm2, xmm7
mulss xmm2, xmm2
movss xmm3, xmm7
mulss xmm3, real4 ptr _Constant_ps_cephes_log_p0_powf
addss xmm3, real4 ptr _Constant_ps_cephes_log_p1_powf
mulss xmm3, xmm7
addss xmm3, real4 ptr _Constant_ps_cephes_log_p2_powf
mulss xmm3, xmm7
addss xmm3, real4 ptr _Constant_ps_cephes_log_p3_powf
mulss xmm3, xmm7
addss xmm3, real4 ptr _Constant_ps_cephes_log_p4_powf
mulss xmm3, xmm7
addss xmm3, real4 ptr _Constant_ps_cephes_log_p5_powf
mulss xmm3, xmm7
addss xmm3, real4 ptr _Constant_ps_cephes_log_p6_powf
mulss xmm3, xmm7
addss xmm3, real4 ptr _Constant_ps_cephes_log_p7_powf
mulss xmm3, xmm7
addss xmm3, real4 ptr _Constant_ps_cephes_log_p8_powf
mulss xmm3, xmm7
mulss xmm3, xmm2
movss xmm0, xmm5
mulss xmm0, real4 ptr _Constant_ps_cephes_log_q1_powf
addss xmm3, xmm0
movss xmm0, xmm2
mulss xmm0, real4 ptr _Constant_ps_0p5_powf
subss xmm3, xmm0
movss xmm0, xmm5
mulss xmm0, real4 ptr _Constant_ps_cephes_log_q2_powf
addss xmm7, xmm3
addss xmm7, xmm0
orps xmm7, xmm6
movss xmm6, _exp
mulss xmm6, xmm7
xorps xmm0, xmm0
movss xmm7, xmm6
mulss xmm7, xmm0
cmpneqps xmm7, xmm7
movss xmm4, xmm6
mulss xmm4, real4 ptr _Constant_ps_cephes_LOG2EF_powf
addss xmm4, real4 ptr _Constant_ps_0p5_powf
movss xmm3, xmm4
cvttps2dq  xmm3, xmm3
cvtdq2ps xmm0, xmm3
movss xmm2, xmm4
cmpltps xmm2, xmm0
andps xmm2, real4 ptr _Constant_one_powf
movss xmm4, xmm0
subss xmm4, xmm2
movss xmm0, xmm4
mulss xmm0, real4 ptr _Constant_ps_cephes_exp_C1_powf
movss xmm5, xmm4
mulss xmm5, real4 ptr _Constant_ps_cephes_exp_C2_powf
subss xmm6, xmm0
subss xmm6, xmm5
movss xmm5, xmm6
mulss xmm5, xmm5
movss xmm1, real4 ptr _Constant_ps_cephes_exp_p0_powf
mulss xmm1, xmm6
addss xmm1, real4 ptr _Constant_ps_cephes_exp_p1_powf
mulss xmm1, xmm6
addss xmm1, real4 ptr _Constant_ps_cephes_exp_p2_powf
mulss xmm1, xmm6
addss xmm1, real4 ptr _Constant_ps_cephes_exp_p3_powf
mulss xmm1, xmm6
addss xmm1, real4 ptr _Constant_ps_cephes_exp_p4_powf
mulss xmm1, xmm6
addss xmm1, real4 ptr _Constant_ps_cephes_exp_p5_powf
mulss xmm1, xmm5
addss xmm1, xmm6
addss xmm1, real4 ptr _Constant_one_powf
movss xmm3, xmm4
cvttps2dq xmm3, xmm4
paddd xmm3, real4 ptr _Constant_pi32_0x7f_powf
pslld xmm3, 17h
movdqa xmm0, xmm3
mulss xmm1, xmm0
orps xmm1, xmm7
movd res, xmm1

FLD res ; this is the way floats are returned in Win32
ret
calcPow endp


main PROC
LOCAL firstPow : REAL8
LOCAL secPow : REAL8
LOCAl aVal : REAL8
LOCAL bVal : REAL8
LOCAL cVal : REAL8
LOCAL base1Real8 : REAL8
LOCAL base2Real8 : REAL8

movss xmm0, base1
cvtps2pd xmm0, xmm0
movsd REAL8 ptr base1Real8, xmm0
movss xmm0, base2
cvtps2pd xmm0, xmm0
movsd REAL8 ptr base2Real8, xmm0

; print m and n as real8 (double)
invoke printf, offset format0, base1Real8, base2Real8

push exp
push base1
call calcPow ; calculate pow(m, 2)
fstp real8 ptr firstPow
wait

push exp
push base2
call calcPow ; pow(n, 2)
fstp real8 ptr secPow
wait

movss xmm0, base1
mulss xmm0, base2
cvtsi2ss xmm1, mulInt
mulss xmm0, xmm1
cvtps2pd xmm1, xmm0
movsd aVal, xmm1

; print a = 2 * m*n;
invoke printf, offset format1, aVal

movsd xmm0, firstPow
movsd xmm1, secPow
subsd xmm0, xmm1
movsd bVal, xmm0
; print b = pow(m, 2) - pow(n, 2)
invoke printf, offset format2, bVal

movsd xmm0, firstPow
movsd xmm1, secPow
addsd xmm0, xmm1
movsd cVal, xmm0
; print b = pow(m, 2) + pow(n, 2)
invoke printf, offset format3, cVal

invoke ExitProcess, 0
main ENDP
END main