News:

Masm32 SDK description, downloads and other helpful links
Message to All Guests

Main Menu

Calculate a Pythagorean triple in C++ and assembly

Started by Ben756, February 22, 2017, 08:07:18 AM

Previous topic - Next topic

aw27

This is a solution without using x87 instructions. Calculations are performed on floats (real4) not doubles (real8). Of course, it could be done on doubles as well.


.686
.xmm

.model flat, stdcall
option casemap :none 

includelib \masm32\lib\msvcrt.lib
printf proto C :vararg
includelib \masm32\lib\kernel32.lib
ExitProcess proto :dword

.const
format0 db "m=%f, n=%.2f",13,10,0,0
format1 db "a = 2 * m*n : %.2f", 13,10,0,0
format2 db "b = pow(m, 2) - pow(n, 2) : %.2f", 13,10,0,0
format3 db "b = pow(m, 2) + pow(n, 2) : %.2f", 13,10,0,0

_Constant_ps_min_norm_pos_powf dd 800000h
align 16
_Constant_ps_inv_mant_mask_powf dd 807fffffh
align 16
_Constant_ps_0p5_powf dd 0.500000000
align 16
_Constant_pi32_0x7f_powf dd 7fh
align 16
_Constant_one_powf dd 1.0
_Constant__ps_cephes_SQRTHF_powf dd 0.707106769
_Constant_ps_cephes_log_p0_powf dd 0.0703768358
_Constant_ps_cephes_log_p1_powf dd -0.115146101
_Constant_ps_cephes_log_p2_powf dd 0.116769984
_Constant_ps_cephes_log_p3_powf dd -0.124201410
_Constant_ps_cephes_log_p4_powf dd 0.142493233
_Constant_ps_cephes_log_p5_powf dd -0.166680574
_Constant_ps_cephes_log_p6_powf dd 0.200007141
_Constant_ps_cephes_log_p7_powf dd -0.249999940
_Constant_ps_cephes_log_p8_powf dd 0.333333313
_Constant_ps_cephes_log_q1_powf dd -0.000212194442
_Constant_ps_cephes_log_q2_powf dd 0.693359375
_Constant_ps_cephes_LOG2EF_powf dd 1.44269502
_Constant_ps_cephes_exp_C1_powf dd 0.693359375
_Constant_ps_cephes_exp_C2_powf dd -0.000212194442
_Constant_ps_cephes_exp_p0_powf dd 0.000198756912
_Constant_ps_cephes_exp_p1_powf dd 0.00139819994
_Constant_ps_cephes_exp_p2_powf dd 0.00833345205
_Constant_ps_cephes_exp_p3_powf dd 0.0416657962
_Constant_ps_cephes_exp_p4_powf dd 0.166666657
_Constant_ps_cephes_exp_p5_powf dd 0.500000000

base1 REAL4 4.5
base2 REAL4 5.5
exp REAL4 2.0
mulInt DWORD 2

.code

calcPow proc public _base: real4, _exp : real4
LOCAL res : REAL4

movss xmm7, _base
xorps xmm0, xmm0
movss xmm6, xmm7
cmpleps xmm6, xmm0
maxss xmm7, real4 ptr _Constant_ps_min_norm_pos_powf
movss xmm5, xmm7
psrld xmm5, 17h

andps xmm7, real4 ptr _Constant_ps_inv_mant_mask_powf
orps xmm7, real4 ptr _Constant_ps_0p5_powf
psubd xmm5, real4 ptr _Constant_pi32_0x7f_powf

cvtdq2ps xmm5, xmm5
addss xmm5, real4 ptr _Constant_one_powf
movss xmm4, xmm7
cmpltss xmm4, real4 ptr _Constant__ps_cephes_SQRTHF_powf
movss xmm0, xmm7
andps xmm0, xmm4
subss xmm7, real4 ptr _Constant_one_powf
movss xmm1, real4 ptr _Constant_one_powf
andps xmm1, xmm4
subss xmm5, xmm1
addss xmm7, xmm0
movss xmm2, xmm7
mulss xmm2, xmm2
movss xmm3, xmm7
mulss xmm3, real4 ptr _Constant_ps_cephes_log_p0_powf
addss xmm3, real4 ptr _Constant_ps_cephes_log_p1_powf
mulss xmm3, xmm7
addss xmm3, real4 ptr _Constant_ps_cephes_log_p2_powf
mulss xmm3, xmm7
addss xmm3, real4 ptr _Constant_ps_cephes_log_p3_powf
mulss xmm3, xmm7
addss xmm3, real4 ptr _Constant_ps_cephes_log_p4_powf
mulss xmm3, xmm7
addss xmm3, real4 ptr _Constant_ps_cephes_log_p5_powf
mulss xmm3, xmm7
addss xmm3, real4 ptr _Constant_ps_cephes_log_p6_powf
mulss xmm3, xmm7
addss xmm3, real4 ptr _Constant_ps_cephes_log_p7_powf
mulss xmm3, xmm7
addss xmm3, real4 ptr _Constant_ps_cephes_log_p8_powf
mulss xmm3, xmm7
mulss xmm3, xmm2
movss xmm0, xmm5
mulss xmm0, real4 ptr _Constant_ps_cephes_log_q1_powf
addss xmm3, xmm0
movss xmm0, xmm2
mulss xmm0, real4 ptr _Constant_ps_0p5_powf
subss xmm3, xmm0
movss xmm0, xmm5
mulss xmm0, real4 ptr _Constant_ps_cephes_log_q2_powf
addss xmm7, xmm3
addss xmm7, xmm0
orps xmm7, xmm6
movss xmm6, _exp
mulss xmm6, xmm7
xorps xmm0, xmm0
movss xmm7, xmm6
mulss xmm7, xmm0
cmpneqps xmm7, xmm7
movss xmm4, xmm6
mulss xmm4, real4 ptr _Constant_ps_cephes_LOG2EF_powf
addss xmm4, real4 ptr _Constant_ps_0p5_powf
movss xmm3, xmm4
cvttps2dq  xmm3, xmm3
cvtdq2ps xmm0, xmm3
movss xmm2, xmm4
cmpltps xmm2, xmm0
andps xmm2, real4 ptr _Constant_one_powf
movss xmm4, xmm0
subss xmm4, xmm2
movss xmm0, xmm4
mulss xmm0, real4 ptr _Constant_ps_cephes_exp_C1_powf
movss xmm5, xmm4
mulss xmm5, real4 ptr _Constant_ps_cephes_exp_C2_powf
subss xmm6, xmm0
subss xmm6, xmm5
movss xmm5, xmm6
mulss xmm5, xmm5
movss xmm1, real4 ptr _Constant_ps_cephes_exp_p0_powf
mulss xmm1, xmm6
addss xmm1, real4 ptr _Constant_ps_cephes_exp_p1_powf
mulss xmm1, xmm6
addss xmm1, real4 ptr _Constant_ps_cephes_exp_p2_powf
mulss xmm1, xmm6
addss xmm1, real4 ptr _Constant_ps_cephes_exp_p3_powf
mulss xmm1, xmm6
addss xmm1, real4 ptr _Constant_ps_cephes_exp_p4_powf
mulss xmm1, xmm6
addss xmm1, real4 ptr _Constant_ps_cephes_exp_p5_powf
mulss xmm1, xmm5
addss xmm1, xmm6
addss xmm1, real4 ptr _Constant_one_powf
movss xmm3, xmm4
cvttps2dq xmm3, xmm4
paddd xmm3, real4 ptr _Constant_pi32_0x7f_powf
pslld xmm3, 17h
movdqa xmm0, xmm3
mulss xmm1, xmm0
orps xmm1, xmm7
movd res, xmm1

FLD res ; this is the way floats are returned in Win32
ret
calcPow endp


main PROC
LOCAL firstPow : REAL8
LOCAL secPow : REAL8
LOCAl aVal : REAL8
LOCAL bVal : REAL8
LOCAL cVal : REAL8
LOCAL base1Real8 : REAL8
LOCAL base2Real8 : REAL8

movss xmm0, base1
cvtps2pd xmm0, xmm0
movsd REAL8 ptr base1Real8, xmm0
movss xmm0, base2
cvtps2pd xmm0, xmm0
movsd REAL8 ptr base2Real8, xmm0

; print m and n as real8 (double)
invoke printf, offset format0, base1Real8, base2Real8

push exp
push base1
call calcPow ; calculate pow(m, 2)
fstp real8 ptr firstPow
wait

push exp
push base2
call calcPow ; pow(n, 2)
fstp real8 ptr secPow
wait

movss xmm0, base1
mulss xmm0, base2
cvtsi2ss xmm1, mulInt
mulss xmm0, xmm1
cvtps2pd xmm1, xmm0
movsd aVal, xmm1

; print a = 2 * m*n;
invoke printf, offset format1, aVal

movsd xmm0, firstPow
movsd xmm1, secPow
subsd xmm0, xmm1
movsd bVal, xmm0
; print b = pow(m, 2) - pow(n, 2)
invoke printf, offset format2, bVal

movsd xmm0, firstPow
movsd xmm1, secPow
addsd xmm0, xmm1
movsd cVal, xmm0
; print b = pow(m, 2) + pow(n, 2)
invoke printf, offset format3, cVal

invoke ExitProcess, 0
main ENDP
END main