I have long had stackframe adjustments and when setting up a YMM test piece, if I wanted LOCAL YMM size variables to store Ymm registers in. The unaligned mnemonic vmovdqu works fine but the aligned version vmovdqa crashes every time.
The macro below tests the stack alignment directly after the LOCALS and the alignment is correct but it crashes anyway.
; -----------------------------------------------------------------------
CheckStackAlign MACRO anum
LOCAL pbuf, pout, obuf, buff, fnum, xvar
.data?
buff db 64 dup (?)
obuf db 16 dup (?)
fnum REAL8 ?
xvar REAL8 ?
.data
pbuf dq ?
pout dq ?
.code
mov pbuf, ptr$(buff)
mov pout, ptr$(obuf)
loadsd xmm0, anum
movsd fnum, xmm0
rcall fptoa,fnum,pout
cvtsi2sd xmm0, rsp ;; load stack pointer into xmm0
loadsd xmm1, anum ;; load anum into xmm1
divsd xmm0, xmm1 ;; divide rsp value by anum
movsd xvar, xmm0
rcall fptoa,xvar,pbuf
conout "Stack pointer RSP divided by ",pout," = ",pbuf,lf, \
"If number has no fraction, it is aligned by ",pout,lf,lf
ENDM
; -----------------------------------------------------------------------