Seems to work OK and easy enough to set up.
; ¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤
include \masm32\include64\masm64rt.inc
ssealign MACRO buffer
lea rax, buffer
memalign rax, 16
EXITM <rax>
ENDM
.code
; ¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤
entry_point proc
call testit
conout lf," If you can see me, it worked.",lf,lf
waitkey
.exit
entry_point endp
; ¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤
testit proc
LOCAL pmem :QWORD
LOCAL buff[256]:BYTE
mov pmem, ssealign(buff)
mov r10, pmem ; load aligned address into register
movdqa XMMWORD PTR [r10], xmm12
movdqa XMMWORD PTR [r10+16], xmm13
movdqa XMMWORD PTR [r10+32], xmm14
movdqa XMMWORD PTR [r10+48], xmm15
nop
nop ; your code goes here
nop
movntdqa xmm12, XMMWORD PTR [r10]
movntdqa xmm13, XMMWORD PTR [r10+16]
movntdqa xmm14, XMMWORD PTR [r10+32]
movntdqa xmm15, XMMWORD PTR [r10+48]
ret
testit endp
; ¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤
end
nop ; your code goes here
push+pop not allowed here...