Which do you prefer:
1) load from memory,
.data
align 16
IdentityMatrixRow0 oword 3F800000h
IdentityMatrixRow1 oword 3F80000000000000h
IdentityMatrixRow2 oword 3F8000000000000000000000h
IdentityMatrixRow3 oword 3F800000000000000000000000000000h
.code
; lea rax, :XMMATRIX ; (In_Out)
XMMatrixIdentity proc
movaps xmm0, IdentityMatrixRow0
movaps xmm1, IdentityMatrixRow1
movaps xmm2, IdentityMatrixRow2
movaps xmm3, IdentityMatrixRow3
movaps xmmword ptr[rax+00h], xmm0
movaps xmmword ptr[rax+10h], xmm1
movaps xmmword ptr[rax+20h], xmm2
movaps xmmword ptr[rax+30h], xmm3
ret
XMMatrixIdentity endp
2) or create masks in code?
.code
; lea rax, :XMMATRIX ; (In_Out)
XMMatrixIdentity proc
pxor xmm0, xmm0
cmpeqss xmm0, xmm0
pslld xmm0, 019h
psrld xmm0, 002h
pshufd xmm1, xmm0, 01010001y
pshufd xmm2, xmm0, 01000101y
pshufd xmm3, xmm0, 00010101y
movaps xmmword ptr[rax+00h], xmm0
movaps xmmword ptr[rax+10h], xmm1
movaps xmmword ptr[rax+20h], xmm2
movaps xmmword ptr[rax+30h], xmm3
ret
XMMatrixIdentity endp