this is Jochen's data and code
align 16
TestB_s:
Src0123 db " 0 1 2 3"
Add4444 dd 04000000h, 04000000h, 04000000h, 04000000h ; xmm1
Add44xx dd 04000000h, 04000000h, 0FA110000h, 0FA110000h ; xmm2
Addxx44 dd 0FA110000h, 0FA110000h, 04000000h, 04000000h ; xmm3
Addxxxx dd 0FA010000h, 0FA010000h, 0FA010000h, 0FA010000h ; xmm4
Add244xx dd 04000000h, 04000000h, 0FA010000h, 0FA010000h ; xmm2
Add2xx44 dd 0FA010000h, 0FA010000h, 04000000h, 04000000h ; xmm3
Add2xxxx dd 0FA010000h, 0FA010000h, 0FA010000h, 0FA010000h ; xmm4
Sub100a dd 00009EF00h, 00009EF00h, 00009EF00h, 00009EF00h
Sub100b dd 00009FF00h, 00009FF00h, 00009FF00h, 00009FF00h
NameB equ FA Jochen ; assign a descriptive name here
TestB proc
mov esi, offset Src0123
mov edi, offset MyArray
push edi
xor ecx, ecx
movaps xmm0, [esi]
movaps xmm1, [esi+16]
movaps xmm2, [esi+32]
movaps xmm3, [esi+48]
movaps xmm4, [esi+64]
lea edx, [edi+4000]
m2m ecx, -5
; align 4
.Repeat
movaps [edi], xmm0
paddd xmm0, xmm1 ; 4444
movaps [edi+16], xmm0
paddd xmm0, xmm2 ; 44xx
movaps [edi+32], xmm0
paddd xmm0, xmm3 ; xx44
movaps [edi+48], xmm0
paddd xmm0, xmm1 ; 4444
movaps [edi+64], xmm0
paddd xmm0, xmm4 ; xxxx
inc ecx
.if Zero?
psubd xmm0, oword ptr Sub100a
.elseif ecx==-4
movaps xmm2, [esi+80]
movaps xmm3, [esi+96]
movaps xmm4, [esi+112]
.elseif ecx==5
psubd xmm0, oword ptr Sub100b
xor ecx, ecx
.endif
add edi, 80
.Until edi>=edx
pop eax
ret
TestB endp
TestB_endp: