News:

Masm32 SDK description, downloads and other helpful links
Message to All Guests
NB: Posting URL's See here: Posted URL Change

Main Menu

Converting MMX to SSE2

Started by Elegant, August 14, 2015, 12:20:11 PM

Previous topic - Next topic

Elegant

Hi I'm trying to convert the following code from MMX to SSE2 but I seem to be having issues trying to get the same functionality. Any help in identifying what would be the cause or how I should really be approaching these types of instruction sets when converting would be greatly appreciated!


Data, etc:


.xmm
.model flat,c

.data

align 16

Keep01 qword 2 dup (0001000100010001h)
HiQ qword 2 dup (00FF00FF00FF00FFh)

.code


MMX Procedure:


sum_pixels_MMX proc public uses esi edi srcp:dword,dstp:dword,pitch:dword,diff:dword,width_:dword,height:dword,thresh:qword,sum:dword,count:dword,divres:dword,divin:dword

pxor mm7,mm7
inc width_
pxor mm6,mm6
pxor mm5,mm5
mov ecx,[srcp]
movq mm0,[ecx]
punpcklbw mm0,mm7
sub ecx,diff
mov eax,height
mov edx,[count] ;currently an unsigned short array[4]

loopy:
mov esi,0
loopx:
movq mm1,[ecx+esi]
movq mm3,mm0
punpcklbw mm1,mm7
psubusw mm3,mm1
movq mm4,thresh
movq mm2,mm1
psubusw mm1,mm0
por mm3,mm1
pcmpgtw mm4,mm3
pand mm2,mm4
paddusw mm6,mm2
pand mm4,Keep01
paddusw mm5,mm4

inc esi
cmp esi,width_
jne loopx

add ecx,pitch
dec eax
jnz loopy

movq [edx],mm5

;First word
mov esi,0
mov eax,[divin]
mov ebx,[divres]
mov si,[edx]
mov cx,[eax+2*esi]
mov [ebx],cx

;2nd word
add edx,2
add ebx,2
mov si,[edx]
mov cx,[eax+2*esi]
mov [ebx],cx

;3rd word
add edx,2
add ebx,2
mov si,[edx]
mov cx,[eax+2*esi]
mov [ebx],cx

;4th word
add edx,2
add ebx,2
mov si,[edx]
mov cx,[eax+2*esi]
mov [ebx],cx

;Address the signed multiply limitation
sub ebx,6
movq mm5,[ebx]
psllw mm6,1

;Now multiply (divres/65536)
mov eax,[dstp]
pmulhw mm6,mm5
packuswb mm6,mm5
movq [eax],mm6

ret

sum_pixels_MMX endp


SSE2 Procedure:


sum_pixels_SSE2 proc public uses esi edi srcp:dword,dstp:dword,pitch:dword,diff:dword,width_:dword,height:dword,thresh:qword,sum:dword,count:dword,divres:dword,divin:dword

pxor xmm7,xmm7
inc width_
pxor xmm6,xmm6
pxor xmm5,xmm5
mov ecx,srcp
movq xmm0,qword ptr [ecx]
punpcklbw xmm0,xmm7
sub ecx,diff
mov eax,height
mov edx,[count] ;currently an unsigned short array[4]

loopy:
mov esi,0
loopx:
movdqa xmm1,[ecx+esi]
movq xmm3,xmm0
punpcklbw xmm1,xmm7
psubusw xmm3,xmm1
movq xmm4,thresh
movdqa xmm2,xmm1
psubusw xmm1,xmm0
por xmm3,xmm1
pcmpgtw xmm4,xmm3
pand xmm2,xmm4
paddusw xmm6,xmm2
pand xmm4,oword ptr Keep01
paddusw xmm5,xmm4

inc esi
cmp esi,width_
jne loopx

add ecx,pitch
dec eax
jnz loopy

movdqa [edx],xmm5

;First word
mov esi,0
mov eax,divin
mov ebx,divres
mov si,[edx]
mov cx,[eax+2*esi]
mov [ebx],cx

;2nd word
add edx,4
add ebx,4
mov si,[edx]
mov cx,[eax+2*esi]
mov [ebx],cx

;3rd word
add edx,4
add ebx,4
mov si,[edx]
mov cx,[eax+2*esi]
mov [ebx],cx

;4th word
add edx,4
add ebx,4
mov si,[edx]
mov cx,[eax+2*esi]
mov [ebx],cx

;Address the signed multiply limitation
sub ebx,12
movdqa xmm5,[ebx]
psllw xmm6,1

;Now multiply (divres/65536)
mov eax,dstp
pmulhw xmm6,xmm5
packuswb xmm6,xmm5
movq qword ptr [eax],xmm6

ret

sum_pixels_SSE2 endp

jj2007

 inc width_
Changing arguments is considered bad programming practice.
For the rest, use Olly to step through the two versions of your code and see what doesn't work.