Wouldn't it be faster to just use SSE (16 bytes at a time)?
;-------------------------------------------------------------------------------
; SSEStrchr - Scan szSource for match with a character or a Null,
; (INVOKE SSEStrchr,pszSource,cbMatch,pMatchLoc - returns a Null or the
; MatchPoint in pMatchLoc, uses xmm0, xmm1, xmm2).
;-------------------------------------------------------------------------------
ALIGN OWORD
SSEStrchr PROC,
pszSource:PBYTE,
cbMatch:DWORD,
pMatchLoc:PDWORD
push eax ; Save regs
push edx
mov eax,[esp+(3*oPointerSize)] ; Get the source string pointer.
movzx edx,BYTE PTR [esp+(4*oPointerSize)] ; Get the match character.
movdqu xmm2,[eax] ; Get the first 16 BYTES of the source string.
add dh,dl ; Copy the match character to dh.
jnz FindFirstChar ; Not a null character search.
pxor xmm0,xmm0 ; Clear all characters in xmm0.
jmp FindFirstNull ; Simple scan for null.
;
; dh=dl=matchchar,xmm0 will be matchs,xmm1 will be nulls,xmm2=source.
;
ALIGN OWORD
FindFirstChar:
movd xmm0,edx ; Move the match character pair to the lowest WORD of xmm0.
pxor xmm1,xmm1 ; Clear all characters in xmm1.
pshuflw xmm0,xmm0,0 ; Interleave the lowest WORD to the lowest DWORD in xmm0.
pcmpeqb xmm1,xmm2 ; Compare 16 BYTES of the source to 16 nulls.
pshufd xmm0,xmm0,0 ; Copy the lowest DWORD to all DWORDS in xmm0.
pcmpeqb xmm2,xmm0 ; Compare 16 BYTES of the source to 16 match characters.
por xmm2,xmm1 ; Or the results of the two compares.
pmovmskb edx,xmm2 ; Return a 1 for each matched character to the low 16 BITS of edx.
or edx,edx ; Any match or null match found?
jnz FoundChar ; Yes.
and eax,-16 ; Mask the source pointer to a mod 16 bound for aligned compares.
movdqa xmm2,xmm0 ; Save the 16 match characters in xmm2.
;
; xmm0=xmm2=matches,xmm1=nulls,[eax+16]=source.
;
ALIGN OWORD
FindNextChar:
lea eax,[eax+16] ; Increment the source string pointer by 16.
pcmpeqb xmm2,[eax] ; Compare 16 match characters to 16 BYTES of the source.
pcmpeqb xmm1,[eax] ; Compare 16 nulls to 16 BYTES of the source.
por xmm1,xmm2 ; Or the results of the two compares.
movdqa xmm2,xmm0 ; Move the 16 match characters to xmm2.
pmovmskb edx,xmm1 ; Return a 1 for each matched character to the low 16 BITS of edx.
or edx,edx ; Any match or null match found?
jz FindNextChar ; No, check again.
jmp FoundChar
ALIGN OWORD
FoundChar:
bsf edx,edx ; Move the BIT number of the lowest BIT in edx to edx.
lea eax,[eax+edx] ; Add the BIT number to the pointer in eax.
xor edx,edx ; Clear edx as a null response for the SSEStrchr call.
cmp [eax],dl ; Does the match character match a null?
cmove eax,edx ; If so, return a null response.
jmp Exit ; Go to save match, restore regs, and return.
;
; xmm0=nulls,xmm2=source.
;
ALIGN OWORD
FindFirstNull:
pcmpeqb xmm2,xmm0 ; Compare 16 BYTES of the source to 16 nulls.
pmovmskb edx,xmm2 ; Return a 1 for each matched character to the low 16 BITS of edx.
or edx,edx ; Any match or null match found?
jnz FoundNull ; Yes.
and eax,-16 ; Mask the source pointer to a mod 16 bound for aligned compares.
;
; xmm0=nulls,xmm2=source.
;
ALIGN OWORD
FindNextNull:
pcmpeqb xmm0,[eax+16] ; Compare 16 nulls to 16 BYTES of the source.
lea eax,[eax+16] ; Increment the source string pointer by 16.
pmovmskb edx,xmm0 ; Return a 1 for each matched character to the low 16 BITS of edx.
or edx,edx ; Any match or null match found?
jz FindNextNull ; No, check again.
FoundNull:
bsf edx,edx ; Move the BIT number of the lowest BIT in edx to edx.
add eax,edx ; Add the BIT number to the pointer in eax.
;
; Exit SSEStrchr.
;
Exit:
mov edx,[esp+(5*oPointerSize)] ; Get the destination for the match location.
mov [edx],eax ; Save the match pointer where it was directed.
pop edx ; Restore regs
pop eax
; cmp dErrorPkg,0 ; Return any error.
ret 12 ; Exit PROC SSEStrchr.
SSEStrchr ENDP
SSEStrchrLen EQU ($-SSEStrchr)
;-------------------------------------------------------------------------------
; End of PROC SSEStrchr.
;-------------------------------------------------------------------------------
Dave.