Current version of BinSearch code in MASM32.lib is buggy: it doesn't check the matching of a tail bytes of a instring and substring, i.e., it will find a match in the pos 2 in the instring like "12345" for a substring "346", more complex test cases below - first testinstring1 contains NO match, last char of the substring, starting at pos 127 in the instring, is not equal to the last byte of the matched substring, but code doesn't check this. For a second teststring2 - there IS a real match, but it lies below that a false match, so the code returns the wrong results as well.
The test piece below contains fixed version BinSearch_fix, the fix is:
align 4
Pre_Match:
lea ebp, [esi+ecx] ; put current scan address in EBP
mov edx, [esp+20+16] ; put pattern length into EDX
inc edx ; <<< ADD THIS LINE
or remove displacement from a memory access and change the loop conditions to a sign check instead of zero check:
Test_Match:
REPEAT 7
movzx ebx, BYTE PTR [ebp+edx] ; load last byte of pattern length in main string
cmp bl, [edi+edx] ; compare it with last byte in pattern
jne Pre_Scan ; exit loop on mismatch
sub edx, 1
js Match
ENDM
movzx ebx, BYTE PTR [ebp+edx] ; load last byte of pattern length in main string
cmp bl, [edi+edx] ; compare it with last byte in pattern
jne Pre_Scan ; exit loop on mismatch
sub edx, 1
jns Test_Match
jmp Match
Results for a test piece below:
Test #1, there is no match, pos sould be -1, pos: 127
Test #2, there IS a match, pos sould be 201, pos: 127
Fix Test #3, there is no match, pos sould be -1, pos: -1
Fix Test #4, there IS a match, pos sould be 201, pos: 201
include \masm32\include\masm32rt.inc
.686
.mmx
.xmm
.code
align 16
testinstr1 db "01234567890ABCDEFGHIJKLM OPQRSTUVWXYZ 01234567890ABCDEFGHIJKLMNOPQRSTUVWXY 01234567890ABCDEFGHIJKLMNOPQRSTUVWXYZ01234567890023434567890ABCDEFGHIJKLMNOPQRSTUVWXYZ01234567890123534567890ABCDEFGHIJKLMNO"
testinstr2 db "01234567890ABCDEFGHIJKLM OPQRSTUVWXYZ 01234567890ABCDEFGHIJKLMNOPQRSTUVWXY 01234567890ABCDEFGHIJKLMNOPQRSTUVWXYZ01234567890023434567890ABCDEFGHIJKLMNOPQRSTUVWXYZ01234567890123534567890ABCDEFGHIJKLMNO >34567890ABCDEFGHIJKLMNOPQRSTUVWXYZ012345678901234<"
align 16
substring db "34567890ABCDEFGHIJKLMNOPQRSTUVWXYZ012345678901234"
OPTION PROLOGUE:NONE
OPTION EPILOGUE:NONE
align 16
BinSearch_fix proc startpos:DWORD,lpSource:DWORD,sLen:DWORD,
lpPattern:DWORD,pLen:DWORD
push ebx
push esi
push edi
push ebp
; ----------------
; setup loop code
; ----------------
mov esi, [esp+8+16] ; lpSource
mov edi, [esp+16+16] ; lpPattern
mov al, [edi] ; get 1st char in pattern
mov ecx, [esp+12+16] ; sLen
sub ecx, [esp+20+16] ; pLen ; sub pattern len to avoid searching past end of src
add ecx, 1
add esi, ecx ; add source length
neg ecx ; invert sign
add ecx, [esp+4+16] ; startpos ; add starting offset
sub DWORD PTR [esp+20+16], 1
jmp Scan_Loop
; ----------------------------------------
align 4
Pre_Match:
lea ebp, [esi+ecx] ; put current scan address in EBP
mov edx, [esp+20+16] ; put pattern length into EDX
inc edx
Test_Match:
REPEAT 7
movzx ebx, BYTE PTR [ebp+edx-1] ; load last byte of pattern length in main string
cmp bl, [edi+edx-1] ; compare it with last byte in pattern
jne Pre_Scan ; exit loop on mismatch
sub edx, 1
jz Match
ENDM
movzx ebx, BYTE PTR [ebp+edx-1] ; load last byte of pattern length in main string
cmp bl, [edi+edx-1] ; compare it with last byte in pattern
jne Pre_Scan ; exit loop on mismatch
sub edx, 1
jnz Test_Match
jmp Match
align 4
Pre_Scan:
add ecx, 1 ; start on next byte
Scan_Loop:
REPEAT 7
cmp al, [esi+ecx] ; scan for 1st byte of pattern
je Pre_Match ; test if it matches
add ecx, 1
jns No_Match ; exit on sign inversion
ENDM
cmp al, [esi+ecx] ; scan for 1st byte of pattern
je Pre_Match ; test if it matches
add ecx, 1
js Scan_Loop ; exit on sign inversion
; ----------------------------------------
No_Match: ; fall through here on no match
mov eax, -1
jmp isOut
Match:
add ecx, [esp+12+16]
sub ecx, [esp+20+16]
mov eax, ecx
isOut:
pop ebp
pop edi
pop esi
pop ebx
ret 20
BinSearch_fix endp
OPTION PROLOGUE:PrologueDef
OPTION EPILOGUE:EpilogueDef
start proc
invoke BinSearch,0,offset testinstr1,sizeof testinstr1,offset substring,sizeof substring
invoke crt_printf,CTXT("Test #1, there is no match, pos sould be -1, pos: %d",13,10),eax
invoke BinSearch,0,offset testinstr2,sizeof testinstr2,offset substring,sizeof substring
invoke crt_printf,CTXT("Test #2, there IS a match, pos sould be 201, pos: %d",13,10),eax
invoke BinSearch_fix,0,offset testinstr1,sizeof testinstr1,offset substring,sizeof substring
invoke crt_printf,CTXT("Fix Test #3, there is no match, pos sould be -1, pos: %d",13,10),eax
invoke BinSearch_fix,0,offset testinstr2,sizeof testinstr2,offset substring,sizeof substring
invoke crt_printf,CTXT("Fix Test #4, there IS a match, pos sould be 201, pos: %d",13,10),eax
invoke crt__getch
invoke crt_exit,0
start endp
end start
Hi Alex,
Thanks for reporting this. Just one question first, is this the version from MASM32 v11 ? Saves me looking through older versions.
Hi Hutch, yes, I entangled it with an older one, from MASMv10 package, the current one is ok.