News:

Masm32 SDK description, downloads and other helpful links
Message to All Guests

Main Menu

Bug in BinSearch MASM32 Lib algo

Started by Antariy, February 04, 2013, 05:43:03 PM

Previous topic - Next topic

Antariy

Current version of BinSearch code in MASM32.lib is buggy: it doesn't check the matching of a tail bytes of a instring and substring, i.e., it will find a match in the pos 2 in the instring like "12345" for a substring "346", more complex test cases below - first testinstring1 contains NO match, last char of the substring, starting at pos 127 in the instring, is not equal to the last byte of the matched substring, but code doesn't check this. For a second teststring2 - there IS a real match, but it lies below that a false match, so the code returns the wrong results as well.

The test piece below contains fixed version BinSearch_fix, the fix is:



  align 4
  Pre_Match:
    lea ebp, [esi+ecx]      ; put current scan address in EBP
    mov edx, [esp+20+16]    ; put pattern length into EDX
    inc edx ; <<< ADD THIS LINE

   
or remove displacement from a memory access and change the loop conditions to a sign check instead of zero check:


  Test_Match:
  REPEAT 7
    movzx ebx, BYTE PTR [ebp+edx] ; load last byte of pattern length in main string
    cmp bl, [edi+edx]     ; compare it with last byte in pattern
    jne Pre_Scan            ; exit loop on mismatch
    sub edx, 1
    js Match
  ENDM

    movzx ebx, BYTE PTR [ebp+edx] ; load last byte of pattern length in main string
    cmp bl, [edi+edx]     ; compare it with last byte in pattern
    jne Pre_Scan            ; exit loop on mismatch
    sub edx, 1
    jns Test_Match

    jmp Match


Results for a test piece below:

Test #1, there is no match, pos sould be -1, pos: 127
Test #2, there IS a match, pos sould be 201, pos: 127
Fix Test #3, there is no match, pos sould be -1, pos: -1
Fix Test #4, there IS a match, pos sould be 201, pos: 201



include \masm32\include\masm32rt.inc

.686
.mmx
.xmm




.code

align 16
testinstr1 db "01234567890ABCDEFGHIJKLM OPQRSTUVWXYZ 01234567890ABCDEFGHIJKLMNOPQRSTUVWXY 01234567890ABCDEFGHIJKLMNOPQRSTUVWXYZ01234567890023434567890ABCDEFGHIJKLMNOPQRSTUVWXYZ01234567890123534567890ABCDEFGHIJKLMNO"

testinstr2 db "01234567890ABCDEFGHIJKLM OPQRSTUVWXYZ 01234567890ABCDEFGHIJKLMNOPQRSTUVWXY 01234567890ABCDEFGHIJKLMNOPQRSTUVWXYZ01234567890023434567890ABCDEFGHIJKLMNOPQRSTUVWXYZ01234567890123534567890ABCDEFGHIJKLMNO >34567890ABCDEFGHIJKLMNOPQRSTUVWXYZ012345678901234<"

align 16
substring db    "34567890ABCDEFGHIJKLMNOPQRSTUVWXYZ012345678901234"


OPTION PROLOGUE:NONE
OPTION EPILOGUE:NONE

align 16

BinSearch_fix proc startpos:DWORD,lpSource:DWORD,sLen:DWORD,
                              lpPattern:DWORD,pLen:DWORD
    push ebx
    push esi
    push edi
    push ebp

  ; ----------------
  ; setup loop code
  ; ----------------
    mov esi, [esp+8+16]     ; lpSource
    mov edi, [esp+16+16]    ; lpPattern
    mov al, [edi]           ; get 1st char in pattern

    mov ecx, [esp+12+16]    ; sLen
    sub ecx, [esp+20+16]    ; pLen           ; sub pattern len to avoid searching past end of src
    add ecx, 1
    add esi, ecx            ; add source length
    neg ecx                 ; invert sign
    add ecx, [esp+4+16]     ; startpos       ; add starting offset
    sub DWORD PTR [esp+20+16], 1
    jmp Scan_Loop

  ; ----------------------------------------

  align 4
  Pre_Match:
    lea ebp, [esi+ecx]      ; put current scan address in EBP
    mov edx, [esp+20+16]    ; put pattern length into EDX
    inc edx
   
  Test_Match:
  REPEAT 7
    movzx ebx, BYTE PTR [ebp+edx-1] ; load last byte of pattern length in main string
    cmp bl, [edi+edx-1]     ; compare it with last byte in pattern
    jne Pre_Scan            ; exit loop on mismatch
    sub edx, 1
    jz Match
  ENDM

    movzx ebx, BYTE PTR [ebp+edx-1] ; load last byte of pattern length in main string
    cmp bl, [edi+edx-1]     ; compare it with last byte in pattern
    jne Pre_Scan            ; exit loop on mismatch
    sub edx, 1
    jnz Test_Match

    jmp Match

  align 4
  Pre_Scan:
    add ecx, 1              ; start on next byte

  Scan_Loop:
  REPEAT 7
    cmp al, [esi+ecx]       ; scan for 1st byte of pattern
    je Pre_Match            ; test if it matches
    add ecx, 1
    jns No_Match            ; exit on sign inversion
  ENDM

    cmp al, [esi+ecx]       ; scan for 1st byte of pattern
    je Pre_Match            ; test if it matches
    add ecx, 1
    js Scan_Loop            ; exit on sign inversion

  ; ----------------------------------------
   
  No_Match:                 ; fall through here on no match
    mov eax, -1
    jmp isOut

  Match:
    add ecx, [esp+12+16]
    sub ecx, [esp+20+16]
    mov eax, ecx

  isOut:
    pop ebp
    pop edi
    pop esi
    pop ebx

    ret 20

BinSearch_fix endp

OPTION PROLOGUE:PrologueDef
OPTION EPILOGUE:EpilogueDef



start proc

invoke BinSearch,0,offset testinstr1,sizeof testinstr1,offset substring,sizeof substring
invoke crt_printf,CTXT("Test #1, there is no match, pos sould be -1, pos: %d",13,10),eax

invoke BinSearch,0,offset testinstr2,sizeof testinstr2,offset substring,sizeof substring
invoke crt_printf,CTXT("Test #2, there IS a match, pos sould be 201, pos: %d",13,10),eax

invoke BinSearch_fix,0,offset testinstr1,sizeof testinstr1,offset substring,sizeof substring
invoke crt_printf,CTXT("Fix Test #3, there is no match, pos sould be -1, pos: %d",13,10),eax

invoke BinSearch_fix,0,offset testinstr2,sizeof testinstr2,offset substring,sizeof substring
invoke crt_printf,CTXT("Fix Test #4, there IS a match, pos sould be 201, pos: %d",13,10),eax

invoke crt__getch
invoke crt_exit,0



start endp

end start


hutch--

Hi Alex,

Thanks for reporting this. Just one question first, is this the version from MASM32 v11 ? Saves me looking through older versions.

Antariy

Hi Hutch, yes, I entangled it with an older one, from MASMv10 package, the current one is ok.