Follow 2 codes, first was optimized in logic and code. Second was optimized in fact that lookup table does not need be created at each call to BM procedure, only in first call that table will be created and sucessive calls will skip table creation.
Now code prints position, and second version print ocurrences found:
;version 0.01
;ML.EXE /c /coff /Cp main2.asm
;LINK.EXE /SUBSYSTEM:CONSOLE main2.obj
;main
include \masm32\include\masm32rt.inc
.686
Boyer_Moore PROTO :dword,:dword,:dword,:dword,:dword
printf proto C :dword, :vararg ; msvcrt
.data
format_string db "%d",13,10,0
file db "\masm32\include\windows.inc",0
string db "Duplicate"
align 4
Text_test db 'xx includix'
String_test db 'did'
.code
start:
;-----------------------------------
call main ;will search for string "Duplicate" in file \masm32\include\windows.inc
;-----------------------------------
;if you want to check specific cases, comment call above and uncoment line below to Text_test and String_test cases
; invoke Boyer_Moore,0,addr Text_test,sizeof Text_test,addr String_test,sizeof String_test
;-----------------------------------
invoke ExitProcess,eax
main proc uses esi edi ebx
LOCAL plc:DWORD ;cmd line pointer
LOCAL saida:DWORD ;output handle
LOCAL manipulador_arquivo:dword ;file handle
LOCAL tamanho:DWORD ;source file size
LOCAL mapeado:DWORD ;ptr source file
LOCAL mem_aloc:DWORD ;ptr mem alloc
LOCAL nada:dword
LOCAL arq_mape:DWORD ;mapped file handle
LOCAL pos:DWORD ;initial position
LOCAL return_value:DWORD ;return result in eax register
invoke GetStdHandle,STD_OUTPUT_HANDLE
mov saida,eax
invoke GetCommandLine
mov plc,eax
invoke CreateFile,addr file,GENERIC_READ,0,0,OPEN_EXISTING,FILE_FLAG_SEQUENTIAL_SCAN,0 ;open source file
mov manipulador_arquivo,eax ;handle
invoke GetFileSize, manipulador_arquivo,NULL ;sizeof source file
mov tamanho,eax
invoke CreateFileMapping,manipulador_arquivo,NULL,PAGE_READONLY,0,0,0 ;mapped file handle
mov arq_mape,eax
invoke MapViewOfFile,eax,FILE_MAP_READ,0,0,0 ;map it, file contents pointer
mov mapeado,eax
invoke CloseHandle,manipulador_arquivo ;don't need this anymore
mov pos,0
@@:
invoke Boyer_Moore,pos,mapeado,tamanho,addr string,sizeof string
mov return_value,eax
invoke printf,addr format_string,return_value
.if return_value == -1
jmp done
.elseif return_value == -2
jmp done
.else
inc return_value
m2m pos,return_value
jmp @B
.endif
done:
invoke UnmapViewOfFile,mapeado
mov eax,return_value
ret
main endp
Boyer_Moore proc uses ebx esi edi startpos:dword,lpSource:dword,srcLngth:dword,lpSubStr:dword,subLngth:dword
LOCAL bad_chars[256]:dword
lea edi,bad_chars ;create a lookup table filled with substring size
mov ecx,256
mov eax,subLngth
rep stosd
lea edi,bad_chars
mov esi,lpSubStr
mov ecx,subLngth
add esi,subLngth
dec esi ;point to last char of substring, zero index based
;-------------------------
;eg: string = "batata", a=1, t=1, b=5, any other chars will be = 6 (sizeof string)
;eg: string = "done", e=1, n=1, o=2, d=3, any other chars will be = 4
;-------------------------
movzx eax,byte ptr [esi] ;read last char
mov dword ptr [edi+eax*4],1 ;its size will be one displacement, insert in bad_chars table
mov ebx,1 ;second char will be one too, third char will be 2, fourth char will be 3, ...
dec esi
dec ecx
mov edx,subLngth
.while ecx != 0 ;while exist chars in substring
movzx eax,byte ptr [esi] ;read that char
.if dword ptr [edi+eax*4] == edx ;actual char have a sizeof subLngth?
mov dword ptr [edi+eax*4],ebx ;yes, so overwrite this char displacement substring in lookup table
inc ebx ;increase displacement ;<<<<<
.endif
dec esi
dec ecx
.endw
;------------------------------
mov eax,-2 ;assume error in parameters data
mov esi,lpSubStr
mov edi,lpSource
mov ebx,srcLngth
mov ecx,subLngth
test ebx,ebx
jz done
test ecx,ecx
jz done
cmp ecx,ebx
ja done
add ebx,edi ;ebx=lpSource+srcLngth
add edi,startpos ;edi=lpSource+startpos
mov edx,subLngth
align 4
try_again:
mov eax,-1
cmp ebx,edi
jbe done
mov ecx,edx
@@:
movzx eax,byte ptr [edi+ecx-1] ;read one char from source
cmp byte ptr [esi+ecx-1],al
jne @F
dec ecx
jnz @B
sub edi,lpSource
mov eax,edi
jmp done
@@:
mov eax,dword ptr [bad_chars+eax*4]
cmp eax,edx ;cmp eax,edx
cmove eax,ecx
add edi,eax
jmp try_again
align 4
done:
ret
Boyer_Moore endp
end start
--------------------------------------
;version 0.02
;ML.EXE /c /coff /Cp main.asm
;LINK.EXE /SUBSYSTEM:CONSOLE main.obj
;main
include \masm32\include\masm32rt.inc
.686
Boyer_Moore PROTO :dword,:dword,:dword,:dword,:dword,:dword
printf proto C :dword, :vararg ; msvcrt
.data
format_string db "pos: %d",13,10,0
occurrences db "found %d ocurrence(s)",13,10,0
file db "\masm32\include\windows.inc",0
string db "Duplicate"
align 4
Text_test db 'xx includix'
String_test db 'did'
.data?
bad_chars dd 256 dup (?)
.code
start:
;-----------------------------------
call main ;will search for string "Duplicate" in file \masm32\include\windows.inc
;-----------------------------------
;if you want to check specific cases, comment call above and uncoment lines below to Text_test and String_test cases
; invoke Boyer_Moore,0,addr Text_test,sizeof Text_test,addr String_test,sizeof String_test,0
; push eax
; invoke printf,addr format_string,eax
; pop eax
;-----------------------------------
invoke ExitProcess,eax
main proc uses esi edi ebx
LOCAL plc:DWORD ;cmd line pointer
LOCAL saida:DWORD ;output handle
LOCAL manipulador_arquivo:dword ;file handle
LOCAL tamanho:DWORD ;source file size
LOCAL mapeado:DWORD ;ptr source file
LOCAL mem_aloc:DWORD ;ptr mem alloc
LOCAL nada:dword
LOCAL arq_mape:DWORD ;mapped file handle
LOCAL pos:DWORD ;initial position
LOCAL return_value:DWORD ;return result in eax register
LOCAL first_r:DWORD
invoke GetStdHandle,STD_OUTPUT_HANDLE
mov saida,eax
invoke GetCommandLine
mov plc,eax
invoke CreateFile,addr file,GENERIC_READ,0,0,OPEN_EXISTING,FILE_FLAG_SEQUENTIAL_SCAN,0 ;open source file
mov manipulador_arquivo,eax ;handle
invoke GetFileSize, manipulador_arquivo,NULL ;sizeof source file
mov tamanho,eax
invoke CreateFileMapping,manipulador_arquivo,NULL,PAGE_READONLY,0,0,0 ;mapped file handle
mov arq_mape,eax
invoke MapViewOfFile,eax,FILE_MAP_READ,0,0,0 ;map it, file contents pointer
mov mapeado,eax
invoke CloseHandle,manipulador_arquivo ;don't need this anymore
mov pos,0
mov first_r,0 ;inserted one more parameter to Boyer_Moore procedure
;if first_run is zero, so table creation is done
;if first_run is not zero, so table creation is skipped, gaining time execution
;so, table bad_chars was moved to data section to be globally accessed
@@:
invoke Boyer_Moore,pos,mapeado,tamanho,addr string,sizeof string,first_r
mov return_value,eax
.if return_value == -1
jmp done
.elseif return_value == -2
jmp done
.else
invoke printf,addr format_string,return_value
inc return_value
m2m pos,return_value
inc first_r ;this is a occurrences count, and will trigger to not recreate lookup table
jmp @B
.endif
done:
invoke UnmapViewOfFile,mapeado
invoke printf,addr occurrences,first_r
mov eax,return_value
ret
main endp
Boyer_Moore proc uses ebx esi edi startpos:dword,lpSource:dword,srcLngth:dword,lpSubStr:dword,subLngth:dword,first_run:dword
cmp first_run,0
jnz @F
lea edi,bad_chars ;create a lookup table filled with substring size
mov ecx,256
mov eax,subLngth
rep stosd
lea edi,bad_chars
mov esi,lpSubStr
mov ecx,subLngth
add esi,subLngth
dec esi ;point to last char of substring, zero index based
;-------------------------
;eg: string = "batata", a=1, t=1, b=5, any other chars will be = 6 (sizeof string)
;eg: string = "done", e=1, n=1, o=2, d=3, any other chars will be = 4
;-------------------------
movzx eax,byte ptr [esi] ;read last char
mov dword ptr [edi+eax*4],1 ;its size will be one displacement, insert in bad_chars table
mov ebx,1 ;second char will be one too, third char will be 2, fourth char will be 3, ...
dec esi
dec ecx
mov edx,subLngth
.while ecx != 0 ;while exist chars in substring
movzx eax,byte ptr [esi] ;read that char
.if dword ptr [edi+eax*4] == edx ;actual char have a sizeof subLngth?
mov dword ptr [edi+eax*4],ebx ;yes, so overwrite this char displacement substring in lookup table
inc ebx ;increase displacement
.endif
dec esi
dec ecx
.endw
;------------------------------
@@:
mov eax,-2 ;assume error in parameters data
mov esi,lpSubStr
mov edi,lpSource
mov ebx,srcLngth
mov ecx,subLngth
test ebx,ebx
jz done
test ecx,ecx
jz done
cmp ecx,ebx
ja done
add ebx,edi ;ebx=lpSource+srcLngth
add edi,startpos ;edi=lpSource+startpos
mov edx,subLngth
align 4
try_again:
mov eax,-1
cmp ebx,edi
jbe done
mov ecx,edx
@@:
movzx eax,byte ptr [edi+ecx-1] ;read one char from source
cmp byte ptr [esi+ecx-1],al
jne @F
dec ecx
jnz @B
sub edi,lpSource
mov eax,edi
jmp done
@@:
mov eax,dword ptr [bad_chars+eax*4]
cmp eax,edx ;test eax,edx
cmove eax,ecx
add edi,eax
jmp try_again
align 4
done:
ret
Boyer_Moore endp
end start
Code can be optimized more, but I need travel tomorrow, so, ... .
---edited---
Corrected code. First fault was optimization used "test" instead of "cmp", second was a logic problem.