Hello sir jj2007;
Thanks for testing, I repair some faults in that code.
;version 0.02
;ML.EXE /c /coff /Cp main.asm
;LINK.EXE /SUBSYSTEM:CONSOLE main.obj
;main
include \masm32\include\masm32rt.inc
.686
Boyer_Moore PROTO :dword,:dword,:dword,:dword,:dword,:dword
printf proto C :dword, :vararg ; msvcrt
.data
format_string db "pos: %d",13,10,0
occurrences db "found %d ocurrence(s)",13,10,0
file db "\masm32\include\windows.inc",0
string db "Duplicate"
;string db "Duplicat"
align 4
Text_test db 'xx includix'
String_test db 'did'
.data?
bad_chars dd 256 dup (?)
.code
start:
;-----------------------------------
call main ;will search for string "Duplicate" in file \masm32\include\windows.inc
;-----------------------------------
;if you want to check specific cases, comment call above and uncoment lines below to Text_test and String_test cases
; invoke Boyer_Moore,0,addr Text_test,sizeof Text_test,addr String_test,sizeof String_test,0
; push eax
; invoke printf,addr format_string,eax
; pop eax
;-----------------------------------
invoke ExitProcess,eax
main proc uses esi edi ebx
LOCAL plc:DWORD ;cmd line pointer
LOCAL saida:DWORD ;output handle
LOCAL manipulador_arquivo:dword ;file handle
LOCAL tamanho:DWORD ;source file size
LOCAL mapeado:DWORD ;ptr source file
LOCAL mem_aloc:DWORD ;ptr mem alloc
LOCAL nada:dword
LOCAL arq_mape:DWORD ;mapped file handle
LOCAL pos:DWORD ;initial position
LOCAL return_value:DWORD ;return result in eax register
LOCAL first_r:DWORD
invoke GetStdHandle,STD_OUTPUT_HANDLE
mov saida,eax
invoke GetCommandLine
mov plc,eax
invoke CreateFile,addr file,GENERIC_READ,0,0,OPEN_EXISTING,FILE_FLAG_SEQUENTIAL_SCAN,0 ;open source file
mov manipulador_arquivo,eax ;handle
invoke GetFileSize, manipulador_arquivo,NULL ;sizeof source file
mov tamanho,eax
invoke CreateFileMapping,manipulador_arquivo,NULL,PAGE_READONLY,0,0,0 ;mapped file handle
mov arq_mape,eax
invoke MapViewOfFile,eax,FILE_MAP_READ,0,0,0 ;map it, file contents pointer
mov mapeado,eax
invoke CloseHandle,manipulador_arquivo ;don't need this anymore
mov pos,0
mov first_r,0 ;inserted one more parameter to Boyer_Moore procedure
;if first_run is zero, so table creation is done
;if first_run is not zero, so table creation is skipped, gaining time execution
;so, table bad_chars was moved to data section to be globally accessed
@@:
invoke Boyer_Moore,pos,mapeado,tamanho,addr string,sizeof string,first_r
mov return_value,eax
.if return_value == -1
jmp done
.elseif return_value == -2
jmp done
.else
invoke printf,addr format_string,return_value
inc return_value
m2m pos,return_value
inc first_r ;this is a occurrences count, and will trigger to not recreate lookup table
jmp @B
.endif
done:
invoke UnmapViewOfFile,mapeado
invoke printf,addr occurrences,first_r
mov eax,return_value
ret
main endp
Boyer_Moore proc uses ebx esi edi startpos:dword,lpSource:dword,srcLngth:dword,lpSubStr:dword,subLngth:dword,first_run:dword
cmp first_run,0
jnz @F
lea edi,bad_chars ;create a lookup table filled with substring size
mov ecx,256
mov eax,subLngth
rep stosd
lea edi,bad_chars
mov esi,lpSubStr
mov ecx,subLngth
add esi,subLngth
dec esi ;point to last char of substring, zero index based
;-------------------------
;eg: string = "batata", a=1, t=1, b=5, any other chars will be = 6 (sizeof string)
;eg: string = "done", e=1, n=1, o=2, d=3, any other chars will be = 4
;-------------------------
movzx eax,byte ptr [esi] ;read last char
mov dword ptr [edi+eax*4],1 ;its size will be one displacement, insert in bad_chars table
mov ebx,1 ;second char will be one too, third char will be 2, fourth char will be 3, ...
dec esi
dec ecx
mov edx,subLngth
.while ecx != 0 ;while exist chars in substring
movzx eax,byte ptr [esi] ;read that char
.if dword ptr [edi+eax*4] == edx ;actual char have a sizeof subLngth?
mov dword ptr [edi+eax*4],ebx ;yes, so overwrite this char displacement substring in lookup table
inc ebx ;increase displacement ;<--fault if moved to below
.endif
dec esi
dec ecx
.endw
;------------------------------
@@:
mov eax,-2 ;assume error in parameters data
mov esi,lpSubStr
mov edi,lpSource
mov ebx,srcLngth
mov ecx,subLngth
test ebx,ebx
jz done
test ecx,ecx
jz done
cmp ecx,ebx
ja done
add ebx,edi ;ebx=lpSource+srcLngth
add edi,startpos ;edi=lpSource+startpos
mov edx,subLngth
align 4
try_again:
mov eax,-1
cmp ebx,edi
jbe done
mov ecx,edx
@@:
movzx eax,byte ptr [edi+ecx-1] ;read one char from source
cmp byte ptr [esi+ecx-1],al
jne @F
dec ecx
jnz @B
sub edi,lpSource
mov eax,edi
jmp done
@@:
mov eax,dword ptr [bad_chars+eax*4]
cmp eax,edx ;test eax,edx ;<-first fault
cmove eax,ecx
add edi,eax
jmp try_again
align 4
done:
ret
Boyer_Moore endp
end start
Hmm, time results don't look good. Well, ... .