Author Topic: another tokenizer  (Read 2081 times)


  • Member
  • ****
  • Posts: 872
another tokenizer
« on: September 24, 2015, 09:49:14 PM »
In one of my projects I needed a tokenizer that both retained formatting, and kept the empty lines.
This is the result of my work.

I need someone to compare the timing between this one and ltok, in the masm32 library.

I don't need it to be very fast, just as fast as ltok.

I would time it myself, but my computer gives a lot of false results. (x4, x2, etc...)

Code: [Select]
align 16
nops 5       ; to align first loop
ztok proc zedd:DWORD, ddez:DWORD
    mov edi, [esp+4]
    push esi
    push edi

; - count lines ------

    mov ebx, 1
    mov al, [edi]
    cmp al, 0   ; if zero, we're done
    jz outta
    cmp al, 0Dh ; -- eol found?
    jnz nope 
    inc ebx     ; -- if yes, inc ctr
    inc edi     ; -- inc ptr
    jmp @b      ; -- jmp top

; - allocate array memory ------

    mov edx, ebx
    shl edx, 2
    invoke GlobalAlloc, GPTR, edx
    mov ecx, [esp+10h]
    mov dword ptr [ecx], eax

; - tokenize lines of text, retaining formatting & empty lines

    mov ecx, eax
    mov edi, [esp+4]
    xor esi, esi
    nops 3      ; - for alignment ---
addptr:         ; add pointer to current line, to the array

    mov dword ptr [ecx+esi*4], edi
    nop          ; for alignment
    mov al, [edi]
    cmp al, 0
    jz done                     ; if zero found, job done.
    cmp al, 0Dh                 ; Cr found?
    jnz notlf
    cmp byte ptr [edi+1], 0Ah   ; is it followed by Lf?
    jnz lffy
    mov byte ptr [edi], 0       ; zero the Cr
    inc edi                     ; increment buffer
    mov byte ptr [edi], 0       ; zero the Lf
    inc edi                     ; increment buffer
    inc esi                     ; increment index
    jmp addptr                  ; if we got this far, it's a new line!
notlf:                          ; if we jumped here, not a new line :(
    inc edi                     ; increment buffer
    jmp top                     ; jump to top
    mov eax, ebx                ; put line count into eax
    pop edi
    pop esi
    ret 8
ztok endp


Thanks for the help, guys  ::)
« Last Edit: September 25, 2015, 10:52:01 PM by zedd151 »