Author Topic: 2 text stripper algos for ascii only.  (Read 271 times)

hutch--

  • Administrator
  • Member
  • ******
  • Posts: 4886
  • Mnemonic Driven API Grinder
    • The MASM32 SDK
2 text stripper algos for ascii only.
« on: June 23, 2017, 11:14:24 PM »
The test piece has 2 algos in it, one which is simple that throws away anything over ascii 126 with the option of either simply removing the high ascii character or replacing it with a space.

The second algo is more powerful in that it runs off a table and can use or reject any character apart from the zero and while it is slightly slower, it is probably fast enough in most contexts.

This is the test code, zip file attached.

; ¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤

    include \masm32\include64\masm64rt.inc

    .data
      tst0 db "1234",174,175,"5678",215,"9012",0
      ptst0 dq tst0
      tst1 db "1234",174,175,"5678",215,"9012",0
      ptst1 dq tst1

      text0 db "123",247,"456",156,"789",0
      ptxt0 dq text0
      text1 db "123",247,"456",156,"789",0
      ptxt1 dq text1

    .code

; ¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤

entry_point proc

    stdout " ********************",13,10
    stdout " fixed low ascii only",13,10
    stdout " ********************",13,10

    conout ptst0,lf
    rcall low_ascii,ptst0,0
    conout ptst0,"    ",chr$(174)," Characters removed",lf

    conout ptst1,lf
    rcall low_ascii,ptst1,1
    conout ptst1," ",chr$(174)," Replaced with space",lf

    stdout " ***********************************************",13,10
    stdout " flexible character range removal or replacement",13,10
    stdout " ***********************************************",13,10

    conout ptxt0,lf
    rcall str_strip,ptxt0, 0                ; strip text by removal of unwanted characters
    conout ptxt0,"       ",chr$(174)," Characters removed",lf

    conout ptxt1,lf
    rcall str_strip,ptxt1, 1                ; modify text by replacing unwanted characters with spaces
    conout ptxt1,"     ",chr$(174)," Replaced with space",lf

    stdout "-----------------------------------",13,10

    waitkey
    .exit

entry_point endp

; ¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤

NOSTACKFRAME

 low_ascii proc
  ; ------------------------------------------
  ; procedure strips out high ascii characters
  ; algorithm overwrites original source
  ; ARGUMENTS
  ;   1.  rcx = address of string to strip
  ;   2.  rdx = optional removal or space
  ;             replacement of high ascii char
  ;         0 = write nothing in its place
  ;     other = write space in its place
  ; ------------------------------------------
    mov r11, rcx                            ; load address into r11 as source
    mov r10, rcx                            ; destination is same address
    mov r9,  rdx                            ; optional replacement or removal
    sub r11, 1
    jmp lbl0                                ; jump into algo

  pre:
    test r9, r9                             ; test if option 0 is set
    jz lbl0                                 ; if 0, jump past and remove character
    mov BYTE PTR [r10], 32                  ; else write a space
    add r10, 1                              ; then increment r10

  lbl0:
  REPEAT 3
    add r11, 1
    movzx rax, BYTE PTR [r11]               ; zero extend byte to QWORD
    cmp rax, 127                            ; test if its 127 or greater
    jge pre                                 ; jump back to "pre" if it is
    mov BYTE PTR [r10], al                  ; write byte to destination
    add r10, 1                              ; increment r10
    test rax, rax                           ; test if last byte written is zero
    jz lbl1                                 ; jump to exit if 0
  ENDM

    add r11, 1
    movzx rax, BYTE PTR [r11]               ; zero extend byte to QWORD
    cmp rax, 127                            ; test if its 127 or greater
    jge pre                                 ; jump back to "pre" if it is
    mov BYTE PTR [r10], al                  ; write byte to destination
    add r10, 1                              ; increment r10
    test rax, rax                           ; test if last byte written is zero
    jnz lbl0                                ; loop back if its not

  lbl1:
    mov rax, rcx                            ; return source address
    ret                                     ; bye

 low_ascii endp

STACKFRAME

; ¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤

NOSTACKFRAME                                ; stack frame not required for pure mnemonic procedure

 str_strip proc
  ; -----------------------------------
  ; strip characters from source as
  ; determined by character table.
  ; ===========================
  ; procedure overwrites source
  ; ===========================
  ; source address = rcx
  ; option value   = rdx
  ;              0 = remove from string
  ;          other = replace with space
  ; -----------------------------------
    mov r11, rcx                            ; load source address into r11
    mov r9,  rcx                            ; same address as destination
    lea r10, chtbl                          ; load the character table address
    mov r8, rdx                             ; load option into r9
    sub r11, 1                              ; sub 1 to set up byte offset
    jmp lbl0                                ; jump directly into main loop

  pre:
    test r8, r8                             ; test if optn is 0
    jz lbl0                                 ; jump to main loop if it is
    mov BYTE PTR [r9], 32                   ; else write a space
    add r9, 1                               ; increment destination address

  lbl0:
    add r11, 1
    movzx rax, BYTE PTR [r11]               ; read byte from source
    movzx rdx, BYTE PTR [r10+rax]           ; get its value from table
    test rdx, rdx                           ; if 0
    jz pre                                  ; branch back to label "pre"
    mov BYTE PTR [r9], al                   ; else write it to destination address
    add r9, 1                               ; increment destination address
    test rax, rax                           ; test if last byte written in 0
    jnz lbl0                                ; loop back if not

    mov rax, rcx                            ; return source address
    ret                                     ; bye

  align 16
  chtbl:
    db 1,0,0,0,0,0,0,0,0,1,1,0,0,1,0,0      ; 0, 9, 10, 13
    db 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
    db 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1      ; 32 to 126
    db 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1
    db 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1
    db 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1
    db 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1
    db 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0
    db 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0      ; no high ascii
    db 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
    db 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
    db 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
    db 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
    db 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
    db 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
    db 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0

 str_strip endp

STACKFRAME                                  ; restore default stack frame

; ¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤

    end
hutch at movsd dot com
http://www.masm32.com    :biggrin:  :biggrin: