Hey all,
I've made extensive use of the masm32 lib, primarily all the string functions in a number of my apps. I'm porting to 64bit at the moment and need to try and duplicate the masm32 libs functionality.
I was wondering if anyone had started looking at this in terms of string functions or had any suggestions as a way to start this.
I'm trying to avoid relying on msvcrt just as it adds a dependency, and the static lib equivalent adds much bloat.
I was tinkering with the notion that perhaps a proper dedicated string library would be useful with functions for substr,substring,len,trim,left,right,monospace,upper,lower,replace,pad,concat,multicat and something similar to printf's format ability.
John
I think you must write that one yourself, if you won't use the CRT.
Here are some routines I've used a while back (requires jWasm and WinInc (or at least declaration of TCHAR)):
option prologue:none
option epilogue:none
; modifies: rax
; rcx will remain unaffected
szSize proc psz:PTCHAR
xor rax,rax
@@: test TCHAR ptr [rcx+rax*SIZEOF TCHAR],-1
lea rax,[rax+1]
jnz @B
@@: dec rax
ret
szSize endp
; concatenates the string in pszBuffer with the strings pszArgs[i].
; MODIFIES: rax,r8,r9,r10,r11
; rcx and rdx will remain unaffected
; RETURNS: rax = size of new string in characters (without term. zero).
; If the buffer is to small, zero is returned. However,
; as long as ccBuffer is not zero a termination zero is
; written to buffer.
szcMultiCat proc pszBuffer:PTCHAR,ccBuffer:QWORD,nArgs:QWORD,pszArgs:VARARG
test rdx,rdx
jz @E
mov r10,5
xor rax,rax
align 8
@@: test WORD ptr [rcx+rax*SIZEOF TCHAR],-1
jz @F
cmp rax,rdx
jae @O
lea rax,[rax+1]
jmp @B
@@:
test r8,r8
jz @1
@L: movzx r11,TCHAR ptr [r9]
test r11,r11
jz @F
mov [rcx+rax*SIZEOF TCHAR],@CatStr(<r11>,@SubStr(<bw>,SIZEOF TCHAR,1))
lea rax,[rax+1]
cmp rax,rdx
jae @O
lea r9,[r9+SIZEOF TCHAR]
jmp @L
@@:
dec r8
jz @1
mov r9,[rsp+r10*8]
lea r10,[r10+1]
jmp @L
align 16
@1: mov TCHAR ptr [rcx+rax*SIZEOF TCHAR],0
ret
align 16
@O: mov TCHAR ptr [rcx+rax*SIZEOF TCHAR-SIZEOF TCHAR],0
@E: xor rax,rax
ret
szcMultiCat endp
option prologue:PrologueDef
option epilogue:EpilogueDef
;########
option frame:auto
option win64:0
DQ2WSZ_LEADING_ZEROS EQU 1
DQ2WSZ_LOWER_CASE EQU 2
DQ2WSZ_SIGNED EQU 4
DQ2WSZ_DWORD EQU 8
DQ2WSZ_WORD EQU 16
DQ2WSZ_BYTE EQU 32
DQ2WSZ_GET_MAX_SIZE EQU 64
DQ2WSZ_PLUS EQU 128
DQ2WSZ_VALID EQU (DQ2WSZ_LEADING_ZEROS or DQ2WSZ_LOWER_CASE or DQ2WSZ_SIGNED or DQ2WSZ_DWORD or DQ2WSZ_WORD or DQ2WSZ_BYTE or DQ2WSZ_GET_MAX_SIZE or DQ2WSZ_PLUS)
; Description: converts an signed or unsigned value, given through rdx,
; to an numeric string in specified radix.
; MODIFIES: rax,rcx,rdx,r8 and r10.
; r9 will remain unaffected
; Parameters:
; pwszBuffer: pointer to buffer that recive the string. The requied size
; depends of the current radix. You will be allways on the save site by using
; a size of 66 char. See also the DQ2WSZ_GET_MAX_SIZE-flag.
; dqNum: number to convert. If not changeed by the flags, this is a QWORD
; uiRadix: 2 - 36
; dqFlags: DQ2WSZ_LEADING_ZEROS print leading zeros.
; DQ2WSZ_LOWER_CASE
; DQ2WSZ_SIGNED dqNum is an signed value
; DQ2WSZ_DWORD dqNum is an DWORD
; DQ2WSZ_WORD " " " WORD
; DQ2WSZ_BYTE " " " BYTE
; DQ2WSZ_GET_MAX_SIZE return (rax) the maximum number of characters (+term. zero) according to the flags.
; Only r9 is used - all other parameters are ignored.
; You can use the return value for allocating an buffer.
; DQ2WSZ_PLUS force the print of plus sign
;
;
dq2wsz proc FRAME pszBuffer:PTCHAR,dqNum:QWORD,uiRadix:QWORD,dqFlags:QWORD
LOCAL sz[66]:CHAR
LOCAL sign:DWORD
.const
align 16
lutU db "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ"
align 16
lutL db "0123456789abcdefghijklmnopqrstuvwxyz"
align 16
nDigits db 0,0,64,41,32,28,25,23,22,21,20,19,18,18,17,17,16,16,16,16,15,15,15,15,14,14,14,14,14,14,14,13,13,13,13,13,13 ; QWORDs
db 0,0,32,21,16,14,13,12,11,11,10,10, 9, 9, 9, 9, 8, 8, 8, 8, 8, 8, 8, 8, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7 ; DWORDs
db 0,0,16,11, 8, 7, 7, 6, 6, 6, 5, 5, 5, 5, 5, 5, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4 ; WORDs
db 0,0, 8, 6, 4, 4, 4, 3, 3, 3, 3, 3, 3, 3, 3, 3, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2 ; BYTEs
.code
.if r8 > 36 || r8 < 2 || r9 & (NOT DQ2WSZ_VALID)
xor rax,rax
ret
.endif
mov r10,r9
and r10,DQ2WSZ_DWORD or DQ2WSZ_WORD or DQ2WSZ_BYTE
.if r10
mov rax,r10
bsr rax,rax
bsf r10,r10
.if r10 != rax
xor rax,rax
ret
.endif
.if r9 & DQ2WSZ_DWORD
.if r9 & DQ2WSZ_SIGNED
movsxd rax,edx
.else
mov eax,edx
.endif
.elseif r9 & DQ2WSZ_WORD
.if r9 & DQ2WSZ_SIGNED
movsx rax,dx
.else
movzx rax,ax
.endif
.else
.if r9 & DQ2WSZ_SIGNED
movsx rax,dl
.else
movzx rax,dl
.endif
.endif
.else
mov rax,rdx
.endif
.if r9 & DQ2WSZ_GET_MAX_SIZE
mov rax,OFFSET nDigits
.if r9 & DQ2WSZ_DWORD
movzx rax,BYTE ptr [rax][r8][1*SIZEOF nDigits]
.elseif r9 & DQ2WSZ_WORD
movzx rax,BYTE ptr [rax][r8][2*SIZEOF nDigits]
.elseif r9 & DQ2WSZ_BYTE
movzx rax,BYTE ptr [rax][r8][3*SIZEOF nDigits]
.else
movzx rax,BYTE ptr [rax][r8][0*SIZEOF nDigits]
.endif
.if r9 & DQ2WSZ_SIGNED || r9 & DQ2WSZ_PLUS
lea rax,[rax+1]
.endif
lea rax,[rax+1]
ret
.endif
mov pszBuffer,rcx
mov rdx,8000000000000000h
xor rcx,rcx
.if r9 & DQ2WSZ_SIGNED && rax & rdx
.if rax == rdx
cqo
idiv r8
neg rdx
.if r9 & DQ2WSZ_LOWER_CASE
mov r10,OFFSET lutL
mov dl,BYTE ptr [r10][rdx]
.else
mov r10,OFFSET lutU
mov dl,BYTE ptr [r10][rdx]
.endif
mov sz[rcx],dl
lea rcx,[rcx+1]
.endif
neg rax
mov sign,1
.else
mov sign,0
.endif
.if rax
.while rax
xor rdx,rdx
div r8
.if r9 & DQ2WSZ_LOWER_CASE
mov r10,OFFSET lutL
mov dl,BYTE ptr [r10+rdx]
.else
mov r10,OFFSET lutU
mov dl,BYTE ptr [r10][rdx]
.endif
mov sz[rcx],dl
lea rcx,[rcx+1]
.endw
.else
mov sz[rcx],'0'
lea rcx,[rcx+1]
.endif
mov r10,pszBuffer
xor rdx,rdx
.if sign
mov TCHAR ptr [r10],'-'
lea rdx,[rdx+1]
.elseif r9 & DQ2WSZ_PLUS
mov TCHAR ptr [r10],'+'
lea rdx,[rdx+1]
.endif
.if r9 & DQ2WSZ_LEADING_ZEROS
mov rax,OFFSET nDigits
.if r9 & DQ2WSZ_DWORD
movzx rax,BYTE ptr [rax][r8][1*SIZEOF nDigits]
.elseif r9 & DQ2WSZ_WORD
movzx rax,BYTE ptr [rax][r8][2*SIZEOF nDigits]
.elseif r9 & DQ2WSZ_BYTE
movzx rax,BYTE ptr [rax][r8][3*SIZEOF nDigits]
.else
movzx rax,BYTE ptr [rax][r8][0*SIZEOF nDigits]
.endif
sub rax,rcx
.while rax
mov TCHAR ptr [r10+rdx*SIZEOF TCHAR],'0'
lea rdx,[rdx+1]
lea rax,[rax-1]
.endw
.endif
.while rcx
movzx rax,BYTE ptr sz[rcx-1]
mov TCHAR ptr [r10+rdx*SIZEOF TCHAR],@SubStr(< al ax>,2*(SIZEOF TCHAR)+1,3)
lea rcx,[rcx-1]
lea rdx,[rdx+1]
.endw
mov TCHAR ptr [r10+rdx*SIZEOF TCHAR],0
mov rax,r10
ret
dq2wsz endp
Awesome, thanks for that :)
Well I've started with the doing only ascii strings for the moment, not unicode. So to make some of the string functions faster I guess it might help to have something like this:
STRING macro strName:REQ,text:VARARG
LOCAL szText
LOCAL szPad
align 4
strName LABEL BYTE
szText db text,0
szPad db ($-szText) MOD 4
endm
then:
.data
STRING HTTPDateString,"Date: {0}, {1} {2} {3} {4}:{5}:{6} GMT",13,10
To allow you to process strings as DWORDS at least.
In the attachment a set of macros I'm currently using in combination with WinInc. Some of them using CRT functions. The String macros:
tchr$(), TCHR,... and the function calling macros fn/fnc/rv/rvc/rv32/rvc32 are independent of the CRT.
Remarks that TCHAR is either declared as BYTE (CHAR) or as WORD (WCHAR), according to the project's settings (UNICODE equate).
example:
.data
TCHR szLbl,"1234",13,10,0
.code
fn myFnc, &sz, "567", tchr$("89",13,10,"10")
What about Donkey's strings.lib (http://www.quickersoft.com/donkey/files/Strings.zip)?
It should be usable by MASM, no?
Quote from: anta40 on December 05, 2012, 01:04:39 AM
What about Donkey's strings.lib (http://www.quickersoft.com/donkey/files/Strings.zip)?
It should be usable by MASM, no?
Not 64bit though