The MASM Forum

64 bit assembler => 64 bit assembler. Conceptual Issues => Topic started by: MarKac on July 19, 2013, 12:58:25 AM

Title: unicode strings
Post by: MarKac on July 19, 2013, 12:58:25 AM
Hi,

Anyone knows if there is any way to represent unicode strings in masm64 ? In nasm its simple

%define u(x) __utf16__(x)
%define w(x) __utf32__(x)

dw u('C:\WINDOWS'), 0       ; Pathname in UTF-16
dd w('A + B'), 0   ; String in UTF-32

but how to do it in masm64 ? is there any elegant solution :(
Title: Re: unicode strings
Post by: jj2007 on July 19, 2013, 01:23:32 AM
\Masm32\macros\macros.asm has the chr$ and WSTR macros. Can't tell you, though, whether they work with Masm64 - afaik it's crippled in many aspects. JWasm should do the job, though - try copying the WSTR macro to your source.

For "true" Unicode (Chinese, Arabic etc), the best option are resource strings. You need a Unicode editor, though.

I use wChr$ (http://www.webalice.it/jj2006/MasmBasicQuickReference.htm#Mb1141)("whatever") and wRes$ (http://www.webalice.it/jj2006/MasmBasicQuickReference.htm#Mb1105)(index), but that's 32-bit.
Title: Re: unicode strings
Post by: qWord on July 19, 2013, 01:41:51 AM
As already said, you can copy the string-definition macros from mascros.asm. But you maybe prefer this modification:
; escape sequences:
; \\ = '\'
; \0 = term. zero
; \a = '('
; \b = ')'
; \l = '<'
; \r = '>'
; \x = '!'
; \q = '"'
; \t = tab
; \p = '%'
; \A = '&'
; \n = new line = CR,LF
utf_x MACRO _type:req,lbl,args:VARARG

    IFNDEF uccstr_lbl_cntr
        uccstr_lbl_cntr = 0
    ENDIF
    IFB <&lbl>
        uccstr_lbl TEXTEQU <anonym_WSTR_>,%uccstr_lbl_cntr
        uccstr_lbl_cntr = uccstr_lbl_cntr + 1
    ELSE
        uccstr_lbl TEXTEQU <&lbl>
    ENDIF

    uccstr_size = 0
    uccstr_flg = 0
    uccstr_iarg = 1
    uccstr_esc = 0
    FOR _arg,<args>
        uccstr_flg = 0
        FORC char,<&_arg>
            IF uccstr_flg NE 0
                uccstr_pos INSTR 1,<"'\>,<&char>
                IF uccstr_pos EQ uccstr_flg
                    EXITM
                ELSEIF uccstr_pos EQ 3 AND uccstr_esc EQ 0
                uccstr_esc = 1
                ELSE
                    IF uccstr_esc
                        uccstr_pos INSTR 1,<\0ablrxqtpAn>,<&char>
                       
                        IFE uccstr_pos
                            uccstr_flg=0
                            EXITM
                        ENDIF
                        uccstr_size = uccstr_size + uccstr_pos/12
                        uccstr_esc = 0
                    ENDIF
                    uccstr_size = uccstr_size + 1
                ENDIF
            ELSE
                uccstr_flg INSTR 1,<"'>,<&char>
                IFE uccstr_flg
                    IF (OPATTR _arg) AND 100y
                        uccstr_flg = 3
                    ENDIF
                    EXITM
                ENDIF
            ENDIF
        ENDM
        IF uccstr_flg EQ 0 OR uccstr_esc NE 0
            EXITM
        ELSEIF uccstr_flg EQ 3
            uccstr_size = uccstr_size + 1
        ENDIF       
        uccstr_iarg = uccstr_iarg + 1
    ENDM
    IF uccstr_flg EQ 0 OR uccstr_esc NE 0
        IF uccstr_esc
        %   .err <invalid escape sequence : argument : @CatStr(%uccstr_iarg)>
        ELSE
        %   .err <invalid string specifier : argument : @CatStr(%uccstr_iarg)>
        ENDIF
        EXITM
    ENDIF
   
    align _type
    uccstr_lbl _type uccstr_size dup (?)
    org $-uccstr_size*_type
   
    uccstr_esc = 0
    FOR _arg,<&args>
        uccstr_flg = 0
        FORC char,<&_arg>
            IF uccstr_flg NE 0
                uccstr_pos INSTR 1,<"'\>,<&char>
                IF uccstr_pos EQ uccstr_flg
                    EXITM
                ELSEIF uccstr_pos EQ 3 AND uccstr_esc EQ 0
                    uccstr_esc = 1
                ELSE
                    IFE uccstr_esc
                        uccstr_char CATSTR <_type >,uccstr_quote,<&char>,uccstr_quote
                        uccstr_char
                    ELSE
                        uccstr_pos INSTR 1,<\0ablrxqtpAn>,<&char>
                        IFE uccstr_pos
                            uccstr_flg=0
                            EXITM
                        ENDIF
                        uccstr_char SUBSTR <  5ch00h28h29h3ch3eh21h22h09h25h26h0dh,0ah>,uccstr_pos*3,3+4*(uccstr_pos/12)
                        uccstr_esc = 0
                        _type uccstr_char
                    ENDIF
                ENDIF
            ELSE
                uccstr_flg INSTR 1,<"'>,<&char>
                IFE uccstr_flg
                    IF (OPATTR _arg) AND 100y
                        uccstr_flg = 3
                    ENDIF
                    EXITM
                ENDIF
                uccstr_quote TEXTEQU <&char>
            ENDIF
        ENDM
        IF uccstr_flg EQ 3
            _type _arg
        ENDIF       
    ENDM

endm

utf_8 macro lbl,args:VARARG
utf_x BYTE,lbl,args
endm

utf_16 macro lbl,args:VARARG
utf_x WORD,lbl,args
endm

utf_32 macro lbl,args:VARARG
utf_x DWORD,lbl,args
endm

usage:
.const
    utf_8 sz1,"1234",13,10,"foo text",0
    utf_16 sz2,"1234\n\0"
    utf_32 sz3,"line 1\n"
    utf_32 ,"line 2",0
Title: Re: unicode strings
Post by: MarKac on July 19, 2013, 01:53:08 AM
thanks it works :icon_cool: