News:

Masm32 SDK description, downloads and other helpful links
Message to All Guests
NB: Posting URL's See here: Posted URL Change

Main Menu

Parsing Text file in Assembly Language

Started by NoCforMe, July 07, 2023, 12:27:01 PM

Previous topic - Next topic

jj2007

Quote from: HSE on July 11, 2023, 07:24:01 AMPerhaps you haven't seen masm64 SDK invoke macro.

Perhaps you haven't seen the jinvoke macro - it's a factor 12 bigger but works with MASM, UAsm and AsmC, checks argument counts and types of arguments, just like the original 32-bit MASM invoke macro :biggrin:

jinvoke MACRO apiarg, args:VARARG
Local tmp$, tmpA$, api$, apx$, apinum, dllnum, info$, inf1$, c1$, is, isCrt, isXmm, oa
Local isR, curSlot, curApi, rev$, isO, isOl, ctArgs, rspExtra, cVarArg, pushReg
; tmp$ CATSTR <jinv &apiarg&, line >, %@Line,  < with _jbInit=>, %jbInit, <, _jbPBI=>, %jbPBI, <, _jbPrologRun=>, %jbPrologRun
; % echo tmp$
ifdef needsnop
mov rbp, rbp
nops 4
endif
  ife jbPrologRun
if @64
; nop
endif
  endif
  api$ CATSTR <apiarg>
  isCrt INSTR api$, <crt_>
  if isCrt
api$ SUBSTR api$, 5
  endif
  apx$ CATSTR <j@>, api$
  ; % echo ____ api$ -> apx$
  is INSTR 1, apx$, </>
  ife is
tmp$ CATSTR <LABEL >, <apiarg>
% echo ____ LABEL apiarg uses invoke apx$ ____
invoke apiarg, args
  else
;   % echo -------- Hello.... DefPrc$ in [api$] or [apiarg]
  isR INSTR DefPrc$, api$ ; j#myalgo#s1441144
  if isR eq 3
    % echo -- Hello DP: [DefPrc$] and info: [info$]
    ; .err ; info$
apinum equ <-1>
isR INSTR isR+1, DefPrc$, <#>
dllnum equ <15000>
info$ SUBSTR DefPrc$, isR+1
info$ CATSTR info$, <xxxxxxxxxxxxxxxxxxxx>
  else
apinum SUBSTR apx$, 1, is-1
isR INSTR is+1, apx$, <:>
dllnum SUBSTR apx$, is+1, isR-is-1
info$ SUBSTR apx$, isR+1
; % echo info: [info$] ; s1441...
info$ CATSTR info$, <xxxxxxxxxxxxxxxxxxxx>
  endif
; tmp$ CATSTR <api >, <apiarg>, < has ID >, %apinum, < and DLL >, %dllnum, <, info=>, info$
; % echo tmp$
; define a new variable jdll@123
tmp$ CATSTR <jdll@>, dllnum
is = opattr tmp$
if is eq 36 ; immediate
curSlot=tmp$ ; already defined
else
curSlot=jaCtDll
if dllnum ge 0 ; <0 is own proc
tmp$ CATSTR tmp$, <=>, %jaCtDll
tmp$
tmp$ CATSTR <jd@>, %dllnum ; jd@3 equ advapi32
; % echo ## DLL: tmp$
tmp$ CATSTR <txDll>, %curSlot, < equ db ">, tmp$, <", 0>
; % echo tmp$
tmp$
jaCtDll=jaCtDll+1
endif
endif
; define a new variable j@123
tmp$ CATSTR <j@>, %apinum
is = opattr tmp$
if is eq 36
; echo ####### tmp$ already defined ###### ; immediate
curApi equ tmp$
else
is INSTR tmp$, <j@> ; jTypeChk follows below
if is
tmp$ CATSTR tmp$, <=>, %jaCtApi
% tmp$ ; the % is for ML64 (erratic errors)
if apinum gt 50000
if jbVerbose
    % echo own api$
endif
curApi equ <-1>
else
tmp$ CATSTR <txApi>, %jaCtApi, < equ ap>, %jaCtApi, <$ db curSlot+1, ">, api$, <", 0>
if jbVerbose
    % echo Win api$
endif
% tmp$ ; the % is for ML64 (erratic errors)
curApi=jaCtApi
jaCtApi=jaCtApi+1 ; this is total, not current
endif
else
curApi equ tmp$
endif
endif
ifidn <args>, <@>
call iaApi[SIZE_P*curApi]
EXITM
elseifidn <args>, <@def@>
EXITM
elseifidn <args>, <@address>
mov rax, iaApi[SIZE_P*curApi]
EXITM
endif
isR=0
cVarArg INSTR info$, <c6x>
rev$ equ <# >
for arg, <args> ; REVERSE
  isR=isR+1
  tmp$ CATSTR <arg>, <    >
  tmp$ SUBSTR tmp$, 1, 4
  rev$ CATSTR <arg>, <#>, rev$
  inf1$ SUBSTR info$, isR+1, 1
  ifidn inf1$, <x>
  ife cVarArg
  ; % echo info$/inf1$
tmp$ CATSTR <## line >, %@Line, <: too many arguments for &apiarg& ##>
% echo tmp$
.err
  endif
  endif
  ifdifi tmp$, <addr>
  if @InStr(1, <arg>, <&>) ne 1
  if @InStr(1, <arg>, <*>) ne 1
  if type arg eq REAL8
  ifdif inf1$, <3> ; :s131
  ;.err <## REAL8 not expected ##>
  endif
  elseif type arg eq QWORD
  ifidn inf1$, <3>
  ; % echo info$/inf1$
  .err <## REAL8 expected ##>
  endif
  endif
  endif
  endif
  endif
endm
is INSTR info$, <x>
if is gt isR+2
  ife cVarArg
  ; tmp$ CATSTR <i=>, %is, <, r=>, %isR
; % echo tmp$
  ; % echo info$/inf1$
  tmp$ CATSTR <## line >, %@Line, <: not enough arguments for &apiarg& ##>
  % echo tmp$
  .err
endif
endif
ctArgs=isR
rspExtra=0
if @64
if ctArgs GT 4
is=ctArgs mod 4
rspExtra=ctArgs/4
ife jbCompStyle
REPEAT 4-is
push rbx ; r8 is a 2-byter, so we take rbx
ENDM
endif
elseif ctArgs LT 4 ; can be merged
ife jbCompStyle
repeat 4-ctArgs ; 1...3 dummy pushes, rest pushed below
    push rbx
endm
endif
endif
  if isR GT jbArgsUsed+20 ; ?????
  jbArgsUsed=isR+20
  .err <isr gt argsused>
  endif
endif
; tmp$ CATSTR <rev=>, rev$
; % echo tmp$
; if usedeb
; mov rsi, rsi ; for debugging
; int 3
; endif
; % echo API: api$, INFO: info$
  ; mov rsp, rsp ; ----- start moving args into stack ---------
While isR ; push in right order: rcx rdx r8 r9 pushed5 pushed6 etc
  isR=isR-1
  is INSTR rev$, <#>
  tmp$ SUBSTR rev$, 1, is-1
  c1$ SUBSTR rev$, 1, 1 ; only for & and *
  tmpA$ CATSTR tmp$, <    >
  tmpA$ SUBSTR tmpA$, 1, 4
isOl=0 ; 0=no addr, offset, * or &
  ifidni tmpA$, <offs>
isOl=7 ; substr must compensate offset characters
  elseifidni tmpA$, <addr>
  isOl=5 ; substr must compensate addr characters
  elseifidn c1$, <&>
  isOl=1
  elseifidn c1$, <*>
  isOl=1
  endif
  if @64
  pushReg equ <r10>
  pushRegD equ <r10d>
  if isR eq 0
  pushReg equ <rcx>
  pushRegD equ <ecx>
  elseif isR eq 1
  pushReg equ <rdx>
  pushRegD equ <edx>
  elseif isR eq 2
  pushReg equ <r8>
  pushRegD equ <r8d>
  elseif isR eq 3
  pushReg equ <r9>
  pushRegD equ <r9d>
  endif
  csDest equ [rsp+8*isR] ; [rbp+x] is same size in X64
  if jbCompStyle ; always, it's the default now
  c1$ SUBSTR info$, isR+2, 1
; if apinum gt 50000 and usedeb
; oa INSTR info$, <x>
; if oa
; tmpx$ SUBSTR info$, 1, oa
; else
; tmpx$ CATSTR info$
; endif
; oa = (opattr tmp$) AND 127
;   tmpx$ CATSTR <Count=>, %isR, <: arg=[>, tmp$, <], c=>, c1$, < in >, tmpx$, <, o=>, %oa
; % echo tmpx$
; endif
noMem=4 ; and (useCB eq 0)
  if isOl
  tmp$ SUBSTR tmp$, 1+isOl
  ; if jbVerbose
  ; % echo off tmp$ ; not very useful
  ; endif
  lea pushReg, tmp$ ; addr or offset
  if isR ge noMem
  mov csDest, pushReg
  endif
  elseif type(tmp$) eq REAL8 ; REAL8 to xmm
  jTypeChk cVarArg, isR, c1$, <4REAL8>, api$
  if isR ge noMem
  movlps xmm0, tmp$ ; real8 to xmm0 (no conversion)
  movlps qword ptr csDest, xmm0
  else ; first 4 in xmm? and rcx rdx r8 r9
  ; % echo DEST: csDest
  tmp2$ CATSTR <movlps xmm>, %isR, <,>, tmp$
  ; % echo **** Passing a REAL8: tmp2$
  % tmp2$
  if cVarArg ; Parameter passing: Floating-point values are only placed in the integer registers RCX, RDX, R8, and R9 when there are varargs arguments
  tmp2$ CATSTR <movd >, pushReg, <, xmm>, %isR
  ; % echo **** Passing a REAL8 both to xmm? and reg64: tmp2$
  % tmp2$
  endif
  endif
  elseif type(tmp$) eq REAL4 ; REAL4 to xmm
  jTypeChk cVarArg, isR, c1$, <3REAL4>, api$
  if isR ge noMem
  movd xmm0, tmp$ ; real4 to xmm0 (no conversion)
  movd dword ptr csDest, xmm0
  else ; first 4 in xmm? and rcx rdx r8 r9
  ; % echo DEST: csDest
  tmp2$ CATSTR <movd xmm>, %isR, <,>, tmp$
  ; % echo **** Passing a REAL8: tmp2$
  % tmp2$
  if cVarArg ; Parameter passing: Floating-point values are only placed in the integer registers RCX, RDX, R8, and R9 when there are varargs arguments
  tmp2$ CATSTR <movd >, pushReg, <, xmm>, %isR
  ; % echo **** Passing a REAL8 both to xmm? and reg64: tmp2$
  % tmp2$
  endif
  endif
  elseif type(tmp$) LT SIZE_P ; zero-extend
  ; % echo xx  tmp$  xx less than size_p
jTypeChk cVarArg, isR, c1$, <1DWORD>, api$ ; let's check if the callee wants something else
  oa = opattr tmp$
  if oa eq atImmediate
  if isR lt noMem ; use registers
  ife tmp$
  xor pushRegD, pushRegD ; shortest option?
  else
  if tmp$ eq -1
  xor pushRegD, pushRegD
  dec pushReg
  elseif tmp$ LT 0
  mov pushReg, tmp$
  else
  mov pushRegD, tmp$
  endif
  endif
  ; no mov csDest, pushReg
  else ; move immediate into stack
  ife tmp$
  and qword ptr csDest, 0 ; shortest option; dword is ok for regs but not mem
  else
  if tmp$ eq -1
  or qword ptr csDest, -1
  else
  mov qword ptr csDest, tmp$
  endif
  endif  
  endif
  elseif oa eq atRegister
  mov csDest, tmp$
  else
  if type tmp$ LT DWORD
  movsx pushRegD, tmp$
  else
  mov pushRegD, tmp$
  endif
  if isR ge noMem
  mov csDest, pushReg
  endif
  endif
  else ; SIZE_P (s-code 1)
  isXmm INSTR tmp$, <xmm>
  if isXmm eq 1
  jTypeChk cVarArg, isR, c1$, <4REAL8>, api$ ; TypeCheck: 4, 3REAL4, c1=[3]
  ; % echo A: movlps csDest, tmp$
  movlps QWORD ptr csDest, tmp$
  if isR lt noMem
  if cVarArg
  ; % echo B: mov pushReg, tmp$
  movd pushReg, tmp$
  endif
  endif
  else
  jTypeChk cVarArg, isR, c1$, <1DWORD>, api$ ; TypeCheck: 4, 3REAL4, c1=[3]
  ifdifi pushReg, tmp$
  if isR ge noMem
  oa = (opattr tmp$) AND 127
if oa eq atRegister
  mov csDest, tmp$
else
  mov pushReg, tmp$
  mov csDest, pushReg
endif
  else
  mov pushReg, tmp$
  endif
  else
  if isR ge noMem ; otherwise fastcall
  mov csDest, tmp$ ; same as pushReg
  endif
  endif
  endif
  endif
  else ; vvv 32-bit code vvv
  if isOl
  tmp$ SUBSTR tmp$, 1+isOl
  lea pushReg, tmp$ ; addr or offset
  mPush pushReg ; 32-bit code
  elseif type tmp$ eq REAL4
  mov pushRegD, tmp$ ; use 32-bit instruction
  mPush pushReg
  tmp$ CATSTR <** Warning, line >, %@Line, <: passing a REAL4 may not work **>
% echo tmp$
  elseif type tmp$ LT SIZE_P ; zero-extend
  oa = opattr tmp$
  if oa eq atImmediate
  ife tmp$
  xor pushRegD, pushRegD
  push pushReg
  else
  push tmp$
  if isR LT 4
  mov pushReg, [rsp]
  endif
  endif
  else
  if type tmp$ LT DWORD
  movsx pushRegD, tmp$
  else
  mov pushRegD, tmp$
  endif
  mPush pushReg
  endif
  else
  ifdifi pushReg, tmp$
  mov pushReg, tmp$
  endif
  mPush pushReg
  endif
  endif
  else ; v v 32-bit code v v
  if isOl ; <addr>
  if isOl eq 7
  mPush tmp$
else
  tmp$ SUBSTR tmp$, 1+isOl
  oa = (opattr tmp$) AND 127
  if oa eq atGlobal
  push offset tmp$
  else
  lea edx, tmp$
  mPush edx
endif
  endif
  elseif (type tmp$ eq REAL8) or (type tmp$ eq QWORD) ; see add rsp
  mPush dword ptr tmp$[4]
  mPush dword ptr tmp$
  rspExtra=rspExtra+4
  else
  isXmm INSTR tmp$, <xmm>
  ife isXmm
  mPush tmp$
else
push eax
endif
  endif
  endif
  ; % echo ---- rev$ -------
  rev$ SUBSTR rev$, is+1
ENDM
  ; mov rbp, rbp ; ----- end moving args ---------
if @64
; tmp$ CATSTR <apiarg>, < has >, %ctArgs, < paras>
; % echo tmp$
if 0
mPush arg6 ; sixth parameter ; mazegen
mPush arg5 ; fifth parameter
sub rsp, 4*8 ; allocate space for 'Register Parameter Stack Area'
mov r9, arg4
mov r8, arg3
mov rdx, arg2
mov rcx, arg1
call function ; inactive
add rsp, 4*8 + 2*8 ; release all parameters from stack
endif
endif
ifidn api$, <ExitProcess>
j@ExDone=1
endif
if jbStrings
tmp$ CATSTR <CALL >, api$, < as >, %(curApi), </>, %jaCtApi
% echo tmp$
endif
if usedeb and apinum lt 50000 ; --- solved for x64 with syms and VS14, see bax ---
ife @64
tmp2$ CATSTR <mov edx, Chr$(">, api$, <")>
; % echo api$: tmp2$
% tmp2$
endif
endif
if @64X
; sub rsp, 4*8
rspExtra=rspExtra+1
endif
if apinum gt 50000
call api$ ;; user proc
else
call iaApi[SIZE_P*curApi]
endif
is INSTR info$, <c>
if (is eq 1) and (@64 eq 0)
add rsp, 4*ctArgs+rspExtra ; 32-bit C stack correction; rspex for QWORD
endif
if rspExtra and jbCompStyle eq 0
add rsp, rspExtra*32
endif
  endif
ENDM

HSE

Quote from: jj2007 on July 11, 2023, 07:51:14 AM
Perhaps you haven't seen the jinvoke macro

Yes. That is an ugly spaghetti  :biggrin: :biggrin:
Equations in Assembly: SmplMath

jj2007

Quote from: HSE on July 11, 2023, 07:54:35 AM
Quote from: jj2007 on July 11, 2023, 07:51:14 AM
Perhaps you haven't seen the jinvoke macro

Yes. That is an ugly spaghetti  :biggrin: :biggrin:

I knew you would like it :greensml:

Look how it translates the CreateWindowEx into real, efficient code:
int 3
jinvoke CreateWindowEx, WS_EX_CLIENTEDGE, Chr$("RichEdit20A"), NULL, reStyle, 0, 0, 1, 1, hWnd, ID_EDIT, wcx.hInstance, NULL
nop


int3                            |
and [rsp+58],0                  | NULL
mov r10,[140003054]             | wcx
mov [rsp+50],r10                | wcx
mov [rsp+48],6F                 | ID_EDIT
mov r10d,[rbp+10]               | hWnd
mov [rsp+40],r10                | hWnd
mov [rsp+38],1                  | 1
mov [rsp+30],1                  | 1
and [rsp+28],0                  | 0
and [rsp+20],0                  | 0
mov r9d,503001C4                | reStyle
xor r8d,r8d                     | NULL
lea rdx,[1400030B7]             | Chr$("RichEdit20A")
mov ecx,200                     | WS_EX
call [<&CreateWindowExA>]       |
nop                             |



P.S.: I fixed the mov rax, eax bug (version 4 attached):

-------- Sample text: --------
invoke CreateWindowEx, WS_EX_CLIENTEDGE, Chr$("RichEdit20A"), NULL, reStyle, 0, 0, 1, 1, hWnd, ID_EDIT, wcx.hInstance, NULL

mov rcx, WS_EX_CLIENTEDGE
mov rdx, Chr$("RichEdit20A")
mov r8, NULL
mov r9, reStyle
push 0
push 0
push 1
push 1
push hWnd
push ID_EDIT
push wcx.hInstance
push NULL
call CreateWindowEx

-------- Sample text: --------
INVOKE  WinMain, EAX

mov ecx, EAX
call WinMain

-------- Sample text: --------
INVOKE  ExitProcess, EAX

mov ecx, EAX
call ExitProcess


Of course, this is still a push orgy, so it's not real code as shown above.

HSE

#63
Quote from: NoCforMe on July 11, 2023, 07:31:45 AM
And hey, it's not that complicated!.

:biggrin: So far (I'm in second example), for this simple things, spaghetti is more easy.

But better than to modify an spaghetti is to begin from zero. Then we have the chance to understand how these table driven FSM can be build (for more complex cases).  :thumbsup:
Equations in Assembly: SmplMath

NoCforMe

Here's the latest version. Takes from 1 to 8 arguments, places the first 4 in registers, pushes any others on the stack.
Try it out.

I think this is as far as I go with this demo; it has met (and exceeded) the challenge by mineiro.

It'd be nice to get some feedback on this. I'm thinking of making an evaluation form, with questions like this:

A. What is your overall opinion of this demo?

  • I think it's great and can't wait to implement it.
  • It's interesting, but maybe some other day.
  • Not sure about this.
  • You'd have to pay me to even think about using this!
  • I'd never use this even if you paid me!
Ha ha, just kidding. But seriously, give me some feedback here. Like I said, this isn't for everyone, but I think it demonstrates an important and very useful technique in text analysis.

It may seem complex, but believe me, after doing two or three of these, it's very easy to start a new parser from scratch. It's like riding a bicycle; the first few time are hard, but it becomes second nature after that. A lot of stuff can be block-copied to save coding time. And you can build very extensive parsers with this method. Just to show you, here's a command file for a graph-making program I did a long time ago that uses my parsing methods:


;===============================================
;   Sample MAKEGRAF control file (test.gcf)
;===============================================

;text (location=(400, 60) text="Index #16" color=16)
;text (location=(20, 180) text="Index #1" color=1)
;text (location=(20, 140) text="Index #2" color=2)
;text (location=(20, 100) text="Index #3" color=3)
;text (location=(20, 60) text="Index #4" color=4)
;text (location=(100, 180) text="Index #5" color=5)
;text (location=(100, 140) text="Index #6" color=6)
;text (location=(100, 100) text="Index #7" color=7)
;text (location=(100, 60) text="Index #8" color=8)
;text (location=(200, 180) text="Index #9" color=9)
;text (location=(200, 140) text="Index #10" color=10)
;text (location=(200, 100) text="Index #11" color=11)
;text (location=(200, 60) text="Index #12" color=12)
;text (location=(400, 180) text="Index #13" color=13)
;text (location=(400, 140) text="Index #14" color=14)
;text (location=(400, 100) text="Index #15" color=15)
;text (location=(500, 60) text="!" color=16)

palette (load "test.gpf"
13(255,0,0) ;define red as RED!
)

graph (
size=(640,400)
filename="test.bmp"
bgcolor=11
)

grid (llcorner=(80, 60)
gridcolor=16
axisthickness=2
width=500
height=300
bgcolor=7)

line(start=(100,75)end=(250,120))
line(start=(250,120)end=(310,270))
line(start=(310,270)end=(450,120))
line(start=(450,120)end=(460,300))

font="5x9.sff"
font="7x11.sff"

; This statement shows a bug: horizontal rotated text doesn't render properly:
;text(location=(400,200) text="Weird; rotated horizontal text" rotation=TRUE)

text (location=(200, 390) text = "Civilians Killed in Iraq (M)" color=1)
text (location=(40, 380) text="3!#$&%*/(0123456789)" color=6 font="5x9.sff")
text (location=(40, 50)
text="!#$&%*/(0123456789):;@<=>ABCDEFGHIJKLMNOPQRSTUVWXYZ?[\\]^`ab"
color=2 font="7x11.sff")
text (location=(40, 30)
text=".,'\"cdefghijklmnopqrstuvwxyz{|}~"
color=2 font="7x11.sff")
text(location=(50,350)text="0123456789" color=13 font="7x11.sff" direction=vert)
text(location=(50,100)text="ROTATED TEXT" color=14 font="7x11.sff" direction=vert rotation=true)

dot (location=(100,76) color=13)
dot (location=(250,121) color=13)
dot (location=(310,270) color=13 shape=square)
Assembly language programming should be fun. That's why I do it.

jj2007

Quote from: NoCforMe on July 11, 2023, 03:14:15 PM
It'd be nice to get some feedback on this. I'm thinking of making an evaluation form, with questions like this:

A. What is your overall opinion of this demo?

  • I think it's great and can't wait to implement it.
  • It's interesting, but maybe some other day.
  • Not sure about this.
  • You'd have to pay me to even think about using this!
  • I'd never use this even if you paid me!

6. You are almost there :thumbsup:

Using invoke lines from your latest source:
INVOKE--> code parser demo, version 4
Allows dec/hex/binary #, registers, var or ADDR var for
up to 8 arguments (requires at least 1).

Enter statement to test: >INVOKE  WinMain, EAX

        MOV     RCX, EAX    ; <<<<<<<<<<<<<< error
        CALL    WinMain

Enter statement to test: >INVOKE  ExitProcess, EAX

        MOV     RCX, EAX
        CALL    ExitProcess

Enter statement to test: >INVOKE  StdOut, OFFSET ProgramHeading

Tokenization error.

Enter statement to test: >INVOKE  StdIn, OFFSET InputBuffer, SIZEOF InputBuffer

Tokenization error.

Enter statement to test: >INVOKE  StdOut, OFFSET CRLFstr

Tokenization error.

Enter statement to test: >INVOKE  wsprintf, ADDR buffer, OFFSET CALLfmt

Tokenization error.

Enter statement to test: >INVOKE  strcmpi, OFFSET TextBuffer, [EBX].$T_entry.T_IDptr


For testing, it might be easier to use a text file with examples, like the attached one, instead of typing all the time.

HSE

Quote from: jj2007 on July 11, 2023, 06:11:32 PM
Enter statement to test: >INVOKE  WinMain, EAX

        MOV     RCX, EAX    ; <<<<<<<<<<<<<< error
        CALL    WinMain

No error, that result is correct! For this FSM, EAX is a variable name.  :biggrin:

Equations in Assembly: SmplMath

NoCforMe

Thanks, Héctor. People keep throwing stuff at my poor li'l parser thinking it knows the entire universe of MASM symbols. It doesn't, just a limited subset of them. Think of how complex it would have to be in order to handle expressions like

[RDX].Table + 12
[RAX + RBX + Table]
[RAX+RBX+Table]

Ironically, my parser can handle that last one, since there are no embedded spaces; it's just another "unknown identifier" to it:

Enter statement to test: >invoke function, [RDX+RAX+Table]

        MOV     RCX, [RDX+RAX+Table]
        CALL    function

But it has no idea what all those particles within it mean.

So have I met the original challenge (mineiro's)?
Assembly language programming should be fun. That's why I do it.

mineiro

I am now downloading your program.
I intend to play with your toy during this week, if I made some changes I will post them in this topic with your permission.
Thank you sir NoCforMe,.
I'd rather be this ambulant metamorphosis than to have that old opinion about everything

jj2007

Quote from: NoCforMe on July 12, 2023, 04:41:47 AM
Thanks, Héctor. People keep throwing stuff at my poor li'l parser

Invoke someproc, [RDX].Table + 12, [RAX + RBX + Table], [RAX+RBX+Table]

mov rcx, [RDX].Table + 12
mov rdx, [RAX + RBX + Table]
mov r8, [RAX+RBX+Table]
call someproc

NoCforMe

That'll make it through my demo if you remove the spaces:


Enter statement to test: >Invoke someproc, [RDX].Table + 12, [RAX + RBX + Table], [RAX+RBX+Table]

Tokenization error.

Enter statement to test: >Invoke someproc, [RDX].Table+12, [RAX+RBX+Table], [RAX+RBX+Table]

        MOV     RCX, [RDX].Table+12
        MOV     RDX, [RAX+RBX+Table]
        MOV     R8, [RAX+RBX+Table]
        CALL    someproc
Assembly language programming should be fun. That's why I do it.

jj2007

No way to fix that spaces problem? After all, the token delimiter is clearly the comma...

NoCforMe

#72
Quote from: jj2007 on July 12, 2023, 07:37:20 AM
No way to fix that spaces problem? After all, the token delimiter is clearly the comma...

JJ, you really don't seem to understand what's going on here. Yes, I could "fix the spaces problem" by only recognizing the comma as the delimiter. But first the more trivial problem: that would mean that spaces would be included in any identifier, like, say, "RAX " or "varName " where the user put a space between the ID and the comma(which of course is allowed in MASM syntax). Which would mess up the formatting. (Would probably still produce valid assemble-able code, but still.)

But the more important problem is that the parser still wouldn't understand at all what the component parts of the expression are, and which sequences of them are legal and which are not. Which you can see is a non-trivial problem, one which is waaaaay beyond the scope of what was spozed to be a somewhat simple demo.

Later: I tried what you suggested, which was to allow a space to be part of an identifier--super-easy change, just change one of the jump targets in the tokenization table--but that broke the whole thing. Scratched my head for a bit, why did that happen? Wellll, because a space is a delimiter, between "invoke" and the function name. So that won't work.

The only proper way to do it would be to handle the universe of address expressions, which is enormous. Not gonna happen for this demo.
Assembly language programming should be fun. That's why I do it.

zedd151

Quote from: NoCforMe on July 12, 2023, 07:53:48 AM
JJ, you really don't seem to understand what's going on here. Yes, I could "fix the spaces problem" by only recognizing the comma as the delimiter. But first the more trivial problem: that would mean that spaces would be included in any identifier, like, say, "RAX " or "varName " where the user put a space between the ID and the comma
Shouldn't be too hard to do a little 'preprocessing'. I have a qe plugin (fixpunc) that removes any space(s) before a comma and places a single space after the comma & removes any extraneous spaces after the comma (if more than 1). Not that you need a qe plugin, but the algo is very simple...  :icon_idea:


src is the source buffer, dst is the destination buffer...
fixpunc    proto :dword, :dword


    .code
    fixpunc proc src:dword, dst:dword
        mov ecx, src
        mov edx, dst
        top:
        mov al, [ecx]
        cmp al, 0
        jz done
        cmp al, ","
        jz comma1
        mov [edx], al
        inc ecx
        inc edx
        jmp top
        comma1:
        cmp byte ptr [edx-1], 20h
        jnz @f
        dec edx
        jmp comma1
        @@:
        mov [edx], al
        inc edx
        @@:
        inc ecx
        cmp byte ptr [ecx], 20h
        jnz movcomm
        mov al, [ecx]
        mov [edx], al
        inc edx
        @@:
        inc ecx
        cmp byte ptr [ecx], 20h
        jz @B
        jmp top
        movcomm:
        mov byte ptr [edx], 20h
        inc edx
        jmp top
        done:
        ret
    fixpunc endp

HSE

Quote from: NoCforMe on July 12, 2023, 07:53:48 AMNot gonna happen for this demo.

:thumbsup:

For a further step, can't be a big deal. Just requiere another state. You can read space and comma at least in 2 different states... but I'm still in example 2  :biggrin:
Equations in Assembly: SmplMath