News:

Masm32 SDK description, downloads and other helpful links
Message to All Guests
NB: Posting URL's See here: Posted URL Change

Main Menu

Static RSP built in JWasm

Started by habran, June 14, 2013, 03:39:17 PM

Previous topic - Next topic

habran

Cod-Father

habran

I have a little correction  in the proc.c line: 2162   ::)

this code did not do what I expected :shock:
it was supposed to check if number of registers reminded for push are odd
and if not, to make it happen

  else if (grcount>4){
       i=grcount-r;
       if (grcount & 1 == 0){
           for (i=0;i<4;i++){
           if (info->home_used[i]==0) break;
          }
           info->home_used[i]=1;
        }
     }


replace with this:
  else if (grcount > 4){
       r = grcount-r;
       if (!(r & 1 )){
           for (i=0;i<4;i++){
           if (info->home_used[i]==0) break;
          }
           info->home_used[i]=1;
        }
     }


I have replaced binaries at the top with corrected build

Cod-Father

habran

I uploaded at the top new version with some more bug fixes
the folder also contains the proc.c with changes 8)
Cod-Father

habran

Some more fixes :biggrin:
in folder at the top of the thread are new binaries an three source files
replace these sources in former source folder with these new ones
here are some examples what it does: 8)


testproc4 PROC FRAME
                 
                mov eax,20
                mov edx,eax
                ret 
testproc4 endp

;produces this:
0000000000A011FA  mov         eax,14h 
0000000000A011FF  mov         edx,eax 
0000000000A01201  ret 



testproc4 PROC FRAME
                LOCAL z:DWORD
                 
                mov eax,20
                mov z,eax
                 ret 
testproc4 endp

;produces this:
00000000010D11FA  sub         rsp,8 
00000000010D11FE  mov         eax,14h 
00000000010D1203  mov         dword ptr [rsp],eax 
00000000010D1206  add         rsp,8 
00000000010D120A  ret


testproc4 PROC FRAME USES rbx rdi
                LOCAL z:DWORD
                 
                mov eax,20
                mov z,eax
                ret 
testproc4 endp

;produces this:
00000000013B11FA  mov         qword ptr [rsp+8],rbx 
00000000013B11FF  push        rdi 
00000000013B1200  sub         rsp,8 
00000000013B1204  mov         eax,14h 
00000000013B1209  mov         dword ptr [rsp],eax 
00000000013B120C  mov         rbx,qword ptr [rsp+18h] 
00000000013B1211  add         rsp,8 
00000000013B1215  pop         rdi 
00000000013B1216  ret



testproc4 PROC FRAME USES rbx rdi
                LOCAL z:DWORD
                 
                mov eax,20
                mov z,eax
                invoke printf,rcx,rbx
                ret 
testproc4 endp
;produces this:
0000000000B911FA  mov         qword ptr [rsp+8],rbx 
0000000000B911FF  push        rdi 
0000000000B91200  sub         rsp,30h 
0000000000B91204  mov         eax,14h 
0000000000B91209  mov         dword ptr [rsp+20h],eax 
0000000000B9120D  mov         rdx,rbx 
0000000000B91210  call        printf (0B91358h) 
0000000000B91215  mov         rbx,qword ptr [rsp+40h] 
0000000000B9121A  add         rsp,30h 
0000000000B9121E  pop         rdi 
0000000000B9121F  ret

Cod-Father

habran

as you can see this assembler has got brains ;)
there is no need to use other options to improve produced code
Cod-Father

japheth

Quote from: habran on August 28, 2013, 04:56:36 PM

;produces this:
[b]00000000013B11FA  mov         qword ptr [rsp+8],rbx  [/b]
00000000013B11FF  push        rdi 
00000000013B1200  sub         rsp,8 
00000000013B1204  mov         eax,14h 
00000000013B1209  mov         dword ptr [rsp],eax 
[b]00000000013B120C  mov         rbx,qword ptr [rsp+18h]  [/b]
00000000013B1211  add         rsp,8 
00000000013B1215  pop         rdi 
00000000013B1216  ret


I'm not really happy with this code generation. Why is rbx not pushed like rdi?

This is not just an aesthetic issue, as you probably might assume - it makes the code incompatible with Win64 SEH.

See this - admittedly "advanced" - sample:


;--- Win64 SEH sample, requires jwasm.
;--- it demonstrates:
;--- a) how to install exception handlers in 64-bit
;--- b) how a handler may "refuse" to handle the exception
;--- c) how to "unwind" via RtlUnwind() or RtlUnwindEx()
;--- d) that an exception handler may be called twice,
;---    see "A Crash Course on the Depths of Win32 Structured Exception Handling"
;---    by Matt Pietrek, MSDN 01/1997.

option casemap:none
option win64:3
option frame:auto

.nolist
.nocref
WIN32_LEAN_AND_MEAN equ 1
include windows.inc
include ntdll.inc
include excpt.inc
include stdio.inc
.cref
.list

;UNWFUNC textequ <RtlUnwind>
UNWFUNC textequ <RtlUnwindEx>

ExceptionExecuteHandler equ 4

includelib <kernel32.lib>
includelib <msvcrt.lib>

CStr macro text:vararg
local sym
.const
sym db text, 0
.code
exitm <offset sym>
endm

.code

func1_eh proc frame pRecord:ptr EXCEPTION_RECORD, pFrame:ptr, pContext:ptr CONTEXT

mov rcx, pRecord
invoke printf, CStr("func1_eh( pRecord=%p [code=%X flags=%X prevRec=%p addr=%p], pFrame=%p, pContext=%p )",10), rcx,
[rcx].EXCEPTION_RECORD.ExceptionCode,
[rcx].EXCEPTION_RECORD.ExceptionFlags,
[rcx].EXCEPTION_RECORD.ExceptionRecord,
[rcx].EXCEPTION_RECORD.ExceptionAddress,
pFrame, pContext

mov rcx, pContext
invoke printf, CStr("func1_eh: context.flags=%X",10), [rcx].CONTEXT.ContextFlags

mov eax, ExceptionContinueSearch

ret
align 8

func1_eh endp


func1 proc frame:func1_eh uses rbx rsi rdi

local lcl1:dword

mov lcl1, 12345678h
mov rbx, -1
mov rsi, -1
mov rdi, -1
invoke printf, CStr("func1: rbp=%p rbx=%p rsi=%p rdi=%p",10), rbp, rbx, rsi, rdi

invoke RaiseException, 0E2003456h, 0, 0, 0

invoke printf, CStr("func1: exit, rbp=%p rbx=%p rsi=%p rdi=%p lcl1=%X",10), rbp, rbx, rsi, rdi, lcl1
ret
align 8

func1  endp

main_eh proc frame pRecord:ptr EXCEPTION_RECORD, pFrame:ptr, pContext:ptr CONTEXT

mov rcx, pRecord
invoke printf, CStr("main_eh( pRecord=%p [code=%X flags=%X prevRec=%p addr=%p], pFrame=%p, pContext=%p )",10), rcx,
[rcx].EXCEPTION_RECORD.ExceptionCode,
[rcx].EXCEPTION_RECORD.ExceptionFlags,
[rcx].EXCEPTION_RECORD.ExceptionRecord,
[rcx].EXCEPTION_RECORD.ExceptionAddress,
pFrame, pContext

mov rcx, pContext
invoke printf, CStr("main_eh: context.flags=%X",10), [rcx].CONTEXT.ContextFlags

mov rcx, pRecord
.if !( [rcx].EXCEPTION_RECORD.ExceptionFlags & 2 )
invoke printf, CStr("main_eh: calling ", @CatStr(!", %UNWFUNC, !"), "(), rsp=%p, rbp=%p",10), rsp, rbp
ifidn UNWFUNC, <RtlUnwindEx>
invoke RtlUnwindEx, pFrame, offset returnaddr, pRecord, NULL, pContext, NULL
else
invoke RtlUnwind, pFrame, offset returnaddr, pRecord, NULL
endif
returnaddr:
invoke printf, CStr("main_eh: back from ", @CatStr(!", %UNWFUNC, !"), "(), rsp=%p, rbp=%p",10), rsp, rbp
;--- the 64-bit unwind has restored all registers, including RSP!
;--- hence one cannot execute a RET.
jmp cont_addr
; mov eax, ExceptionContinueExecution
.else
mov eax, ExceptionContinueSearch
.endif
ret
align 8

main_eh endp

main proc frame:main_eh

local lcl1:dword

mov lcl1, 12345678h
;--- initialize non-volatile registers to see if the contents remain unchanged
mov rbx, 055667788deadbeefh
mov rsi, 05555aaaa5555aaaah
mov rdi, 08765432112345678h
invoke printf, CStr("main: rsp=%p rbp=%p rbx=%p rsi=%p rdi=%p",10), rsp, rbp, rbx, rsi, rdi

call func1
cont_addr::
invoke printf, CStr("main: exit, rbp=%p rbx=%p rsi=%p rdi=%p lcl1=%X",10), rbp, rbx, rsi, rdi, lcl1
ret
align 8

main endp

mainCRTStartup proc frame
call main
invoke ExitProcess, 0
mainCRTStartup endp

end mainCRTStartup



I'm sorry that it's a complicated sample, unfortunately, but I don't know how to make it simpler without loosing vital information.

The point is: the sample won't run as expected if it is generated with your version of jwasm - while the standard jwasm v2.10 and also the v2.11 prerelease have no problems generating a "running" sample.

The reason for the incompatibility is somewhat hidden in how Win64 handles the "unwind" thingy - with your version of jwasm, it simply cannot know how to restore all registers.




habran

Hi Japheth :biggrin:
I am glad that you payed attention to this version
I have tested and looked through the code  above
I agree that it doesn't work, however I don't find it as a good programming
look:by Matt Pietrek, MSDN 01/1997.
it was not meant to handle 64 bit code when written first time
I would use jmp exit instead of ret and it would work fine

here is nicely explained about usage of the shadow space

in my example I am using last register to align to 16 byte
and I use unused shadow space to store registers to reduce usage of the stack

I saw it in disassembly of MSVC C functions, they do it exactly the same way as I did
Cod-Father

habran

sorry Japheth :P
I actually just glanced at your example because it was late at night
QuoteI would use jmp exit instead of ret and it would work fine
that was bullshit
I will play around to see why is it not working
Cod-Father

habran

It is probably possible to "teach" SEH how to handle this case :icon_confused:
Cod-Father

habran

here is an example how MSVC12 process C code:

UINT_PTR XIndexOffset(ASMEDIT *asme, const XCHARINDEX *charin, XCHARINDEX *charout, INT_PTR offset, int newline)
{
000000013F710900  mov         qword ptr [rsp+18h],rbx 
000000013F710905  push        rbp 
000000013F710906  sub         rsp,40h 
  XCHARINDEX charcnt=*charin;
000000013F71090A  mov         rax,qword ptr [rdx] 
000000013F71090D  mov         r10,qword ptr [rdx+8] 
000000013F710911  mov         qword ptr [charin],rdi 
  INT_PTR offsetcnt=offset;
  int nSub;
  BYTE nLineBreak;

  if (newline)
000000013F710916  mov         edi,dword ptr [newline] 
000000013F71091A  mov         qword ptr [ciCount],rax 
000000013F71091F  mov         rax,qword ptr [rdx+10h] 
000000013F710923  mov         qword ptr [charout],r14 
000000013F710928  mov         qword ptr [rsp+30h],rax 
000000013F71092D  mov         rbp,r9 
000000013F710930  mov         r14,r8 
...
...
000000013F710B08  mov         rbx,qword ptr [offset] 
000000013F710B0D  add         rsp,40h 
000000013F710B11  pop         rbp 
000000013F710B12  ret 

Cod-Father

japheth


The problem with your jwasm version is that the listing of prologue code is messed if option -Sg has been set. This makes it virtually impossible to see what SEH-primitives your program has created inside the prologue.

But, as you may have noticed, my example is just a slightly modified version of a sample included in Wininc ( it's in the Sampl64\SEHSmpl folder ). And in this folder there is also a Masm64-compatible version, which has to emit the SEH-primitives manually. I suggest to use this version for experiments, adding the prologue code that your jwasm version is creating  by hand.

If you're lucky, you just have to emit a .SAVEREG directive for the register value saved in the shadow space.

habran

thanks Japheth :t
that's good idea, I'll try it tonight :biggrin:

Cod-Father

habran

#57
I have done it but it brakes :(

here is the code where I implemented it:


/* added for W64F_STATICRSP */
static void win64_StoreRegHome( struct proc_info *info )
/*******************************************************/
{
  int          i = 0;
  int           cnt;
  int           grcount=0;
  int           sizestd =0;
  int           r;
  uint_16    *regist;
info->stored_reg = 0;
      if ( info->regslist ) {
         for( regist = info->regslist,cnt = *regist++; cnt; cnt--, regist++,i++ ) {
      if ( GetValueSp( *regist ) & OP_XMM ) continue;
      else ++grcount;
         }
         for (i=0,r=0;i<4;i++){
            if (info->home_used[i]==0) ++r;
            }
         if (r){
            if (grcount==1) memset(info->home_used, 1, 4);
            else if (grcount==2 && r >= 2){
               for (i=0;i<4;i++){
                     if (info->home_used[i]==0) break;
                    }
                     for (++i;i<4;i++)
                        info->home_used[i]=1;
               }
            else if (grcount==3){
               if ( r == 1) memset(info->home_used, 1, 4);
               if ( r >= 3){
               for (i=0;i<4;i++){
                     if (info->home_used[i]==0) break;
                    }
               for (++i;i<4;i++){
                     if (info->home_used[i]==0) break;
                    }
                     for (++i;i<4;i++)
                        info->home_used[i]=1;
                  }
            }
            else if (grcount==4 && r == 4){
                    info->home_used[4]=1;
               }
            else if (grcount > 4){
                 r = grcount-r;
                 if (!(r & 1 )){
                     for (i=0;i<4;i++){
                     if (info->home_used[i]==0) break;
                    }
                     info->home_used[i]=1;
                  }
               }
            }
            for( i=0,regist = info->regslist,cnt = *regist++; cnt; cnt--, regist++,i++ ) {
        if ( GetValueSp( *regist ) & OP_XMM ) continue;
        else {
    sizestd += 8;
    if (i < 4) {
if (info->home_used[i]==0){
    AddLineQueueX( "mov [%r+%u], %r",T_RSP, NUMQUAL sizestd, *regist );
                            if ( ( 1 << GetRegNo( *regist ) ) & win64_nvgpr )
            AddLineQueueX( "%r %r, %u", T_DOT_SAVEREG, *regist, NUMQUAL sizestd);                           
                             info->stored_reg++;
}
else {
cnt++;regist--;
}
  }
             }
              }/* end for */
           }
return;
}

Cod-Father

japheth

Quote from: habran on August 29, 2013, 08:57:10 PM
I have done it but it brakes :(

Alles muss man selber machen!  :icon_mrgreen:

I did a few experiments with the ML64-compatible version. Result: it works to save a register content into the shadow space - the OS will restore the contents if the proper .SAVEREG directive has been used.

However: the offset to use with .SAVEREG must not be calculated from the current value of RSP, but from the value RSP will have after the prologue! Here's the prologue that worked for me:


func1 proc frame:func1_eh

@localsize = 5*8

; push rbx
; .pushreg rbx
mov [rsp+8], rbx                              ;save register rbx in shadow space
.savereg rbx, 8+16+@localsize ;!!! offset must be the "offset" to the "final" RSP

push rsi
.pushreg rsi
push rdi
.pushreg rdi
sub rsp,@localsize
.allocstack @localsize
.endprolog


and the epilogue looks like this:

add rsp, @localsize
pop rdi
pop rsi
; pop rbx
mov rbx, [rsp+8]
ret


habran

you meant: "If you want the job done properly do it yourself" :biggrin:
I have tried it with:

if (info->home_used[i]==0){
AddLineQueueX( "mov [%r+%u], %r",T_RSP, NUMQUAL sizestd, *regist );
                 if ( ( 1 << GetRegNo( *regist ) ) & win64_nvgpr )
               AddLineQueueX( "%r %r, %u", T_DOT_SAVEREG, *regist, NUMQUAL sizestd + 16 + info->localsize);                           
                             info->stored_reg++;
}
else {
cnt++;regist--;
}
    }

It still brakes :(
Cod-Father