News:

Masm32 SDK description, downloads and other helpful links
Message to All Guests

Main Menu

Static RSP in JWasm

Started by habran, May 23, 2013, 04:41:27 PM

Previous topic - Next topic

habran

Hi all

I've been working on a new feature for JWasm 64 bit

that is static RSP and saving nonvolatile registers in callee's home for first 4 params

to compile to this you need to use a new option for JWasm:
option win64:7
option frame:auto



here are some examples:

example PROC FRAME uses rbx rsi xmm8 rdi r12 r13 r14 r15 xmm7 baraba:QWORD,lParam:QWORD
local   mara :QWORD
local FileName [MAX_PATH]:BYTE
    mov rax,sizeof(FileName)
    lea r8,szFileName
    mov [r8],rcx
    mov rax,mara
    ret
example endp   


it is compiled to that:


example:
0000000140034C07  push        rbp 
0000000140034C08  mov         qword ptr [rsp+8],rbx
0000000140034C0D  mov         qword ptr [rsp+10h],rsi
0000000140034C12  mov         qword ptr [rsp+18h],rdi
0000000140034C17  mov         qword ptr [rsp+20h],r12
0000000140034C1C  push        r14 
0000000140034C1E  push        r15 
0000000140034C20  sub         rsp,20h
0000000140034C24  movdqa      xmmword ptr [rsp],xmm8
0000000140034C2A  movdqa      xmmword ptr [rsp+10h],xmm7
0000000140034C30  sub         rsp,130h
0000000140034C37  mov         rax,104h
0000000140034C3E  lea         r8,[rsp+28h]
0000000140034C43  mov         qword ptr [r8],rcx
0000000140034C46  mov         rax,qword ptr [rsp+20h]
0000000140034C4B  mov         rbx,qword ptr [rsp+8]
0000000140034C50  mov         rsi,qword ptr [rsp+10h]
0000000140034C55  movdqa      xmm8,xmmword ptr [rsp+110h]
0000000140034C5F  mov         rdi,qword ptr [rsp+18h]
0000000140034C64  mov         r12,qword ptr [rsp+20h]
0000000140034C69  movdqa      xmm7,xmmword ptr [rsp+120h]
0000000140034C72  add         rsp,158h
0000000140034C79  pop         r15 
0000000140034C7B  pop         r14 
0000000140034C7D  pop         rbp 
0000000140034C7E  ret             



another:
example1 PROC FRAME uses rbx  baraba:QWORD,lParam:QWORD
local   mara :QWORD
local szFileName [MAX_PATH]:BYTE
    mov rax,sizeof(szFileName)
    lea r8,szFileName
    mov [r8],rcx
    mov rax,baraba
    ret
example1 endp   

compiled to:
example1:
0000000140034C7F  mov         qword ptr [rsp+8],rcx
0000000140034C84  push        rbp 
0000000140034C85  push        rbx 
0000000140034C86  sub         rsp,148h
0000000140034C8D  mov         rax,104h
0000000140034C94  lea         r8,[rsp+30h]
0000000140034C99  mov         qword ptr [r8],rcx
0000000140034C9C  mov         rax,qword ptr [rsp+168h]
0000000140034CA4  add         rsp,148h
0000000140034CAB  pop         rbx 
0000000140034CAC  pop         rbp 
0000000140034CAD  ret             
testfan1:
0000000140034CAE  mov         qword ptr [rsp+8],rcx
0000000140034CB3  mov         qword ptr [rsp+10h],rdx
0000000140034CB8  mov         qword ptr [rsp+18h],r8
0000000140034CBD  mov         qword ptr [rsp+20h],r9
0000000140034CC2  push        rbp 
0000000140034CC3  sub         rsp,40h
0000000140034CC7  mov         rcx,qword ptr [rsp+50h]
0000000140034CCC  mov         rdx,qword ptr [rsp+58h]
0000000140034CD1  mov         r8,qword ptr [rsp+60h]
0000000140034CD6  mov         r9,qword ptr [rsp+68h]
0000000140034CDB  mov         rax,qword ptr [rsp+70h]
0000000140034CE0  mov         ecx,0FFFFFFF5h
0000000140034CE5  call        qword ptr [Kernel32_NULL_THUNK_DATA (1400B5B28h)]
0000000140034CEB  add         rsp,40h
0000000140034CEF  pop         rbp 
0000000140034CF0  ret             


one more:

testfan1 PROC FRAME one:QWORD,two:QWORD,three:QWORD,four:QWORD,five:QWORD
local grenma :QWORD
local grenpa :QWORD
local baba :QWORD
local deda :QWORD
    mov rcx,one
    mov rdx,two
    mov r8,three
    mov r9,four
    mov rax,five
    invoke GetStdHandle,STD_OUTPUT_HANDLE
ret
testfan1 endp

compiled to:
testfan1:
0000000140034CAE  mov         qword ptr [rsp+8],rcx
0000000140034CB3  mov         qword ptr [rsp+10h],rdx
0000000140034CB8  mov         qword ptr [rsp+18h],r8
0000000140034CBD  mov         qword ptr [rsp+20h],r9
0000000140034CC2  push        rbp 
0000000140034CC3  sub         rsp,40h
0000000140034CC7  mov         rcx,qword ptr [rsp+50h]
0000000140034CCC  mov         rdx,qword ptr [rsp+58h]
0000000140034CD1  mov         r8,qword ptr [rsp+60h]
0000000140034CD6  mov         r9,qword ptr [rsp+68h]
0000000140034CDB  mov         rax,qword ptr [rsp+70h]
0000000140034CE0  mov         ecx,0FFFFFFF5h
0000000140034CE5  call        qword ptr [Kernel32_NULL_THUNK_DATA (1400B5B28h)]
0000000140034CEB  add         rsp,40h
0000000140034CEF  pop         rbp 
0000000140034CF0  ret             


one more:

testfan PROC FRAME uses rbx rdi rsi r12 one:QWORD,two:QWORD,three:QWORD,four:QWORD,five:QWORD
local granny :QWORD
local grandpa :QWORD
local baba :QWORD
local deda :QWORD
local mrect :RECT
    mov rcx,one
    mov rdx,two
    mov r8,three
    mov r9,four
    mov mrect.left,ecx
    mov mrect.top,edx
    mov mrect.right,r8d
    mov mrect.bottom,r9d
    mov rax,five
    inc rax
    mov granny,rax
    inc rax
    mov grandpa,rax
    inc rax
    mov baba,rax
    inc rax
    mov deda,rax
    invoke GetStdHandle,STD_OUTPUT_HANDLE
    ret
testfan endp


compiled to:

testfan:
0000000140034CF1  mov         qword ptr [rsp+8],rcx
0000000140034CF6  mov         qword ptr [rsp+10h],rdx
0000000140034CFB  mov         qword ptr [rsp+18h],r8
0000000140034D00  mov         qword ptr [rsp+20h],r9
0000000140034D05  push        rbp 
0000000140034D06  push        rbx 
0000000140034D07  push        rdi 
0000000140034D08  push        rsi 
0000000140034D09  push        r12 
0000000140034D0B  sub         rsp,50h
0000000140034D0F  mov         rcx,qword ptr [rsp+80h]
0000000140034D17  mov         rdx,qword ptr [rsp+88h]
0000000140034D1F  mov         r8,qword ptr [rsp+90h]
0000000140034D27  mov         r9,qword ptr [rsp+98h]
0000000140034D2F  mov         dword ptr [rsp+40h],ecx
0000000140034D33  mov         dword ptr [rsp+44h],edx
0000000140034D37  mov         dword ptr [rsp+48h],r8d
0000000140034D3C  mov         dword ptr [rsp+4Ch],r9d
0000000140034D41  mov         rax,qword ptr [rsp+0A0h]
0000000140034D49  inc         rax 
0000000140034D4C  mov         qword ptr [rsp+20h],rax
0000000140034D51  inc         rax 
0000000140034D54  mov         qword ptr [rsp+28h],rax
0000000140034D59  inc         rax 
0000000140034D5C  mov         qword ptr [rsp+30h],rax
0000000140034D61  inc         rax 
0000000140034D64  mov         qword ptr [rsp+38h],rax
0000000140034D69  mov         ecx,0FFFFFFF5h
0000000140034D6E  call        qword ptr [Kernel32_NULL_THUNK_DATA (1400B5B28h)]
0000000140034D74  add         rsp,50h
0000000140034D78  pop         r12 
0000000140034D7A  pop         rsi 
0000000140034D7B  pop         rdi 
0000000140034D7C  pop         rbx 
0000000140034D7D  pop         rbp 
0000000140034D7E  ret             


I am still experimenting so I will post source code later
for now you can test binaries
only for 64 bit systems
here they are:
Cod-Father

qWord

seems like an ill-formed approach to suppress the frame pointer: why is RBP pushed?
However, a nice feature  :t
MREAL macros - when you need floating point arithmetic while assembling!

habran

Quoteseems like an ill-formed approach to suppress the frame pointer
that is not ill-formed approach, that is what MSVC compiler does

Quotewhy is RBP pushed?
I thought that, because stack has to be aligned to 16 byte, it is beter to do "PUSH RBP"
than to do "SUB RSP,8", so we save RBP for free use

QuoteHowever, a nice feature  :t
thanks :biggrin:

I am still working on it to make sure it works as it is supposed to
when ready I will post here a source code 8)

I would like more people to try to play with it so that we see if there are some bugs :redface:
Cod-Father

sinsi

0000000140034C07  push        rbp 
0000000140034C08  mov         qword ptr [rsp+8],rbx

On entry, RSP points to the return address.
After PUSH RBP, RSP points to the pushed RBP and RSP+8 points to the return address.
Then RSP+8 is overwritten by RBX?

Or have I had too many ambers?

habran

QuoteOr have I had too many ambers?
looks like that I had some to many :P

if we have RBP on -8
than it goes from -8 to 0
so RBX go from 0 to 7 :biggrin:
it suppose to go to RSP + 16

thanks, you are better when you have some  ambers than me wiyhout them :icon_redface:
Cod-Father

qWord

Quote from: habran on May 23, 2013, 07:51:47 PM
Quoteseems like an ill-formed approach to suppress the frame pointer
that is not ill-formed approach, that is what MSVC compiler does
At least for my programs the compiler often use MOV instead of PUSH/POPS to save the registers. PUSH/POPs is then only used to adjust the stack alignment (as you do it with PUSH RBP  :icon14:).

EDIT: I've take a deeper look to it an recognize that the compiler does as you say in many cases. However, sometimes the compiler saves the non-volatile register to the shadow space and then move the register arguments into these.
(Regardless that I would still prefer a variant that adjust the stack pointer one times and then use MOVs for saving.)
MREAL macros - when you need floating point arithmetic while assembling!

sinsi

Quote from: habran on May 23, 2013, 08:27:49 PM
thanks, you are better when you have some  ambers than me wiyhout them :icon_redface:
I use ml64, so I am used to calculating where RSP is  :badgrin:

habran

sinsi, you pointed out my bug I was looking for and I couldn't see it
even it was so obvious  :redface:

I was like a mother with an ugly child :biggrin:

now I can easy fix it :bgrin:

thanks

Cod-Father

habran

qWord, I am still working on fixing everything
now when sinsi kindly pointed out my main problem I can fix it easy
if you don't like this option you can stick with other ones
here you can read more about it
Cod-Father

sinsi

Similar here: http://ntcore.com/files/vista_x64.htm

qWord

sorry people, I was talking about what VC does when the option "suppress frame pointer" is enabled. For some reason I thought the compiler will omit PUSH/POPs in that case  :redface:
MREAL macros - when you need floating point arithmetic while assembling!

habran

hey qWord :P
don't apologize, have look my blunder  :icon_mrgreen:
it was such an obvious mistake and I was not able to spot it :dazzled:

now I have a work to do, to fix it

I'll be back 8)


Cod-Father