Hi all
I've been working on a new feature for JWasm 64 bit
that is static RSP and saving nonvolatile registers in callee's home for first 4 params
to compile to this you need to use a new option for JWasm:
option win64:7
option frame:auto
here are some examples:
example PROC FRAME uses rbx rsi xmm8 rdi r12 r13 r14 r15 xmm7 baraba:QWORD,lParam:QWORD
local mara :QWORD
local FileName [MAX_PATH]:BYTE
mov rax,sizeof(FileName)
lea r8,szFileName
mov [r8],rcx
mov rax,mara
ret
example endp
it is compiled to that:
example:
0000000140034C07 push rbp
0000000140034C08 mov qword ptr [rsp+8],rbx
0000000140034C0D mov qword ptr [rsp+10h],rsi
0000000140034C12 mov qword ptr [rsp+18h],rdi
0000000140034C17 mov qword ptr [rsp+20h],r12
0000000140034C1C push r14
0000000140034C1E push r15
0000000140034C20 sub rsp,20h
0000000140034C24 movdqa xmmword ptr [rsp],xmm8
0000000140034C2A movdqa xmmword ptr [rsp+10h],xmm7
0000000140034C30 sub rsp,130h
0000000140034C37 mov rax,104h
0000000140034C3E lea r8,[rsp+28h]
0000000140034C43 mov qword ptr [r8],rcx
0000000140034C46 mov rax,qword ptr [rsp+20h]
0000000140034C4B mov rbx,qword ptr [rsp+8]
0000000140034C50 mov rsi,qword ptr [rsp+10h]
0000000140034C55 movdqa xmm8,xmmword ptr [rsp+110h]
0000000140034C5F mov rdi,qword ptr [rsp+18h]
0000000140034C64 mov r12,qword ptr [rsp+20h]
0000000140034C69 movdqa xmm7,xmmword ptr [rsp+120h]
0000000140034C72 add rsp,158h
0000000140034C79 pop r15
0000000140034C7B pop r14
0000000140034C7D pop rbp
0000000140034C7E ret
another:
example1 PROC FRAME uses rbx baraba:QWORD,lParam:QWORD
local mara :QWORD
local szFileName [MAX_PATH]:BYTE
mov rax,sizeof(szFileName)
lea r8,szFileName
mov [r8],rcx
mov rax,baraba
ret
example1 endp
compiled to:
example1:
0000000140034C7F mov qword ptr [rsp+8],rcx
0000000140034C84 push rbp
0000000140034C85 push rbx
0000000140034C86 sub rsp,148h
0000000140034C8D mov rax,104h
0000000140034C94 lea r8,[rsp+30h]
0000000140034C99 mov qword ptr [r8],rcx
0000000140034C9C mov rax,qword ptr [rsp+168h]
0000000140034CA4 add rsp,148h
0000000140034CAB pop rbx
0000000140034CAC pop rbp
0000000140034CAD ret
testfan1:
0000000140034CAE mov qword ptr [rsp+8],rcx
0000000140034CB3 mov qword ptr [rsp+10h],rdx
0000000140034CB8 mov qword ptr [rsp+18h],r8
0000000140034CBD mov qword ptr [rsp+20h],r9
0000000140034CC2 push rbp
0000000140034CC3 sub rsp,40h
0000000140034CC7 mov rcx,qword ptr [rsp+50h]
0000000140034CCC mov rdx,qword ptr [rsp+58h]
0000000140034CD1 mov r8,qword ptr [rsp+60h]
0000000140034CD6 mov r9,qword ptr [rsp+68h]
0000000140034CDB mov rax,qword ptr [rsp+70h]
0000000140034CE0 mov ecx,0FFFFFFF5h
0000000140034CE5 call qword ptr [Kernel32_NULL_THUNK_DATA (1400B5B28h)]
0000000140034CEB add rsp,40h
0000000140034CEF pop rbp
0000000140034CF0 ret
one more:
testfan1 PROC FRAME one:QWORD,two:QWORD,three:QWORD,four:QWORD,five:QWORD
local grenma :QWORD
local grenpa :QWORD
local baba :QWORD
local deda :QWORD
mov rcx,one
mov rdx,two
mov r8,three
mov r9,four
mov rax,five
invoke GetStdHandle,STD_OUTPUT_HANDLE
ret
testfan1 endp
compiled to:
testfan1:
0000000140034CAE mov qword ptr [rsp+8],rcx
0000000140034CB3 mov qword ptr [rsp+10h],rdx
0000000140034CB8 mov qword ptr [rsp+18h],r8
0000000140034CBD mov qword ptr [rsp+20h],r9
0000000140034CC2 push rbp
0000000140034CC3 sub rsp,40h
0000000140034CC7 mov rcx,qword ptr [rsp+50h]
0000000140034CCC mov rdx,qword ptr [rsp+58h]
0000000140034CD1 mov r8,qword ptr [rsp+60h]
0000000140034CD6 mov r9,qword ptr [rsp+68h]
0000000140034CDB mov rax,qword ptr [rsp+70h]
0000000140034CE0 mov ecx,0FFFFFFF5h
0000000140034CE5 call qword ptr [Kernel32_NULL_THUNK_DATA (1400B5B28h)]
0000000140034CEB add rsp,40h
0000000140034CEF pop rbp
0000000140034CF0 ret
one more:
testfan PROC FRAME uses rbx rdi rsi r12 one:QWORD,two:QWORD,three:QWORD,four:QWORD,five:QWORD
local granny :QWORD
local grandpa :QWORD
local baba :QWORD
local deda :QWORD
local mrect :RECT
mov rcx,one
mov rdx,two
mov r8,three
mov r9,four
mov mrect.left,ecx
mov mrect.top,edx
mov mrect.right,r8d
mov mrect.bottom,r9d
mov rax,five
inc rax
mov granny,rax
inc rax
mov grandpa,rax
inc rax
mov baba,rax
inc rax
mov deda,rax
invoke GetStdHandle,STD_OUTPUT_HANDLE
ret
testfan endp
compiled to:
testfan:
0000000140034CF1 mov qword ptr [rsp+8],rcx
0000000140034CF6 mov qword ptr [rsp+10h],rdx
0000000140034CFB mov qword ptr [rsp+18h],r8
0000000140034D00 mov qword ptr [rsp+20h],r9
0000000140034D05 push rbp
0000000140034D06 push rbx
0000000140034D07 push rdi
0000000140034D08 push rsi
0000000140034D09 push r12
0000000140034D0B sub rsp,50h
0000000140034D0F mov rcx,qword ptr [rsp+80h]
0000000140034D17 mov rdx,qword ptr [rsp+88h]
0000000140034D1F mov r8,qword ptr [rsp+90h]
0000000140034D27 mov r9,qword ptr [rsp+98h]
0000000140034D2F mov dword ptr [rsp+40h],ecx
0000000140034D33 mov dword ptr [rsp+44h],edx
0000000140034D37 mov dword ptr [rsp+48h],r8d
0000000140034D3C mov dword ptr [rsp+4Ch],r9d
0000000140034D41 mov rax,qword ptr [rsp+0A0h]
0000000140034D49 inc rax
0000000140034D4C mov qword ptr [rsp+20h],rax
0000000140034D51 inc rax
0000000140034D54 mov qword ptr [rsp+28h],rax
0000000140034D59 inc rax
0000000140034D5C mov qword ptr [rsp+30h],rax
0000000140034D61 inc rax
0000000140034D64 mov qword ptr [rsp+38h],rax
0000000140034D69 mov ecx,0FFFFFFF5h
0000000140034D6E call qword ptr [Kernel32_NULL_THUNK_DATA (1400B5B28h)]
0000000140034D74 add rsp,50h
0000000140034D78 pop r12
0000000140034D7A pop rsi
0000000140034D7B pop rdi
0000000140034D7C pop rbx
0000000140034D7D pop rbp
0000000140034D7E ret
I am still experimenting so I will post source code later
for now you can test binaries
only for 64 bit systems
here they are:
seems like an ill-formed approach to suppress the frame pointer: why is RBP pushed?
However, a nice feature :t
Quoteseems like an ill-formed approach to suppress the frame pointer
that is not ill-formed approach, that is what MSVC compiler does
Quotewhy is RBP pushed?
I thought that, because stack has to be aligned to 16 byte, it is beter to do "PUSH RBP"
than to do "SUB RSP,8", so we save RBP for free use
QuoteHowever, a nice feature :t
thanks :biggrin:
I am still working on it to make sure it works as it is supposed to
when ready I will post here a source code 8)
I would like more people to try to play with it so that we see if there are some bugs :redface:
0000000140034C07 push rbp
0000000140034C08 mov qword ptr [rsp+8],rbx
On entry, RSP points to the return address.
After PUSH RBP, RSP points to the pushed RBP and RSP+8 points to the return address.
Then RSP+8 is overwritten by RBX?
Or have I had too many ambers?
QuoteOr have I had too many ambers?
looks like that I had some to many :P
if we have RBP on -8
than it goes from -8 to 0
so RBX go from 0 to 7 :biggrin:
it suppose to go to RSP + 16
thanks, you are better when you have some ambers than me wiyhout them :icon_redface:
Quote from: habran on May 23, 2013, 07:51:47 PM
Quoteseems like an ill-formed approach to suppress the frame pointer
that is not ill-formed approach, that is what MSVC compiler does
At least for my programs the compiler often use MOV instead of PUSH/POPS to save the registers. PUSH/POPs is then only used to adjust the stack alignment (as you do it with PUSH RBP :icon14:).
EDIT: I've take a deeper look to it an recognize that the compiler does as you say in many cases. However, sometimes the compiler saves the non-volatile register to the shadow space and then move the register arguments into these.
(Regardless that I would still prefer a variant that adjust the stack pointer one times and then use MOVs for saving.)
Quote from: habran on May 23, 2013, 08:27:49 PM
thanks, you are better when you have some ambers than me wiyhout them :icon_redface:
I use ml64, so I am used to calculating where RSP is :badgrin:
sinsi, you pointed out my bug I was looking for and I couldn't see it
even it was so obvious :redface:
I was like a mother with an ugly child :biggrin:
now I can easy fix it :bgrin:
thanks
qWord, I am still working on fixing everything
now when sinsi kindly pointed out my main problem I can fix it easy
if you don't like this option you can stick with other ones
here (http://www.codemachine.com/article_x64deepdive.html) you can read more about it
Similar here: http://ntcore.com/files/vista_x64.htm
sorry people, I was talking about what VC does when the option "suppress frame pointer" is enabled. For some reason I thought the compiler will omit PUSH/POPs in that case :redface:
hey qWord :P
don't apologize, have look my blunder :icon_mrgreen:
it was such an obvious mistake and I was not able to spot it :dazzled:
now I have a work to do, to fix it
I'll be back 8)