Basically , the only thing that would make an RSP based prologue bigger than the RBP is a large list in USES.
Totally wrong. Please LOOK at this case:
option casemap:none
option frame:auto
OPTION STACKBASE:RSP
option win64:11
getSum proc public FRAME dest:ptr, src:ptr, val1 : qword, val2:qword
LOCAL myVar1 : qword
LOCAL myVar2 : qword
mov rax, 1
add rax, val1
mov myVar1, rax
add rax, val2
mov myVar2, rax
INVOKE sub1, dest, rdx, myVar1, myVar2
ret
getSum endp
decompiles to:
getSum:
000000013F5B181B mov qword ptr [rsp+8],rcx
000000013F5B1820 mov qword ptr [rsp+18h],r8
000000013F5B1825 mov qword ptr [rsp+20h],r9
000000013F5B182A sub rsp,38h
000000013F5B182E mov rax,1
000000013F5B1835 add rax,qword ptr [rsp+50h]
000000013F5B183A mov qword ptr [rsp+20h],rax
000000013F5B183F add rax,qword ptr [rsp+58h]
000000013F5B1844 mov qword ptr [rsp+28h],rax
000000013F5B1849 mov rcx,qword ptr [rsp+40h]
000000013F5B184E mov r8,qword ptr [rsp+20h]
000000013F5B1853 mov r9,qword ptr [rsp+28h]
000000013F5B1858 call 000000013F5B1800
000000013F5B185D add rsp,38h
000000013F5B1861 ret
TOTAL : 70 bytes
Now with:
option casemap:none
option frame:auto
option win64:2
getSum proc public FRAME dest:ptr, src:ptr, val1 : qword, val2:qword
LOCAL myVar1 : qword
LOCAL myVar2 : qword
mov dest, rcx
mov val1, r8
mov val2, r9
mov rax, 1
add rax, val1
mov myVar1, rax
add rax, val2
mov myVar2, rax
INVOKE sub1, dest, rdx, myVar1, myVar2
ret
getSum endp
decompiles to:
getSum:
000000013F5C1814 push rbp
000000013F5C1815 mov rbp,rsp
000000013F5C1818 sub rsp,30h
000000013F5C181C mov qword ptr [rbp+10h],rcx
000000013F5C1820 mov qword ptr [rbp+20h],r8
000000013F5C1824 mov qword ptr [rbp+28h],r9
000000013F5C1828 mov rax,1
000000013F5C182F add rax,qword ptr [rbp+20h]
000000013F5C1833 mov qword ptr [rbp-8],rax
000000013F5C1837 add rax,qword ptr [rbp+28h]
000000013F5C183B mov qword ptr [rbp-10h],rax
000000013F5C183F mov rcx,qword ptr [rbp+10h]
000000013F5C1843 mov r8,qword ptr [rbp-8]
000000013F5C1847 mov r9,qword ptr [rbp-10h]
000000013F5C184B call 000000013F5C1800
000000013F5C1850 add rsp,30h
000000013F5C1854 pop rbp
000000013F5C1855 ret
TOTAL: 65 bytes.
Difference is "only" more 5 bytes for the OPTION STACKBASE:RSP alternative.
Move from memory to register and vice-versa use longer and slower instructions.
There is another problem, you can not dynamically allocate memory on the stack with the OPTION STACKBASE:RSP in use.