Please refer to the 3 cases above:
includelib \masm32\lib64\kernel32.lib
ExitProcess PROTO :dword
.code
;CASE 1
p1 proc ; leaf
sub rsp, 8 ; align stack
; ... do our things
add rsp, 8
ret
p1 endp
;Case 2
p2 proc parm1:dword
;and rsp, -16 ;no need here, but will not hurt if used, because push rbp will align
; ... do our things
ret
p2 endp
;Case 2
p3 proc
LOCAL myvar:dword
and rsp, -16 ; align
; do our things
ret
p3 endp
;Case 2
p4_1 proc uses rbx rdi rsi par1:qword, par2:qword, par3:qword, par4:qword, par5:qword
and rsp, -16 ; align
; do our things
ret
p4_1 endp
; Case 3
p4 proc
sub rsp, 28h ; shadow space+space for 5th parameter.
;and rsp, -16 ;no need here, but will not hurt if used, because already aligned
mov rcx,1
mov rdx,2
mov r8,3
mov r9,4
mov rax, 5
mov [rsp+20h],rax
call p4_1
add rsp, 28h
ret
p4 endp
; Case 3, but without need for epilog because ExitProcess fixes everything
main proc
sub rsp, 28h ; shadow space + align
call p1
mov rcx, 1
call p2
call p3
call p4
;add rsp, 28h
;ret
mov ecx,0
call ExitProcess
main endp
end