alloca/_alloca is a C/C++ function that allocates space in the stack of the caller.
alloca/_alloca is extremely fast when compared with the heap allocation functions.
Of course, alloca/_alloca is not suitable for large allocations.
The space allocated by alloca/_alloca does not need to be freed, it is released on function exit.
What I present here is a demo that includes an _alloca 32-bit ASM function with alignment.
As you can see this _alloca depends on the calling convention because is not inlined.
The advantage on not being inlined is that can be used from high level languages that do not have an alloca function (Delphi, for example).
The demo is short but fairly advanced (in requirements :lol:) and the only assembler up to the task I could find is HASM.
The reason is that no other has reliable support for alignment above 16-byte, good support for AVX instructions and good support for calling conventions.
Here is the demo:
.686
.XMM
;_BORL=1
;_CDEC=1
;_STD=1
;_FAST=1
_PASC=1
IFDEF _BORL
.MODEL FLAT, BORLAND
ELSEIFDEF _CDEC
.MODEL FLAT, C
ELSEIFDEF _STD
.MODEL FLAT, STDCALL
ELSEIFDEF _FAST
.MODEL FLAT, FASTCALL
ELSEIFDEF _PASC
.MODEL FLAT, PASCAL
ENDIF
OPTION CASEMAP:NONE
option dllimport:<msvcrt.dll>
printf PROTO C arg1:Ptr Byte, printlist: VARARG
getchar PROTO C
option dllimport:<kernel32.dll>
ExitProcess PROTO STDCALL :dword
Reals8ToYmm MACRO par1, par2, par3, par4
Local ymmValue
.data
align 32
ymmValue real8 par1, par2, par3, par4
.code
exitm <ymmValue>
ENDM
.data
value1 db "Value 1 is: double1 %.3lf double2 %.3lf double3 %.3lf double4 %.3lf",13,10,0
value2 db "Value 2 is :double1 %.3lf double2 %.3lff double3 %.3lf double4 %.3lf",13,10,0
result db "Result is double1 %lf double2 %lf double3 %lf double4 %lf",13,10,0
.code
OPTION PROLOGUE:NONE
OPTION EPILOGUE:NONE
IFDEF _BORL
_alloca proc public thesize:dword, alignm:dword
pop ecx ; pops the return address
sub esp, thesize
neg alignm
and esp, alignm
mov eax, esp
push ecx ; re-push the return address to the top of stack
ret
_alloca endp
ELSEIFDEF _FAST
_alloca proc public thesize:dword, alignm:dword
pop eax ; pops the return address
sub esp, thesize
neg alignm
and esp, alignm
mov ecx, eax
mov eax, esp
push ecx ; re-push the return address in the top of stack
ret
_alloca endp
ELSEIFDEF _CDEC
_alloca proc public thesize:dword, alignm:dword
pop ecx ; pops the return address
pop eax ; thesize
pop edx ; align
sub esp, eax
neg edx
and esp, edx
mov eax, esp
push edx ; re-push arguments to the stack for caller to clean them
push edx ; ditto
push ecx ; re-push the return address in the top of stack
ret
_alloca endp
ELSEIFDEF _STD
_alloca proc public thesize:dword, alignm:dword
pop ecx ; pops the return address
pop eax ; thesize
pop edx ; align
sub esp, eax
neg edx
and esp, edx
mov eax, esp
push ecx ; re-push the return address in the top of stack
ret
_alloca endp
ELSEIFDEF _PASC
_alloca proc public thesize:dword, alignm:dword
pop ecx ; pops the return address
pop eax ; align
pop edx ; thesize
sub esp, edx
neg eax
and esp, eax
mov eax, esp
push ecx ; re-push the return address in the top of stack
ret
_alloca endp
ENDIF
OPTION PROLOGUE:PrologueDef
OPTION EPILOGUE:EpilogueDef
; Calculate sqrt of sum of 2 vectors of 4 doubles
AVXArithFloat proc public val1:ptr, val2: ptr, res:ptr
mov eax, val1
vmovapd ymm0,ymmword ptr [eax]
mov eax, val2
vmovapd ymm1,ymmword ptr [eax]
vaddpd ymm2,ymm0,ymm1
vsqrtpd ymm3, ymm2
mov eax, res
vmovapd ymmword ptr [eax],ymm3
vzeroupper
ret
AVXArithFloat endp
start proc
LOCAL _ymm0 : PTR YMMWORD
LOCAL _ymm1 : PTR YMMWORD
LOCAL _ymm2 : PTR YMMWORD
;int 3
vmovapd ymm6, YMMWORD ptr Reals8ToYmm(771.3101, -101.544, 9221.34, -341.7773)
INVOKE _alloca, 32, 32 ; allocate 32 bytes on the stack with 32 byte alignment
vmovapd ymmword ptr [eax], ymm6 ; copy to 32-byte aligned memory
mov _ymm0, eax
vmovapd ymm7, YMMWORD ptr Reals8ToYmm(333.3101, 701.544, -348.84, 421.6599)
INVOKE _alloca, 32, 32 ; allocate 32 bytes on the stack with 32 byte alignment
vmovapd ymmword ptr [eax], ymm7 ; copy to 32-byte aligned memory
mov _ymm1, eax
INVOKE _alloca, 32, 32 ; allocate 32 bytes on the stack with 32 byte alignment
mov _ymm2, eax
INVOKE AVXArithFloat, _ymm0, _ymm1, _ymm2
mov eax, _ymm0
INVOKE printf, addr value1, real8 ptr [eax], real8 ptr [eax+8], real8 ptr [eax+16], real8 ptr [eax+24]
mov eax, _ymm1
INVOKE printf, addr value2, real8 ptr [eax], real8 ptr [eax+8], real8 ptr [eax+16], real8 ptr [eax+24]
mov eax, _ymm2
INVOKE printf, addr result, real8 ptr [eax], real8 ptr [eax+8], real8 ptr [eax+16], real8 ptr [eax+24]
INVOKE getchar
INVOKE ExitProcess, 0
start endp
end start
end