I'm just trying to convert this program in c++ to asm. It works normally if notepad.exe is loaded, it will return 1 in c++. In asm, I'm having issues with 'lea rbx, aProcesses' which should contain each pid in [rbx] but it doesn't.
Here is the code in c++ in windows
#include <Windows.h>
#include <stdio.h>
#include <psapi.h>
bool isRunning(char * pName){
unsigned long aProcesses[1024], cbNeeded, cProcesses;
if(!EnumProcesses(aProcesses, sizeof(aProcesses), &cbNeeded))
return false;
printf("sizeof(aProcesses): %zd\n", sizeof(aProcesses));
printf("cbNeeded: %ld\n", cbNeeded);
printf("sizeof(unsigned long): %zd\n", sizeof(unsigned long));
cProcesses = cbNeeded / sizeof(unsigned long);
printf("cProcesses: %ld\n", cProcesses);
for(unsigned int i = 0; i < cProcesses; i++)
{
printf("i: %d: ", i);
printf("aProcesses[i]: %d\n", aProcesses[i]);
if(aProcesses[i] == 0)
continue;
HANDLE hProcess = OpenProcess(PROCESS_QUERY_INFORMATION | PROCESS_VM_READ, 0, aProcesses[i]);
printf("hProcess: %p\n", hProcess);
char buffer[50];
GetModuleBaseName(hProcess, 0, buffer, 50);
CloseHandle(hProcess);
if(strcmp(pName, buffer) == 0)
return true;
}
return false;
}
int main(void){
bool ret = isRunning("notepad.exe");
printf("%d\n", ret);
return 0;
}
---------------------------------
And here is my code in asm
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
includelib ucrt.lib
includelib msvcrt.lib
includelib legacy_stdio_definitions.lib
includelib kernel32.lib
includelib psapi.lib
.data
aProcesses DD 1024 DUP(0) ; unsigned long aProcesses[1024]
cbNeeded DQ ?
cProcesses DQ ?
hProcess DQ ?
pName db "notepad.exe",0
found db "pid found!",0
not_found db "pid not found!",0
.code
externdef printf:proc
externdef EnumProcesses:proc
externdef OpenProcess:proc
externdef GetModuleBaseNameA:proc
externdef CloseHandle:proc
externdef ExitProcess:proc
main proc
sub rsp, 28h ;reserve stack space for called functions
and rsp, 0fffffffffffffff0h ;make sure stack 16-byte aligned
begin:
; if(!EnumProcesses(aProcesses, sizeof(aProcesses), &cbNeeded))
; return false;
lea r8, cbNeeded ; &cbNeeded; use lea whenever var is [out]
mov rdx, 1000h ; sizeof(aProcesses); 4096
lea rcx, aProcesses ; long aProcesses[1024] array to hold 1024 pids; use lea whenever var is [out]; pointers are passed by reference; like &
sub rsp, 20h
call EnumProcesses
xor rax, rax
xor rbx, rbx
mov ax, WORD PTR [cbNeeded] ; dereferenced; do not use lea
sar eax, 2 ; does the same thing as the division below. i've debugged this
;mov bl, 4h ; size of long
;div bl
mov cProcesses, rax ; ax contains quotient; dx contains remainder
; for(unsigned int i = 0; i < cProcesses; i++)
; {
; if(aProcesses[i] == 0)
; continue;
xor r14, r14 ; r14 is the counter
mov r14, cProcesses ; cProcesses contains the number of total processes
xor rbx, rbx
lea rbx, aProcesses ; all processes; the entire array
find_pid:
xor rax, rax
mov eax, DWORD PTR [rbx] ; this should be the PID but having trouble getting this to work
add rbx, 4h ; incrementing to the next element; long is 4 bytes each
cmp eax, 0 ; check if null
je continue
jmp open_process
continue:
dec r14 ; (while --ecx) in c; r14 is the counter
cmp r14, 0
je no__match
jmp find_pid
open_process:
; HANDLE hProcess = OpenProcess(PROCESS_QUERY_INFORMATION | PROCESS_VM_READ, 0, aProcesses[i]);
xor r8, r8
mov r8d, eax ; eax = aProcesses[i] ; each element is 4 bytes, not 1
xor rdx, rdx ; arg2 = NULL
xor rcx, rcx
mov rcx, 410h ; PROCESS_QUERY_INFORMATION: 400h; PROCESS_VM_READ 10h; add them both = 410h
sub rsp, 20h
call OpenProcess
mov hProcess, rax
add rsp, 20h
; char buf[256];
; we add 256 bytes on the stack since we want a clean buffer generated for each loop
xor rax, rax
xor rcx, rcx
mov al, 20h ; 32 bytes x 8 (push rcx) = 0x100 (256) bytes is needed for 'char buf[256]'
init_buf:
push rcx
dec al
cmp al, cl
jne init_buf
lea r15, [rsp] ; must use a register because we will need to load it into rdx for GetModuleBaseName
; GetModuleBaseName(hProcess, 0, buffer, 50);
xor r8, r8
xor r9, r9
mov r9, 100h ; 256 bytes for our buffer to write information into: [out] buffer
mov r8, r15 ; r15 has the address of our buffer on the stack
xor rdx, rdx ; 2nd arg = NULL
mov rcx, hProcess
sub rsp, 20h
call GetModuleBaseNameA
add rsp, 20h
; CloseHandle(hProcess);
xor rcx, rcx
mov rcx, hProcess
sub rsp, 20h
call CloseHandle
add rsp, 20h
; if(strcmp(pName, buffer) == 0)
; return true;
lea rsi, pName
lea rdi, QWORD PTR [r15] ; r15 has the address of our buffer on the stack but [r15] is the dereferenced buffer
call str_cmp
add rsp, 100h ; add buffer stack space back to avoid stack overflow
cmp rax, 0
je match
; if current pid does not match, resume loop to next pid
jmp find_pid ; resumes loop for next pid (aProcesses[i])
no__match: ; none of the pids matched the string pName
lea rcx, not_found
sub rsp, 20h
call printf
add rsp, 20h
jmp exit
match:
lea rcx, found
sub rsp, 20h
call printf
add rsp, 20h
exit:
sub rsp, 20h
call ExitProcess
main endp
end
Quote from: cyrus on January 14, 2024, 12:20:40 PMdiv bl
This will fail; use div ebx instead. Besides,
sar eax, 2 will work better.
I've actually checked the return of this in ax and it matches cProcesses in c++. For my system it returns around 139-142.
> isrunning.exe
sizeof(aProcesses): 4096
cbNeeded: 560
sizeof(unsigned long): 4
cProcesses: 140
i: 0: aProcesses[i]: 0
i: 1: aProcesses[i]: 4
hProcess: 0000000000000000
i: 2: aProcesses[i]: 92
hProcess: 0000000000000000
But I did modify that to sar which has the same effect anyway, but no change.
Quote from: cyrus on January 14, 2024, 12:50:13 PMBut I did modify that to sar which has the same effect anyway, but no change.
I don't have time to check the logic of your project, but
div bl will definitely fail with an exception.
Well I have debugged that and it does not fail so I'm not sure how you reach that conclusion, but 'sar eax, 2' has the same effect as 'div bl'. The end result in ax is the number of processes which match cProcesses in the c++ code.
The code you posted gives me an array of PIDs, not just 2.
One problem may be the "sub rsp,20h", the function EnumProcesses seems to skip over the misalignment of the stack.
As far as the "div bl" goes, you are fluking it, since it translates to "divide AX by BL", so any count >255 would overflow.
Quote from: sinsi on January 14, 2024, 01:41:12 PMThe code you posted gives me an array of PIDs, not just 2.
One problem may be the "sub rsp,20h", the function EnumProcesses seems to skip over the misalignment of the stack.
As far as the "div bl" goes, you are fluking it, since it translates to "divide AX by BL", so any count >255 would overflow.
I've posted the entire code this time. I edited my original post. As mentioned, from OpenProcess and below, all that works because I tested it by manually placing the actual PID of notepad.exe from my system into eax which returned true. But in this code, I have a loop and not able to dereference aProcesses. As far as the add rsp, well I removed that because I realized I was not in the loop there. If that isn't used in a loop, a stack overflow will be the result. But that isn't the issue.
(https://i.postimg.cc/1fLFqhxL/bl.png) (https://postimg.cc/1fLFqhxL)
This proves the division by bl does indeed work. The end result in RAX is 8D which in decimal, is 141. Which, if you run the c++ code, you will see it is cProcesses and it is more or less the same. This will vary each time, for me, its been between 139 - 144 depending on when I run it. Also, re-running the c++ code may change that number
Couple small things that don't affect execution:
xor r14, r14 ; r14 is the counter
mov r14, cProcesses ; cProcesses contains the number of total processes
xor rbx, rbx
lea rbx, aProcesses ; all processes; the entire array
Completely unnecessary to clear those registers before loading them.
I was able to successfully get this to work but I'm not sure why it actually mattered. Initially, I was using rsi instead of rbx to place aProcesses into. I had other functions to print integers with printf and it used rsi so I ended up using rbx. Well when I used rsi, I noticed that each byte was the actual string from pName. Why is that? I never loaded any string into rsi yet somehow it is initialized to contain that string. Does it do this in startup? I ended up just changing the register from rbx to r12, something that isn't volatile and it worked perfectly. Took me an entire day to realize that. Any ideas why I couldn't use rsi for that? I use rsi this way in other code. If I load something in it, I expect it to have my array, not some string I defined in my code which bears no resemblance to rsi.
BTW, my code includes my original division by bl.
Quote from: NoCforMe on January 14, 2024, 02:53:23 PMCouple small things that don't affect execution:
xor r14, r14 ; r14 is the counter
mov r14, cProcesses ; cProcesses contains the number of total processes
xor rbx, rbx
lea rbx, aProcesses ; all processes; the entire array
Completely unnecessary to clear those registers before loading them.
https://masm32.com/board/Smileys/default/badgrin.gif
I realized that for lea but out of habit I keep doing that just in case because sometimes I have over a thousand lines of code and if I forget to xor something, I get in trouble.
Love the screen name. I should have chose noPyforMe since I hate python so much. :angelic:
Are you sure you understand the 64-bit ABI correctly?
I don't do any 64-bit programming myself, so I'm not sure of the particulars, but I believe that RSI, as well as RBX and RDI, are "sacred" registers that must be preserved, just as their 32-bit counterparts are. So if you're going to use them in your code you need to preserve them and restore them before exiting. Not sure if that was the issue you experienced.
I'm curious to know if RSI comes pre-loaded (with the command tail?) at the program entry point.
Quote from: cyrus on January 14, 2024, 03:04:42 PMI realized that for lea but out of habit I keep doing that just in case because sometimes I have over a thousand lines of code and if I forget to xor something, I get in trouble.
Well, no harm, no foul.
BTW, concerning the
div bl, since you're doing a power-of-2 divide, use
SHR (no need for
SAR) instead. It's much more elegant. Not to mention tons faster, not that it matters here ...
Quote from: NoCforMe on January 14, 2024, 03:08:40 PMAre you sure you understand the 64-bit ABI correctly?
I don't do any 64-bit programming myself, so I'm not sure of the particulars, but I believe that RSI, as well as RBX and RDI, are "sacred" registers that must be preserved, just as their 32-bit counterparts are. So if you're going to use them in your code you need to preserve them and restore them before exiting. Not sure if that was the issue you experienced.
I'm curious to know if RSI comes pre-loaded (with the command tail?) at the program entry point.
Quote from: cyrus on January 14, 2024, 03:04:42 PMI realized that for lea but out of habit I keep doing that just in case because sometimes I have over a thousand lines of code and if I forget to xor something, I get in trouble.
Well, no harm, no foul.
BTW, concerning the div bl, since you're doing a power-of-2 divide, use SHR (no need for SAR) instead. It's much more elegant. Not to mention tons faster, not that it matters here ...
Yes this register is not one that can be used throughout a loop like that. Now that I think of it, this is the first time I attempted to use that register through a loop and I should have been more cautious about anything that isn't r12-15.
As for RSI itself and string, I do in fact think that it does come pre-loaded with some string that is defined. For me that happened to be the first string pName. I've seen this happen before but wasn't sure.
Good to know about the division. It is definitely faster as I've used SHR before but not SAR. In large cases, I will definitely use them. Thanks all for the comments (even though they didn't solve the issue, they are helpful for future endeavors in asm)
You need to read up about spill/shadow space and passing parameters for 64-bit.
sub rsp, 28h+256 ;reserve stack space for called functions
lea r15, [rsp+28] ; delete the later line before the call to GetModuleBaseName
This change seems to *not crash*
You normally allocate 4 qwords for the spill. If a Windows function you call has more than 4 parameters then you would allocate that many. Note that you MUST allocate a minimum of 4.
Once you have set up your stack, don't touch it - no more "sub rsp,20h/add rsp,20h" pairs, the initial adjustment will take care of it.
My recommendation, take it or leave it: Forget 64-bit programming. Completely overkill and a pain in the ass besides. Win32 forever!
Quote from: NoCforMe on January 14, 2024, 04:07:32 PMMy recommendation, take it or leave it: Forget 64-bit programming. Completely overkill and a pain in the ass besides. Win32 forever!
It is nice to allocate 8GB to work with an SQL table and have the WHOLE F'N THING in memory :cool:
I should have said "forget 64-bit programming except in certain circumstances where you need humongous amounts of memory" ...
Quote from: sinsi on January 14, 2024, 03:34:58 PMYou need to read up about spill/shadow space and passing parameters for 64-bit.
sub rsp, 28h+256 ;reserve stack space for called functions
lea r15, [rsp+28] ; delete the later line before the call to GetModuleBaseName
This change seems to *not crash*
You normally allocate 4 qwords for the spill. If a Windows function you call has more than 4 parameters then you would allocate that many. Note that you MUST allocate a minimum of 4.
Once you have set up your stack, don't touch it - no more "sub rsp,20h/add rsp,20h" pairs, the initial adjustment will take care of it.
I have noticed that the style of setting aside stack space this way you stated: 'sub rsp, 256' and then using that for my buffer doesn't end up working in some cases and I'll tell you why. When you reserve stack space that way, it's going to have random data, not null bytes. When you try to use that for a buffer, you never know what you'll get and often your buffer will contain other data and not work. I do that style of subtracting stack space when I am going to use that amount of space to dedicate to a structure like the PROCESS INFORMATION in CreateProcessA because that is going to get populated. or WSAData, or when I am in a read loop from a network socket. That buffer is going to fill up entirely with the data I am reading in and then gets null-terminated.
However, I believe my weakness with asm in general is the stack space. I have 1 program where I have to make 2 calls to printf with an empty string because it won't work otherwise and I've written quite a bit of programs with perfect stack alignment, so I don't know what that issue is.
I believe the sub rsp, 20h is required for every function call isn't it? I read about this before in 64-bit programming. The add rsp, 20h is only necessary when I am in a loop. If I leave it out, stack overflow.
Quote from: NoCforMe on January 14, 2024, 04:40:37 PMI should have said "forget 64-bit programming except in certain circumstances where you need humongous amounts of memory" ...
I understand 32-bits is more fun to program but I have to actually program this for current systems which are 64-bit lol.
Quote from: cyrus on January 14, 2024, 06:15:17 PMI have noticed that the style of setting aside stack space this way you stated: 'sub rsp, 256' and then using that for my buffer doesn't end up working in some cases ...
Two reasons to fail, 256 is not enough, or misalignes the stack.
Quote from: cyrus on January 14, 2024, 06:15:17 PMWhen you reserve stack space that way, it's going to have random data, not null bytes.
As for any LOCAL variable, you set it up for the call, if the call returns no error the buffer has to be correc.
Quote from: cyrus on January 14, 2024, 06:15:17 PMI believe the sub rsp, 20h is required for every function call isn't it? I read about this before in 64-bit programming. The add rsp, 20h is only necessary when I am in a loop. If I leave it out, stack overflow.
A Windows function uses at least 4 spill slots, that's what the "sub rsp,20h" is, assuming the stack is aligned (which it isn't on entry).
You are way off here, study the Win64 ABI.
Quote from: cyrus on January 14, 2024, 06:15:17 PMI have noticed that the style of setting aside stack space this way you stated: 'sub rsp, 256' and then using that for my buffer doesn't end up working in some cases and I'll tell you why. When you reserve stack space that way, it's going to have random data, not null bytes. When you try to use that for a buffer, you never know what you'll get [...]
Yes. It's the same with any variables allocated on the stack as LOCALs. The rule is, when using any such stack-allocated space, ASSUME it contains garbage.
You can clear stack space just like any other space by using REP STOSB or in a loop by setting it to the desired value. For instance (32-bit example here):
PUSH EDI
LEA EDI, <variable you want to clear>
MOV ECX, <size of variable in bytes>
MOV AL, <value to fill variable with>
REP STOSB
POP EDI
--or--
LEA EDX, <variable you want to clear>
MOV ECX, <size of variable in bytes>
MOV AL, <value to fill variable with>
@@: MOV [EDX], AL
INC EDX
LOOP @B
You can clear the space using words, dwords or qwords as well.
Also, if the stack space is going to receive the results of a function call like your
EnumProcesses(), it doesn't matter what's in the buffer: the function will just overwrite it, so no need to initialize it.
Quote from: cyrus on January 14, 2024, 01:18:33 PMI have debugged that and it does not fail
So did I, and as Sinsi wrote, it will brutally fail for values over 1023
*).
Test it (the code is Masm64 SDK compatible (https://masm32.com/board/index.php?topic=10880.0), unlike yours):
include \masm64\include64\masm64rt.inc
.code
entry_point proc
xor rax, rax
xor rbx, rbx
INT 3
mov ax, 1234h ; simulated WORD PTR [cbNeeded]
mov bl, 4h ; size of long
div bl ; before: eax=1234h, ebx=4h
conout str$(eax)
invoke ExitProcess, 0
entry_point endp
end
*) Actually, it is
much more complicated, see attachment.
With poasm:
ifdef __UASM__
.x64
.Model flat
endif
ExitProcess PROTO STDCALL :DWORD
.code
_mainCRTStartup proc
xor rax, rax
xor rbx, rbx
INT 3
mov ax, 1234h ; simulated WORD PTR [cbNeeded]
mov bl, 4h ; size of long
div bl ; before: eax=1234h, ebx=4h
;conout str$(eax)
;invoke ExitProcess, 0
mov eax, 0
call ExitProcess ; just for ml64
_mainCRTStartup endp
end
Quote from: sinsi on January 14, 2024, 06:33:13 PMQuote from: cyrus on January 14, 2024, 06:15:17 PMI have noticed that the style of setting aside stack space this way you stated: 'sub rsp, 256' and then using that for my buffer doesn't end up working in some cases ...
Two reasons to fail, 256 is not enough, or misalignes the stack.
That is a good point and I've missed that it may corrupt the stack alignment there.
Quote from: cyrus on January 14, 2024, 06:15:17 PMWhen you reserve stack space that way, it's going to have random data, not null bytes.
As for any LOCAL variable, you set it up for the call, if the call returns no error the buffer has to be correc.
I already know that local variables are set up for that call. In this case, I am setting up buffer for each call. Could I do what NoCForMe mentioned, declare my buf as 256 in the .data section initialized to 0, and then use REP STOSB in each call to clear it out before I use it? Yes but I'm not sure if that is more efficient than simply pushing 256 null bytes on the stack. Is it? If so, I may use that for the increase in performance but I doubt it would matter in that regard. Maybe if that was megabytes.
Quote from: cyrus on January 14, 2024, 06:15:17 PMI believe the sub rsp, 20h is required for every function call isn't it? I read about this before in 64-bit programming. The add rsp, 20h is only necessary when I am in a loop. If I leave it out, stack overflow.
A Windows function uses at least 4 spill slots, that's what the "sub rsp,20h" is, assuming the stack is aligned (which it isn't on entry).
You are way off here, study the Win64 ABI.
What am I way off on exactly? I did mention a windows function uses 32 bytes so why are you telling me that?
Quote from: jj2007 on January 14, 2024, 08:07:06 PMQuote from: cyrus on January 14, 2024, 01:18:33 PMI have debugged that and it does not fail
So did I, and as Sinsi wrote, it will brutally fail for values over 1023*).
Test it (the code is Masm64 SDK compatible (https://masm32.com/board/index.php?topic=10880.0), unlike yours):
include \masm64\include64\masm64rt.inc
.code
entry_point proc
xor rax, rax
xor rbx, rbx
INT 3
mov ax, 1234h ; simulated WORD PTR [cbNeeded]
mov bl, 4h ; size of long
div bl ; before: eax=1234h, ebx=4h
conout str$(eax)
invoke ExitProcess, 0
entry_point endp
end
*) Actually, it is much more complicated, see attachment.
Good point. I overlooked anything over 1023, so that makes sense.
Quote from: NoCforMe on January 14, 2024, 07:00:54 PMQuote from: cyrus on January 14, 2024, 06:15:17 PMI have noticed that the style of setting aside stack space this way you stated: 'sub rsp, 256' and then using that for my buffer doesn't end up working in some cases and I'll tell you why. When you reserve stack space that way, it's going to have random data, not null bytes. When you try to use that for a buffer, you never know what you'll get [...]
Yes. It's the same with any variables allocated on the stack as LOCALs. The rule is, when using any such stack-allocated space, ASSUME it contains garbage.
You can clear stack space just like any other space by using REP STOSB or in a loop by setting it to the desired value. For instance (32-bit example here):
PUSH EDI
LEA EDI, <variable you want to clear>
MOV ECX, <size of variable in bytes>
MOV AL, <value to fill variable with>
REP STOSB
POP EDI
--or--
LEA EDX, <variable you want to clear>
MOV ECX, <size of variable in bytes>
MOV AL, <value to fill variable with>
@@: MOV [EDX], AL
INC EDX
LOOP @B
You can clear the space using words, dwords or qwords as well.
Also, if the stack space is going to receive the results of a function call like your EnumProcesses(), it doesn't matter what's in the buffer: the function will just overwrite it, so no need to initialize it.
I did mention when I have a buffer I'm going to fill entirely, using 'sub rsp' method works just fine. It's when in these cases, the data varies and I don't know how large that may be and I'm comparing strings. Although in this particular case, I know 'notepad.exe' is only 11 bytes so if data from other PIDs are read into the 11 byte buffer, I don't care but it may overflow onto something else and I figure 256 bytes isn't much to push onto the stack.
Thanks for the tip on clearing a buffer. 2 things here.
1. Is that more efficient than declaring my buffer in the .data section, initializing it to 0, then simply doing that for each call when I am in the loop? Or is simply pushing 256 bytes on the stack just as efficient?
2. I managed to "clear" my buffer by doing
mov qword ptr [r15], 0 ; clear the buffer, otherwise it will end up in an infinite loop thinking it is always there
Assuming r15 has the beginning of rsp where I pushed 256 bytes onto. I believe it just adds a null terminator to that so it may not clear the entire data but I believe it is sufficient for strcmp.
Quote from: cyrus on January 15, 2024, 06:35:23 AMWhat am I way off on exactly? I did mention a windows function uses 32 bytes so why are you telling me that?
It gets tricky when a function has more than 4 parameters, the extra ones get put onto the stack, usually by a series of "mov [rsp+28h],rax" and so on, so it's easy to lose track of where RSP is.
Even if a function has 0 parameters, it still needs those 32 bytes, that's part of the ABI.
Quote from: sinsi on January 15, 2024, 09:32:51 AMQuote from: cyrus on January 15, 2024, 06:35:23 AMWhat am I way off on exactly? I did mention a windows function uses 32 bytes so why are you telling me that?
It gets tricky when a function has more than 4 parameters, the extra ones get put onto the stack, usually by a series of "mov [rsp+28h],rax" and so on, so it's easy to lose track of where RSP is.
Even if a function has 0 parameters, it still needs those 32 bytes, that's part of the ABI.
Ok I totally know that. Here is an example of how I call WSASocketA. In 32-bits, I used push. In 64-bit, I do exactly what is required.
; call WSASocketA
sub rsp, 30h
xor r9, r9 ; 4th arg: lpProtocolInfo=NULL (uses itself from above: NULL)
;push r9 ; 6th arg: dwFlags=NULL
;push r9 ; 5th arg: g=NULL
mov QWORD PTR [rsp + 28h], 00h ; 6th arg: dwFlags=NULL
mov QWORD PTR [rsp + 20h], 00h ; 5th arg: g=NULL
xor r8, r8
mov r8b, 6h ; 3rd arg: protocol=6
xor rdx, rdx
mov dl, 1h ; 2nd arg: type=1
xor rcx, rcx
mov cl, 2h ; 1st arg: af=2
call WSASocketA ; call WSASocketA
mov sockfd, rax ; save socket descriptor of WSASocketA to sockfd variable
callWSASocketA PROC
;on entry, the stack is misaligned. We have 6 arguments, so need to add 8 bytes to align it
sub rsp, 38h ;This would be at the top of this proc so every function call can re-use it
;As a bonus it gives us 8 bytes to use at [RSP+30..37] (this time)
;swap some code around to cut down on size
xor r9d,r9d ; 4th arg: lpProtocolInfo=NULL (uses itself from above: NULL)
mov [rsp+28h],r9 ; 6th arg: dwFlags=NULL
mov [rsp+20h],r9 ; 5th arg: g=NULL
;the next 3 args are of type 'int' which is 32-bit? I'm not a C programmer
;The advantage of altering the low 32 bits of a register is that the upper 32 are cleared.
;Of course if you forget that it can make your code crash in mysterious ways :)
mov r8d,6h ; 3rd arg: protocol=6
mov edx,1h ; 2nd arg: type=1
mov ecx,2h ; 1st arg: af=2
call WSASocketA ; call WSASocketA
;this proc acts like a function, and returns rax
;Slightly better than having this code accessing a non-local var
add rsp,38h
ret
callWSASocketA ENDP
Another way
callWSASocketA PROC
mov ecx,2
mov edx,1
mov r8d,6
xor r9d,r9d
push rax ;aligns the stack
push 0
push 0
sub rsp,20h
call WSASocketA
add rsp,7*8
ret
callWSASocketA ENDP
Quote from: cyrus on January 15, 2024, 06:39:53 AMThanks for the tip on clearing a buffer. 2 things here.
1. Is that more efficient than declaring my buffer in the .data section, initializing it to 0, then simply doing that for each call when I am in the loop? Or is simply pushing 256 bytes on the stack just as efficient?
2. I managed to "clear" my buffer by doing
mov qword ptr [r15], 0 ; clear the buffer, otherwise it will end up in an infinite loop thinking it is always there
Assuming r15 has the beginning of rsp where I pushed 256 bytes onto. I believe it just adds a null terminator to that so it may not clear the entire data but I believe it is sufficient for strcmp.
Just to clear up a bit of confusion here: I didn't realize that the data going into your buffer was strings. That actually makes things easier.
1. Again, if you're having a function fill a buffer, you don't need to "clear" the buffer, as the function will simply overwrite whatever's in the buffer to start with.
2. Your 2nd bit of code there is correct. Since strings (the kind we deal with here in assembly language 99.99% of the time) are NULL-terminated, all you need to do to "clear" a buffer is to put a single byte of zero into it.
3. If you're doing string comparisons on a buffer that's been filled by a function, again, you don't need to initialize the buffer first, as the string (assuming there's just one) is guaranteed to have a NULL at the end. There are some weird Windows API functions that return multiple strings where each string is terminated by one NULL and the whole shebang is terminated by an extra NULL, but those are special cases. Even there, you're always going to be able to find the end of the strings and the end of the buffer.
About your question about using a static buffer (one declared in your
.data section) instead of one allocated on the stack: pretty much 6 of one, half a dozen of the other. Not more or less efficient either way. It's true that you can initialize the static buffer when you declare it. But again, if you're using it multiple times with your Enum function, there's no need to "clear" it each time anyhow. A static buffer will take up space in your program; however, you can minimize the space it occupies in the .exe file by declaring it in your
.data? section (uninitialized data), but then you can't initialize it in the declaration; you'll have to use code to initialize it if you need to do that.
Quote from: NoCforMe on January 15, 2024, 01:33:04 PMAbout your question about using a static buffer (one declared in your .data section) instead of one allocated on the stack: pretty much 6 of one, half a dozen of the other. Not more or less efficient either way. It's true that you can initialize the static buffer when you declare it. But again, if you're using it multiple times with your Enum function, there's no need to "clear" it each time anyhow. A static buffer will take up space in your program; however, you can minimize the space it occupies in the .exe file by declaring it in your .data? section (uninitialized data), but then you can't initialize it in the declaration; you'll have to use code to initialize it if you need to do that.
Just to add to that, if your procedure is recursive, or gets called by multiple threads, you
have to use the stack, otherwise each running procedure will clobber the other's buffer (since it is the same buffer).
Quote from: NoCforMe on January 15, 2024, 01:33:04 PMdeclaring it in your .data? section (uninitialized data), but then you can't initialize it in the declaration
The OS loader will do that for you:
.data? is always zeroed at program start. Of course, if you write to that buffer and come back later, you need to zero it again.
With little modifications for Masm64 SDK (and perhaps a couple of issues) apparently work correctly:
include \masm32\include64\masm64rt.inc
include \masm32\include64\psapi.inc
includelib \masm32\lib64\psapi.lib
.data
aProcesses DD 1024 DUP(0) ; unsigned long aProcesses[1024]
cbNeeded DQ ?
cProcesses DQ ?
hProcess DQ ?
; pName db "notepad.exe",0
pName db "qeditor.exe",0
found db "pid found!",0
not_found db "pid not found!",0
.code
entry_point proc
sub rsp, 28h ;reserve stack space for called functions
and rsp, 0fffffffffffffff0h ;make sure stack 16-byte aligned
begin:
; if(!EnumProcesses(aProcesses, sizeof(aProcesses), &cbNeeded))
; return false;
lea r8, cbNeeded ; &cbNeeded; use lea whenever var is [out]
mov rdx, 1000h ; sizeof(aProcesses); 4096
lea rcx, aProcesses ; long aProcesses[1024] array to hold 1024 pids; use lea whenever var is [out]; pointers are passed by reference; like &
sub rsp, 20h
call EnumProcesses
add rsp, 20h ; >>> this was missing
xor rax, rax
xor rbx, rbx
mov ax, WORD PTR [cbNeeded] ; dereferenced; do not use lea
sar eax, 2 ; does the same thing as the division below. i've debugged this
;mov bl, 4h ; size of long
;div bl
mov cProcesses, rax ; ax contains quotient; dx contains remainder
; for(unsigned int i = 0; i < cProcesses; i++)
; {
; if(aProcesses[i] == 0)
; continue;
mov r14, cProcesses ; cProcesses contains the number of total processes
lea rbx, aProcesses ; all processes; the entire array
find_pid:
mov eax, DWORD PTR [rbx] ; this should be the PID but having trouble getting this to work
add rbx, 4h ; incrementing to the next element; long is 4 bytes each
cmp eax, 0 ; check if null
je continue
jmp open_process
continue:
dec r14 ; (while --ecx) in c; r14 is the counter
cmp r14, 0
je no__match
jmp find_pid
open_process:
; HANDLE hProcess = OpenProcess(PROCESS_QUERY_INFORMATION | PROCESS_VM_READ, 0, aProcesses[i]);
mov r8d, eax ; eax = aProcesses[i] ; each element is 4 bytes, not 1
xor rdx, rdx ; arg2 = NULL
mov rcx, 410h ; PROCESS_QUERY_INFORMATION: 400h; PROCESS_VM_READ 10h; add them both = 410h
sub rsp, 20h
call OpenProcess
mov hProcess, rax
add rsp, 20h
; char buf[256];
; we add 256 bytes on the stack since we want a clean buffer generated for each loop
xor rax, rax
xor rcx, rcx
mov al, 20h ; 32 bytes x 8 (push rcx) = 0x100 (256) bytes is needed for 'char buf[256]'
init_buf:
push rcx
dec al
cmp al, cl
jne init_buf
lea r15, [rsp] ; must use a register because we will need to load it into rdx for GetModuleBaseName
; GetModuleBaseName(hProcess, 0, buffer, 50);
xor r8, r8
xor r9, r9
mov r9, 100h ; 256 bytes for our buffer to write information into: [out] buffer
mov r8, r15 ; r15 has the address of our buffer on the stack
xor rdx, rdx ; 2nd arg = NULL
mov rcx, hProcess
sub rsp, 20h
call GetModuleBaseNameA
add rsp, 20h
; CloseHandle(hProcess);
xor rcx, rcx
mov rcx, hProcess
sub rsp, 20h
call CloseHandle
add rsp, 20h
; if(strcmp(pName, buffer) == 0)
; return true;
;
; >> Note here where used rsi and rdi ???
;
lea rcx, pName
lea rdx, QWORD PTR [r15] ; r15 has the address of our buffer on the stack but [r15] is the dereferenced buffer
call szCmp
add rsp, 100h ; add buffer stack space back to avoid stack overflow
cmp rax, 0
jne match
; if current pid does not match, resume loop to next pid
jmp find_pid ; resumes loop for next pid (aProcesses[i])
no__match: ; none of the pids matched the string pName
lea rcx, not_found
sub rsp, 20h
call vc_printf
add rsp, 20h
jmp exit
match:
lea rcx, found
sub rsp, 20h
call vc_printf
add rsp, 20h
exit:
sub rsp, 20h
call ExitProcess
entry_point endp
end
It has to stop using MASMx86 and don't waste newbies time with it.
About EnumProcessModules:
"If this function is called from a 32-bit application running on WOW64,
it can only enumerate the modules of a 32-bit process.
If the process is a 64-bit process, this function fails and the last error code is ERROR_PARTIAL_COPY (299)."
https://learn.microsoft.com/en-us/windows/win32/api/psapi/nf-psapi-enumprocessmodules (https://learn.microsoft.com/en-us/windows/win32/api/psapi/nf-psapi-enumprocessmodules)
Thank you HSE for code!
Why so complicated and incomprehensible for such a trifle!? :biggrin:
;***************************************************;
include \masm32\include64\masm64rt.inc
include \masm32\include64\psapi.inc
includelib \masm32\lib64\psapi.lib
include \masm32\include64\ntdll.inc
includelib \masm32\lib64\ntdll.lib
;***************************************************;
.data
SaveRBX dq 0
SaveRSI dq 0
SaveRDI dq 0
lphModule dq 0
aProcesses dd 1024 DUP(0)
uModuleName db 520 Dup(0)
cbNeeded dd 0
hProcess dd 0
lpcbNeeded dd 0
uName dw "n","o","t","e","p","a","d",".","e","x","e",0,0,0,0
szMessTitle dw "n","o","t","e","p","a","d",".","e","x","e"," ","i","s"," ","f","o","u","n","d","!",0,0,0,0
uString dw "%","l","u",0,0,0,0
szMess dw "P","I","D",":"," "
szUBuff db 56 Dup(0)
;****************************************************;
.code
;****************************************************;
main proc
sub rsp, 48
; Get the list of process identifiers.
;BOOL EnumProcesses( [out] DWORD *lpidProcess,[in] DWORD cb, [out] LPDWORD lpcbNeeded
lea r8, cbNeeded ; Result
mov edx, 1000h ; cb -> The size of the pProcessIds array, in bytes.
lea rcx, aProcesses ; A pointer to an array that receives the list of process identifiers
call EnumProcesses
test eax, eax
je Ret_0
; Calculate how many process identifiers were returned.
mov eax, cbNeeded ; cbNeeded bytes / 4 = cProcesses
shr eax,2 ; cProcesses = cbNeeded / sizeof(DWORD);
; Print the name and process identifier for each process.
test eax, eax
je Ret_0
mov SaveRBX, rbx
mov SaveRSI, rsi
mov SaveRDI, rdi
lea rbx, aProcesses ; rbx -> DWORD aProcesses[1024]
mov edi, eax ; edi->cProcesses
;****************************************************;
@Loop:
mov r8d,dword ptr [rbx] ; edi->get current PID
test r8d,r8d
je @Next
;HANDLE OpenProcess( [in] DWORD dwDesiredAccess, [in] BOOL bInheritHandle, [in] DWORD dwProcessId);
xor edx,edx ; BOOL bInheritHandle
mov ecx,410h ; dwDesiredAccess
call OpenProcess
mov rsi,rax ; rsi=rax=open handle to the specified process
test rax,rax ; If rax=0 -> not every process can be opened !!!
je @Skip ; and skip it
;BOOL EnumProcessModules( [in] HANDLE hProcess, [out] HMODULE *lphModule,[in] DWORD cb,[out] LPDWORD lpcbNeeded);
lea r9, lpcbNeeded ; lpcbNeeded -> The number of bytes required to store all module handles in the lphModule array
mov r8d,8 ; cb -> The size of the lphModule array, in bytes
lea rdx,lphModule ; Result -> *lphModule
mov rcx,rax ; rcx = rax = hProcess
call EnumProcessModules
test eax,eax ; If eax = zero->Error
je @Skip ;
;DWORD GetModuleBaseNameW( [in] HANDLE hProcess, [in, optional] HMODULE hModule,[out] LPWSTR lpBaseName, [in] DWORD nSize);
mov rdx, lphModule ; rdx -> *lphModule
lea r8, uModuleName ; Buffer for currebt ModulName
mov r9d, 104h ; nSize of lpBaseName in bytes
mov rcx, rsi ; rcx=rsi -> hProcess
call GetModuleBaseNameW
test eax, eax
je @Skip
;...Compare...
lea rcx, uModuleName
lea rdx, uName
call _wcsicmp
test eax, eax
jne @Skip
;....Found it...
xor r9d, r9d
mov r8d, dword ptr[rbx] ; PID
lea rdx, uString
lea rcx, szUBuff
call wsprintfW
xor r9d, r9d
lea r8, szMessTitle
lea rdx, szMess
xor ecx, ecx
call MessageBoxW
mov eax, dword ptr [rbx]
jmp Ret_1
@Skip:
mov rcx,rsi ; rcx=rsi -> hProcess
call CloseHandle
@Next:
add rbx,4 ; rbx-> next ID from array aProcesses
sub rdi,1 ; one less for rdi->cProcesses
jne @Loop
xor eax,eax
Ret_1:
mov rbx, SaveRBX
mov rsi, SaveRSI
mov rdi, SaveRDI
add rsp,48
ret
Ret_0:
xor eax,eax
add rsp,48
ret
main endp
;***************************************************;
End
Quote from: lingo on January 16, 2024, 07:52:14 AMIt has to stop using MASMx86 and don't waste newbies time with it.
No. Just no.(They're talking about 32-bit X86 programming here.)
I'm committed to Win32/x86, and there are a hell of a lot of others here who are as well. So please don't go around making blanket prohibitions like this.
I would fight you on this but I don't want to pollute this thread.
Now back to the OP's problems. I don't know if you've already seen this stuff, but you might want to look at the Microsoft Learn pages on the 64-bit ABI, here (https://learn.microsoft.com/en-us/cpp/build/x64-software-conventions?view=msvc-170) and here (https://learn.microsoft.com/en-us/cpp/build/stack-usage?view=msvc-170) (the 2nd page covers stack usage).
Hi Lingo!
Quote from: lingo on January 16, 2024, 07:52:14 AMWhy so complicated and incomprehensible for such a trifle!? :biggrin:
It's cyrus's code. He is testing things, and code is working :thumbsup:
HSE
Quote from: lingo on January 16, 2024, 07:52:14 AMuName dw "n","o","t","e","p","a","d",".","e","x","e",0,0,0,0
szMessTitle dw "n","o","t","e","p","a","d",".","e","x","e"," ","i","s"," ","f","o","u","n","d","!",0,0,0,0
I sincerely hope you have a tool that
generates this crap, Lingo :biggrin:
I recommend wChr$() (https://www.jj2007.eu/MasmBasicQuickReference.htm#Mb1139), wData or Ole$() (https://www.jj2007.eu/MasmBasicQuickReference.htm#Mb1139).
I wanted to mention that I am reading these posts from you gentlemen but I have been testing another program that keeps crashing due to a stack overflow despite the fact I am adding 'add rsp, 20h' to every call and adding the correct stack space. This one was a tough one, dealing with iterating structures which I have done successfully. I will post that code shortly. I am pretty frustrated about it.
Quote from: HSE on January 15, 2024, 11:11:02 PMWith little modifications for Masm64 SDK (and perhaps a couple of issues) apparently work correctly:
include \masm32\include64\masm64rt.inc
include \masm32\include64\psapi.inc
includelib \masm32\lib64\psapi.lib
.data
aProcesses DD 1024 DUP(0) ; unsigned long aProcesses[1024]
cbNeeded DQ ?
cProcesses DQ ?
hProcess DQ ?
; pName db "notepad.exe",0
pName db "qeditor.exe",0
found db "pid found!",0
not_found db "pid not found!",0
.code
entry_point proc
sub rsp, 28h ;reserve stack space for called functions
and rsp, 0fffffffffffffff0h ;make sure stack 16-byte aligned
begin:
; if(!EnumProcesses(aProcesses, sizeof(aProcesses), &cbNeeded))
; return false;
lea r8, cbNeeded ; &cbNeeded; use lea whenever var is [out]
mov rdx, 1000h ; sizeof(aProcesses); 4096
lea rcx, aProcesses ; long aProcesses[1024] array to hold 1024 pids; use lea whenever var is [out]; pointers are passed by reference; like &
sub rsp, 20h
call EnumProcesses
add rsp, 20h ; >>> this was missing
xor rax, rax
xor rbx, rbx
mov ax, WORD PTR [cbNeeded] ; dereferenced; do not use lea
sar eax, 2 ; does the same thing as the division below. i've debugged this
;mov bl, 4h ; size of long
;div bl
mov cProcesses, rax ; ax contains quotient; dx contains remainder
; for(unsigned int i = 0; i < cProcesses; i++)
; {
; if(aProcesses[i] == 0)
; continue;
mov r14, cProcesses ; cProcesses contains the number of total processes
lea rbx, aProcesses ; all processes; the entire array
find_pid:
mov eax, DWORD PTR [rbx] ; this should be the PID but having trouble getting this to work
add rbx, 4h ; incrementing to the next element; long is 4 bytes each
cmp eax, 0 ; check if null
je continue
jmp open_process
continue:
dec r14 ; (while --ecx) in c; r14 is the counter
cmp r14, 0
je no__match
jmp find_pid
open_process:
; HANDLE hProcess = OpenProcess(PROCESS_QUERY_INFORMATION | PROCESS_VM_READ, 0, aProcesses[i]);
mov r8d, eax ; eax = aProcesses[i] ; each element is 4 bytes, not 1
xor rdx, rdx ; arg2 = NULL
mov rcx, 410h ; PROCESS_QUERY_INFORMATION: 400h; PROCESS_VM_READ 10h; add them both = 410h
sub rsp, 20h
call OpenProcess
mov hProcess, rax
add rsp, 20h
; char buf[256];
; we add 256 bytes on the stack since we want a clean buffer generated for each loop
xor rax, rax
xor rcx, rcx
mov al, 20h ; 32 bytes x 8 (push rcx) = 0x100 (256) bytes is needed for 'char buf[256]'
init_buf:
push rcx
dec al
cmp al, cl
jne init_buf
lea r15, [rsp] ; must use a register because we will need to load it into rdx for GetModuleBaseName
; GetModuleBaseName(hProcess, 0, buffer, 50);
xor r8, r8
xor r9, r9
mov r9, 100h ; 256 bytes for our buffer to write information into: [out] buffer
mov r8, r15 ; r15 has the address of our buffer on the stack
xor rdx, rdx ; 2nd arg = NULL
mov rcx, hProcess
sub rsp, 20h
call GetModuleBaseNameA
add rsp, 20h
; CloseHandle(hProcess);
xor rcx, rcx
mov rcx, hProcess
sub rsp, 20h
call CloseHandle
add rsp, 20h
; if(strcmp(pName, buffer) == 0)
; return true;
;
; >> Note here where used rsi and rdi ???
;
lea rcx, pName
lea rdx, QWORD PTR [r15] ; r15 has the address of our buffer on the stack but [r15] is the dereferenced buffer
call szCmp
add rsp, 100h ; add buffer stack space back to avoid stack overflow
cmp rax, 0
jne match
; if current pid does not match, resume loop to next pid
jmp find_pid ; resumes loop for next pid (aProcesses[i])
no__match: ; none of the pids matched the string pName
lea rcx, not_found
sub rsp, 20h
call vc_printf
add rsp, 20h
jmp exit
match:
lea rcx, found
sub rsp, 20h
call vc_printf
add rsp, 20h
exit:
sub rsp, 20h
call ExitProcess
entry_point endp
end
This works now because I modified my original post. Instead of RBX for holding the address of aProcesses, I used RSI. I even modified it again to use R12 instead of RBX just in case I use RBX somewhere else. Although no functions ever use it, it is still a volatile register and it isn't wise to store the address of a pointer in there through a large loop like I have with multiple function calls within it.
Quote from: cyrus on January 16, 2024, 02:32:30 PMAlthough no functions ever use it, it is still a volatile register
No, rbx/ebx is a non-volatile register. Windows will not modify it, with one exception: callbacks like WndProc.
Quote from: jj2007 on January 16, 2024, 08:55:06 PMNo, rbx/ebx is a non-volatile register.
It's about R12 :thumbsup:
Quote from: HSE on January 16, 2024, 09:50:23 PMQuote from: jj2007 on January 16, 2024, 08:55:06 PMNo, rbx/ebx is a non-volatile register.
It's about R12 :thumbsup:
So what? Non-volatile, too
https://learn.microsoft.com/en-us/cpp/build/x64-calling-convention?view=msvc-170 (https://learn.microsoft.com/en-us/cpp/build/x64-calling-convention?view=msvc-170)
Quote from: jj2007 on January 16, 2024, 10:42:47 PMSo what?
:biggrin: Dogs and cats are almost the same... but not the same.
Quote from: cyrus on January 16, 2024, 02:32:30 PMI even modified it again to use R12 instead of RBX just in case I use RBX somewhere else. Although no functions ever use it, it is still a volatile register
1. The phrase could refer to rbx or r12, it's ambiguous
2. Both are non-volatile registers
Any problem with div effecting edx?
Quote from: jj2007 on January 16, 2024, 11:33:22 PMQuote from: cyrus on January 16, 2024, 02:32:30 PMI even modified it again to use R12 instead of RBX just in case I use RBX somewhere else. Although no functions ever use it, it is still a volatile register
1. The phrase could refer to rbx or r12, it's ambiguous
2. Both are non-volatile registers
Thanks. I guess thats why RBX did work but out of habit, I was used to just sticking to R12-15 but good to know.
I do need to read more into the ABI but I do a lot of other things as well, not just programming in asm
Ok I just wanted to say thank you sinsi for opening my eyes about the stack alignment. That plagued me. I had a program where I had call printf just to get it to work (or any function). But after analyzing it all the way through, I realized my stack was indeed unaligned.
My other program I was about to post up but didn't need to because so far it looks to be working pretty solid, was one that calls GetTcpTable
which, requires 2 calls to be made. One must initialize
SizePointer
by making 2 calls to GetTcpTable
. First being [in], second call, [out] receiving the actual size. This is because the tcp table is dynamic depending on what's in it at the current time of calling the function and retreiving the data. The data goes into a struct of type
PMIB_TCPTABLE
. The size from
SizePointer
is what is used to allocate size for this struct. I did that on the stack using that size. The issue was that the size always varied. And I needed to either subtract stack space or push bytes on the stack. This time I did subtract the size on the stack. But here is the kicker. That may not be stack-aligned. So what was my trick? I took the whole size, divided it by 16 using 'div ebx' so ensure that I would have enough room. I noticed most calls for my particular tcp table ranged from about 512 to 772 or so bytes. Each call varied depending on when Microsoft decides to connect to some rogue server whenever it wants :smiley:
After division, I checked if there was a remainder. If there was, I added 1 to the quotient, not the total size. The quotient + 1 x 16 again would give me a grand total that is stack-aligned. I subtract this from the stack to give me the struct I needed. Then I just added that size back and resumed to 'begin'. I tested this by calling printf between so that I know whats' going on. It ends up in an infinite loop which is the intended goal here,
https://learn.microsoft.com/en-us/windows/win32/api/iphlpapi/nf-iphlpapi-gettcptable
Quote from: sinsi on January 15, 2024, 02:11:06 PMQuote from: NoCforMe on January 15, 2024, 01:33:04 PMAbout your question about using a static buffer (one declared in your .data section) instead of one allocated on the stack: pretty much 6 of one, half a dozen of the other. Not more or less efficient either way. It's true that you can initialize the static buffer when you declare it. But again, if you're using it multiple times with your Enum function, there's no need to "clear" it each time anyhow. A static buffer will take up space in your program; however, you can minimize the space it occupies in the .exe file by declaring it in your .data? section (uninitialized data), but then you can't initialize it in the declaration; you'll have to use code to initialize it if you need to do that.
Just to add to that, if your procedure is recursive, or gets called by multiple threads, you have to use the stack, otherwise each running procedure will clobber the other's buffer (since it is the same buffer).
This is why I love using the stack, just to be sure that data is always new.
NoCforMe, yea for this particular one, or any other one where that buffer gets filled from a call to a function such as GetModuleBaseName, I don't need a new buffer. In fact, for that program, I just declared 256 bytes in the .data section, then just 'lea r8, buf' (which is the 3rd argument to that function)
If you need to allocate a variable-sized buffer, the best way (for me) is
- allocate the amount of memory with HeapAlloc (or whatever you use)
- store that address somewhere (non-volatile register or locally on the stack)
- do what you need to do to fill the buffer
From here, you could
- process the buffer
- HeapFree
or just return the address to the caller in RAX and let them process and free the buffer
Quote from: sinsi on January 17, 2024, 11:02:30 AMIf you need to allocate a variable-sized buffer, the best way (for me) is
- allocate the amount of memory with HeapAlloc (or whatever you use)
- store that address somewhere (non-volatile register or locally on the stack)
- do what you need to do to fill the buffer
From here, you could
- process the buffer
- HeapFree
or just return the address to the caller in RAX and let them process and free the buffer
Yes I totally forgot about that. If the buffer I need is larger than 8192, I usually end up calling malloc or HeapAlloc/HeapFree as well but sometimes I forget the simplest things. This relieves headaches of dealing with the stack but I'm glad I went through this in case I don't want to allocate memory from the heap and need to use the stack for simplicity.
Windows PAGESIZE is 4096, so good to remember that with stack if use it without probing.