The MASM Forum

General => The Campus => Topic started by: cyrus on January 14, 2024, 12:20:40 PM

Title: issues with dereferencing iteration of array of long elements in a loop
Post by: cyrus on January 14, 2024, 12:20:40 PM
I'm just trying to convert this program in c++ to asm. It works normally if notepad.exe is loaded, it will return 1 in c++. In asm, I'm having issues with 'lea rbx, aProcesses' which should contain each pid in [rbx] but it doesn't.

Here is the code in c++ in windows


#include <Windows.h>
#include <stdio.h>
#include <psapi.h>

bool isRunning(char * pName){
    unsigned long aProcesses[1024], cbNeeded, cProcesses;
    if(!EnumProcesses(aProcesses, sizeof(aProcesses), &cbNeeded))
        return false;
 
    printf("sizeof(aProcesses): %zd\n", sizeof(aProcesses));
    printf("cbNeeded: %ld\n", cbNeeded);
    printf("sizeof(unsigned long): %zd\n", sizeof(unsigned long));

    cProcesses = cbNeeded / sizeof(unsigned long);
    printf("cProcesses: %ld\n", cProcesses);
    for(unsigned int i = 0; i < cProcesses; i++)
    {
        printf("i: %d: ", i);
        printf("aProcesses[i]: %d\n", aProcesses[i]);
        if(aProcesses[i] == 0)
            continue;
 
        HANDLE hProcess = OpenProcess(PROCESS_QUERY_INFORMATION | PROCESS_VM_READ, 0, aProcesses[i]);
        printf("hProcess: %p\n", hProcess);
        char buffer[50];
        GetModuleBaseName(hProcess, 0, buffer, 50);
        CloseHandle(hProcess);
        if(strcmp(pName, buffer) == 0)
            return true;
    }
    return false;
}

int main(void){
    bool ret = isRunning("notepad.exe");
    printf("%d\n", ret);
    return 0;
}

---------------------------------
And here is my code in asm



;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
includelib ucrt.lib
includelib msvcrt.lib
includelib legacy_stdio_definitions.lib
includelib kernel32.lib
includelib psapi.lib


.data
aProcesses       DD 1024 DUP(0)   ;   unsigned long aProcesses[1024]
cbNeeded         DQ ?
cProcesses       DQ ?
hProcess         DQ ?

pName db "notepad.exe",0
found db "pid found!",0
not_found db "pid not found!",0

.code
externdef printf:proc
externdef EnumProcesses:proc
externdef OpenProcess:proc
externdef GetModuleBaseNameA:proc
externdef CloseHandle:proc
externdef ExitProcess:proc


main proc

    sub rsp, 28h                    ;reserve stack space for called functions
    and rsp, 0fffffffffffffff0h     ;make sure stack 16-byte aligned

    begin:

    ; if(!EnumProcesses(aProcesses, sizeof(aProcesses), &cbNeeded))
    ;    return false;

    lea r8, cbNeeded       ; &cbNeeded; use lea whenever var is [out]
    mov rdx, 1000h         ; sizeof(aProcesses); 4096
    lea rcx, aProcesses    ; long aProcesses[1024] array to hold 1024 pids; use lea whenever var is [out]; pointers are passed by reference; like &
    sub rsp, 20h
    call EnumProcesses

    xor rax, rax
    xor rbx, rbx
    mov ax, WORD PTR [cbNeeded]  ; dereferenced; do not use lea
    sar eax, 2                    ; does the same thing as the division below. i've debugged this
    ;mov bl, 4h                   ; size of long
    ;div bl
    mov cProcesses, rax          ; ax contains quotient; dx contains remainder


    ; for(unsigned int i = 0; i < cProcesses; i++)
    ; {
    ;     if(aProcesses[i] == 0)
    ;         continue;

    xor r14, r14             ; r14 is the counter
    mov r14, cProcesses      ; cProcesses contains the number of total processes
    xor rbx, rbx
    lea rbx, aProcesses      ; all processes;  the entire array
   
    find_pid:
        xor rax, rax
        mov eax, DWORD PTR [rbx] ; this should be the PID but having trouble getting this to work
        add rbx, 4h              ; incrementing to the next element; long is 4 bytes each
        cmp eax, 0               ; check if null
        je continue

        jmp open_process

        continue:
            dec r14              ; (while --ecx) in c; r14 is the counter
            cmp r14, 0
            je no__match

            jmp find_pid           


    open_process:
    ;    HANDLE hProcess = OpenProcess(PROCESS_QUERY_INFORMATION | PROCESS_VM_READ, 0, aProcesses[i]);

    xor r8, r8
    mov r8d, eax            ; eax = aProcesses[i] ; each element is 4 bytes, not 1
    xor rdx, rdx            ; arg2 = NULL
    xor rcx, rcx
    mov rcx, 410h           ; PROCESS_QUERY_INFORMATION: 400h;  PROCESS_VM_READ 10h; add them both = 410h
    sub rsp, 20h
    call OpenProcess
    mov hProcess, rax
    add rsp, 20h


    ;    char buf[256];
    ; we add 256 bytes on the stack since we want a clean buffer generated for each loop
    xor rax, rax
    xor rcx, rcx
    mov al, 20h  ; 32 bytes x 8 (push rcx) = 0x100 (256) bytes is needed for 'char buf[256]'
    init_buf:
        push rcx
        dec al
        cmp al, cl
        jne init_buf


    lea r15, [rsp]    ; must use a register because we will need to load it into rdx for GetModuleBaseName

    ;    GetModuleBaseName(hProcess, 0, buffer, 50);

    xor r8, r8
    xor r9, r9
    mov r9, 100h              ; 256 bytes for our buffer to write information into: [out] buffer
    mov r8, r15               ; r15 has the address of our buffer on the stack
    xor rdx, rdx              ; 2nd arg = NULL
    mov rcx, hProcess
    sub rsp, 20h
    call GetModuleBaseNameA
    add rsp, 20h

    ;    CloseHandle(hProcess);
    xor rcx, rcx
    mov rcx, hProcess
    sub rsp, 20h
    call CloseHandle
    add rsp, 20h


    ;    if(strcmp(pName, buffer) == 0)
    ;        return true;
    lea rsi, pName
    lea rdi, QWORD PTR [r15]  ; r15 has the address of our buffer on the stack but [r15] is the dereferenced buffer
    call str_cmp
    add rsp, 100h           ; add buffer stack space back to avoid stack overflow
    cmp rax, 0
    je match

    ; if current pid does not match, resume loop to next pid
    jmp find_pid            ; resumes loop for next pid (aProcesses[i])

    no__match:              ; none of the pids matched the string pName
    lea rcx, not_found
    sub rsp, 20h
    call printf
    add rsp, 20h
    jmp exit

    match:
    lea rcx, found
    sub rsp, 20h
    call printf
    add rsp, 20h

    exit:
    sub rsp, 20h
    call ExitProcess

main endp

end
Title: Re: issues with dereferencing iteration of array of long elements in a loop
Post by: jj2007 on January 14, 2024, 12:42:13 PM
Quote from: cyrus on January 14, 2024, 12:20:40 PMdiv bl

This will fail; use div ebx instead. Besides, sar eax, 2 will work better.
Title: Re: issues with dereferencing iteration of array of long elements in a loop
Post by: cyrus on January 14, 2024, 12:50:13 PM
I've actually checked the return of this in ax and it matches cProcesses in c++. For my system it returns around 139-142.

> isrunning.exe
sizeof(aProcesses): 4096
cbNeeded: 560
sizeof(unsigned long): 4
cProcesses: 140
i: 0: aProcesses[i]: 0
i: 1: aProcesses[i]: 4
hProcess: 0000000000000000
i: 2: aProcesses[i]: 92
hProcess: 0000000000000000

But I did modify that to sar which has the same effect anyway, but no change.
Title: Re: issues with dereferencing iteration of array of long elements in a loop
Post by: jj2007 on January 14, 2024, 01:11:59 PM
Quote from: cyrus on January 14, 2024, 12:50:13 PMBut I did modify that to sar which has the same effect anyway, but no change.

I don't have time to check the logic of your project, but div bl will definitely fail with an exception.
Title: Re: issues with dereferencing iteration of array of long elements in a loop
Post by: cyrus on January 14, 2024, 01:18:33 PM
Well I have debugged that and it does not fail so I'm not sure how you reach that conclusion, but 'sar eax, 2' has the same effect as 'div bl'. The end result in ax is the number of processes which match cProcesses in the c++ code.
Title: Re: issues with dereferencing iteration of array of long elements in a loop
Post by: sinsi on January 14, 2024, 01:41:12 PM
The code you posted gives me an array of PIDs, not just 2.
One problem may be the "sub rsp,20h", the function EnumProcesses seems to skip over the misalignment of the stack.

As far as the "div bl" goes, you are fluking it, since it translates to "divide AX by BL", so any count >255 would overflow.
Title: Re: issues with dereferencing iteration of array of long elements in a loop
Post by: cyrus on January 14, 2024, 02:24:18 PM
Quote from: sinsi on January 14, 2024, 01:41:12 PMThe code you posted gives me an array of PIDs, not just 2.
One problem may be the "sub rsp,20h", the function EnumProcesses seems to skip over the misalignment of the stack.

As far as the "div bl" goes, you are fluking it, since it translates to "divide AX by BL", so any count >255 would overflow.

I've posted the entire code this time. I edited my original post. As mentioned, from OpenProcess and below, all that works because I tested it by manually placing the actual PID of notepad.exe from my system into eax which returned true. But in this code, I have a loop and not able to dereference aProcesses. As far as the add rsp, well I removed that because I realized I was not in the loop there. If that isn't used in a loop, a stack overflow will be the result. But that isn't the issue.
Title: Re: issues with dereferencing iteration of array of long elements in a loop
Post by: cyrus on January 14, 2024, 02:39:26 PM

(https://i.postimg.cc/1fLFqhxL/bl.png) (https://postimg.cc/1fLFqhxL)

This proves the division by bl does indeed work. The end result in RAX is 8D which in decimal, is 141. Which, if you run the c++ code, you will see it is cProcesses and it is more or less the same. This will vary each time, for me, its been between 139 - 144 depending on when I run it. Also, re-running the c++ code may change that number
Title: Re: issues with dereferencing iteration of array of long elements in a loop
Post by: NoCforMe on January 14, 2024, 02:53:23 PM
Couple small things that don't affect execution:
    xor r14, r14            ; r14 is the counter
    mov r14, cProcesses      ; cProcesses contains the number of total processes

    xor rbx, rbx
    lea rbx, aProcesses      ; all processes;  the entire array

Completely unnecessary to clear those registers before loading them.
Title: Re: issues with dereferencing iteration of array of long elements in a loop
Post by: cyrus on January 14, 2024, 03:00:39 PM
I was able to successfully get this to work but I'm not sure why it actually mattered. Initially, I was using rsi instead of rbx to place aProcesses into. I had other functions to print integers with printf and it used rsi so I ended up using rbx. Well when I used rsi, I noticed that each byte was the actual string from pName. Why is that? I never loaded any string into rsi yet somehow it is initialized to contain that string. Does it do this in startup? I ended up just changing the register from rbx to r12, something that isn't volatile and it worked perfectly. Took me an entire day to realize that. Any ideas why I couldn't use rsi for that? I use rsi this way in other code. If I load something in it, I expect it to have my array, not some string I defined in my code which bears no resemblance to rsi.

BTW, my code includes my original division by bl.
Title: Re: issues with dereferencing iteration of array of long elements in a loop
Post by: cyrus on January 14, 2024, 03:04:42 PM
Quote from: NoCforMe on January 14, 2024, 02:53:23 PMCouple small things that don't affect execution:
    xor r14, r14            ; r14 is the counter
    mov r14, cProcesses      ; cProcesses contains the number of total processes

    xor rbx, rbx
    lea rbx, aProcesses      ; all processes;  the entire array

Completely unnecessary to clear those registers before loading them.
https://masm32.com/board/Smileys/default/badgrin.gif

I realized that for lea but out of habit I keep doing that just in case because sometimes I have over a thousand lines of code and if I forget to xor something, I get in trouble.

Love the screen name. I should have chose noPyforMe since I hate python so much.  :angelic:
Title: Re: issues with dereferencing iteration of array of long elements in a loop
Post by: NoCforMe on January 14, 2024, 03:08:40 PM
Are you sure you understand the 64-bit ABI correctly?

I don't do any 64-bit programming myself, so I'm not sure of the particulars, but I believe that RSI, as well as RBX and RDI, are "sacred" registers that must be preserved, just as their 32-bit counterparts are. So if you're going to use them in your code you need to preserve them and restore them before exiting. Not sure if that was the issue you experienced.

I'm curious to know if RSI comes pre-loaded (with the command tail?) at the program entry point.

Quote from: cyrus on January 14, 2024, 03:04:42 PMI realized that for lea but out of habit I keep doing that just in case because sometimes I have over a thousand lines of code and if I forget to xor something, I get in trouble.

Well, no harm, no foul.

BTW, concerning the div bl, since you're doing a power-of-2 divide, use SHR (no need for SAR) instead. It's much more elegant. Not to mention tons faster, not that it matters here ...
Title: Re: issues with dereferencing iteration of array of long elements in a loop
Post by: cyrus on January 14, 2024, 03:31:54 PM
Quote from: NoCforMe on January 14, 2024, 03:08:40 PMAre you sure you understand the 64-bit ABI correctly?

I don't do any 64-bit programming myself, so I'm not sure of the particulars, but I believe that RSI, as well as RBX and RDI, are "sacred" registers that must be preserved, just as their 32-bit counterparts are. So if you're going to use them in your code you need to preserve them and restore them before exiting. Not sure if that was the issue you experienced.

I'm curious to know if RSI comes pre-loaded (with the command tail?) at the program entry point.

Quote from: cyrus on January 14, 2024, 03:04:42 PMI realized that for lea but out of habit I keep doing that just in case because sometimes I have over a thousand lines of code and if I forget to xor something, I get in trouble.

Well, no harm, no foul.

BTW, concerning the div bl, since you're doing a power-of-2 divide, use SHR (no need for SAR) instead. It's much more elegant. Not to mention tons faster, not that it matters here ...


Yes this register is not one that can be used throughout a loop like that. Now that I think of it, this is the first time I attempted to use that register through a loop and I should have been more cautious about anything that isn't r12-15.

As for RSI itself and string, I do in fact think that it does come pre-loaded with some string that is defined. For me that happened to be the first string pName. I've seen this happen before but wasn't sure.

Good to know about the division. It is definitely faster as I've used SHR before but not SAR. In large cases, I will definitely use them. Thanks all for the comments (even though they didn't solve the issue, they are helpful for future endeavors in asm)
Title: Re: issues with dereferencing iteration of array of long elements in a loop
Post by: sinsi on January 14, 2024, 03:34:58 PM
You need to read up about spill/shadow space and passing parameters for 64-bit.
    sub rsp, 28h+256                    ;reserve stack space for called functions
    lea r15, [rsp+28]    ; delete the later line before the call to GetModuleBaseName
This change seems to *not crash*

You normally allocate 4 qwords for the spill. If a Windows function you call has more than 4 parameters then you would allocate that many. Note that you MUST allocate a minimum of 4.

Once you have set up your stack, don't touch it - no more "sub rsp,20h/add rsp,20h" pairs, the initial adjustment will take care of it.
Title: Re: issues with dereferencing iteration of array of long elements in a loop
Post by: NoCforMe on January 14, 2024, 04:07:32 PM
My recommendation, take it or leave it: Forget 64-bit programming. Completely overkill and a pain in the ass besides. Win32 forever!
Title: Re: issues with dereferencing iteration of array of long elements in a loop
Post by: sinsi on January 14, 2024, 04:28:55 PM
Quote from: NoCforMe on January 14, 2024, 04:07:32 PMMy recommendation, take it or leave it: Forget 64-bit programming. Completely overkill and a pain in the ass besides. Win32 forever!
It is nice to allocate 8GB to work with an SQL table and have the WHOLE F'N THING in memory  :cool:
Title: Re: issues with dereferencing iteration of array of long elements in a loop
Post by: NoCforMe on January 14, 2024, 04:40:37 PM
I should have said "forget 64-bit programming except in certain circumstances where you need humongous amounts of memory" ...
Title: Re: issues with dereferencing iteration of array of long elements in a loop
Post by: cyrus on January 14, 2024, 06:15:17 PM
Quote from: sinsi on January 14, 2024, 03:34:58 PMYou need to read up about spill/shadow space and passing parameters for 64-bit.
    sub rsp, 28h+256                    ;reserve stack space for called functions
    lea r15, [rsp+28]    ; delete the later line before the call to GetModuleBaseName
This change seems to *not crash*

You normally allocate 4 qwords for the spill. If a Windows function you call has more than 4 parameters then you would allocate that many. Note that you MUST allocate a minimum of 4.

Once you have set up your stack, don't touch it - no more "sub rsp,20h/add rsp,20h" pairs, the initial adjustment will take care of it.

I have noticed that the style of setting aside stack space this way you stated: 'sub rsp, 256' and then using that for my buffer doesn't end up working in some cases and I'll tell you why. When you reserve stack space that way, it's going to have random data, not null bytes. When you try to use that for a buffer, you never know what you'll get and often your buffer will contain other data and not work. I do that style of subtracting stack space when I am going to use that amount of space to dedicate to a structure like the PROCESS INFORMATION in CreateProcessA because that is going to get populated. or WSAData, or when I am in a read loop from a network socket. That buffer is going to fill up entirely with the data I am reading in and then gets null-terminated.

However, I believe my weakness with asm in general is the stack space. I have 1 program where I have to make 2 calls to printf with an empty string because it won't work otherwise and I've written quite a bit of programs with perfect stack alignment, so I don't know what that issue is.

I believe the sub rsp, 20h is required for every function call isn't it? I read about this before in 64-bit programming. The add rsp, 20h is only necessary when I am in a loop. If I leave it out, stack overflow.
Title: Re: issues with dereferencing iteration of array of long elements in a loop
Post by: cyrus on January 14, 2024, 06:17:39 PM
Quote from: NoCforMe on January 14, 2024, 04:40:37 PMI should have said "forget 64-bit programming except in certain circumstances where you need humongous amounts of memory" ...

I understand 32-bits is more fun to program but I have to actually program this for current systems which are 64-bit lol.
Title: Re: issues with dereferencing iteration of array of long elements in a loop
Post by: sinsi on January 14, 2024, 06:33:13 PM
Quote from: cyrus on January 14, 2024, 06:15:17 PMI have noticed that the style of setting aside stack space this way you stated: 'sub rsp, 256' and then using that for my buffer doesn't end up working in some cases ...
Two reasons to fail, 256 is not enough, or misalignes the stack.

Quote from: cyrus on January 14, 2024, 06:15:17 PMWhen you reserve stack space that way, it's going to have random data, not null bytes.
As for any LOCAL variable, you set it up for the call, if the call returns no error the buffer has to be correc.

Quote from: cyrus on January 14, 2024, 06:15:17 PMI believe the sub rsp, 20h is required for every function call isn't it? I read about this before in 64-bit programming. The add rsp, 20h is only necessary when I am in a loop. If I leave it out, stack overflow.
A Windows function uses at least 4 spill slots, that's what the "sub rsp,20h" is, assuming the stack is aligned (which it isn't on entry).
You are way off here, study the Win64 ABI.
Title: Re: issues with dereferencing iteration of array of long elements in a loop
Post by: NoCforMe on January 14, 2024, 07:00:54 PM
Quote from: cyrus on January 14, 2024, 06:15:17 PMI have noticed that the style of setting aside stack space this way you stated: 'sub rsp, 256' and then using that for my buffer doesn't end up working in some cases and I'll tell you why. When you reserve stack space that way, it's going to have random data, not null bytes. When you try to use that for a buffer, you never know what you'll get [...]

Yes. It's the same with any variables allocated on the stack as LOCALs. The rule is, when using any such stack-allocated space, ASSUME it contains garbage.

You can clear stack space just like any other space by using REP STOSB or in a loop by setting it to the desired value. For instance (32-bit example here):
    PUSH    EDI
    LEA    EDI, <variable you want to clear>
    MOV    ECX, <size of variable in bytes>
    MOV    AL, <value to fill variable with>
    REP    STOSB
    POP    EDI

   --or--

    LEA    EDX, <variable you want to clear>
    MOV    ECX, <size of variable in bytes>
    MOV    AL, <value to fill variable with>
@@:    MOV    [EDX], AL
    INC    EDX
    LOOP    @B

You can clear the space using words, dwords or qwords as well.

Also, if the stack space is going to receive the results of a function call like your EnumProcesses(), it doesn't matter what's in the buffer: the function will just overwrite it, so no need to initialize it.
Title: Re: issues with dereferencing iteration of array of long elements in a loop
Post by: jj2007 on January 14, 2024, 08:07:06 PM
Quote from: cyrus on January 14, 2024, 01:18:33 PMI have debugged that and it does not fail

So did I, and as Sinsi wrote, it will brutally fail for values over 1023*).
Test it (the code is Masm64 SDK compatible (https://masm32.com/board/index.php?topic=10880.0), unlike yours):

include \masm64\include64\masm64rt.inc
.code
entry_point proc
  xor rax, rax
  xor rbx, rbx
  INT 3
  mov ax, 1234h        ; simulated WORD PTR [cbNeeded]
  mov bl, 4h        ; size of long
  div bl        ; before: eax=1234h, ebx=4h
  conout str$(eax)
  invoke ExitProcess, 0
entry_point endp
end

*) Actually, it is much more complicated, see attachment.
Title: Re: issues with dereferencing iteration of array of long elements in a loop
Post by: TimoVJL on January 14, 2024, 10:23:58 PM
With poasm:
ifdef __UASM__
.x64
.Model flat
endif
ExitProcess PROTO STDCALL :DWORD
.code
_mainCRTStartup proc
  xor rax, rax
  xor rbx, rbx
  INT 3
  mov ax, 1234h        ; simulated WORD PTR [cbNeeded]
  mov bl, 4h        ; size of long
  div bl        ; before: eax=1234h, ebx=4h
  ;conout str$(eax)
  ;invoke ExitProcess, 0
  mov eax, 0
  call ExitProcess    ; just for ml64
_mainCRTStartup endp
end
Title: Re: issues with dereferencing iteration of array of long elements in a loop
Post by: cyrus on January 15, 2024, 06:35:23 AM
Quote from: sinsi on January 14, 2024, 06:33:13 PM
Quote from: cyrus on January 14, 2024, 06:15:17 PMI have noticed that the style of setting aside stack space this way you stated: 'sub rsp, 256' and then using that for my buffer doesn't end up working in some cases ...
Two reasons to fail, 256 is not enough, or misalignes the stack.

That is a good point and I've missed that it may corrupt the stack alignment there.

Quote from: cyrus on January 14, 2024, 06:15:17 PMWhen you reserve stack space that way, it's going to have random data, not null bytes.
As for any LOCAL variable, you set it up for the call, if the call returns no error the buffer has to be correc.

I already know that local variables are set up for that call. In this case, I am setting up buffer for each call. Could I do what NoCForMe mentioned, declare my buf as 256 in the .data section initialized to 0, and then use REP STOSB in each call to clear it out before I use it? Yes but I'm not sure if that is more efficient than simply pushing 256 null bytes on the stack. Is it? If so, I may use that for the increase in performance but I doubt it would matter in that regard. Maybe if that was megabytes.

Quote from: cyrus on January 14, 2024, 06:15:17 PMI believe the sub rsp, 20h is required for every function call isn't it? I read about this before in 64-bit programming. The add rsp, 20h is only necessary when I am in a loop. If I leave it out, stack overflow.
A Windows function uses at least 4 spill slots, that's what the "sub rsp,20h" is, assuming the stack is aligned (which it isn't on entry).
You are way off here, study the Win64 ABI.

What am I way off on exactly? I did mention a windows function uses 32 bytes so why are you telling me that?
Title: Re: issues with dereferencing iteration of array of long elements in a loop
Post by: cyrus on January 15, 2024, 06:36:46 AM
Quote from: jj2007 on January 14, 2024, 08:07:06 PM
Quote from: cyrus on January 14, 2024, 01:18:33 PMI have debugged that and it does not fail

So did I, and as Sinsi wrote, it will brutally fail for values over 1023*).
Test it (the code is Masm64 SDK compatible (https://masm32.com/board/index.php?topic=10880.0), unlike yours):

include \masm64\include64\masm64rt.inc
.code
entry_point proc
  xor rax, rax
  xor rbx, rbx
  INT 3
  mov ax, 1234h        ; simulated WORD PTR [cbNeeded]
  mov bl, 4h        ; size of long
  div bl        ; before: eax=1234h, ebx=4h
  conout str$(eax)
  invoke ExitProcess, 0
entry_point endp
end

*) Actually, it is much more complicated, see attachment.

Good point. I overlooked anything over 1023, so that makes sense.
Title: Re: issues with dereferencing iteration of array of long elements in a loop
Post by: cyrus on January 15, 2024, 06:39:53 AM
Quote from: NoCforMe on January 14, 2024, 07:00:54 PM
Quote from: cyrus on January 14, 2024, 06:15:17 PMI have noticed that the style of setting aside stack space this way you stated: 'sub rsp, 256' and then using that for my buffer doesn't end up working in some cases and I'll tell you why. When you reserve stack space that way, it's going to have random data, not null bytes. When you try to use that for a buffer, you never know what you'll get [...]

Yes. It's the same with any variables allocated on the stack as LOCALs. The rule is, when using any such stack-allocated space, ASSUME it contains garbage.

You can clear stack space just like any other space by using REP STOSB or in a loop by setting it to the desired value. For instance (32-bit example here):
    PUSH    EDI
    LEA    EDI, <variable you want to clear>
    MOV    ECX, <size of variable in bytes>
    MOV    AL, <value to fill variable with>
    REP    STOSB
    POP    EDI

   --or--

    LEA    EDX, <variable you want to clear>
    MOV    ECX, <size of variable in bytes>
    MOV    AL, <value to fill variable with>
@@:    MOV    [EDX], AL
    INC    EDX
    LOOP    @B

You can clear the space using words, dwords or qwords as well.

Also, if the stack space is going to receive the results of a function call like your EnumProcesses(), it doesn't matter what's in the buffer: the function will just overwrite it, so no need to initialize it.

I did mention when I have a buffer I'm going to fill entirely, using 'sub rsp' method works just fine. It's when in these cases, the data varies and I don't know how large that may be and I'm comparing strings. Although in this particular case, I know 'notepad.exe' is only 11 bytes so if data from other PIDs are read into the 11 byte buffer, I don't care but it may overflow onto something else and I figure 256 bytes isn't much to push onto the stack.

Thanks for the tip on clearing a buffer. 2 things here.

1. Is that more efficient than declaring my buffer in the .data section, initializing it to 0, then simply doing that for each call when I am in the loop? Or is simply pushing 256 bytes on the stack just as efficient?

2. I managed to "clear" my buffer by doing
   
mov qword ptr [r15], 0           ; clear the buffer, otherwise it will end up in an infinite loop thinking it is always there
Assuming r15 has the beginning of rsp where I pushed 256 bytes onto. I believe it just adds a null terminator to that so it may not clear the entire data but I believe it is sufficient for strcmp.
Title: Re: issues with dereferencing iteration of array of long elements in a loop
Post by: sinsi on January 15, 2024, 09:32:51 AM
Quote from: cyrus on January 15, 2024, 06:35:23 AMWhat am I way off on exactly? I did mention a windows function uses 32 bytes so why are you telling me that?
It gets tricky when a function has more than 4 parameters, the extra ones get put onto the stack, usually by a series of "mov [rsp+28h],rax" and so on, so it's easy to lose track of where RSP is.
Even if a function has 0 parameters, it still needs those 32 bytes, that's part of the ABI.
Title: Re: issues with dereferencing iteration of array of long elements in a loop
Post by: cyrus on January 15, 2024, 11:10:13 AM
Quote from: sinsi on January 15, 2024, 09:32:51 AM
Quote from: cyrus on January 15, 2024, 06:35:23 AMWhat am I way off on exactly? I did mention a windows function uses 32 bytes so why are you telling me that?
It gets tricky when a function has more than 4 parameters, the extra ones get put onto the stack, usually by a series of "mov [rsp+28h],rax" and so on, so it's easy to lose track of where RSP is.
Even if a function has 0 parameters, it still needs those 32 bytes, that's part of the ABI.

Ok I totally know that. Here is an example of how I call WSASocketA. In 32-bits, I used push. In 64-bit, I do exactly what is required.

    ; call WSASocketA
    sub rsp, 30h
    xor r9, r9                       ; 4th arg: lpProtocolInfo=NULL (uses itself from above: NULL)
    ;push r9                          ; 6th arg: dwFlags=NULL
    ;push r9                          ; 5th arg: g=NULL
    mov QWORD PTR [rsp + 28h], 00h  ; 6th arg: dwFlags=NULL
    mov QWORD PTR [rsp + 20h], 00h  ; 5th arg: g=NULL
    xor r8, r8
    mov r8b, 6h                    ; 3rd arg: protocol=6
    xor rdx, rdx
    mov dl, 1h                     ; 2nd arg: type=1
    xor rcx, rcx
    mov cl, 2h                     ; 1st arg: af=2
    call WSASocketA                ; call WSASocketA
    mov sockfd, rax                ; save socket descriptor of WSASocketA to sockfd variable
Title: Re: issues with dereferencing iteration of array of long elements in a loop
Post by: sinsi on January 15, 2024, 12:10:47 PM
callWSASocketA PROC
    ;on entry, the stack is misaligned. We have 6 arguments, so need to add 8 bytes to align it
    sub rsp, 38h ;This would be at the top of this proc so every function call can re-use it
                 ;As a bonus it gives us 8 bytes to use at [RSP+30..37] (this time)
    ;swap some code around to cut down on size
    xor r9d,r9d                     ; 4th arg: lpProtocolInfo=NULL (uses itself from above: NULL)
    mov [rsp+28h],r9                ; 6th arg: dwFlags=NULL
    mov [rsp+20h],r9                ; 5th arg: g=NULL
    ;the next 3 args are of type 'int' which is 32-bit? I'm not a C programmer
    ;The advantage of altering the low 32 bits of a register is that the upper 32 are cleared.
    ;Of course if you forget that it can make your code crash in mysterious ways :)
    mov r8d,6h                      ; 3rd arg: protocol=6
    mov edx,1h                      ; 2nd arg: type=1
    mov ecx,2h                      ; 1st arg: af=2
    call WSASocketA                 ; call WSASocketA
    ;this proc acts like a function, and returns rax
    ;Slightly better than having this code accessing a non-local var
    add rsp,38h
    ret
callWSASocketA ENDP
Another way
callWSASocketA PROC
    mov  ecx,2
    mov  edx,1
    mov  r8d,6
    xor  r9d,r9d
    push rax     ;aligns the stack
    push 0
    push 0
    sub  rsp,20h
    call WSASocketA
    add rsp,7*8
    ret
callWSASocketA ENDP

Title: Re: issues with dereferencing iteration of array of long elements in a loop
Post by: NoCforMe on January 15, 2024, 01:33:04 PM
Quote from: cyrus on January 15, 2024, 06:39:53 AMThanks for the tip on clearing a buffer. 2 things here.

1. Is that more efficient than declaring my buffer in the .data section, initializing it to 0, then simply doing that for each call when I am in the loop? Or is simply pushing 256 bytes on the stack just as efficient?

2. I managed to "clear" my buffer by doing
   
mov qword ptr [r15], 0          ; clear the buffer, otherwise it will end up in an infinite loop thinking it is always there
Assuming r15 has the beginning of rsp where I pushed 256 bytes onto. I believe it just adds a null terminator to that so it may not clear the entire data but I believe it is sufficient for strcmp.

Just to clear up a bit of confusion here: I didn't realize that the data going into your buffer was strings. That actually makes things easier.

1. Again, if you're having a function fill a buffer, you don't need to "clear" the buffer, as the function will simply overwrite whatever's in the buffer to start with.

2. Your 2nd bit of code there is correct. Since strings (the kind we deal with here in assembly language 99.99% of the time) are NULL-terminated, all you need to do to "clear" a buffer is to put a single byte of zero into it.

3. If you're doing string comparisons on a buffer that's been filled by a function, again, you don't need to initialize the buffer first, as the string (assuming there's just one) is guaranteed to have a NULL at the end. There are some weird Windows API functions that return multiple strings where each string is terminated by one NULL and the whole shebang is terminated by an extra NULL, but those are special cases. Even there, you're always going to be able to find the end of the strings and the end of the buffer.

About your question about using a static buffer (one declared in your .data section) instead of one allocated on the stack: pretty much 6 of one, half a dozen of the other. Not more or less efficient either way. It's true that you can initialize the static buffer when you declare it. But again, if you're using it multiple times with your Enum function, there's no need to "clear" it each time anyhow. A static buffer will take up space in your program; however, you can minimize the space it occupies in the .exe file by declaring it in your .data? section (uninitialized data), but then you can't initialize it in the declaration; you'll have to use code to initialize it if you need to do that.
Title: Re: issues with dereferencing iteration of array of long elements in a loop
Post by: sinsi on January 15, 2024, 02:11:06 PM
Quote from: NoCforMe on January 15, 2024, 01:33:04 PMAbout your question about using a static buffer (one declared in your .data section) instead of one allocated on the stack: pretty much 6 of one, half a dozen of the other. Not more or less efficient either way. It's true that you can initialize the static buffer when you declare it. But again, if you're using it multiple times with your Enum function, there's no need to "clear" it each time anyhow. A static buffer will take up space in your program; however, you can minimize the space it occupies in the .exe file by declaring it in your .data? section (uninitialized data), but then you can't initialize it in the declaration; you'll have to use code to initialize it if you need to do that.
Just to add to that, if your procedure is recursive, or gets called by multiple threads, you have to use the stack, otherwise each running procedure will clobber the other's buffer (since it is the same buffer).
Title: Re: issues with dereferencing iteration of array of long elements in a loop
Post by: jj2007 on January 15, 2024, 07:51:50 PM
Quote from: NoCforMe on January 15, 2024, 01:33:04 PMdeclaring it in your .data? section (uninitialized data), but then you can't initialize it in the declaration
The OS loader will do that for you: .data? is always zeroed at program start. Of course, if you write to that buffer and come back later, you need to zero it again.
Title: Re: issues with dereferencing iteration of array of long elements in a loop
Post by: HSE on January 15, 2024, 11:11:02 PM
With little modifications for Masm64 SDK (and perhaps a couple of issues) apparently work correctly:
include \masm32\include64\masm64rt.inc

include \masm32\include64\psapi.inc
includelib \masm32\lib64\psapi.lib

.data
    aProcesses      DD 1024 DUP(0)  ;  unsigned long aProcesses[1024]
    cbNeeded        DQ ?
    cProcesses      DQ ?
    hProcess        DQ ?

;    pName db "notepad.exe",0
    pName db "qeditor.exe",0
    found db "pid found!",0
    not_found db "pid not found!",0

.code

entry_point proc

    sub rsp, 28h                    ;reserve stack space for called functions
    and rsp, 0fffffffffffffff0h    ;make sure stack 16-byte aligned

    begin:

    ; if(!EnumProcesses(aProcesses, sizeof(aProcesses), &cbNeeded))
    ;    return false;

    lea r8, cbNeeded      ; &cbNeeded; use lea whenever var is [out]
    mov rdx, 1000h        ; sizeof(aProcesses); 4096
    lea rcx, aProcesses    ; long aProcesses[1024] array to hold 1024 pids; use lea whenever var is [out]; pointers are passed by reference; like &
    sub rsp, 20h
    call EnumProcesses
    add rsp, 20h            ; >>> this was missing

    xor rax, rax
    xor rbx, rbx
    mov ax, WORD PTR [cbNeeded]  ; dereferenced; do not use lea
    sar eax, 2                    ; does the same thing as the division below. i've debugged this
    ;mov bl, 4h                  ; size of long
    ;div bl
    mov cProcesses, rax          ; ax contains quotient; dx contains remainder


    ; for(unsigned int i = 0; i < cProcesses; i++)
    ; {
    ;    if(aProcesses[i] == 0)
    ;        continue;

    mov r14, cProcesses      ; cProcesses contains the number of total processes
    lea rbx, aProcesses      ; all processes;  the entire array
 
    find_pid:
        mov eax, DWORD PTR [rbx] ; this should be the PID but having trouble getting this to work
        add rbx, 4h              ; incrementing to the next element; long is 4 bytes each
        cmp eax, 0              ; check if null
        je continue

        jmp open_process

        continue:
            dec r14              ; (while --ecx) in c; r14 is the counter
            cmp r14, 0
            je no__match

            jmp find_pid         


    open_process:
    ;    HANDLE hProcess = OpenProcess(PROCESS_QUERY_INFORMATION | PROCESS_VM_READ, 0, aProcesses[i]);

    mov r8d, eax            ; eax = aProcesses[i] ; each element is 4 bytes, not 1
    xor rdx, rdx            ; arg2 = NULL
    mov rcx, 410h          ; PROCESS_QUERY_INFORMATION: 400h;  PROCESS_VM_READ 10h; add them both = 410h
    sub rsp, 20h
    call OpenProcess
    mov hProcess, rax
    add rsp, 20h

    ;    char buf[256];
    ; we add 256 bytes on the stack since we want a clean buffer generated for each loop
    xor rax, rax
    xor rcx, rcx
    mov al, 20h  ; 32 bytes x 8 (push rcx) = 0x100 (256) bytes is needed for 'char buf[256]'
    init_buf:
        push rcx
        dec al
        cmp al, cl
        jne init_buf


    lea r15, [rsp]    ; must use a register because we will need to load it into rdx for GetModuleBaseName

    ;    GetModuleBaseName(hProcess, 0, buffer, 50);

    xor r8, r8
    xor r9, r9
    mov r9, 100h              ; 256 bytes for our buffer to write information into: [out] buffer
    mov r8, r15              ; r15 has the address of our buffer on the stack
    xor rdx, rdx              ; 2nd arg = NULL
    mov rcx, hProcess
    sub rsp, 20h
    call GetModuleBaseNameA
    add rsp, 20h

    ;    CloseHandle(hProcess);
    xor rcx, rcx
    mov rcx, hProcess
    sub rsp, 20h
    call CloseHandle
    add rsp, 20h


    ;    if(strcmp(pName, buffer) == 0)
    ;        return true;
    ;   
    ;                >>  Note here where used rsi and rdi ???
    ;
    lea rcx, pName
    lea rdx, QWORD PTR [r15]  ; r15 has the address of our buffer on the stack but [r15] is the dereferenced buffer
    call szCmp
    add rsp, 100h          ; add buffer stack space back to avoid stack overflow
    cmp rax, 0
    jne match

    ; if current pid does not match, resume loop to next pid
    jmp find_pid            ; resumes loop for next pid (aProcesses[i])

    no__match:              ; none of the pids matched the string pName
    lea rcx, not_found
    sub rsp, 20h
    call vc_printf
    add rsp, 20h
    jmp exit

    match:
    lea rcx, found
    sub rsp, 20h
    call vc_printf
    add rsp, 20h

    exit:
    sub rsp, 20h
    call ExitProcess

entry_point endp

end

Title: Re: issues with dereferencing iteration of array of long elements in a loop
Post by: lingo on January 16, 2024, 07:52:14 AM
It has to stop using MASMx86 and don't waste newbies time with it.

About EnumProcessModules:

"If this function is called from a 32-bit application running on WOW64,
it can only enumerate the modules of a 32-bit process.
If the process is a 64-bit process, this function fails and the last error code is ERROR_PARTIAL_COPY (299)."
https://learn.microsoft.com/en-us/windows/win32/api/psapi/nf-psapi-enumprocessmodules (https://learn.microsoft.com/en-us/windows/win32/api/psapi/nf-psapi-enumprocessmodules)


Thank you HSE for code!
Why so complicated and incomprehensible for such a trifle!? :biggrin:


;***************************************************;
include  \masm32\include64\masm64rt.inc
include  \masm32\include64\psapi.inc
includelib \masm32\lib64\psapi.lib
include    \masm32\include64\ntdll.inc
includelib  \masm32\lib64\ntdll.lib
;***************************************************;

.data

SaveRBX    dq 0
SaveRSI    dq 0
SaveRDI    dq 0
lphModule  dq 0
aProcesses  dd 1024 DUP(0) 
uModuleName db 520 Dup(0)
cbNeeded    dd 0
hProcess    dd 0
lpcbNeeded  dd 0 
uName      dw "n","o","t","e","p","a","d",".","e","x","e",0,0,0,0
szMessTitle dw "n","o","t","e","p","a","d",".","e","x","e"," ","i","s"," ","f","o","u","n","d","!",0,0,0,0
uString    dw "%","l","u",0,0,0,0 
szMess      dw "P","I","D",":"," "
szUBuff    db 56 Dup(0)
;****************************************************;

.code
;****************************************************;
main              proc
                    sub    rsp, 48 

; Get the list of process identifiers.
;BOOL EnumProcesses( [out] DWORD *lpidProcess,[in] DWORD cb, [out] LPDWORD lpcbNeeded

                    lea    r8,  cbNeeded            ; Result
                    mov    edx, 1000h              ; cb ->    The size of the pProcessIds array, in bytes.
                    lea    rcx, aProcesses          ; A pointer to an array that receives the list of process identifiers
                    call    EnumProcesses 
                    test    eax, eax 
                    je      Ret_0

; Calculate how many process identifiers were returned.

                    mov    eax, cbNeeded            ; cbNeeded bytes / 4 = cProcesses
                    shr    eax,2                    ; cProcesses = cbNeeded / sizeof(DWORD); 

; Print the name and process identifier for each process.

                    test    eax, eax 
                    je      Ret_0 

                    mov    SaveRBX, rbx
                    mov    SaveRSI, rsi
                    mov    SaveRDI, rdi
                   
                    lea    rbx, aProcesses          ; rbx -> DWORD aProcesses[1024]                       
                    mov    edi, eax                ; edi->cProcesses
     
;****************************************************;
@Loop:
                    mov    r8d,dword ptr [rbx]      ; edi->get current PID   
                    test    r8d,r8d 
                    je      @Next       

;HANDLE OpenProcess(  [in] DWORD dwDesiredAccess,  [in] BOOL  bInheritHandle,  [in] DWORD dwProcessId);

                    xor    edx,edx                  ; BOOL  bInheritHandle 
                    mov    ecx,410h                ; dwDesiredAccess 
                    call    OpenProcess 
                    mov    rsi,rax                  ; rsi=rax=open handle to the specified process
                    test    rax,rax                  ; If rax=0 -> not every process can be opened !!!
                    je      @Skip                    ; and skip it
                       
;BOOL EnumProcessModules( [in]  HANDLE  hProcess, [out] HMODULE *lphModule,[in] DWORD cb,[out] LPDWORD lpcbNeeded);

                    lea    r9, lpcbNeeded          ; lpcbNeeded -> The number of bytes required to store all module handles in the lphModule array
                    mov    r8d,8              ; cb -> The size of the lphModule array, in bytes
                    lea    rdx,lphModule            ; Result -> *lphModule
                    mov    rcx,rax                  ; rcx = rax = hProcess
                    call    EnumProcessModules 
                    test    eax,eax                  ; If eax = zero->Error
                    je      @Skip                    ; 

;DWORD GetModuleBaseNameW( [in] HANDLE  hProcess, [in, optional] HMODULE hModule,[out] LPWSTR lpBaseName, [in] DWORD nSize);

                    mov    rdx, lphModule          ; rdx -> *lphModule
                    lea    r8,  uModuleName        ; Buffer for currebt ModulName
                    mov    r9d, 104h              ; nSize of lpBaseName in bytes
                    mov    rcx, rsi              ; rcx=rsi -> hProcess
                    call    GetModuleBaseNameW
                    test    eax, eax
                    je      @Skip
;...Compare...
                    lea    rcx, uModuleName 
                    lea    rdx, uName
                    call    _wcsicmp
                    test    eax, eax
                    jne    @Skip
;....Found it...
                    xor    r9d, r9d
                    mov    r8d, dword ptr[rbx]      ; PID
                    lea    rdx, uString
                    lea    rcx, szUBuff
                    call    wsprintfW
                    xor    r9d, r9d
                    lea    r8,  szMessTitle
                    lea    rdx, szMess
                    xor    ecx, ecx
                    call    MessageBoxW
                    mov    eax, dword ptr [rbx]
                    jmp    Ret_1
@Skip:
                    mov    rcx,rsi                    ; rcx=rsi -> hProcess
                    call    CloseHandle
@Next:
                    add    rbx,4                    ; rbx-> next ID from array aProcesses 
                    sub    rdi,1                    ; one less for rdi->cProcesses
                    jne    @Loop     
                    xor    eax,eax 
Ret_1:                                         
                    mov    rbx, SaveRBX
                    mov    rsi, SaveRSI
                    mov    rdi, SaveRDI
                    add    rsp,48 
                    ret 
Ret_0:
                    xor    eax,eax 
                    add    rsp,48 
                    ret   
main              endp
;***************************************************;
End

Title: Re: issues with dereferencing iteration of array of long elements in a loop
Post by: NoCforMe on January 16, 2024, 08:00:26 AM
Quote from: lingo on January 16, 2024, 07:52:14 AMIt has to stop using MASMx86 and don't waste newbies time with it.

No. Just no.

(They're talking about 32-bit X86 programming here.)

I'm committed to Win32/x86, and there are a hell of a lot of others here who are as well. So please don't go around making blanket prohibitions like this.

I would fight you on this but I don't want to pollute this thread.

Now back to the OP's problems. I don't know if you've already seen this stuff, but you might want to look at the Microsoft Learn pages on the 64-bit ABI, here (https://learn.microsoft.com/en-us/cpp/build/x64-software-conventions?view=msvc-170) and here (https://learn.microsoft.com/en-us/cpp/build/stack-usage?view=msvc-170) (the 2nd page covers stack usage).
Title: Re: issues with dereferencing iteration of array of long elements in a loop
Post by: HSE on January 16, 2024, 08:06:43 AM
Hi Lingo!

Quote from: lingo on January 16, 2024, 07:52:14 AMWhy so complicated and incomprehensible for such a trifle!? :biggrin:

It's cyrus's code. He is testing things, and code is working  :thumbsup:

HSE
Title: Re: issues with dereferencing iteration of array of long elements in a loop
Post by: jj2007 on January 16, 2024, 08:31:14 AM
Quote from: lingo on January 16, 2024, 07:52:14 AMuName       dw "n","o","t","e","p","a","d",".","e","x","e",0,0,0,0
szMessTitle dw "n","o","t","e","p","a","d",".","e","x","e"," ","i","s"," ","f","o","u","n","d","!",0,0,0,0

I sincerely hope you have a tool that generates this crap, Lingo :biggrin:

I recommend wChr$() (https://www.jj2007.eu/MasmBasicQuickReference.htm#Mb1139), wData or Ole$() (https://www.jj2007.eu/MasmBasicQuickReference.htm#Mb1139).
Title: Re: issues with dereferencing iteration of array of long elements in a loop
Post by: cyrus on January 16, 2024, 02:17:23 PM
I wanted to mention that I am reading these posts from you gentlemen but I have been testing another program that keeps crashing due to a stack overflow despite the fact I am adding 'add rsp, 20h' to every call and adding the correct stack space. This one was a tough one, dealing with iterating structures which I have done successfully. I will post that code shortly. I am pretty frustrated about it.
Title: Re: issues with dereferencing iteration of array of long elements in a loop
Post by: cyrus on January 16, 2024, 02:32:30 PM
Quote from: HSE on January 15, 2024, 11:11:02 PMWith little modifications for Masm64 SDK (and perhaps a couple of issues) apparently work correctly:
include \masm32\include64\masm64rt.inc

include \masm32\include64\psapi.inc
includelib \masm32\lib64\psapi.lib

.data
    aProcesses      DD 1024 DUP(0)  ;  unsigned long aProcesses[1024]
    cbNeeded        DQ ?
    cProcesses      DQ ?
    hProcess        DQ ?

;    pName db "notepad.exe",0
    pName db "qeditor.exe",0
    found db "pid found!",0
    not_found db "pid not found!",0

.code

entry_point proc

    sub rsp, 28h                    ;reserve stack space for called functions
    and rsp, 0fffffffffffffff0h    ;make sure stack 16-byte aligned

    begin:

    ; if(!EnumProcesses(aProcesses, sizeof(aProcesses), &cbNeeded))
    ;    return false;

    lea r8, cbNeeded      ; &cbNeeded; use lea whenever var is [out]
    mov rdx, 1000h        ; sizeof(aProcesses); 4096
    lea rcx, aProcesses    ; long aProcesses[1024] array to hold 1024 pids; use lea whenever var is [out]; pointers are passed by reference; like &
    sub rsp, 20h
    call EnumProcesses
    add rsp, 20h            ; >>> this was missing

    xor rax, rax
    xor rbx, rbx
    mov ax, WORD PTR [cbNeeded]  ; dereferenced; do not use lea
    sar eax, 2                    ; does the same thing as the division below. i've debugged this
    ;mov bl, 4h                  ; size of long
    ;div bl
    mov cProcesses, rax          ; ax contains quotient; dx contains remainder


    ; for(unsigned int i = 0; i < cProcesses; i++)
    ; {
    ;    if(aProcesses[i] == 0)
    ;        continue;

    mov r14, cProcesses      ; cProcesses contains the number of total processes
    lea rbx, aProcesses      ; all processes;  the entire array
 
    find_pid:
        mov eax, DWORD PTR [rbx] ; this should be the PID but having trouble getting this to work
        add rbx, 4h              ; incrementing to the next element; long is 4 bytes each
        cmp eax, 0              ; check if null
        je continue

        jmp open_process

        continue:
            dec r14              ; (while --ecx) in c; r14 is the counter
            cmp r14, 0
            je no__match

            jmp find_pid         


    open_process:
    ;    HANDLE hProcess = OpenProcess(PROCESS_QUERY_INFORMATION | PROCESS_VM_READ, 0, aProcesses[i]);

    mov r8d, eax            ; eax = aProcesses[i] ; each element is 4 bytes, not 1
    xor rdx, rdx            ; arg2 = NULL
    mov rcx, 410h          ; PROCESS_QUERY_INFORMATION: 400h;  PROCESS_VM_READ 10h; add them both = 410h
    sub rsp, 20h
    call OpenProcess
    mov hProcess, rax
    add rsp, 20h

    ;    char buf[256];
    ; we add 256 bytes on the stack since we want a clean buffer generated for each loop
    xor rax, rax
    xor rcx, rcx
    mov al, 20h  ; 32 bytes x 8 (push rcx) = 0x100 (256) bytes is needed for 'char buf[256]'
    init_buf:
        push rcx
        dec al
        cmp al, cl
        jne init_buf


    lea r15, [rsp]    ; must use a register because we will need to load it into rdx for GetModuleBaseName

    ;    GetModuleBaseName(hProcess, 0, buffer, 50);

    xor r8, r8
    xor r9, r9
    mov r9, 100h              ; 256 bytes for our buffer to write information into: [out] buffer
    mov r8, r15              ; r15 has the address of our buffer on the stack
    xor rdx, rdx              ; 2nd arg = NULL
    mov rcx, hProcess
    sub rsp, 20h
    call GetModuleBaseNameA
    add rsp, 20h

    ;    CloseHandle(hProcess);
    xor rcx, rcx
    mov rcx, hProcess
    sub rsp, 20h
    call CloseHandle
    add rsp, 20h


    ;    if(strcmp(pName, buffer) == 0)
    ;        return true;
    ;   
    ;                >>  Note here where used rsi and rdi ???
    ;
    lea rcx, pName
    lea rdx, QWORD PTR [r15]  ; r15 has the address of our buffer on the stack but [r15] is the dereferenced buffer
    call szCmp
    add rsp, 100h          ; add buffer stack space back to avoid stack overflow
    cmp rax, 0
    jne match

    ; if current pid does not match, resume loop to next pid
    jmp find_pid            ; resumes loop for next pid (aProcesses[i])

    no__match:              ; none of the pids matched the string pName
    lea rcx, not_found
    sub rsp, 20h
    call vc_printf
    add rsp, 20h
    jmp exit

    match:
    lea rcx, found
    sub rsp, 20h
    call vc_printf
    add rsp, 20h

    exit:
    sub rsp, 20h
    call ExitProcess

entry_point endp

end



This works now because I modified my original post. Instead of RBX for holding the address of aProcesses, I used RSI. I even modified it again to use R12 instead of RBX just in case I use RBX somewhere else. Although no functions ever use it, it is still a volatile register and it isn't wise to store the address of a pointer in there through a large loop like I have with multiple function calls within it.
Title: Re: issues with dereferencing iteration of array of long elements in a loop
Post by: jj2007 on January 16, 2024, 08:55:06 PM
Quote from: cyrus on January 16, 2024, 02:32:30 PMAlthough no functions ever use it, it is still a volatile register

No, rbx/ebx is a non-volatile register. Windows will not modify it, with one exception: callbacks like WndProc.
Title: Re: issues with dereferencing iteration of array of long elements in a loop
Post by: HSE on January 16, 2024, 09:50:23 PM
Quote from: jj2007 on January 16, 2024, 08:55:06 PMNo, rbx/ebx is a non-volatile register.

It's about R12  :thumbsup:
Title: Re: issues with dereferencing iteration of array of long elements in a loop
Post by: jj2007 on January 16, 2024, 10:42:47 PM
Quote from: HSE on January 16, 2024, 09:50:23 PM
Quote from: jj2007 on January 16, 2024, 08:55:06 PMNo, rbx/ebx is a non-volatile register.

It's about R12  :thumbsup:

So what? Non-volatile, too

https://learn.microsoft.com/en-us/cpp/build/x64-calling-convention?view=msvc-170 (https://learn.microsoft.com/en-us/cpp/build/x64-calling-convention?view=msvc-170)
Title: Re: issues with dereferencing iteration of array of long elements in a loop
Post by: HSE on January 16, 2024, 11:26:09 PM
Quote from: jj2007 on January 16, 2024, 10:42:47 PMSo what?

:biggrin:   Dogs and cats are almost the same... but not the same.
Title: Re: issues with dereferencing iteration of array of long elements in a loop
Post by: jj2007 on January 16, 2024, 11:33:22 PM
Quote from: cyrus on January 16, 2024, 02:32:30 PMI even modified it again to use R12 instead of RBX just in case I use RBX somewhere else. Although no functions ever use it, it is still a volatile register

1. The phrase could refer to rbx or r12, it's ambiguous
2. Both are non-volatile registers
Title: Re: issues with dereferencing iteration of array of long elements in a loop
Post by: BugCatcher on January 17, 2024, 02:26:39 AM
Any problem with div effecting edx?
Title: Re: issues with dereferencing iteration of array of long elements in a loop
Post by: cyrus on January 17, 2024, 03:06:09 AM
Quote from: jj2007 on January 16, 2024, 11:33:22 PM
Quote from: cyrus on January 16, 2024, 02:32:30 PMI even modified it again to use R12 instead of RBX just in case I use RBX somewhere else. Although no functions ever use it, it is still a volatile register

1. The phrase could refer to rbx or r12, it's ambiguous
2. Both are non-volatile registers


Thanks. I guess thats why RBX did work but out of habit, I was used to just sticking to R12-15 but good to know.

I do need to read more into the ABI but I do a lot of other things as well, not just programming in asm
Title: Re: issues with dereferencing iteration of array of long elements in a loop
Post by: cyrus on January 17, 2024, 09:39:22 AM
Ok I just wanted to say thank you sinsi for opening my eyes about the stack alignment. That plagued me. I had a program where I had call printf just to get it to work (or any function). But after analyzing it all the way through, I realized my stack was indeed unaligned.

My other program I was about to post up but didn't need to because so far it looks to be working pretty solid, was one that calls GetTcpTable which, requires 2 calls to be made. One must initialize
SizePointer by making 2 calls to GetTcpTable. First being [in], second call, [out] receiving the actual size. This is because the tcp table is dynamic depending on what's in it at the current time of calling the function and retreiving the data. The data goes into a struct of type
PMIB_TCPTABLE. The size from
SizePointer is what is used to allocate size for this struct. I did that on the stack using that size. The issue was that the size always varied. And I needed to either subtract stack space or push bytes on the stack. This time I did subtract the size on the stack. But here is the kicker. That may not be stack-aligned. So what was my trick? I took the whole size, divided it by 16 using 'div ebx' so ensure that I would have enough room. I noticed most calls for my particular tcp table ranged from about 512 to 772 or so bytes. Each call varied depending on when Microsoft decides to connect to some rogue server whenever it wants  :smiley:

After division, I checked if there was a remainder. If there was, I added 1 to the quotient, not the total size. The quotient + 1 x 16 again would give me a grand total that is stack-aligned. I subtract this from the stack to give me the struct I needed. Then I just added that size back and resumed to 'begin'. I tested this by calling printf between so that I know whats' going on. It ends up in an infinite loop which is the intended goal here,

https://learn.microsoft.com/en-us/windows/win32/api/iphlpapi/nf-iphlpapi-gettcptable
Title: Re: issues with dereferencing iteration of array of long elements in a loop
Post by: cyrus on January 17, 2024, 09:59:38 AM
Quote from: sinsi on January 15, 2024, 02:11:06 PM
Quote from: NoCforMe on January 15, 2024, 01:33:04 PMAbout your question about using a static buffer (one declared in your .data section) instead of one allocated on the stack: pretty much 6 of one, half a dozen of the other. Not more or less efficient either way. It's true that you can initialize the static buffer when you declare it. But again, if you're using it multiple times with your Enum function, there's no need to "clear" it each time anyhow. A static buffer will take up space in your program; however, you can minimize the space it occupies in the .exe file by declaring it in your .data? section (uninitialized data), but then you can't initialize it in the declaration; you'll have to use code to initialize it if you need to do that.
Just to add to that, if your procedure is recursive, or gets called by multiple threads, you have to use the stack, otherwise each running procedure will clobber the other's buffer (since it is the same buffer).

This is why I love using the stack, just to be sure that data is always new.

NoCforMe, yea for this particular one, or any other one where that buffer gets filled from a call to a function such as GetModuleBaseName, I don't need a new buffer. In fact, for that program, I just declared 256 bytes in the .data section, then just 'lea r8, buf' (which is the 3rd argument to that function)
Title: Re: issues with dereferencing iteration of array of long elements in a loop
Post by: sinsi on January 17, 2024, 11:02:30 AM
If you need to allocate a variable-sized buffer, the best way (for me) is
 - allocate the amount of memory with HeapAlloc (or whatever you use)
 - store that address somewhere (non-volatile register or locally on the stack)
 - do what you need to do to fill the buffer
From here, you could
 - process the buffer
 - HeapFree
or just return the address to the caller in RAX and let them process and free the buffer

Title: Re: issues with dereferencing iteration of array of long elements in a loop
Post by: cyrus on January 17, 2024, 11:27:16 AM
Quote from: sinsi on January 17, 2024, 11:02:30 AMIf you need to allocate a variable-sized buffer, the best way (for me) is
 - allocate the amount of memory with HeapAlloc (or whatever you use)
 - store that address somewhere (non-volatile register or locally on the stack)
 - do what you need to do to fill the buffer
From here, you could
 - process the buffer
 - HeapFree
or just return the address to the caller in RAX and let them process and free the buffer



Yes I totally forgot about that. If the buffer I need is larger than 8192, I usually end up calling malloc or HeapAlloc/HeapFree as well but sometimes I forget the simplest things. This relieves headaches of dealing with the stack but I'm glad I went through this in case I don't want to allocate memory from the heap and need to use the stack for simplicity.
Title: Re: issues with dereferencing iteration of array of long elements in a loop
Post by: TimoVJL on January 17, 2024, 05:23:20 PM
Windows PAGESIZE is 4096, so good to remember that with stack if use it without probing.