News:

Masm32 SDK description, downloads and other helpful links
Message to All Guests
NB: Posting URL's See here: Posted URL Change

Main Menu

issues with dereferencing iteration of array of long elements in a loop

Started by cyrus, January 14, 2024, 12:20:40 PM

Previous topic - Next topic

cyrus

I'm just trying to convert this program in c++ to asm. It works normally if notepad.exe is loaded, it will return 1 in c++. In asm, I'm having issues with 'lea rbx, aProcesses' which should contain each pid in [rbx] but it doesn't.

Here is the code in c++ in windows


#include <Windows.h>
#include <stdio.h>
#include <psapi.h>

bool isRunning(char * pName){
    unsigned long aProcesses[1024], cbNeeded, cProcesses;
    if(!EnumProcesses(aProcesses, sizeof(aProcesses), &cbNeeded))
        return false;
 
    printf("sizeof(aProcesses): %zd\n", sizeof(aProcesses));
    printf("cbNeeded: %ld\n", cbNeeded);
    printf("sizeof(unsigned long): %zd\n", sizeof(unsigned long));

    cProcesses = cbNeeded / sizeof(unsigned long);
    printf("cProcesses: %ld\n", cProcesses);
    for(unsigned int i = 0; i < cProcesses; i++)
    {
        printf("i: %d: ", i);
        printf("aProcesses[i]: %d\n", aProcesses[i]);
        if(aProcesses[i] == 0)
            continue;
 
        HANDLE hProcess = OpenProcess(PROCESS_QUERY_INFORMATION | PROCESS_VM_READ, 0, aProcesses[i]);
        printf("hProcess: %p\n", hProcess);
        char buffer[50];
        GetModuleBaseName(hProcess, 0, buffer, 50);
        CloseHandle(hProcess);
        if(strcmp(pName, buffer) == 0)
            return true;
    }
    return false;
}

int main(void){
    bool ret = isRunning("notepad.exe");
    printf("%d\n", ret);
    return 0;
}

---------------------------------
And here is my code in asm



;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
includelib ucrt.lib
includelib msvcrt.lib
includelib legacy_stdio_definitions.lib
includelib kernel32.lib
includelib psapi.lib


.data
aProcesses       DD 1024 DUP(0)   ;   unsigned long aProcesses[1024]
cbNeeded         DQ ?
cProcesses       DQ ?
hProcess         DQ ?

pName db "notepad.exe",0
found db "pid found!",0
not_found db "pid not found!",0

.code
externdef printf:proc
externdef EnumProcesses:proc
externdef OpenProcess:proc
externdef GetModuleBaseNameA:proc
externdef CloseHandle:proc
externdef ExitProcess:proc


main proc

    sub rsp, 28h                    ;reserve stack space for called functions
    and rsp, 0fffffffffffffff0h     ;make sure stack 16-byte aligned

    begin:

    ; if(!EnumProcesses(aProcesses, sizeof(aProcesses), &cbNeeded))
    ;    return false;

    lea r8, cbNeeded       ; &cbNeeded; use lea whenever var is [out]
    mov rdx, 1000h         ; sizeof(aProcesses); 4096
    lea rcx, aProcesses    ; long aProcesses[1024] array to hold 1024 pids; use lea whenever var is [out]; pointers are passed by reference; like &
    sub rsp, 20h
    call EnumProcesses

    xor rax, rax
    xor rbx, rbx
    mov ax, WORD PTR [cbNeeded]  ; dereferenced; do not use lea
    sar eax, 2                    ; does the same thing as the division below. i've debugged this
    ;mov bl, 4h                   ; size of long
    ;div bl
    mov cProcesses, rax          ; ax contains quotient; dx contains remainder


    ; for(unsigned int i = 0; i < cProcesses; i++)
    ; {
    ;     if(aProcesses[i] == 0)
    ;         continue;

    xor r14, r14             ; r14 is the counter
    mov r14, cProcesses      ; cProcesses contains the number of total processes
    xor rbx, rbx
    lea rbx, aProcesses      ; all processes;  the entire array
   
    find_pid:
        xor rax, rax
        mov eax, DWORD PTR [rbx] ; this should be the PID but having trouble getting this to work
        add rbx, 4h              ; incrementing to the next element; long is 4 bytes each
        cmp eax, 0               ; check if null
        je continue

        jmp open_process

        continue:
            dec r14              ; (while --ecx) in c; r14 is the counter
            cmp r14, 0
            je no__match

            jmp find_pid           


    open_process:
    ;    HANDLE hProcess = OpenProcess(PROCESS_QUERY_INFORMATION | PROCESS_VM_READ, 0, aProcesses[i]);

    xor r8, r8
    mov r8d, eax            ; eax = aProcesses[i] ; each element is 4 bytes, not 1
    xor rdx, rdx            ; arg2 = NULL
    xor rcx, rcx
    mov rcx, 410h           ; PROCESS_QUERY_INFORMATION: 400h;  PROCESS_VM_READ 10h; add them both = 410h
    sub rsp, 20h
    call OpenProcess
    mov hProcess, rax
    add rsp, 20h


    ;    char buf[256];
    ; we add 256 bytes on the stack since we want a clean buffer generated for each loop
    xor rax, rax
    xor rcx, rcx
    mov al, 20h  ; 32 bytes x 8 (push rcx) = 0x100 (256) bytes is needed for 'char buf[256]'
    init_buf:
        push rcx
        dec al
        cmp al, cl
        jne init_buf


    lea r15, [rsp]    ; must use a register because we will need to load it into rdx for GetModuleBaseName

    ;    GetModuleBaseName(hProcess, 0, buffer, 50);

    xor r8, r8
    xor r9, r9
    mov r9, 100h              ; 256 bytes for our buffer to write information into: [out] buffer
    mov r8, r15               ; r15 has the address of our buffer on the stack
    xor rdx, rdx              ; 2nd arg = NULL
    mov rcx, hProcess
    sub rsp, 20h
    call GetModuleBaseNameA
    add rsp, 20h

    ;    CloseHandle(hProcess);
    xor rcx, rcx
    mov rcx, hProcess
    sub rsp, 20h
    call CloseHandle
    add rsp, 20h


    ;    if(strcmp(pName, buffer) == 0)
    ;        return true;
    lea rsi, pName
    lea rdi, QWORD PTR [r15]  ; r15 has the address of our buffer on the stack but [r15] is the dereferenced buffer
    call str_cmp
    add rsp, 100h           ; add buffer stack space back to avoid stack overflow
    cmp rax, 0
    je match

    ; if current pid does not match, resume loop to next pid
    jmp find_pid            ; resumes loop for next pid (aProcesses[i])

    no__match:              ; none of the pids matched the string pName
    lea rcx, not_found
    sub rsp, 20h
    call printf
    add rsp, 20h
    jmp exit

    match:
    lea rcx, found
    sub rsp, 20h
    call printf
    add rsp, 20h

    exit:
    sub rsp, 20h
    call ExitProcess

main endp

end

jj2007


cyrus

I've actually checked the return of this in ax and it matches cProcesses in c++. For my system it returns around 139-142.

> isrunning.exe
sizeof(aProcesses): 4096
cbNeeded: 560
sizeof(unsigned long): 4
cProcesses: 140
i: 0: aProcesses[i]: 0
i: 1: aProcesses[i]: 4
hProcess: 0000000000000000
i: 2: aProcesses[i]: 92
hProcess: 0000000000000000

But I did modify that to sar which has the same effect anyway, but no change.

jj2007

Quote from: cyrus on January 14, 2024, 12:50:13 PMBut I did modify that to sar which has the same effect anyway, but no change.

I don't have time to check the logic of your project, but div bl will definitely fail with an exception.

cyrus

Well I have debugged that and it does not fail so I'm not sure how you reach that conclusion, but 'sar eax, 2' has the same effect as 'div bl'. The end result in ax is the number of processes which match cProcesses in the c++ code.

sinsi

The code you posted gives me an array of PIDs, not just 2.
One problem may be the "sub rsp,20h", the function EnumProcesses seems to skip over the misalignment of the stack.

As far as the "div bl" goes, you are fluking it, since it translates to "divide AX by BL", so any count >255 would overflow.

cyrus

Quote from: sinsi on January 14, 2024, 01:41:12 PMThe code you posted gives me an array of PIDs, not just 2.
One problem may be the "sub rsp,20h", the function EnumProcesses seems to skip over the misalignment of the stack.

As far as the "div bl" goes, you are fluking it, since it translates to "divide AX by BL", so any count >255 would overflow.

I've posted the entire code this time. I edited my original post. As mentioned, from OpenProcess and below, all that works because I tested it by manually placing the actual PID of notepad.exe from my system into eax which returned true. But in this code, I have a loop and not able to dereference aProcesses. As far as the add rsp, well I removed that because I realized I was not in the loop there. If that isn't used in a loop, a stack overflow will be the result. But that isn't the issue.

cyrus




This proves the division by bl does indeed work. The end result in RAX is 8D which in decimal, is 141. Which, if you run the c++ code, you will see it is cProcesses and it is more or less the same. This will vary each time, for me, its been between 139 - 144 depending on when I run it. Also, re-running the c++ code may change that number

NoCforMe

Couple small things that don't affect execution:
    xor r14, r14            ; r14 is the counter
    mov r14, cProcesses      ; cProcesses contains the number of total processes

    xor rbx, rbx
    lea rbx, aProcesses      ; all processes;  the entire array

Completely unnecessary to clear those registers before loading them.
Assembly language programming should be fun. That's why I do it.

cyrus

I was able to successfully get this to work but I'm not sure why it actually mattered. Initially, I was using rsi instead of rbx to place aProcesses into. I had other functions to print integers with printf and it used rsi so I ended up using rbx. Well when I used rsi, I noticed that each byte was the actual string from pName. Why is that? I never loaded any string into rsi yet somehow it is initialized to contain that string. Does it do this in startup? I ended up just changing the register from rbx to r12, something that isn't volatile and it worked perfectly. Took me an entire day to realize that. Any ideas why I couldn't use rsi for that? I use rsi this way in other code. If I load something in it, I expect it to have my array, not some string I defined in my code which bears no resemblance to rsi.

BTW, my code includes my original division by bl.

cyrus

Quote from: NoCforMe on January 14, 2024, 02:53:23 PMCouple small things that don't affect execution:
    xor r14, r14            ; r14 is the counter
    mov r14, cProcesses      ; cProcesses contains the number of total processes

    xor rbx, rbx
    lea rbx, aProcesses      ; all processes;  the entire array

Completely unnecessary to clear those registers before loading them.
https://masm32.com/board/Smileys/default/badgrin.gif

I realized that for lea but out of habit I keep doing that just in case because sometimes I have over a thousand lines of code and if I forget to xor something, I get in trouble.

Love the screen name. I should have chose noPyforMe since I hate python so much.  :angelic:

NoCforMe

Are you sure you understand the 64-bit ABI correctly?

I don't do any 64-bit programming myself, so I'm not sure of the particulars, but I believe that RSI, as well as RBX and RDI, are "sacred" registers that must be preserved, just as their 32-bit counterparts are. So if you're going to use them in your code you need to preserve them and restore them before exiting. Not sure if that was the issue you experienced.

I'm curious to know if RSI comes pre-loaded (with the command tail?) at the program entry point.

Quote from: cyrus on January 14, 2024, 03:04:42 PMI realized that for lea but out of habit I keep doing that just in case because sometimes I have over a thousand lines of code and if I forget to xor something, I get in trouble.

Well, no harm, no foul.

BTW, concerning the div bl, since you're doing a power-of-2 divide, use SHR (no need for SAR) instead. It's much more elegant. Not to mention tons faster, not that it matters here ...
Assembly language programming should be fun. That's why I do it.

cyrus

Quote from: NoCforMe on January 14, 2024, 03:08:40 PMAre you sure you understand the 64-bit ABI correctly?

I don't do any 64-bit programming myself, so I'm not sure of the particulars, but I believe that RSI, as well as RBX and RDI, are "sacred" registers that must be preserved, just as their 32-bit counterparts are. So if you're going to use them in your code you need to preserve them and restore them before exiting. Not sure if that was the issue you experienced.

I'm curious to know if RSI comes pre-loaded (with the command tail?) at the program entry point.

Quote from: cyrus on January 14, 2024, 03:04:42 PMI realized that for lea but out of habit I keep doing that just in case because sometimes I have over a thousand lines of code and if I forget to xor something, I get in trouble.

Well, no harm, no foul.

BTW, concerning the div bl, since you're doing a power-of-2 divide, use SHR (no need for SAR) instead. It's much more elegant. Not to mention tons faster, not that it matters here ...


Yes this register is not one that can be used throughout a loop like that. Now that I think of it, this is the first time I attempted to use that register through a loop and I should have been more cautious about anything that isn't r12-15.

As for RSI itself and string, I do in fact think that it does come pre-loaded with some string that is defined. For me that happened to be the first string pName. I've seen this happen before but wasn't sure.

Good to know about the division. It is definitely faster as I've used SHR before but not SAR. In large cases, I will definitely use them. Thanks all for the comments (even though they didn't solve the issue, they are helpful for future endeavors in asm)

sinsi

You need to read up about spill/shadow space and passing parameters for 64-bit.
    sub rsp, 28h+256                    ;reserve stack space for called functions
    lea r15, [rsp+28]    ; delete the later line before the call to GetModuleBaseName
This change seems to *not crash*

You normally allocate 4 qwords for the spill. If a Windows function you call has more than 4 parameters then you would allocate that many. Note that you MUST allocate a minimum of 4.

Once you have set up your stack, don't touch it - no more "sub rsp,20h/add rsp,20h" pairs, the initial adjustment will take care of it.

NoCforMe

My recommendation, take it or leave it: Forget 64-bit programming. Completely overkill and a pain in the ass besides. Win32 forever!
Assembly language programming should be fun. That's why I do it.