News:

Masm32 SDK description, downloads and other helpful links
Message to All Guests
NB: Posting URL's See here: Posted URL Change

Main Menu

Roll your own FASTCALL in PowerBASIC.

Started by hutch--, December 16, 2014, 01:54:24 PM

Previous topic - Next topic

hutch--

Below is a test piece that shows how to "roll your own" version of FASTCALL so that you get a stack frame free procedure where you can routinely use EBP without having to perform stack corrections on stack arguments with any stack changes. You still must preserve and restore any of EBX ESI EDI and EBP if you use them but you are free from having to adjust ESP after pushes and pops.

#IF 0  ' ¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤

    With the FASTCALL procedures the arguments are passed directly in registers bypassing the
    overhead of the stack. In the system used below you can pass up to 3 arguments using the
    normal volatile registers EAX ECX & EDX and in these procedures they are written in the
    order,
      arg1 = EAX
      arg2 = ECX
      arg3 = EDX

    If you are passing only 1 argument you will only use EAX, with 2 arguments it will be EAX & ECX
    With 3 arguments you will use EAX ECX & EDX.

    The MACROS are designed to make calling the procedures more convenient but they have limitations
    that are different from high level code. You must pass data that can be written directly to a
    32 bit register.

    With procedures constructed in this manner you have access to 7 registers and do not have to
    deal with the stack making corrections to stack based addresses. You must properly comply with
    the Intel ABI rules for register preservation but by doing so you can use 4 extra registers.

    EBX ESI EDI & EBP

    If you were going to use all 4 registers you would use the following code.

    push ebx
    push esi
    push edi
    push ebp

  ; write your algorithm here

    pop ebp
    pop edi
    pop esi
    pop ebx

    NOTE that the restores are all popped in reverse order to the pushes.

    If you don't need all 4 just preserve and restore the ones you use.

    If you need extra memory variables without a stack being available you can either allocated
    GLOBAL or STATIC variables within the procedure.

#ENDIF ' ¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤

    #include "\basic\include\win32api.inc"

  ' ------------------------------
  ' return address of BASIC string
  ' ------------------------------
    MACRO FUNCTION sadd(quoted)
      MACROTEMP bas
      LOCAL bas as STRING
      bas = quoted
    END MACRO = StrPtr(bas)
  ' ------------------------------

  ' --------------------
  ' fastcall memory copy
  ' --------------------
    MACRO fcmemcopy(psrc,pdst,bcnt)
    PREFIX "!"
      mov eax, psrc
      mov ecx, pdst
      mov edx, bcnt
      call fastcall_mcopy
    END PREFIX
    END MACRO

  ' ------------------------
  ' fastcall simple len algo
  ' ------------------------
    MACRO FUNCTION fcgetlen(pstr)
      MACROTEMP retval
      LOCAL retval as DWORD
      PREFIX "!"
        mov eax, psrc
        call fastcall_len
        mov retval, eax
      END PREFIX
    END MACRO = retval

  ' ----------------------------
  ' fastcall high speed len algo
  ' ----------------------------
    MACRO FUNCTION fcstrlen(pstr)
      MACROTEMP retval
      LOCAL retval as DWORD
      PREFIX "!"
        mov eax, psrc
        call fastcall_strlen
        mov retval, eax
      END PREFIX
    END MACRO = retval

  ' -------------------------------
  ' fastcall BYTE level memory fill
  ' -------------------------------
    MACRO fcmfillB(src,count,filler)
    PREFIX "!"
      mov eax, src
      mov ecx, count
      mov edx, filler
      call memfillB
    END PREFIX
    END MACRO

  ' --------------------------------
  ' fastcall DWORD level memory fill
  ' --------------------------------
    MACRO fcmfillD(src,count,filler)
    PREFIX "!"
      mov eax, src
      mov ecx, count
      mov edx, filler
      call memfillD
    END PREFIX
    END MACRO

  ' ---------------------------------------------------------
  ' allocate GLOBAL memory and assign a string to it
  ' For an empty buffer use "" for the string
  ' ---------------------------------------------------------
    MACRO FUNCTION globalstr(quoted_text,bytecount)
      MACROTEMP globalstring
      GLOBAL globalstring as STRING * bytecount
      globalstring = quoted_text
    END MACRO = VarPtr(globalstring)

  ' ---------------------------------------------------------
  ' allocate fixed memory in the executable BSS section
  ' ---------------------------------------------------------
    MACRO FUNCTION fixedmem(bytecount)
      MACROTEMP bss_section_memory
      GLOBAL bss_section_memory as STRING * bytecount
    END MACRO = VarPtr(bss_section_memory)

  ' ---------------------------------------------------------
  ' allocate local memory and assign a string to it
  ' For an empty buffer use "" for the string
  ' ---------------------------------------------------------
    MACRO FUNCTION localstr(quoted_text,bytecount)
      MACROTEMP szstring
      LOCAL szstring as STRINGZ * bytecount
      szstring = quoted_text
    END MACRO = VarPtr(szstring)

  ' ---------------------------------------------------------
  ' allocate local memory and return its address
  ' ---------------------------------------------------------
    MACRO FUNCTION localmem(bytecount)
      MACROTEMP stack_memory
      LOCAL stack_memory as STRING * bytecount
    END MACRO = VarPtr(stack_memory)
  ' ---------------------------------------------------------

' ¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤

FUNCTION PBmain as LONG

    LOCAL psrc as STRINGZ PTR
    LOCAL pdst as STRINGZ PTR
    LOCAL bcnt as DWORD

    psrc = localstr("This is a test",1024)
    pdst = localmem(1024)
    bcnt = fcstrlen(psrc)

    fcmemcopy(psrc,pdst,bcnt)
    StdOut @pdst

    pdst = localmem(1024)

    fcmfillD(pdst,128,"XXXX")

    StdOut @pdst

    waitkey$

End FUNCTION

' ¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤

FASTPROC fastcall_mcopy
  PREFIX "!"

    push ebx            ; Intel ABI compliant
    push esi            ; preserve registers
    push edi

    cld
    mov esi, eax        ; source address in esi
    mov edi, ecx        ; destination address in edi
    mov ecx, edx        ; byte count in ecx
    mov ebx, edx        ; put a copy of byte count in ebx

    shr ecx, 2
    rep movsd

    mov ecx, ebx
    and ecx, 3
    rep movsb

    pop edi             ; restore registers
    pop esi
    pop ebx

    ret

  END PREFIX
END FASTPROC

' ¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤

FASTPROC fastcall_len
    PREFIX "!"

    mov ecx, eax
    sub eax, 1

  align 4
  lbl0:
    add eax, 1
    cmp BYTE PTR [eax], 0
    jne lbl0

    sub eax, ecx
    ret

    END PREFIX
END FASTPROC

' ¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤

FASTPROC fastcall_strlen
    PREFIX "!"
  ; -----------------------
  ; Agner Fog's StrLen algo
  ; src pointer is in EAX
  ; -----------------------

    push esi
    push edi

    lea edx, [eax+3]                ; pointer+3 used in the end
    mov esi, &H80808080

  ; -----------
  ; unroll by 4
  ; -----------
  lbl1:     
    mov edi, [eax]                  ; read first 4 bytes
    add eax, 4                      ; increment pointer
    lea ecx, [edi-&H01010101]       ; subtract 1 from each byte
    not edi                         ; invert all bytes
    and ecx, edi                    ; and these two
    and ecx, esi
    jnz nxt

    mov edi, [eax]                  ; read first 4 bytes
    add eax, 4                      ; increment pointer
    lea ecx, [edi-&H01010101]       ; subtract 1 from each byte
    not edi                         ; invert all bytes
    and ecx, edi                    ; and these two
    and ecx, esi
    jnz nxt

    mov edi, [eax]                  ; read first 4 bytes
    add eax, 4                      ; increment pointer
    lea ecx, [edi-&H01010101]       ; subtract 1 from each byte
    not edi                         ; invert all bytes
    and ecx, edi                    ; and these two
    and ecx, esi
    jnz nxt

    mov edi, [eax]                  ; read first 4 bytes
    add eax, 4                      ; 4 increment DWORD pointer
    lea ecx, [edi-&H01010101]       ; subtract 1 from each byte
    not edi                         ; invert all bytes
    and ecx, edi                    ; and these two
    and ecx, esi
    jz lbl1                         ; no zero bytes, continue loop

  nxt:
    test ecx, &H00008080            ; test first two bytes
    jnz lbl2
    shr ecx, 16                     ; not in the first 2 bytes
    add eax, 2
  lbl2:
    shl cl, 1                       ; use carry flag to avoid branch
    sbb eax, edx                    ; compute length

    pop edi
    pop esi

    ret

    END PREFIX
END FASTPROC

' ¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤

' FUNCTION memfillB(ByVal pmem as DWORD,ByVal lmem as DWORD,ByVal fillchar as BYTE) as DWORD

FASTPROC memfillB
  PREFIX "!"

    push edi                ; Intel ABI complaint

    mov edi, eax
    mov al, dl
    rep stosb

    pop edi                 ; restore EDI

    ret

  END PREFIX
END FASTPROC

' ¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤

' FUNCTION memfillD(ByVal pmem as DWORD,ByVal lmem as DWORD,ByVal fillchars as DWORD) as DWORD

FASTPROC memfillD
  PREFIX "!"

    push esi                ; Intel ABI complaint
    push edi                ; preserve registers

    mov esi, ecx            ; count stored in ESI
    mov edi, eax            ; address in EDI
    mov eax, edx            ; fill chars in eax

    cld                     ; write forward
    shr ecx, 2              ; int divide by 4
    rep stosd

    mov ecx, esi            ; reload count
    and ecx, 3              ; calculate tail length
    rep stosb

    pop edi                 ; restore registers
    pop esi

    ret

  END PREFIX
END FASTPROC

' ¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤

Gunther

Steve,

that's an interesting idea. Do you have plans to post it inside the PB forum, too? I think there are some coders which are interested.

Gunther
You have to know the facts before you can distort them.

hutch--

Gunther,

I put a link in their ASM section.

Gunther

Quote from: hutch-- on December 16, 2014, 09:21:38 PM
I put a link in their ASM section.

Good idea. We would need a few more active PB & ASM coders from there.

Gunther
You have to know the facts before you can distort them.