News:

Masm32 SDK description, downloads and other helpful links
Message to All Guests
NB: Posting URL's See here: Posted URL Change

Main Menu

Old "C" Question

Started by hutch--, July 12, 2017, 04:18:58 AM

Previous topic - Next topic

hutch--

I have been doing some testing on using modules written in pure C without any libraries but confess to being extremely rusty with my C code. Formatted pre 1990 K&R style, this seems to work OK and the generated output appears to be reasonably good quality for a sequential comparison. Does anyone have a more efficient technique for such a simple task ? I note with some humour that the main code is in 32 bit even though the LOCAL is a 64 bit register.

Microsoft (R) C/C++ Optimizing Compiler Version 19.00.24218.2 for x64
Copyright (C) Microsoft Corporation.  All rights reserved.


// ¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤

int ifblock(int item)

    {
      if (item == 1)
        return 1;
      if (item == 2)
        return 2;
      if (item == 3)
        return 3;
      if (item == 4)
        return 4;
      if (item == 5)
        return 5;
      if (item == 6)
        return 6;
      else
        return 0;
    }

// ¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤

/* ----------------------------------------------------------------------------------

sub_140001080   proc
.text:0000000140001080                            local local_1: qword ; [rsp+0x20]
.text:0000000140001080
.text:0000000140001080 83F901                     cmp ecx, 0x1
.text:0000000140001083 7503                       jne 0x140001088
.text:0000000140001085 8BC1                       mov eax, ecx
.text:0000000140001087 C3                         ret
.text:0000000140001088
.text:0000000140001088 0x140001088:
.text:0000000140001088 83F902                     cmp ecx, 2
.text:000000014000108b 7503                       jne 0x140001090
.text:000000014000108d 8BC1                       mov eax, ecx
.text:000000014000108f C3                         ret
.text:0000000140001090
.text:0000000140001090 0x140001090:
.text:0000000140001090 83F903                     cmp ecx, 3
.text:0000000140001093 7503                       jne 0x140001098
.text:0000000140001095 8BC1                       mov eax, ecx
.text:0000000140001097 C3                         ret
.text:0000000140001098
.text:0000000140001098 0x140001098:
.text:0000000140001098 83F904                     cmp ecx, 4
.text:000000014000109b 7503                       jne 0x1400010a0
.text:000000014000109d 8BC1                       mov eax, ecx
.text:000000014000109f C3                         ret
.text:00000001400010a0
.text:00000001400010a0 0x1400010a0:
.text:00000001400010a0 83F905                     cmp ecx, 5
.text:00000001400010a3 7503                       jne 0x1400010a8
.text:00000001400010a5 8BC1                       mov eax, ecx
.text:00000001400010a7 C3                         ret
.text:00000001400010a8
.text:00000001400010a8 0x1400010a8:
.text:00000001400010a8 33C0                       xor eax, eax
.text:00000001400010aa BA06000000                 mov edx, 6
.text:00000001400010af 3BCA                       cmp ecx, edx
.text:00000001400010b1 0F44C2                     cmove eax, edx
.text:00000001400010b4 C3                         ret

---------------------------------------------------------------------------------- */


Vortex

Hi Hutch,

#include <stdio.h>

int ifblock(int item)
{
return (( item > 0 && item < 7) * item);
}

int main(int argc,char *argv[])
{
printf("ifblock=%d",ifblock(6));
return 0;
}


The result of Microsoft (R) 32-bit C/C++ Optimizing Compiler Version 16.00.40219.01 for 80x86 :

cl /Ox /O2 test.c

_text   SEGMENT PARA PUBLIC 'CODE'

_ifblock PROC NEAR
        mov     ecx, dword ptr [esp+4H]
        lea     eax, ptr [ecx-1H]
        mov     edx, 5
        cmp     edx, eax
        sbb     eax, eax
        inc     eax
        imul    eax, ecx
        ret
_ifblock ENDP

_text   ENDS


hutch--

Erol,

Thanks for the reply, what about a 64 bit version ?

Vortex

Hi Hutch,

Here is the 64-bit version :

ifblock PROC
        xor     eax, eax
        lea     edx, ptr [rcx-1H]
        cmp     edx, 5
        setbe   al
        imul    eax, ecx
        ret
ifblock ENDP

TWell

int ifblock(int item)
{
return (item > 0 && item < 7)?item:0;
}

ifblock PROC
        xor     eax, eax                                ; 0000 _ 33. C0
        lea     edx, ptr [rcx-1H]                       ; 0002 _ 8D. 51, FF
        cmp     edx, 5                                  ; 0005 _ 83. FA, 05
        cmovbe  eax, ecx                                ; 0008 _ 0F 46. C1
        ret                                             ; 000B _ C3
ifblock ENDP

hutch--

This is the next format I have tested. Being very rusty with C I did not remember some of the data types but with a bit of experimentation, I tried "long long" and got the output to at least partially output 64 bit code. What I am try to get is full 64 bit registers without the partial 32/64 bit mix that I have been getting. I have solved the problem with using Pelle's linker, there is an option in the 2017 CL that is supposed to only work on 32 bit code that turns off the embedding of two default libraries on a 64 bit module as well.

/Zl omit default library name in .OBJ

With this option set, Pelle's linker does not drop an error on the two missing default library names.

Same question as before, is there a more efficient way to code this block to evaluate a finite set of inputs with different return value for each value and that will produce full 64 bit code ?

This is the modified module.

// ¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤

long long ifblock(long long item)

    {
      if (item == 1)
        return 1234;
      if (item == 2)
        return 2345;
      if (item == 3)
        return 3456;
      if (item == 4)
        return 4567;
      if (item == 5)
        return 5678;
      if (item == 6)
        return 6789;
      else
        return 0;
    }

/* ¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤

cmp rcx, 0x1
jne 0x1400012dc
mov eax, 0x4d2
ret

0x1400012dc:
cmp rcx, 2
jne 0x1400012e8
mov eax, 0x929
ret

0x1400012e8:
cmp rcx, 3
jne 0x1400012f4
mov eax, 0xd80
ret

0x1400012f4:
cmp rcx, 4
jne 0x140001300
mov eax, 0x11d7
ret

0x140001300:
cmp rcx, 5
jne 0x14000130c
mov eax, 0x162e
ret

0x14000130c:
xor eax, eax
mov edx, 0x1a85
cmp rcx, 6
cmove eax, edx
ret

¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤ */

Vortex

Hi Hutch,

long long x[]={1234,2345,3456,4567,5678,6789};

long long ifblock(long long item)
{
return (item > 0 && item < 7)?x[item]:0;
}

hutch--

Thanks Erol, that produces surprisingly good code.

ifblock:
  0000000000000000: lea         rax,[rcx-1]
  0000000000000004: cmp         rax,5
  0000000000000008: ja          0000000000000016
  000000000000000A: lea         rax,

  •   0000000000000011: mov         rax,qword ptr [rax+rcx*8]
      0000000000000015: ret
      0000000000000016: xor         eax,eax
      0000000000000018: ret
    [/tt]
    dumpbin result.

jack

here's a C explorer site where you can see the asm output from your C code https://godbolt.org

hutch--

Thanks for the suggestion jack, I gave it a whirl but GCC output is nowhere as good as the VC2017 code generation.

jack

Hutch, if using gcc you need to use -O3 to optimize, you will see the difference.

TWell

gcc -c -O3 ifblock.c
ifblock LABEL NEAR
        xor     eax, eax                                ; 0000 _ 31. C0
        lea     rdx, [rcx-1H]                           ; 0002 _ 48: 8D. 51, FF
        cmp     rdx, 5                                  ; 0006 _ 48: 83. FA, 05
        ja      ?_001                                   ; 000A _ 77, 0B
        lea     rax, [x]                                ; 000C _ 48: 8D. 05, 00000000(rel)
        mov     rax, qword ptr [rax+rcx*8]              ; 0013 _ 48: 8B. 04 C8
?_001:  ret                                             ; 0017 _ C3

jack

Vortex code, using gcc 7.1 -O3
Quote from: Vortex on July 12, 2017, 05:01:51 AM
#include <stdio.h>

int ifblock(int item)
{
return (( item > 0 && item < 7) * item);
}

int main(int argc,char *argv[])
{
printf("ifblock=%d",ifblock(6));
return 0;
}


ifblock(int):
        lea     eax, [rdi-1]
        cmp     eax, 5
        setbe   al
        movzx   eax, al
        imul    eax, edi
        ret
.LC0:
        .string "ifblock=%d"
main:
        sub     rsp, 8
        mov     esi, 6
        mov     edi, OFFSET FLAT:.LC0
        xor     eax, eax
        call    printf
        xor     eax, eax
        add     rsp, 8
        ret


hutch--

#13
This is what I get from Erol's code built with this option in CL ver 14 VS2017.

\amd64\cl /c /O2 /Ot /Zl ifblock.c


ifblock:
  0000000000000000: lea         rax,[rcx-1]
  0000000000000004: cmp         rax,5
  0000000000000008: ja          0000000000000016
  000000000000000A: lea         rax, x in square brackets
  0000000000000011: mov         rax,qword ptr [rax+rcx*8]
  0000000000000015: ret
  0000000000000016: xor         eax,eax
  0000000000000018: ret


This is why you still write this type of code in assembler. What I am after is reduced instruction count and full 64 bit code with no hybrid code.

; ¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤

NOSTACKFRAME

blockif proc

    .data
      align 8
      tabl dq 0,1234,2345,3456,4567,5678,6789
    .code

    cmp rcx, 6                          ; anything from 7 to -1
    cmova rcx, tabl                     ; if rcx > 6, mov 0 to it
    lea rdx, tabl                       ; load table address
    mov rax, QWORD PTR [rdx+rcx*8]      ; return table value
    ret

blockif endp

STACKFRAME

; ¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤