I have been doing some testing on using modules written in pure C without any libraries but confess to being extremely rusty with my C code. Formatted pre 1990 K&R style, this seems to work OK and the generated output appears to be reasonably good quality for a sequential comparison. Does anyone have a more efficient technique for such a simple task ? I note with some humour that the main code is in 32 bit even though the LOCAL is a 64 bit register.
Microsoft (R) C/C++ Optimizing Compiler Version 19.00.24218.2 for x64
Copyright (C) Microsoft Corporation. All rights reserved.
// ¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤
int ifblock(int item)
{
if (item == 1)
return 1;
if (item == 2)
return 2;
if (item == 3)
return 3;
if (item == 4)
return 4;
if (item == 5)
return 5;
if (item == 6)
return 6;
else
return 0;
}
// ¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤
/* ----------------------------------------------------------------------------------
sub_140001080 proc
.text:0000000140001080 local local_1: qword ; [rsp+0x20]
.text:0000000140001080
.text:0000000140001080 83F901 cmp ecx, 0x1
.text:0000000140001083 7503 jne 0x140001088
.text:0000000140001085 8BC1 mov eax, ecx
.text:0000000140001087 C3 ret
.text:0000000140001088
.text:0000000140001088 0x140001088:
.text:0000000140001088 83F902 cmp ecx, 2
.text:000000014000108b 7503 jne 0x140001090
.text:000000014000108d 8BC1 mov eax, ecx
.text:000000014000108f C3 ret
.text:0000000140001090
.text:0000000140001090 0x140001090:
.text:0000000140001090 83F903 cmp ecx, 3
.text:0000000140001093 7503 jne 0x140001098
.text:0000000140001095 8BC1 mov eax, ecx
.text:0000000140001097 C3 ret
.text:0000000140001098
.text:0000000140001098 0x140001098:
.text:0000000140001098 83F904 cmp ecx, 4
.text:000000014000109b 7503 jne 0x1400010a0
.text:000000014000109d 8BC1 mov eax, ecx
.text:000000014000109f C3 ret
.text:00000001400010a0
.text:00000001400010a0 0x1400010a0:
.text:00000001400010a0 83F905 cmp ecx, 5
.text:00000001400010a3 7503 jne 0x1400010a8
.text:00000001400010a5 8BC1 mov eax, ecx
.text:00000001400010a7 C3 ret
.text:00000001400010a8
.text:00000001400010a8 0x1400010a8:
.text:00000001400010a8 33C0 xor eax, eax
.text:00000001400010aa BA06000000 mov edx, 6
.text:00000001400010af 3BCA cmp ecx, edx
.text:00000001400010b1 0F44C2 cmove eax, edx
.text:00000001400010b4 C3 ret
---------------------------------------------------------------------------------- */
Hi Hutch,
#include <stdio.h>
int ifblock(int item)
{
return (( item > 0 && item < 7) * item);
}
int main(int argc,char *argv[])
{
printf("ifblock=%d",ifblock(6));
return 0;
}
The result of Microsoft (R) 32-bit C/C++ Optimizing Compiler Version 16.00.40219.01 for 80x86 :
cl /Ox /O2 test.c
_text SEGMENT PARA PUBLIC 'CODE'
_ifblock PROC NEAR
mov ecx, dword ptr [esp+4H]
lea eax, ptr [ecx-1H]
mov edx, 5
cmp edx, eax
sbb eax, eax
inc eax
imul eax, ecx
ret
_ifblock ENDP
_text ENDS
Erol,
Thanks for the reply, what about a 64 bit version ?
Hi Hutch,
Here is the 64-bit version :
ifblock PROC
xor eax, eax
lea edx, ptr [rcx-1H]
cmp edx, 5
setbe al
imul eax, ecx
ret
ifblock ENDP
int ifblock(int item)
{
return (item > 0 && item < 7)?item:0;
}
ifblock PROC
xor eax, eax ; 0000 _ 33. C0
lea edx, ptr [rcx-1H] ; 0002 _ 8D. 51, FF
cmp edx, 5 ; 0005 _ 83. FA, 05
cmovbe eax, ecx ; 0008 _ 0F 46. C1
ret ; 000B _ C3
ifblock ENDP
This is the next format I have tested. Being very rusty with C I did not remember some of the data types but with a bit of experimentation, I tried "long long" and got the output to at least partially output 64 bit code. What I am try to get is full 64 bit registers without the partial 32/64 bit mix that I have been getting. I have solved the problem with using Pelle's linker, there is an option in the 2017 CL that is supposed to only work on 32 bit code that turns off the embedding of two default libraries on a 64 bit module as well.
/Zl omit default library name in .OBJ
With this option set, Pelle's linker does not drop an error on the two missing default library names.
Same question as before, is there a more efficient way to code this block to evaluate a finite set of inputs with different return value for each value and that will produce full 64 bit code ?
This is the modified module.
// ¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤
long long ifblock(long long item)
{
if (item == 1)
return 1234;
if (item == 2)
return 2345;
if (item == 3)
return 3456;
if (item == 4)
return 4567;
if (item == 5)
return 5678;
if (item == 6)
return 6789;
else
return 0;
}
/* ¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤
cmp rcx, 0x1
jne 0x1400012dc
mov eax, 0x4d2
ret
0x1400012dc:
cmp rcx, 2
jne 0x1400012e8
mov eax, 0x929
ret
0x1400012e8:
cmp rcx, 3
jne 0x1400012f4
mov eax, 0xd80
ret
0x1400012f4:
cmp rcx, 4
jne 0x140001300
mov eax, 0x11d7
ret
0x140001300:
cmp rcx, 5
jne 0x14000130c
mov eax, 0x162e
ret
0x14000130c:
xor eax, eax
mov edx, 0x1a85
cmp rcx, 6
cmove eax, edx
ret
¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤ */
Hi Hutch,
long long x[]={1234,2345,3456,4567,5678,6789};
long long ifblock(long long item)
{
return (item > 0 && item < 7)?x[item]:0;
}
Thanks Erol, that produces surprisingly good code.
ifblock:
0000000000000000: lea rax,[rcx-1]
0000000000000004: cmp rax,5
0000000000000008: ja 0000000000000016
000000000000000A: lea rax,
0000000000000011: mov rax,qword ptr [rax+rcx*8]
0000000000000015: ret
0000000000000016: xor eax,eax
0000000000000018: ret
[/tt]
dumpbin result.
here's a C explorer site where you can see the asm output from your C code https://godbolt.org
Thanks for the suggestion jack, I gave it a whirl but GCC output is nowhere as good as the VC2017 code generation.
Hutch, if using gcc you need to use -O3 to optimize, you will see the difference.
gcc -c -O3 ifblock.c
ifblock LABEL NEAR
xor eax, eax ; 0000 _ 31. C0
lea rdx, [rcx-1H] ; 0002 _ 48: 8D. 51, FF
cmp rdx, 5 ; 0006 _ 48: 83. FA, 05
ja ?_001 ; 000A _ 77, 0B
lea rax, [x] ; 000C _ 48: 8D. 05, 00000000(rel)
mov rax, qword ptr [rax+rcx*8] ; 0013 _ 48: 8B. 04 C8
?_001: ret ; 0017 _ C3
Vortex code, using gcc 7.1 -O3
Quote from: Vortex on July 12, 2017, 05:01:51 AM
#include <stdio.h>
int ifblock(int item)
{
return (( item > 0 && item < 7) * item);
}
int main(int argc,char *argv[])
{
printf("ifblock=%d",ifblock(6));
return 0;
}
ifblock(int):
lea eax, [rdi-1]
cmp eax, 5
setbe al
movzx eax, al
imul eax, edi
ret
.LC0:
.string "ifblock=%d"
main:
sub rsp, 8
mov esi, 6
mov edi, OFFSET FLAT:.LC0
xor eax, eax
call printf
xor eax, eax
add rsp, 8
ret
This is what I get from Erol's code built with this option in CL ver 14 VS2017.
\amd64\cl /c /O2 /Ot /Zl ifblock.c
ifblock:
0000000000000000: lea rax,[rcx-1]
0000000000000004: cmp rax,5
0000000000000008: ja 0000000000000016
000000000000000A: lea rax, x in square brackets
0000000000000011: mov rax,qword ptr [rax+rcx*8]
0000000000000015: ret
0000000000000016: xor eax,eax
0000000000000018: ret
This is why you still write this type of code in assembler. What I am after is reduced instruction count and full 64 bit code with no hybrid code.
; ¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤
NOSTACKFRAME
blockif proc
.data
align 8
tabl dq 0,1234,2345,3456,4567,5678,6789
.code
cmp rcx, 6 ; anything from 7 to -1
cmova rcx, tabl ; if rcx > 6, mov 0 to it
lea rdx, tabl ; load table address
mov rax, QWORD PTR [rdx+rcx*8] ; return table value
ret
blockif endp
STACKFRAME
; ¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤