This snippet just multiplies two numbers:
#include <stdio.h>
#include <conio.h>
static unsigned long long foo(int factor)
{
int tmp=0x77777777;
return (long long)tmp*factor;
}
int main(void) {
// __asm("int $3"); // GCC syntax
// __asm int 3; // VC + Pelles C
printf("Result: %llu\nexpected: 612158616795740616", foo(0x12345678));
_getch();
}
So how do various C compilers implement this simple task?
First, the assembler equivalent (unnecessary complicated):
invoke mul77, 12345678h
...
mul77 proc factor
Local tmp
mov tmp, 77777777h
mov eax, tmp
mul factor
ret
mul77 endp
The short version would be, of course: mov eax, 12345678h
mov edx, 77777777h
mul edx
Same task with M$ VC:00D57E24 ³. 68 78563412 push 12345678 ; ÚArg1 = 12345678
00D57E29 ³. E8 C2FFFFFF call 00D57DF0 ; ÀTmp.00D57DF0
...
00D57DF0 Ú$ 55 push ebp ; 20 instructions, one call
00D57DF1 ³. 8BEC mov ebp, esp
00D57DF3 ³. 51 push ecx ; that is a neat trick ;-)
00D57DF4 ³. 56 push esi
00D57DF5 ³. C745 FC 77777777 mov dword ptr [ebp-4], 77777777
00D57DFC ³. 8B45 FC mov eax, [ebp-4]
00D57DFF ³. 99 cdq
00D57E00 ³. 8BC8 mov ecx, eax
00D57E02 ³. 8BF2 mov esi, edx
00D57E04 ³. 8B45 08 mov eax, [ebp+8]
00D57E07 ³. 99 cdq
00D57E08 ³. 52 push edx ; ÚArg4
00D57E09 ³. 50 push eax ; ³Arg3 => [Arg1]
00D57E0A ³. 56 push esi ; ³Arg2
00D57E0B ³. 51 push ecx ; ³Arg1 => 77777777
00D57E0C ³. E8 9F94FFFF call 00D512B0 ; ÀTmp.00D512B0
00D57E11 ³. 5E pop esi
00D57E12 ³. 8BE5 mov esp, ebp
00D57E14 ³. 5D pop ebp
00D57E15 À. C3 retn
...
008E12B0 Ú$ 8B4424 08 mov eax, [esp+8] ; Tmp.008E12B0(guessed Arg1,Arg2,Arg3,Arg4)
008E12B4 ³. 8B4C24 10 mov ecx, [esp+10]
008E12B8 ³. 0BC8 or ecx, eax
008E12BA ³. 8B4C24 0C mov ecx, [esp+0C]
008E12BE ³. 75 09 jnz short 008E12C9
008E12C0 ³. 8B4424 04 mov eax, [esp+4]
008E12C4 ³. F7E1 mul ecx
008E12C6 ³. C2 1000 retn 10
GCC:00401388 ³. C70424 78563412 mov dword ptr [esp], 12345678
0040138F ³. E8 ACFFFFFF call 00401340
...
00401340 Ú$ 55 push ebp ; 27 instructions, no call
00401341 ³. 89E5 mov ebp, esp
00401343 ³. 57 push edi
00401344 ³. 56 push esi
00401345 ³. 53 push ebx
00401346 ³. 83EC 14 sub esp, 14
00401349 ³. C745 EC 77777777 mov dword ptr [ebp-14], 77777777
00401350 ³. 8B45 EC mov eax, [ebp-14]
00401353 ³. 89C1 mov ecx, eax
00401355 ³. 89C3 mov ebx, eax
00401357 ³. C1FB 1F sar ebx, 1F
0040135A ³. 8B45 08 mov eax, [ebp+8]
0040135D ³. 99 cdq
0040135E ³. 89DF mov edi, ebx
00401360 ³. 0FAFF8 imul edi, eax
00401363 ³. 89D6 mov esi, edx
00401365 ³. 0FAFF1 imul esi, ecx
00401368 ³. 01FE add esi, edi
0040136A ³. F7E1 mul ecx
0040136C ³. 8D0C16 lea ecx, [edx+esi]
0040136F ³. 89CA mov edx, ecx
00401371 ³. 83C4 14 add esp, 14
00401374 ³. 5B pop ebx
00401375 ³. 5E pop esi
00401376 ³. 5F pop edi
00401377 ³. 5D pop ebp
00401378 À. C3 retn
And finally, Pelles C:00D57E24 ³. 68 78563412 push 12345678 ; ÚArg1 = 12345678
00D57E29 ³. E8 C2FFFFFF call 00D57DF0 ; ÀTmp.00D57DF0
...
00401000 Ú$ 55 push ebp ; 21 instructions, one call
00401001 ³. 89E5 mov ebp, esp
00401003 ³. 83EC 04 sub esp, 4
00401006 ³. 53 push ebx
00401007 ³. C745 FC 77777777 mov dword ptr [ebp-4], 77777777
0040100E ³. 8B45 FC mov eax, [ebp-4]
00401011 ³. 89C0 mov eax, eax
00401013 ³. 99 cdq
00401014 ³. 8B4D 08 mov ecx, [ebp+8]
00401017 ³. 89C9 mov ecx, ecx
00401019 ³. 89CB mov ebx, ecx
0040101B ³. C1FB 1F sar ebx, 1F
0040101E ³. 52 push edx ; ÚArg4
0040101F ³. 50 push eax ; ³Arg3 => 77777777
00401020 ³. 53 push ebx ; ³Arg2
00401021 ³. 51 push ecx ; ³Arg1 => [Arg1]
00401022 ³. E8 39000000 call __llmul ; ÀMysteries_of_C.__llmul
00401027 ³. 5B pop ebx
00401028 ³. 89EC mov esp, ebp
0040102A ³. 5D pop ebp
0040102B À. C3 retn
...
__llmul Ú$ 8B4424 10 mov eax, [esp+10] ; Mysteries_of_C.__llmul(guessed Arg1,Arg2,Arg3,Arg4)
00401064 ³. 8B4C24 08 mov ecx, [esp+8]
00401068 ³. 09C1 or ecx, eax
0040106A ³. 8B4C24 04 mov ecx, [esp+4]
0040106E ³. 75 09 jnz short 00401079
00401070 ³. 8B4424 0C mov eax, [esp+0C]
00401074 ³. F7E1 mul ecx
00401076 ³. C2 1000 retn 10
And the timings please. :bgrin:
Plus may I use the data? :greensml:
deleted
Quote from: Raistlin on April 22, 2018, 02:19:04 AM
And the timings please. :bgrin:
Plus may I use the data? :greensml:
690 for the GCC version on my Core i5 for 100000000 iterations. VC is 10% slower, the short assembler version a factor 8 faster.
@nidud: thanks, and no offense please, but I never watch youtube tutorials - I have better uses of my time.
JJ,
Are you compiling in release mode? of course not.
If you compile in release mode, VS will not even make the multiplication, it will infer immediately the result.
deleted
unsigned long long foo(int factor)
{
return (unsigned long long)0x77777777*factor;
}
int main()
{
unsigned long long value=foo(0x12345678);
printf("Result: %llu\nexpected: 612158616795740616", value);
getchar();
return 0;
}
_main PROC ; COMDAT
; Line 12
push ebp
mov ebp, esp
and esp, -8 ; fffffff8H
; Line 14
push 142529284 ; 087ed304H
push -1001522744 ; c44df9c8H
push OFFSET ??_C@_0CM@MAJDCJPD@Result?3?5?5?5?$CFllu?6expected?3?56121586@
call _printf
add esp, 12 ; 0000000cH
; Line 15
call DWORD PTR __imp__getchar
; Line 16
xor eax, eax
; Line 17
mov esp, ebp
pop ebp
ret 0
_main ENDP
Quote from: aw27 on April 22, 2018, 02:52:24 AMIf you compile in release mode, VS will not even make the multiplication, it will infer immediately the result.
Right, and that is of course very helpful when analysing this problem ;)
If you got problems use debug mode.
Even in release mode there are various possibilities to make it call the useless function, you will find them if you want.
Quote from: aw27 on April 22, 2018, 03:13:05 AMEven in release mode there are various possibilities to make it call the useless function, you will find them if you want.
Note that gcc doesn't have a "release mode" and a "debug mode" (https://stackoverflow.com/questions/1534912/how-to-build-in-release-mode-with-optimizations-in-gcc)
Yeah, these modern compilers are incredibly clever, right? Even with -O1 it plays already foul. One would have to set up a testbed summing up
foo(randomvalue) to force the compiler to do a proper job. No time for that today, though...
All right, if you have no time, let's leave that for now. :biggrin:
I know that GCC has not release but I don't use it in Windows, although I have cygwin and eclipse installed.
Well, actually I use it when I do avr or arm but I barely notice.
deleted
it would be nice to have an explanation of what happens to variables put before main under the hood and is it really necessary to create a buffer and align pointer/eax to be able to inline asm movaps xmm0,var1234,mulps xmm0,var5678,movaps var1234,xmm0?in C++