I started out looking for a gcc command line option that would control the truncation. I did not find one, but I did determine that with gcc 4.7.2 I can push the result to 1267 with a cast.
#include <stdio.h>
#include <conio.h>
int foo (double);
int main (int,char**);
int foo (double d)
{
printf("d = %.15G\n", (double)d);
return (double)(d*100); /* was return d*100; */
}
int main (int argc,char** argv)
{
int rv={0};
rv= foo(12.67);
printf("rv = %d\n",(int)rv);
getch();
return 0;
}
Assembly output without cast:
.text
.globl _foo
.def _foo; .scl 2; .type 32; .endef
_foo:
LFB11:
.cfi_startproc
pushl %ebp
.cfi_def_cfa_offset 8
.cfi_offset 5, -8
movl %esp, %ebp
.cfi_def_cfa_register 5
subl $56, %esp
fldl 8(%ebp)
fstl 4(%esp)
movl $LC0, (%esp)
fstpl -40(%ebp)
call _printf
flds LC1
fldl -40(%ebp)
fmulp %st, %st(1)
fnstcw -10(%ebp)
movw -10(%ebp), %ax
orb $12, %ah
movw %ax, -12(%ebp)
fldcw -12(%ebp)
fistpl -16(%ebp)
fldcw -10(%ebp)
movl -16(%ebp), %eax
leave
.cfi_restore 5
.cfi_def_cfa 4, 4
ret
.cfi_endproc
LFE11:
Assembly output with cast:
.text
.globl _foo
.def _foo; .scl 2; .type 32; .endef
_foo:
LFB11:
.cfi_startproc
pushl %ebp
.cfi_def_cfa_offset 8
.cfi_offset 5, -8
movl %esp, %ebp
.cfi_def_cfa_register 5
subl $56, %esp
fldl 8(%ebp)
fstl 4(%esp)
movl $LC0, (%esp)
fstpl -40(%ebp)
call _printf
flds LC1
fldl -40(%ebp)
fmulp %st, %st(1)
fstpl -16(%ebp)
fldl -16(%ebp)
fnstcw -18(%ebp)
movw -18(%ebp), %ax
orb $12, %ah
movw %ax, -20(%ebp)
fldcw -20(%ebp)
fistpl -24(%ebp)
fldcw -18(%ebp)
movl -24(%ebp), %eax
leave
.cfi_restore 5
.cfi_def_cfa 4, 4
ret
.cfi_endproc
LFE11:
The difference is that the cast causes the result of the multiply to be stored to memory as a double and then reloaded, reducing the precision from 64 bits to 53 bits. I’m having trouble understanding how this could cause a rounding by truncation to round up, and beginning to suspect that a cast is not a reliable way to get around the problem.