I'm trying to use the following code to emulate a 16-bit half-float in software:
typedef struct half
{
unsigned short mantissa:10;
unsigned short exponent:5;
unsigned short sign:1;
} half;
unsigned short from_half(half h)
{
return h.mantissa | h.exponent << 10 | h.sign << 15;
}
half to_half(unsigned short s)
{
half result = { s, s >> 10, s >> 15 };
return result;
}
I set this up so that it could easily be optimized into a move instruction, but lo and behold, in from_half
, GCC does the bit-shifting anyway (even at -O3
):
from_half:
mov edx, edi
mov eax, edi
and di, 1023
shr dx, 15
and eax, 31744
movzx edx, dl
sal edx, 15
or eax, edx
or eax, edi
ret
while to_half
is optimized nicely:
to_half:
mov eax, edi
ret
I've tried different optimization levels (-O1
, -O2
, -Os
) but none optimize it into what I was hoping.
Clang does this how I would expect even at -O1
:
from_half: # @from_half
mov eax, edi
ret
to_half: # @to_half
mov eax, edi
ret
How can I get GCC to optimize this into a move? Why isn't it optimized that way already?