I write a very simple program, it behaves normal without -O2
:
#include <stdio.h>#include <stdint.h>int main(){ uint32_t A[4] = { 1, 2, 3, 4 }; float B[4] = { 0, 0, 0, 0 }; float C[4] = { 5, 6, 7, 8 }; int i; // convert integer A to float B for (i = 0; i < 4; i++) B[i] = (float)A[i]; // memory copy from B to C uint32_t *src = (uint32_t*)(B); uint32_t *dst = (uint32_t*)(C); dst[0] = src[0]; dst[1] = src[1]; dst[2] = src[2]; dst[3] = src[3];#if 0 // open this to correct the error __asm__("":::"memory");#endif // print C, C should be [1.0, 2.0, 3.0, 4.0] for (i = 0; i < 4; i++) printf("%f\n", C[i]); return 0;}
Compile without -O2
:
$ gcc error.c -o error$ ./error1.00002.00003.00004.0000
It works as expected. But if I added a -O2
:
$ gcc -O2 error.c -o error$ ./error-6169930235904.0000000.000000-6169804406784.0000000.000000
In addition, if you switch #if 0
to #if 1
, it works correctly again. The asm ("":::"memory")
should be unecessary in the same thread.
Is it a bug of -O2
optimization ??
Is there any thing I can tell the compiler to care of it ?? I have a function to store xmm register to a (void*) pointer, like:
inline void StoreRegister(void *ptr, const __m128& reg){#if DONT_HAVE_SSE const uint32_t *src = reinterpret_cast<const uint32_t*>(®); uint32_t *dst = reinterpret_cast<uint32_t*>(ptr); dst[0] = src[0]; dst[1] = src[1]; dst[2] = src[2]; dst[3] = src[3];#else _mm_storeu_si128(reinterpret_cast<__m128*>(ptr), _mm_castps_si128(reg));#endif}
The dst
is the C
in the code above, any way to make it correct without modifying the function signature.