I have a function to compute dot product in C++. I want to compile this function with -O3 compiler optimization. Rest of the codes in my codebase are compiled with -O0. To do this, I have created a static library that contains the function and compiled the library with -O3. Then I have linked the library to my code. But I am not getting the optimization from my library.
#include "config.h"int multiply(uint128 *X1, uint128 *Y1, uint128 &ans, int input_length){ int i=0; ans = 0; if (input_length > 4) { for (; i < input_length - 4; i += 4) { ans += X1[i] * Y1[i]; ans += X1[i + 1] * Y1[i + 1]; ans += X1[i + 2] * Y1[i + 2]; ans += X1[i + 3] * Y1[i + 3]; } } for (; i < input_length; i++) { ans += X1[i] * Y1[i]; } return 0;}int main(){ int len = 500, wrapper = 50; uint128 a[len], b[len], ans; auto start = time_now, end = time_now; long long ctr = 0; for(int t = 0; t < wrapper; t++) { for(int i =0; i < len; i++) { a[i] = rand(); b[i] = rand(); } start = time_now; multiply(a, b, ans, len); end = time_now; ctr += std::chrono::duration_cast<std::chrono::nanoseconds>(end-start).count(); } cout<<"time taken: "<<ctr<<endl;}
time taken: 1372
#include "optimized.hpp"typedef __uint128_t uint128;#define time_now std::chrono::high_resolution_clock::now()int main(){ int len = 500, wrapper = 50; uint128 a[len], b[len], ans; auto start = time_now, end = time_now; long long ctr = 0; for(int t = 0; t < wrapper; t++) { for(int i =0; i < len; i++) { a[i] = rand(); b[i] = rand(); } start = time_now; multiply(a, b, ans, len); end = time_now; ctr += std::chrono::duration_cast<std::chrono::nanoseconds>(end-start).count(); } cout<<"time taken: "<<ctr<<endl; return 0;}
time taken: 36140