I'm trying to measure the running time of the parallel version and the serial one.
I have such a program:
#include <stdio.h>#include <stdlib.h>#include <stdbool.h>#include <omp.h>#include <time.h>#include <unistd.h>#include <sys/types.h>#include <errno.h>#include <sys/resource.h>#include <sys/times.h>#define ARRAY_SIZE 1024 * 20long time_delta = 0;struct rusage rusage_start;struct rusage rusage_finish;void bubble_sort(unsigned int* array) { unsigned int tmp = 0; bool no_swap = 0; for (unsigned int i = ARRAY_SIZE - 1; i >= 0; --i) { no_swap = 1; { #pragma omp parallel for num_threads(4) for (unsigned int j = 0; j < i; j++) { if (array[j] > array[j + 1]) { tmp = array[j]; array[j] = array[j + 1]; array[j + 1] = tmp; no_swap = 0; } } } if (no_swap) break; }}int main(int argc, char* argv[]) { (void)argc; (void)argv; srand(time(NULL)); unsigned int* array = malloc(sizeof(unsigned int) * ARRAY_SIZE); if(!array) { return -1; } for(unsigned int i = 0; i < ARRAY_SIZE; ++i) { array[i] = rand() % ARRAY_SIZE; } getrusage(RUSAGE_SELF, &rusage_start); //sort bubble_sort(array); getrusage(RUSAGE_SELF, &rusage_finish); time_delta = (1000000 * (rusage_finish.ru_utime.tv_sec - rusage_start.ru_utime.tv_sec)) + (rusage_finish.ru_utime.tv_usec - rusage_start.ru_utime.tv_usec); printf("Time: %li microseconds\n", time_delta); free(array); return 0;}
I compile and measure time like this:
gcc -openmp main.c -o prog && for n in {1..10}; do ./prog; done
The problem is that, if I change the number of threads in the function before for or remove the directive altogether, nothing changes.
What am I doing wrong?
Everything seems to be correct. (I run the code on a VM with 4 cores; lscpu sees them.)