Quantcast
Channel: Active questions tagged gcc - Stack Overflow
Viewing all articles
Browse latest Browse all 22272

openmp for loop is not parallelized

$
0
0

I'm trying to measure the running time of the parallel version and the serial one.

I have such a program:

#include <stdio.h>#include <stdlib.h>#include <stdbool.h>#include <omp.h>#include <time.h>#include <unistd.h>#include <sys/types.h>#include <errno.h>#include <sys/resource.h>#include <sys/times.h>#define ARRAY_SIZE 1024 * 20long time_delta = 0;struct rusage rusage_start;struct rusage rusage_finish;void bubble_sort(unsigned int* array) {    unsigned int tmp = 0;    bool no_swap = 0;    for (unsigned int i = ARRAY_SIZE - 1; i >= 0; --i)    {        no_swap = 1;        {            #pragma omp parallel for num_threads(4)            for (unsigned int j = 0; j < i; j++)            {                if (array[j] > array[j + 1])                {                    tmp = array[j];                    array[j] = array[j + 1];                    array[j + 1] = tmp;                    no_swap = 0;                }            }        }        if (no_swap)            break;    }}int main(int argc, char* argv[]) {    (void)argc;    (void)argv;    srand(time(NULL));    unsigned int* array = malloc(sizeof(unsigned int) * ARRAY_SIZE);    if(!array) { return -1; }    for(unsigned int i = 0; i < ARRAY_SIZE; ++i) {        array[i] = rand() % ARRAY_SIZE;    }    getrusage(RUSAGE_SELF, &rusage_start);    //sort    bubble_sort(array);    getrusage(RUSAGE_SELF, &rusage_finish);    time_delta = (1000000 * (rusage_finish.ru_utime.tv_sec - rusage_start.ru_utime.tv_sec)) + (rusage_finish.ru_utime.tv_usec - rusage_start.ru_utime.tv_usec);    printf("Time: %li microseconds\n", time_delta);    free(array);    return 0;}

I compile and measure time like this:

gcc -openmp main.c -o prog && for n in {1..10}; do ./prog; done

The problem is that, if I change the number of threads in the function before for or remove the directive altogether, nothing changes.

What am I doing wrong?

Everything seems to be correct. (I run the code on a VM with 4 cores; lscpu sees them.)


Viewing all articles
Browse latest Browse all 22272

Latest Images

Trending Articles



Latest Images

<script src="https://jsc.adskeeper.com/r/s/rssing.com.1596347.js" async> </script>