Quantcast
Channel: Active questions tagged gcc - Stack Overflow
Viewing all articles
Browse latest Browse all 21994

How to vectorize and optimize this functions in C?

$
0
0

I have this functions, the result is correct but the compiler don't vectorize this.How can I achive that the compiler vectorize this and how can I optimize this codes?

void LongNumSet( char *L, unsigned N, char digit ){    for (int i = 0; i < N; ++i){      L[i] = digit;   }}void LongNumCopy( char *Vin, char *Vout, unsigned N ){  for ( int i=0; i< N; ++i )  {    Vout[i] = Vin[i];  }}char LongNumAddition( char *__restrict Vin1, char * __restrict Vin2, char * __restrict Vout, unsigned N ){  char CARRY = 0,R,aux;  Vin1 = (char*)__builtin_assume_aligned (Vin1, 1);  Vin2 = (char*)__builtin_assume_aligned (Vin2, 1);  for ( int i=0; i< N; ++i )  {    char R = Vin1[i] + Vin2[i] + CARRY;    aux = R <= 9;    Vout[i] = (aux) ? R:R-ten;    CARRY = (aux) ? 0:1;  }  return CARRY;}char LongNumAddDigit( char *V, char digit, unsigned N ){  int i=0;  char R = V[0] + digit;  if ( R < ten){    V[0] = R;    return 0;  }  V[0] = R-ten;  // add carry, maybe iteratively for all digits  char CARRY = 1;  i = 1;  while ( CARRY && i < N )  {    if ( V[i] < 9 )    {     V[i]++;     CARRY = 0;    }    else    {      V[i] = 0;      i++;  // CARRY remains set to 1    }  }  return CARRY;}

I use the comand gcc -O3 -ffast-math -msse -funroll-all-loops -ftree-vectorizer-verbose=25 -lm -g $1 -o ${2}.O3 and I executate the program in 55 s.This is all of code:

#include <stdio.h>#include <stdlib.h>#include <stdint.h>// Variable used to generate pseudo-random numbersunsigned int seed;unsigned int temp;unsigned int var1 = 214013;unsigned int var2 = 2531011;#define val13 13#define ten 10// Function to generate pseudo-random numbersinline int myRandom() {  temp = var1*seed;  seed = temp + var2;  return (seed>>val13);}void LongNumInit( char *L, unsigned N ){  for ( int i=0; i<N;++i )  {    L[i] = myRandom() % ten;  // digito decimal  }}void LongNumPrint( char *L, unsigned N, char *Name ){  printf("%s:", Name);  for ( int i=N; i>0; i-- )  {    printf("%d", L[i-1]);  }  printf("\n");}void LongNumSet( char *L, unsigned N, char digit ){    for (int i = 0; i < N; ++i){      L[i] = digit;   }}void LongNumCopy( char *Vin, char *Vout, unsigned N ){  for ( int i=0; i< N; ++i )  {    Vout[i] = Vin[i];  }}char LongNumAddition( char *__restrict Vin1, char * __restrict Vin2, char * __restrict Vout, unsigned N ){  char CARRY = 0,R,aux;  Vin1 = (char*)__builtin_assume_aligned (Vin1, 1);  Vin2 = (char*)__builtin_assume_aligned (Vin2, 1);  for ( int i=0; i< N; ++i )  {    char R = Vin1[i] + Vin2[i] + CARRY;    aux = R <= 9;    Vout[i] = (aux) ? R:R-ten;    CARRY = (aux) ? 0:1;  }  return CARRY;}char LongNumAddDigit( char *V, char digit, unsigned N ){  int i=0;  char R = V[0] + digit;  if ( R < ten){    V[0] = R;    return 0;  }  V[0] = R-ten;  // add carry, maybe iteratively for all digits  char CARRY = 1;  i = 1;  while ( CARRY && i < N )  {    if ( V[i] < 9 )    {     V[i]++;     CARRY = 0;    }    else    {      V[i] = 0;      i++;  // CARRY remains set to 1    }  }  return CARRY;}char LongNumHorizAdd( char *Vin, char *Vout, unsigned N ){  char CARRY = 0;  LongNumSet ( Vout, N, 0 );  for ( int i=0; i< N; ++i )  {    LongNumAddDigit ( Vout, Vin[i], N );  }  return 0; // CARRY can never be set}char LongNumConstMult( char *V, unsigned N, char digit ){  char CARRY = 0;  char ja = 0;  for ( int i=0; i< N; ++i )  {    char aux = V[i] * digit;    char R = aux + CARRY;    CARRY = ((u_int32_t)R * (u_int32_t)0xCCCD) >> 19;    ja = (CARRY << 3) + 2*CARRY;    R -= ja;    V[i] = R;  }  return CARRY; // may be from 0 to 9}void LongNumMultiply( char *Vin1, char *Vin2, char *VoutH, char *VoutL,  unsigned N ){  // Create Temporal Long Integer with double size  unsigned char *TEMP = (unsigned char*) calloc(2*N,sizeof(unsigned char));  unsigned char *RES = (unsigned char*) calloc( 2*N,sizeof(unsigned char) );  LongNumSet  ( RES, 2*N, 0 );    // Set RES to 0  for ( int i=0; i<N; ++i )  {    LongNumSet  ( TEMP, 2*N, 0 );            // Set TEMP to 0    LongNumCopy ( Vin1, TEMP+i, N );         // Copy Vin1 -> TEMP, with offset i    LongNumConstMult( TEMP, 2*N, Vin2[i] );  // TEMP * Vin2[i] -> TEMP    LongNumAddition ( TEMP, RES, RES, 2*N ); // TEMP + RES -> RES  }  // Result goes to VoutH-VoutL  LongNumCopy ( RES,   VoutL, N );  // Copy RES   -> VoutL  LongNumCopy ( RES+N, VoutH, N );  // Copy RES+N -> VoutH}int main (int argc, char **argv){  int i, sum1, sum2, sum3, N=10000, Rep=50;  seed = 12345;  // obtain parameters at run time  if (argc>1) { N    = atoi(argv[1]); }  if (argc>2) { Rep  = atoi(argv[2]); }  printf("Challenge #3: Vector size is %d. Repeat %d times\n", N, Rep);  // Create Long Nums  unsigned char *V1= (unsigned char*) malloc( N*sizeof(unsigned char) );  unsigned char *V2= (unsigned char*) malloc( N*sizeof(unsigned char) );  unsigned char *V3= (unsigned char*) malloc( N*sizeof(unsigned char) );  unsigned char *V4= (unsigned char*) malloc( N*sizeof(unsigned char) );  LongNumInit ( V1, N ); LongNumInit ( V2, N ); LongNumInit ( V3, N );  // Repeat  for (i=0; i<Rep; i++)  {    LongNumAddition ( V1, V2, V4, N );    LongNumMultiply ( V3, V4, V2, V1, N );    LongNumHorizAdd ( V1, V2, N );    LongNumAddDigit ( V3, V2[0], N );  }  // Print last 32 digits of Long Numbers  LongNumPrint( V1, 32, "V1" );  LongNumPrint( V2, 32, "V2" );  LongNumPrint( V3, 32, "V3" );  LongNumPrint( V4, 32, "V4" );  free(V1); free(V2); free(V3); free(V4);  return 0;}

Viewing all articles
Browse latest Browse all 21994

Trending Articles



<script src="https://jsc.adskeeper.com/r/s/rssing.com.1596347.js" async> </script>