You are on page 1of 8

8/1/2016

dataflow.c

#include<stdio.h>
#include"rdtsc.h"
#include<math.h>
#include<stdlib.h>
#include<omp.h>
#include<string.h>
#include<malloc.h>
#include<memory.h>
#include<windows.h>
#include<limits.h>
#include<assert.h>
#include<time.h>
#include<xmmintrin.h>
#include<emmintrin.h>
#include<pmmintrin.h>
#defineNTHR4
#defineERRORPER0.0
#defineFILAS200
#defineCOL200//maximo200
#definePADD200COL
#defineCOLUMNASCOL+PADD//debesermultiplode8
staticfloat__attribute__((aligned(64)))mat_ASeq[FILAS][COLUMNAS];
staticfloat__attribute__((aligned(64)))mat_BSeq[FILAS][COLUMNAS];
staticfloat__attribute__((aligned(64)))mat_CSeq[FILAS][COLUMNAS];
staticfloat__attribute__((aligned(64)))mat_DSeq[FILAS][COLUMNAS];
staticfloat__attribute__((aligned(64)))mat_APar[FILAS][COLUMNAS];
staticfloat__attribute__((aligned(64)))mat_BPar[FILAS][COLUMNAS];
staticfloat__attribute__((aligned(64)))mat_CPar[FILAS][COLUMNAS];
staticfloat__attribute__((aligned(64)))mat_DPar[FILAS][COLUMNAS];
staticdoublevec_esperas[5]={0,0,0,0,0.8};
staticunsignedlonglongini,fin,wdif,wmin,wmax,wsum;
voidIniDatos()
{

inti,j;

/*initializerandomseed:*/

srand(time(NULL));

//srand(57855465);
for(i=0;i<FILAS;i++)
for(j=0;j<COLUMNAS;j++)

mat_ASeq[i][j]=mat_APar[i][j]=(float)(rand()%100);
for(i=0;i<FILAS;i++)
for(j=0;j<COLUMNAS;j++)

mat_BSeq[i][j]=mat_BPar[i][j]=(float)(rand()%100);
for(i=0;i<FILAS;i++)
for(j=0;j<COLUMNAS;j++)

mat_CSeq[i][j]=mat_CPar[i][j]=(float)(rand()%100);
for(i=0;i<FILAS;i++)
for(j=0;j<COLUMNAS;j++)

mat_DSeq[i][j]=mat_DPar[i][j]=(float)(rand()%100);
}
floatf1(intind)
{
doubleespera=vec_esperas[ind%5];
if(espera>0.0)Sleep(espera);
return(float)((ind*ind)%100);
}
//SECUENCIAL
voidproc_sequential(floatmatA[][COLUMNAS],floatmatB[][COLUMNAS],floatmatC[][COLUMNAS],
floatmatD[][COLUMNAS],intfilas,intcolumnas)
{
inti,j;
for(i=1;i<filas1;i++)
for(j=0;j<columnas1;j++)
{
matA[i][j]=matA[i1][j]+f1(i);
file:///C:/Users/Erik/Desktop/dataflow.c

1/8

8/1/2016

dataflow.c

matC[i][j]=matA[i1][j+1]+matD[i+1][j]+f1(i);
matD[i][j]=matB[i+1][j+1]/3.0+f1(i);
matB[i][j]=matB[i+1][j]+f1(j);
}
}
intproc_ParFilaFija(floatmatA[][COLUMNAS],floatmatB[][COLUMNAS],floatmatC[][COLUMNAS],
floatmatD[][COLUMNAS],intfilas,intcolumnas)
{
inti,j;
for(i=1;i<filas1;i++){

#pragmaompparallelforprivate(j)

for(j=0;j<columnas1;j++){

matB[i][j]=matB[i+1][j]+f1(j);

matA[i][j]=matA[i1][j]+f1(i);

matD[i][j]=matB[i+1][j+1]/3.0+f1(i);

matC[i][j]=matA[i1][j+1]+matD[i+1][j]+f1(i);

}
intproc_ParColumnaFija(floatmatA[][COLUMNAS],floatmatB[][COLUMNAS],floatmatC[][COLUMNAS],
floatmatD[][COLUMNAS],intfilas,intcolumnas)
{
inti,j;

for(j=0;j<columnas1;j++){

#pragmaompparallelnum_threads(NTHR)

for(i=1;i<filas1;i++){

matA[i][j]=matA[i1][j]+f1(i);

/*for(i=0;i<FILAS;i++){

for(j=0;j<COL;j++)

printf("%f%d,",matA[i]
[j],omp_get_thread_num());

printf("\n");

}*/

#pragmaompforprivate(i)

for(i=1;i<(filas)1;i++){

matC[i][j]=matA[i1][j+1]+matD[i+1][j]+f1(i);

#pragmaompforprivate(i)

for(i=1;i<(filas)1;i++){

matD[i][j]=matB[i+1][j+1]/3.0+f1(i);

for(i=1;i<(filas)1;i++){

matB[i][j]=matB[i+1][j]+f1(j);

}
}
/******************************************************************************************************
***********/
intproc_UnCore(floatmatA[][COLUMNAS],floatmatB[][COLUMNAS],floatmatC[][COLUMNAS],
floatmatD[][COLUMNAS],intfilas,intcolumnas)
{
inti,j;

__m128v_3;

floatfi,fj;
file:///C:/Users/Erik/Desktop/dataflow.c

2/8

8/1/2016

dataflow.c

//S1

__m128v_s1_a,v_s1sum;

//S2

__m128v_s2_a,v_s2_d,v_s2sum1,v_s2sum2;

__m128v_f1i,v_f1j;

//S3

__m128v_s3_b,v_s3div,v_s3sum;

//S4

__m128v_s4_b,v_s4sum;

v_3=_mm_set1_ps(3.0);

for(i=1;i<filas1;i++){

for(j=0;j<(columnas/4)1;j++)
{

fi=f1(i);

v_f1i=_mm_set1_ps(fi);
//matA[i][j]=matA[i1][j]+f1(i);

v_s1_a=_mm_load_ps(&matA[i1][j*4]);

v_s1sum=_mm_add_ps(v_s1_a,v_f1i);

_mm_store_ps(&matA[i][j*4],v_s1sum);

f1(i);
//matC[i][j]=matA[i1][j+1]+matD[i+1][j]+f1(i);

v_s2_a=_mm_loadu_ps(&matA[i1][j*4+1]);

v_s2_d=_mm_load_ps(&matD[i+1][j*4]);

v_s2sum1=_mm_add_ps(v_s2_a,v_s2_d);

v_s2sum2=_mm_add_ps(v_s2sum1,v_f1i);

_mm_store_ps(&matC[i][j*4],v_s2sum2);

f1(i);

//matD[i][j]=matB[i+1][j+1]/3.0+f1(i);

v_s3_b=_mm_loadu_ps(&matB[i+1][j*4+1]);

v_s3div=_mm_div_ps(v_s3_b,v_3);

v_s3sum=_mm_add_ps(v_s3div,v_f1i);

_mm_store_ps(&matD[i][j*4],v_s3sum);

//matB[i][j]=matB[i+1][j]+f1(j);

v_f1j=_mm_setr_ps(f1(j*4),f1(j*4+1),f1(j*4+2),f1(j*4+3));

v_s4_b=_mm_load_ps(&matB[i+1][j*4]);

v_s4sum=_mm_add_ps(v_s4_b,v_f1j);

_mm_store_ps(&matB[i][j*4],v_s4sum);
}

for(j=0;j<3;j++){
matA[i][columnas4+j]=matA[i1][columnas4+j]+fi;
matC[i][columnas4+j]=matA[i1][columnas3+j]+matD[i+1][columnas4+j]+fi;
matD[i][columnas4+j]=matB[i+1][columnas3+j]/3.0+fi;
matB[i][columnas4+j]=matB[i+1][columnas4+j]+f1(columnas4+j);

}
intproc_ParFision(floatmatA[][COLUMNAS],floatmatB[][COLUMNAS],floatmatC[][COLUMNAS],
floatmatD[][COLUMNAS],intfilas,intcolumnas)
{

inti,j;

#pragmaompparallelnum_threads(NTHR)
{

#pragmaompforprivate(i,j)

for(i=1;i<filas1;i++){

for(j=0;j<columnas1;j++){

matA[i][j]=matA[i1][j]+f1(i);

file:///C:/Users/Erik/Desktop/dataflow.c

3/8

8/1/2016

dataflow.c

#pragmaompforprivate(i,j)
for(i=1;i<filas1;i++){

for(j=0;j<columnas1;j++){

matC[i][j]=matA[i1][j+1]+matD[i+1][j]+f1(i);

}
}

#pragmaompforprivate(i,j)
for(i=1;i<filas1;i++){

for(j=0;j<columnas1;j++){

matD[i][j]=matB[i+1][j+1]/3.0+f1(i);

#pragmaompforprivate(i,j)
for(j=0;j<columnas1;j++){

for(i=1;i<filas1;i++){

matB[i][j]=matB[i+1][j]+f1(j);

}
}

intproc_ParFisionVect(floatmatA[][COLUMNAS],floatmatB[][COLUMNAS],floatmatC[][COLUMNAS],
floatmatD[][COLUMNAS],intfilas,intcolumnas)
{

inti,j;

__m128v_3;

floatfi;

__m128v_f1i,v_f1j;

//S1

__m128v_s1_a,v_s1sum;

//S2

__m128v_s2_a,v_s2_d,v_s2sum1,v_s2sum2;

//S3

__m128v_s3_b,v_s3div,v_s3sum;

v_3=_mm_set1_ps(3.0);

f1(i);

for(i=1;i<filas1;i++){

//matA[i][j]=matA[i1][j]+f1(i);

#pragmaompparallelforprivate(j)

for(j=0;j<(columnas/4)1;j++){

fi=f1(i);

v_f1i=_mm_set1_ps(fi);

v_s1_a=_mm_load_ps(&matA[i1][j*4]);

v_s1sum=_mm_add_ps(v_s1_a,v_f1i);

_mm_store_ps(&matA[i][j*4],v_s1sum);

for(j=0;j<3;j++)

matA[i][columnas4+j]=matA[i1][columnas4+j]+

for(i=1;i<filas1;i++){

file:///C:/Users/Erik/Desktop/dataflow.c

4/8

8/1/2016

dataflow.c

#pragmaompparallelforprivate(j)

for(j=0;j<(columnas/4)1;j++){

fi=f1(i);

v_f1i=_mm_set1_ps(fi);

//matC[i][j]=matA[i1][j+1]+matD[i+1][j]+f1(i);

v_s2_a=_mm_loadu_ps(&matA[i1][j*4+1]);

v_s2_d=_mm_load_ps(&matD[i+1][j*4]);

v_s2sum1=_mm_add_ps(v_s2_a,v_s2_d);

v_s2sum2=_mm_add_ps(v_s2sum1,v_f1i);

_mm_store_ps(&matC[i][j*4],v_s2sum2);

for(j=0;j<3;j++)

matC[i][columnas4+j]=matA[i1][columnas3+j]+
matD[i+1][columnas4+j]+f1(i);

for(i=1;i<filas1;i++){

#pragmaompparallelforprivate(j)

for(j=0;j<(columnas/4)1;j++){

fi=f1(i);

v_f1i=_mm_set1_ps(fi);

//matD[i][j]=matB[i+1][j+1]/3.0+f1(i);

v_s3_b=_mm_loadu_ps(&matB[i+1][j*4+1]);

v_s3div=_mm_div_ps(v_s3_b,v_3);

v_s3sum=_mm_add_ps(v_s3div,v_f1i);

_mm_store_ps(&matD[i][j*4],v_s3sum);

for(j=0;j<3;j++)

matD[i][columnas4+j]=matB[i+1][columnas3+j]/3.0
+f1(i);

//#pragmaompforprivate(i,j)

for(j=0;j<columnas1;j++)

for(i=1;i<filas1;i++)

matB[i][j]=matB[i+1][j]+f1(j);

}
}
intproc_ParFilaFijaVect(floatmatA[][COLUMNAS],floatmatB[][COLUMNAS],floatmatC[][COLUMNAS],
floatmatD[][COLUMNAS],intfilas,intcolumnas)
{
inti,j;

__m128v_3;

floatfi,fj;

//S2

__m128v_s2_a,v_s2_d,v_s2sum1,v_s2sum2;

__m128v_f1i,v_f1j;

//S1yS3

__m128v_s1_a,v_s1sum;

__m128v_s3_b,v_s3div,v_s3sum;

//S4

__m128v_s4_b,v_s4sum;

v_3=_mm_set1_ps(3.0);

file:///C:/Users/Erik/Desktop/dataflow.c

5/8

8/1/2016

dataflow.c

for(i=1;i<filas1;i++){

fi=f1(i);

v_f1i=_mm_set1_ps(fi);

#pragmaompparallelnum_threads(NTHR)

#pragmaompforprivate(j)

for(j=0;j<(columnas/4)1;j++){

//matC[i][j]=matA[i1][j+1]+matD[i+1][j]+f1(i);

v_s2_a=_mm_loadu_ps(&matA[i1][j*4+1]);

v_s2_d=_mm_load_ps(&matD[i+1][j*4]);

v_s2sum1=_mm_add_ps(v_s2_a,v_s2_d);

v_s2sum2=_mm_add_ps(v_s2sum1,v_f1i);

_mm_store_ps(&matC[i][j*4],v_s2sum2);

f1(i);

for(j=0;j<3;j++)

matC[i][columnas4+j]=matA[i1][columnas3+j]+matD[i+1][columnas
4+j]+fi;

/*****************/

#pragmaompforprivate(j)

for(j=0;j<(columnas/4)1;j++){

//matA[i][j]=matA[i1][j]+f1(i);

v_s1_a=_mm_loadu_ps(&matA[i1][j*4]);

v_s1sum=_mm_add_ps(v_s1_a,v_f1i);

_mm_store_ps(&matA[i][j*4],v_s1sum);

f1(i);

//matD[i][j]=matB[i+1][j+1]/3.0+f1(i);

v_s3_b=_mm_loadu_ps(&matB[i+1][j*4+1]);

v_s3div=_mm_div_ps(v_s3_b,v_3);

v_s3sum=_mm_add_ps(v_s3div,v_f1i);

_mm_store_ps(&matD[i][j*4],v_s3sum);

f1(i);

for(j=0;j<3;j++){

matA[i][columnas4+j]=matA[i1][columnas4+j]+fi;

matD[i][columnas4+j]=matB[i+1][columnas3+j]/3.0+fi;

#pragmaompforprivate(j)

for(j=0;j<(columnas/4)1;j++){

//matB[i][j]=matB[i+1][j]+f1(j);

v_f1j=_mm_setr_ps(f1(j*4),f1(j*4+1),f1(j*4+2),f1(j*4+3));

v_s4_b=_mm_load_ps(&matB[i+1][j*4]);

v_s4sum=_mm_add_ps(v_s4_b,v_f1j);

_mm_store_ps(&matB[i][j*4],v_s4sum);

for(j=0;j<3;j++)

matB[i][columnas4+j]=matB[i+1][columnas4+j]+f1(columnas4+j);

/******************************************************************************************************
***********/
intmain(intargc,char*argv[])
{

inti,j;

IniDatos();

printf("seq:\n");
for(wmin=UINT_MAX,wmax=0,wsum=0,j=0;j<11;j++)
ini=rdtsc();

file:///C:/Users/Erik/Desktop/dataflow.c

6/8

8/1/2016

dataflow.c

proc_sequential(mat_ASeq,mat_BSeq,mat_CSeq,mat_DSeq,FILAS,COL);

fin=rdtsc();

if(j>0)
{
wdif=(finini);
wsum+=wdif;
wmax=(wdif>wmax)?wdif:wmax;
wmin=(wdif<wmin)?wdif:wmin;
}
}
printf("Ticksmedio:\t\t%fmin%fmax%f\n",(float)wsum/10,(float)wmin,(float)wmax);

printf("Para:\n");

omp_set_num_threads(NTHR);

for(wmin=UINT_MAX,wmax=0,wsum=0,j=0;j<11;j++)

ini=rdtsc();
#if(FILAS<=200&&COLUMNAS<=200)

proc_UnCore(mat_APar,mat_BPar,mat_CPar,mat_DPar,FILAS,COL);
#elif(FILAS<=5000&&COLUMNAS<=5000)

proc_ParFision(mat_APar,mat_BPar,mat_CPar,mat_DPar,FILAS,COL);
#else

proc_ParFilaFijaVect(mat_APar,mat_BPar,mat_CPar,mat_DPar,FILAS,COL);

#endif

fin=rdtsc();

if(j>0)
{
wdif=(finini);
wsum+=wdif;
wmax=(wdif>wmax)?wdif:wmax;
wmin=(wdif<wmin)?wdif:wmin;
}
}
printf("Ticksmedio:\t\t%fmin%fmax%f\n",(float)wsum/10,(float)wmin,(float)wmax);
for(i=0;i<FILAS;i++)
{
for(j=0;j<COL;j++)
{
if(mat_ASeq[i][j]!=mat_APar[i][j])

{
if(abs(mat_ASeq[i][j]mat_APar[i][j])>ERRORPER)

printf("matrizAdistintai%d,j%dvalores%f%f\n",
i,j,mat_ASeq[i][j],mat_APar[i][j]);
break;
}

}
if(mat_BSeq[i][j]!=mat_BPar[i][j])

{
if(abs(mat_BSeq[i][j]mat_BPar[i][j])>ERRORPER)

printf("matrizBdistintai%d,j%dvalores%f%f\n",
i,j,mat_BSeq[i][j],mat_BPar[i][j]);
break;
}

}
if(mat_CSeq[i][j]!=mat_CPar[i][j])

{
if(abs(mat_CSeq[i][j]mat_CPar[i][j])>ERRORPER)

printf("matrizCdistintai%d,j%dvalores%f%f\n",
i,j,mat_CSeq[i][j],mat_CPar[i][j]);
break;
}
file:///C:/Users/Erik/Desktop/dataflow.c

7/8

8/1/2016

dataflow.c

}
if(mat_DSeq[i][j]!=mat_DPar[i][j])

{
if(abs(mat_DSeq[i][j]mat_DPar[i][j])>ERRORPER)

printf("matrizDdistintai%d,j%dvalores%f%f\n",
i,j,mat_DSeq[i][j],mat_DPar[i][j]);
break;
}

}
}
if(j!=COL)break;
}
if(i==FILAS)
printf("MismosResultados\n");
return;
}

file:///C:/Users/Erik/Desktop/dataflow.c

8/8