Parallel Block-Oriented Matrix Multiplication
Parallel Block-Oriented Matrix Multiplication
#include<stdio.h>
#include "mpi.h"
void mult(int a[][2],int b[][2],int c[][2],int d)
{
int i,j,k;
for(i=0;i<d;i++)
for(j=0;j<d;j++)
{
c[i][j]=0;
for(k=0;k<d;k++)
c[i][j] = c[i][j] + a[i][k]*b[k][j];
}
}
void add(int a[][2],int b[][2],int c[][2],int d)
{
int i,j;
for(i=0;i<d;i++)
for(j=0;j<d;j++)
c[i][j] = a[i][j]+b[i][j];
}
int main(int argc,char *argv[])
{
int size,rank,inc=0;
int a[4][4],b[4][4];
int a1[2][2],a2[2][2],a3[2][2],a4[2][2];
int b1[2][2],b2[2][2],b3[2][2],b4[2][2];
int d1[2][2],d2[2][2],ans[4][2][2];
double t1,t2,t3,t4,t5,t6;
MPI_Init(&argc,&argv);
MPI_Comm_size(MPI_COMM_WORLD,&size);
MPI_Comm_rank(MPI_COMM_WORLD,&rank);
MPI_Status stat;
if(size!=9)
{
if(!rank)
printf("\nThis Program require exactly 9 processors!!\n\n");
MPI_Finalize();
exit(1);
}
if(rank==0)
{
int i,j,k,x,y,ind=1,l;
for(i=0;i<4;i++)
for(j=0;j<4;j++)
{
a[i][j] = ind;
b[i][j] = ind;
ind++;
}
printf("\nINPUT MATRIX A and B:\n");
for(i=0;i<4;i++)
{
for(j=0;j<4;j++)
printf("\t%d",a[i][j]);
printf("\n");
}
for(i=0,x=0;i<2;i++,x++)
for(j=0,y=0;j<2;j++,y++)
{
a1[i][j]=a[x][y];
b1[i][j]=b[x][y];
}
for(i=0,x=0;i<2;i++,x++)
for(j=0,y=2;j<2;j++,y++)
{
a2[i][j]=a[x][y];
b3[i][j]=b[x][y];
}
for(i=0,x=2;i<2;i++,x++)
for(j=0,y=0;j<2;j++,y++)
{
a3[i][j]=a[x][y];
b2[i][j]=b[x][y];
}
for(i=0,x=2;i<2;i++,x++)
for(j=0,y=2;j<2;j++,y++)
{
a4[i][j]=a[x][y];
b4[i][j]=b[x][y];
}
t1 = MPI_Wtime();
for(k=1;k<=8;k++)
{
switch(k)
{
case 1: MPI_Send(a1,4,MPI_INT,1,101,MPI_COMM_WORLD);
MPI_Send(b1,4,MPI_INT,1,102,MPI_COMM_WORLD);
break;
case 2: MPI_Send(a2,4,MPI_INT,2,101,MPI_COMM_WORLD);
MPI_Send(b2,4,MPI_INT,2,102,MPI_COMM_WORLD);
break;
case 3: MPI_Send(a1,4,MPI_INT,3,101,MPI_COMM_WORLD);
MPI_Send(b3,4,MPI_INT,3,102,MPI_COMM_WORLD);
break;
case 4: MPI_Send(a2,4,MPI_INT,4,101,MPI_COMM_WORLD);
MPI_Send(b4,4,MPI_INT,4,102,MPI_COMM_WORLD);
break;
case 5: MPI_Send(a3,4,MPI_INT,5,101,MPI_COMM_WORLD);
MPI_Send(b1,4,MPI_INT,5,102,MPI_COMM_WORLD);
break;
case 6: MPI_Send(a4,4,MPI_INT,6,101,MPI_COMM_WORLD);
MPI_Send(b2,4,MPI_INT,6,102,MPI_COMM_WORLD);
break;
case 7: MPI_Send(a3,4,MPI_INT,7,101,MPI_COMM_WORLD);
MPI_Send(b3,4,MPI_INT,7,102,MPI_COMM_WORLD);
break;
case 8: MPI_Send(a4,4,MPI_INT,8,101,MPI_COMM_WORLD);
MPI_Send(b4,4,MPI_INT,8,102,MPI_COMM_WORLD);
break;
}
}
for(k=1;k<=7;k+=2)
{
l=k+1;
MPI_Recv(d1,4,MPI_INT,k,201,MPI_COMM_WORLD,&stat);
MPI_Recv(d2,4,MPI_INT,l,201,MPI_COMM_WORLD,&stat);
add(d1,d2,ans[inc],2);
printf("\nProcessor %d and %d:\n",k,l);
for(i=0;i<2;i++)
{
for(j=0;j<2;j++)
printf("\t%d",ans[inc][i][j]);
printf("\n");
}
inc++;
}
t2 = MPI_Wtime();
t3 = (t2-t1);
MPI_Recv(&t6,1,MPI_DOUBLE,8,202,MPI_COMM_WORLD,&stat);
t4 = MPI_Wtime();
mult(c1,c2,d,2);
t5 = MPI_Wtime();
t6 = (t5-t4);
MPI_Send(d,4,MPI_INT,0,201,MPI_COMM_WORLD);
MPI_Send(&t6,1,MPI_DOUBLE,0,202,MPI_COMM_WORLD);
}
MPI_Finalize();
exit(0);
}
Analysis of Parallel Bock-Oriented Matrix Multiplication:
**************************************************
Processor 1 and 2:
90 100
202 228
Processor 3 and 4:
110 120
254 280
Processor 5 and 6:
314 356
426 484
Processor 7 and 8:
398 440
542 600