I have to simulate 2D plate heat diffusion. Everything works fine with 4 tasks, but it doesn't when using N*4 tasks. I suspect the problem is in the logic of communications between the upper and lower "ghost rows."
int t=0;
for (t = 0; t < integration_time; t++)
{
if (taskid == 1)
{
neighbor_down = taskid + 1;
neighbor_up = taskid - 1;
MPI_Send(&temp[offset+chunksize-1][0], COLUMNS, MPI_FLOAT, neighbor_down, tag1, MPI_COMM_WORLD);
MPI_Send(&temp[offset][0], COLUMNS, MPI_FLOAT, neighbor_up, 3, MPI_COMM_WORLD);
MPI_Recv(&temp[offset+chunksize][0], COLUMNS, MPI_FLOAT, neighbor_down, tag1, MPI_COMM_WORLD, &status);
MPI_Recv(&temp[offset-1][0], COLUMNS, MPI_FLOAT, neighbor_up, 3, MPI_COMM_WORLD, &status);
/* Send timestep to master */
}
if (taskid == numtasks - 1)
{
neighbor_up = taskid - 1;
MPI_Send(&temp[offset][0], COLUMNS, MPI_FLOAT, neighbor_up, tag2, MPI_COMM_WORLD);
MPI_Recv(&temp[offset-1][0], COLUMNS, MPI_FLOAT, neighbor_up, tag2, MPI_COMM_WORLD, &status);
}
if (taskid % 2 == 0 && taskid != 1 && taskid != numtasks - 1)
{
neighbor_up = taskid - 1;
neighbor_down = taskid + 1;
MPI_Recv(&temp[offset-1][0], COLUMNS, MPI_FLOAT, neighbor_up, tag1, MPI_COMM_WORLD, &status);
MPI_Recv(&temp[offset+chunksize][0], COLUMNS, MPI_FLOAT, neighbor_down, tag2, MPI_COMM_WORLD, &status);
MPI_Send(&temp[offset][0], COLUMNS, MPI_FLOAT, neighbor_up, tag1, MPI_COMM_WORLD);
MPI_Send(&temp[offset+chunksize-1][0], COLUMNS, MPI_FLOAT, neighbor_down, tag2, MPI_COMM_WORLD);
}
if (taskid % 2 != 0 && taskid != 1 && taskid != numtasks - 1)
{
neighbor_up = taskid - 1;
neighbor_down = taskid + 1;
MPI_Send(&temp[offset][0], COLUMNS, MPI_FLOAT, neighbor_up, tag1, MPI_COMM_WORLD);
MPI_Send(&temp[offset+chunksize-1][0], COLUMNS, MPI_FLOAT, neighbor_down, tag2, MPI_COMM_WORLD);
MPI_Recv(&temp[offset-1][0], COLUMNS, MPI_FLOAT, neighbor_up, tag1, MPI_COMM_WORLD, &status);
MPI_Recv(&temp[offset+chunksize][0], COLUMNS, MPI_FLOAT, neighbor_down, tag2, MPI_COMM_WORLD, &status);
}
When running with more than 4 tasks, it stucks in entering the main loop of master process. Someone has some idea? I'm sorry for the poor code optimization, but i'm new to C and parallel programming!
#include <stdio.h>
#include <stdlib.h>
#include <mpi.h>
#include <math.h>
#define ROWS 64
#define COLUMNS 64
#define MASTER 0
#define CONTROL_TEMP 10000000
#define integration_time 1000
void inidat(int nx, int ny, float *u);
void prtdat(int nx, int ny, float *u, char *fnam);
void diffuse(int nx, int ny, float *t_old, float *t_new, float cx, float cy);
float temp[ROWS][COLUMNS];
int main(int argc, char *argv[])
{
float cy, cx;
cy=0.2;
cx=0.2;
int numtasks, taskid, rc, dest, offset, i, j, tag1, tag2, source, chunksize;
MPI_Status status;
int ghost_up, ghost_down;
float* t_new;
float* t_old;
int ix, iy;
int nx, ny;
ny = ROWS;
nx = COLUMNS;
/***** Initializations *****/
MPI_Init(&argc, &argv);
MPI_Comm_size(MPI_COMM_WORLD, &numtasks);
if (numtasks % 4 != 0)
{
printf("Quitting. Number of MPI tasks must be divisible by 4.\n");
MPI_Abort(MPI_COMM_WORLD, rc);
exit(0);
}
MPI_Comm_rank(MPI_COMM_WORLD, &taskid);
//printf("MPI task %d has started...\n", taskid);
chunksize = (ROWS / numtasks);
tag1 = 1;
tag2 = 2;
/***** Master task only ******/
if (taskid == MASTER)
{
for (i = 0; i < ROWS; i++) /* Initialize the array */
{
for (j = 0; j < COLUMNS; j++)
{
temp[i][j] = 0;
}
}
/* Create boundary condition with temperature at CONTROL_TEMP at the center of the domain */
temp[ROWS/2][COLUMNS/2] = CONTROL_TEMP;
prtdat(ROWS, COLUMNS, &temp[0][0], "initial.dat");
/* Subdivide the domain into N_TASKS horizontal stripes that we will send to each WORKER */
chunksize = ROWS/numtasks;
offset = chunksize;
for (dest = 1; dest < numtasks; dest++)
{
MPI_Send(&offset, 1, MPI_INT, dest, tag1, MPI_COMM_WORLD);
MPI_Send(&temp[offset][0], chunksize*COLUMNS, MPI_FLOAT, dest, tag2, MPI_COMM_WORLD);
offset = offset + chunksize;
}
offset = 0;
/* Exchange data with task 1, receive and send timestep */
int neighbor_down, neighbor_up;
neighbor_down = taskid + 1;
int t = 0;
printf("Master entering loop\n");
char filename[40];
for (t = 0; t < integration_time; t++)
{
for (j = 1; j < offset+chunksize; j++)
{
for (i = 1; i < COLUMNS-1; i++)
{
temp[j][i] = temp[j][i]+cx*(temp[j+1][i]+temp[j-1][i]-2.0*temp[j][i])+cy*(temp[j][i+1]+temp[j][i-1]-2.0*temp[j][i]);
}
}
MPI_Send(&temp[chunksize-1][0], COLUMNS, MPI_FLOAT, 1, 3, MPI_COMM_WORLD);
MPI_Recv(&temp[chunksize][0], COLUMNS, MPI_FLOAT, 1, 3, MPI_COMM_WORLD, &status);
for (i = 1; i < numtasks; i++)
{
source = i;
MPI_Recv(&offset, 1, MPI_INT, source, tag1, MPI_COMM_WORLD, &status);
MPI_Recv(&temp[offset][0], chunksize*COLUMNS, MPI_FLOAT, source, tag2, MPI_COMM_WORLD, &status);
}
// Create a filename with the timestep name
sprintf(filename, "data/final_%d", t);
prtdat(ROWS, COLUMNS, &temp[0][0], filename);
}
}
/***** Non-master tasks only *****/
if (taskid > MASTER)
{
/* Receive temp[j][i] from master */
source = MASTER;
MPI_Recv(&offset, 1, MPI_INT, source, tag1, MPI_COMM_WORLD, &status);
MPI_Recv(&temp[offset][0], chunksize*COLUMNS, MPI_FLOAT, source, tag2, MPI_COMM_WORLD, &status);
/* Creates the ghost zone for each task. Basically the upper and bottom row comunicates, after each timestep,
the temperature to the upper an lower task */
int neighbor_down, neighbor_up;
/* Check if taskid = first or last [first = 1, last = numtasks - 1] */
int t=0;
for (t = 0; t < integration_time; t++)
{
if (taskid == 1)
{
neighbor_down = taskid + 1;
neighbor_up = taskid - 1;
MPI_Send(&temp[offset+chunksize-1][0], COLUMNS, MPI_FLOAT, neighbor_down, tag1, MPI_COMM_WORLD);
MPI_Send(&temp[offset][0], COLUMNS, MPI_FLOAT, neighbor_up, 3, MPI_COMM_WORLD);
MPI_Recv(&temp[offset+chunksize][0], COLUMNS, MPI_FLOAT, neighbor_down, tag1, MPI_COMM_WORLD, &status);
MPI_Recv(&temp[offset-1][0], COLUMNS, MPI_FLOAT, neighbor_up, 3, MPI_COMM_WORLD, &status);
/* Send timestep to master */
}
if (taskid == numtasks - 1)
{
neighbor_up = taskid - 1;
MPI_Send(&temp[offset][0], COLUMNS, MPI_FLOAT, neighbor_up, tag2, MPI_COMM_WORLD);
MPI_Recv(&temp[offset-1][0], COLUMNS, MPI_FLOAT, neighbor_up, tag2, MPI_COMM_WORLD, &status);
}
if (taskid % 2 == 0 && taskid != 1 && taskid != numtasks - 1)
{
neighbor_up = taskid - 1;
neighbor_down = taskid + 1;
MPI_Recv(&temp[offset-1][0], COLUMNS, MPI_FLOAT, neighbor_up, tag1, MPI_COMM_WORLD, &status);
MPI_Recv(&temp[offset+chunksize][0], COLUMNS, MPI_FLOAT, neighbor_down, tag2, MPI_COMM_WORLD, &status);
MPI_Send(&temp[offset][0], COLUMNS, MPI_FLOAT, neighbor_up, tag1, MPI_COMM_WORLD);
MPI_Send(&temp[offset+chunksize-1][0], COLUMNS, MPI_FLOAT, neighbor_down, tag2, MPI_COMM_WORLD);
}
if (taskid % 2 != 0 && taskid != 1 && taskid != numtasks - 1)
{
neighbor_up = taskid - 1;
neighbor_down = taskid + 1;
MPI_Send(&temp[offset][0], COLUMNS, MPI_FLOAT, neighbor_up, tag1, MPI_COMM_WORLD);
MPI_Send(&temp[offset+chunksize-1][0], COLUMNS, MPI_FLOAT, neighbor_down, tag2, MPI_COMM_WORLD);
MPI_Recv(&temp[offset-1][0], COLUMNS, MPI_FLOAT, neighbor_up, tag1, MPI_COMM_WORLD, &status);
MPI_Recv(&temp[offset+chunksize][0], COLUMNS, MPI_FLOAT, neighbor_down, tag2, MPI_COMM_WORLD, &status);
}
for (j = offset; j < offset+chunksize; j++)
{
for (i = 1; i < COLUMNS-1; i++)
{
temp[j][i] = temp[j][i]+cx*(temp[j+1][i]+temp[j-1][i]-2.0*temp[j][i])+cy*(temp[j][i+1]+temp[j][i-1]-2.0*temp[j][i]);
}
}/* Send the ghost zone to the upper and bottom task */
MPI_Send(&offset, 1, MPI_INT, MASTER, tag1, MPI_COMM_WORLD);
MPI_Send(&temp[offset][0], chunksize*COLUMNS, MPI_FLOAT, MASTER, tag2, MPI_COMM_WORLD);
//send the data back to the master
}
printf("Task %d finished updating temperature\n", taskid);
// send the data back to the master
MPI_Send(&offset, 1, MPI_INT, MASTER, tag1, MPI_COMM_WORLD);
MPI_Send(&temp[offset][0], chunksize*COLUMNS, MPI_FLOAT, MASTER, tag2, MPI_COMM_WORLD);
}
// master receives the data from the workers
if(taskid == MASTER)
{
for (i = 1; i < numtasks; i++)
{
source = i;
MPI_Recv(&offset, 1, MPI_INT, source, tag1, MPI_COMM_WORLD, &status);
MPI_Recv(&temp[offset][0], chunksize*COLUMNS, MPI_FLOAT, source, tag2, MPI_COMM_WORLD, &status);
}
printf("Master received FINAL data from workers\n");
prtdat(ROWS, COLUMNS, &temp[0][0], "final.dat");
}
// Print the final data to file
MPI_Finalize();
return 0;
}
/* Function to update the temperature at each step */
void diffuse(int nx, int ny, float *t_old, float *t_new, float cx, float cy)
{
int ix, iy;
for (iy=1; iy < ny-1; iy++){
for(ix=1; ix < nx-1; iy++){
*(t_new+ix*ny+iy) = *(t_old+ix*ny+iy) + cx * (*(t_old+(ix+1)*ny+iy) + *(t_old+(ix-1)*ny+iy) - 2.0 * *(t_old+ix*ny+iy)) + cy * *(t_old+ix*ny+iy+1) + *(t_old+ix*ny+iy-1) - 2.0* *(t_old+ix*ny+iy);
}
}
for (iy=1; iy < ny-1; iy++){
for(ix=1; ix <nx-1; ix++){
*(t_old+ix*ny+iy) = *(t_new+ix*ny+iy);
}
}
}
void prtdat(int nx, int ny, float *u1, char *fnam)
{
int ix, iy;
FILE *fp;
fp = fopen(fnam, "w");
for (ix = 0; ix <= nx-1; ix++) {
for (iy = 0; iy <= ny-1; iy++) {
fprintf(fp, "%8.3f", *(u1+ix*ny+iy));
if (iy != ny-1) {
fprintf(fp, " ");
}
else {
fprintf(fp, "\n");
}
}
}
fclose(fp);
printf(" %s\n",fnam);
}
void prntfile(int nx, int ny, float *u1, char *fnam)
{
int ix, iy;
FILE *fp;
fp = fopen(fnam, "w");
for (ix = 0; ix <= nx-1; ix++) {
for (iy = 0; iy <= ny-1; iy++) {
fprintf(fp, "%8.3f", *(u1+ix*ny+iy));
if (iy != ny-1) {
fprintf(fp, " ");
}
else {
fprintf(fp, "\n");
}
}
}
fclose(fp);
printf(" %s\n",fnam);
}
I've tried changing the send recv order but things get worse.
MPI_FLOATbeforeMPI_INTbut task 1 does it the other way around.MPI_Isend(),MPI_Irecv()and a singleMPI_Waitall().