0

I have to simulate 2D plate heat diffusion. Everything works fine with 4 tasks, but it doesn't when using N*4 tasks. I suspect the problem is in the logic of communications between the upper and lower "ghost rows."

int t=0;
        for (t = 0; t < integration_time; t++)
        {
            
            
            if (taskid == 1)
            {
                neighbor_down = taskid + 1;
                neighbor_up = taskid - 1;
                MPI_Send(&temp[offset+chunksize-1][0], COLUMNS, MPI_FLOAT, neighbor_down, tag1, MPI_COMM_WORLD);
                MPI_Send(&temp[offset][0], COLUMNS, MPI_FLOAT, neighbor_up, 3, MPI_COMM_WORLD);
                MPI_Recv(&temp[offset+chunksize][0], COLUMNS, MPI_FLOAT, neighbor_down, tag1, MPI_COMM_WORLD, &status);
                MPI_Recv(&temp[offset-1][0], COLUMNS, MPI_FLOAT, neighbor_up, 3, MPI_COMM_WORLD, &status);
                /* Send timestep to master */
                
            }
            if (taskid == numtasks - 1)
            {
                neighbor_up = taskid - 1;
                MPI_Send(&temp[offset][0], COLUMNS, MPI_FLOAT, neighbor_up, tag2, MPI_COMM_WORLD);
                MPI_Recv(&temp[offset-1][0], COLUMNS, MPI_FLOAT, neighbor_up, tag2, MPI_COMM_WORLD, &status);
            }
            
            
            if (taskid % 2 == 0 && taskid != 1 && taskid != numtasks - 1)
                {
                    neighbor_up = taskid - 1;
                    neighbor_down = taskid + 1;
                    
                    MPI_Recv(&temp[offset-1][0], COLUMNS, MPI_FLOAT, neighbor_up, tag1, MPI_COMM_WORLD, &status);
                    MPI_Recv(&temp[offset+chunksize][0], COLUMNS, MPI_FLOAT, neighbor_down, tag2, MPI_COMM_WORLD, &status);
                    MPI_Send(&temp[offset][0], COLUMNS, MPI_FLOAT, neighbor_up, tag1, MPI_COMM_WORLD);
                    MPI_Send(&temp[offset+chunksize-1][0], COLUMNS, MPI_FLOAT, neighbor_down, tag2, MPI_COMM_WORLD);
                }
            if (taskid % 2 != 0 && taskid != 1 && taskid != numtasks - 1)
                {
                    neighbor_up = taskid - 1;
                    neighbor_down = taskid + 1;
                    
                    MPI_Send(&temp[offset][0], COLUMNS, MPI_FLOAT, neighbor_up, tag1, MPI_COMM_WORLD);
                    MPI_Send(&temp[offset+chunksize-1][0], COLUMNS, MPI_FLOAT, neighbor_down, tag2, MPI_COMM_WORLD);
                    MPI_Recv(&temp[offset-1][0], COLUMNS, MPI_FLOAT, neighbor_up, tag1, MPI_COMM_WORLD, &status);
                    MPI_Recv(&temp[offset+chunksize][0], COLUMNS, MPI_FLOAT, neighbor_down, tag2, MPI_COMM_WORLD, &status);
                }

When running with more than 4 tasks, it stucks in entering the main loop of master process. Someone has some idea? I'm sorry for the poor code optimization, but i'm new to C and parallel programming!

#include <stdio.h>
#include <stdlib.h>
#include <mpi.h>
#include <math.h>

#define ROWS 64
#define COLUMNS 64
#define MASTER 0
#define CONTROL_TEMP 10000000
#define integration_time 1000

void inidat(int nx, int ny, float *u);
void prtdat(int nx, int ny, float *u, char *fnam);
void diffuse(int nx, int ny, float *t_old, float *t_new, float cx, float cy);

float temp[ROWS][COLUMNS];

int main(int argc, char *argv[])
{
    float cy, cx;
    cy=0.2;
    cx=0.2;
    int numtasks, taskid, rc, dest, offset, i, j, tag1, tag2, source, chunksize;
    MPI_Status status;
    int ghost_up, ghost_down;
    float* t_new;
    float* t_old;
    int ix, iy;
    int nx, ny;
    ny = ROWS;
    nx = COLUMNS;

    /***** Initializations *****/
    MPI_Init(&argc, &argv);
    MPI_Comm_size(MPI_COMM_WORLD, &numtasks);
    if (numtasks % 4 != 0)
    {
        printf("Quitting. Number of MPI tasks must be divisible by 4.\n");
        MPI_Abort(MPI_COMM_WORLD, rc);
        exit(0);
    }

    MPI_Comm_rank(MPI_COMM_WORLD, &taskid);
    //printf("MPI task %d has started...\n", taskid);
    chunksize = (ROWS / numtasks);
    tag1 = 1;
    tag2 = 2;

    /***** Master task only ******/
    if (taskid == MASTER)
    {                                   
        for (i = 0; i < ROWS; i++) /* Initialize the array */
        {
            for (j = 0; j < COLUMNS; j++)
            {
                temp[i][j] = 0;
            }

        }
        /* Create boundary condition with temperature at CONTROL_TEMP at the center of the domain */
        temp[ROWS/2][COLUMNS/2] = CONTROL_TEMP;


        
        prtdat(ROWS, COLUMNS, &temp[0][0], "initial.dat");

        /* Subdivide the domain into N_TASKS horizontal stripes that we will send to each WORKER */
        
        chunksize = ROWS/numtasks;
        offset = chunksize;
        for (dest = 1; dest < numtasks; dest++)
        {
            MPI_Send(&offset, 1, MPI_INT, dest, tag1, MPI_COMM_WORLD);
            MPI_Send(&temp[offset][0], chunksize*COLUMNS, MPI_FLOAT, dest, tag2, MPI_COMM_WORLD);
            offset = offset + chunksize;
        }
        offset = 0;
        

        /* Exchange data with task 1, receive and send timestep */
        int neighbor_down, neighbor_up;
        neighbor_down = taskid + 1;
        int t = 0;
        printf("Master entering loop\n");
        char filename[40];
        for (t = 0; t < integration_time; t++)
        {
            for (j = 1; j < offset+chunksize; j++)
            {
                for (i = 1; i < COLUMNS-1; i++)
                {
                    temp[j][i] = temp[j][i]+cx*(temp[j+1][i]+temp[j-1][i]-2.0*temp[j][i])+cy*(temp[j][i+1]+temp[j][i-1]-2.0*temp[j][i]);
                    
                }
                
            }
            
            
            
            
            MPI_Send(&temp[chunksize-1][0], COLUMNS, MPI_FLOAT, 1, 3, MPI_COMM_WORLD);
            MPI_Recv(&temp[chunksize][0], COLUMNS, MPI_FLOAT, 1, 3, MPI_COMM_WORLD, &status);
            
            for (i = 1; i < numtasks; i++)
            {
                source = i;
                MPI_Recv(&offset, 1, MPI_INT, source, tag1, MPI_COMM_WORLD, &status);
                MPI_Recv(&temp[offset][0], chunksize*COLUMNS, MPI_FLOAT, source, tag2, MPI_COMM_WORLD, &status);
            }

            // Create a filename with the timestep name
            
            sprintf(filename, "data/final_%d", t);

            prtdat(ROWS, COLUMNS, &temp[0][0], filename);
            
        }
       
    }

    

    /***** Non-master tasks only *****/
    if (taskid > MASTER)
    {
        /* Receive temp[j][i] from master */
        source = MASTER;
        
        MPI_Recv(&offset, 1, MPI_INT, source, tag1, MPI_COMM_WORLD, &status);
        MPI_Recv(&temp[offset][0], chunksize*COLUMNS, MPI_FLOAT, source, tag2, MPI_COMM_WORLD, &status);
        
       /* Creates the ghost zone for each task. Basically the upper and bottom row comunicates, after each timestep, 
          the temperature to the upper an lower task */

        
        int neighbor_down, neighbor_up;

        /* Check if taskid = first or last [first = 1, last = numtasks - 1] */
        


   
        int t=0;
        for (t = 0; t < integration_time; t++)
        {
            
            
            if (taskid == 1)
            {
                neighbor_down = taskid + 1;
                neighbor_up = taskid - 1;
                MPI_Send(&temp[offset+chunksize-1][0], COLUMNS, MPI_FLOAT, neighbor_down, tag1, MPI_COMM_WORLD);
                MPI_Send(&temp[offset][0], COLUMNS, MPI_FLOAT, neighbor_up, 3, MPI_COMM_WORLD);
                MPI_Recv(&temp[offset+chunksize][0], COLUMNS, MPI_FLOAT, neighbor_down, tag1, MPI_COMM_WORLD, &status);
                MPI_Recv(&temp[offset-1][0], COLUMNS, MPI_FLOAT, neighbor_up, 3, MPI_COMM_WORLD, &status);
                /* Send timestep to master */
                
            }
            if (taskid == numtasks - 1)
            {
                neighbor_up = taskid - 1;
                MPI_Send(&temp[offset][0], COLUMNS, MPI_FLOAT, neighbor_up, tag2, MPI_COMM_WORLD);
                MPI_Recv(&temp[offset-1][0], COLUMNS, MPI_FLOAT, neighbor_up, tag2, MPI_COMM_WORLD, &status);
            }
            
            
            if (taskid % 2 == 0 && taskid != 1 && taskid != numtasks - 1)
                {
                    neighbor_up = taskid - 1;
                    neighbor_down = taskid + 1;
                    
                    MPI_Recv(&temp[offset-1][0], COLUMNS, MPI_FLOAT, neighbor_up, tag1, MPI_COMM_WORLD, &status);
                    MPI_Recv(&temp[offset+chunksize][0], COLUMNS, MPI_FLOAT, neighbor_down, tag2, MPI_COMM_WORLD, &status);
                    MPI_Send(&temp[offset][0], COLUMNS, MPI_FLOAT, neighbor_up, tag1, MPI_COMM_WORLD);
                    MPI_Send(&temp[offset+chunksize-1][0], COLUMNS, MPI_FLOAT, neighbor_down, tag2, MPI_COMM_WORLD);
                }
            if (taskid % 2 != 0 && taskid != 1 && taskid != numtasks - 1)
                {
                    neighbor_up = taskid - 1;
                    neighbor_down = taskid + 1;
                    
                    MPI_Send(&temp[offset][0], COLUMNS, MPI_FLOAT, neighbor_up, tag1, MPI_COMM_WORLD);
                    MPI_Send(&temp[offset+chunksize-1][0], COLUMNS, MPI_FLOAT, neighbor_down, tag2, MPI_COMM_WORLD);
                    MPI_Recv(&temp[offset-1][0], COLUMNS, MPI_FLOAT, neighbor_up, tag1, MPI_COMM_WORLD, &status);
                    MPI_Recv(&temp[offset+chunksize][0], COLUMNS, MPI_FLOAT, neighbor_down, tag2, MPI_COMM_WORLD, &status);
                }
                
                

            
            for (j = offset; j < offset+chunksize; j++)
                {
                    for (i = 1; i < COLUMNS-1; i++)
                    {
                        temp[j][i] = temp[j][i]+cx*(temp[j+1][i]+temp[j-1][i]-2.0*temp[j][i])+cy*(temp[j][i+1]+temp[j][i-1]-2.0*temp[j][i]);
                    }
                }/* Send the ghost zone to the upper and bottom task */
            
            MPI_Send(&offset, 1, MPI_INT, MASTER, tag1, MPI_COMM_WORLD);
            MPI_Send(&temp[offset][0], chunksize*COLUMNS, MPI_FLOAT, MASTER, tag2, MPI_COMM_WORLD);

            //send the data back to the master
            

        
        
            
            
        }
        printf("Task %d finished updating temperature\n", taskid);
        // send the data back to the master
        MPI_Send(&offset, 1, MPI_INT, MASTER, tag1, MPI_COMM_WORLD);
        MPI_Send(&temp[offset][0], chunksize*COLUMNS, MPI_FLOAT, MASTER, tag2, MPI_COMM_WORLD);

    }
        
    

    
    // master receives the data from the workers
    if(taskid == MASTER)
    {
        for (i = 1; i < numtasks; i++)
        {
            source = i;
            MPI_Recv(&offset, 1, MPI_INT, source, tag1, MPI_COMM_WORLD, &status);
            MPI_Recv(&temp[offset][0], chunksize*COLUMNS, MPI_FLOAT, source, tag2, MPI_COMM_WORLD, &status);
        }
        printf("Master received FINAL data from workers\n");
       
        prtdat(ROWS, COLUMNS, &temp[0][0], "final.dat");
 
    }

    // Print the final data to file
    
    MPI_Finalize();






    

    return 0;
}
/* Function to update the temperature at each step */
void diffuse(int nx, int ny, float *t_old, float *t_new, float cx, float cy)
{
    int ix, iy;
    
    for (iy=1; iy < ny-1; iy++){
        for(ix=1; ix < nx-1; iy++){
            *(t_new+ix*ny+iy) = *(t_old+ix*ny+iy) + cx * (*(t_old+(ix+1)*ny+iy) + *(t_old+(ix-1)*ny+iy) - 2.0 * *(t_old+ix*ny+iy)) + cy * *(t_old+ix*ny+iy+1) + *(t_old+ix*ny+iy-1) - 2.0* *(t_old+ix*ny+iy);
        }
    }

    for (iy=1; iy < ny-1; iy++){
        for(ix=1; ix <nx-1; ix++){
            *(t_old+ix*ny+iy) = *(t_new+ix*ny+iy);
        }
    }


}


void prtdat(int nx, int ny, float *u1, char *fnam)
{
   int ix, iy;
   FILE *fp;

   fp = fopen(fnam, "w");
   for (ix = 0; ix <= nx-1; ix++) {
      for (iy = 0; iy <= ny-1; iy++) {
        fprintf(fp, "%8.3f", *(u1+ix*ny+iy));
        if (iy != ny-1) {
           fprintf(fp, " ");
           }
       else {
          fprintf(fp, "\n");
          }
       }
    }
   fclose(fp);
   printf(" %s\n",fnam);
}

void prntfile(int nx, int ny, float *u1, char *fnam)
{
   int ix, iy;
   FILE *fp;

   fp = fopen(fnam, "w");
   for (ix = 0; ix <= nx-1; ix++) {
      for (iy = 0; iy <= ny-1; iy++) {
        fprintf(fp, "%8.3f", *(u1+ix*ny+iy));
        if (iy != ny-1) {
           fprintf(fp, " ");
           }
       else {
          fprintf(fp, "\n");
          }
       }
    }
   fclose(fp);
   printf(" %s\n",fnam);
}

I've tried changing the send recv order but things get worse.

2
  • master exchange MPI_FLOAT before MPI_INT but task 1 does it the other way around. Commented Jun 19, 2024 at 1:25
  • These patterns are very deadlock prone, so you should use neighbourhood collectives if they are a fit. Otherwise, try using MPI_Isend(), MPI_Irecv() and a single MPI_Waitall(). Commented Jun 19, 2024 at 1:27

0

Your Answer

By clicking “Post Your Answer”, you agree to our terms of service and acknowledge you have read our privacy policy.

Start asking to get answers

Find the answer to your question by asking.

Ask question

Explore related questions

See similar questions with these tags.