I am trying to get started on dynamic process creation in MPI. I have a parent code (main.c) trying to spawn new worker/child processes (worker.c) and merge both into one intracommunicator. The parent code (main.c) is
#include<stdio.h>
#include "mpi.h"
MPI_Comm child_comm;
int rank, size;
MPI_Comm_rank(MPI_COMM_WORLD, &rank);
MPI_Comm_size(MPI_COMM_WORLD, &size);
if(rank == 0 )
{
int num_processes_to_spawn = 2;
MPI_Comm_spawn("worker", MPI_ARGV_NULL, num_processes_to_spawn, MPI_INFO_NULL, 0, MPI_COMM_SELF, &child_comm, MPI_ERRCODES_IGNORE );
MPI_Comm intra_comm;
MPI_Intercomm_merge(child_comm,0, &intra_comm);
MPI_Barrier(child_comm);
int tmp_size;
MPI_Comm_size(intra_comm, &tmp_size);
printf("size of intra comm world = %d\n", tmp_size);
MPI_Comm_size(child_comm, &tmp_size);
printf("size of child comm world = %d\n", tmp_size);
MPI_Comm_size(MPI_COMM_WORLD, &tmp_size);
printf("size of parent comm world = %d\n", tmp_size);
}
MPI_Finalize();
The worker (child) code is:
#include<stdio.h>
#include "mpi.h"
int main( int argc, char *argv[] )
{
int numprocs, myrank;
MPI_Comm parentcomm;
MPI_Comm intra_comm;
MPI_Init( &argc, &argv );
MPI_Comm_size( MPI_COMM_WORLD, &numprocs );
MPI_Comm_rank( MPI_COMM_WORLD, &myrank );
MPI_Comm_get_parent( &parentcomm );
MPI_Intercomm_merge(parentcomm, 1, &intra_comm);
MPI_Barrier(parentcomm);
if(myrank == 0)
{
int tmp_size;
MPI_Comm_size(parentcomm, &tmp_size);
printf("child size of parent comm world = %d\n", tmp_size);
MPI_Comm_size(MPI_COMM_WORLD, &tmp_size);
printf("child size of child comm world = %d\n", tmp_size);
MPI_Comm_size(intra_comm, &tmp_size);
printf("child size of intra comm world = %d\n", tmp_size);
MPI_Finalize( );
return 0;
}
}
I run this code using
mpirun -np 12 main.c
After split and merge, I expect the output as
size of intra comm world = 14
size of child comm world = 2
size of parent comm world = 12
child size of parent comm world = 12
child size of child comm world = 2
child size of intra comm world = 14
But I get the following incorrect output.
size of intra comm world = 3
size of child comm world = 1
size of parent comm world = 12
child size of parent comm world = 2
child size of child comm world = 2
child size of intra comm world = 3
I do not understand where the mistake it, could kindly someone let me know where the mistake is.
Thanks, Kris