I'm studying for an exam at university about parallel programming with Open MPI. I'm trying to read a file with the master process (rank 0) and send some data to all workers processes, but I get a runtime error that I'm not able to solve. The error is:
An error occurred in MPI_Recv
reported by process [724303873,11]
on communicator MPI_COMM_WORLD
MPI_ERR_BUFFER: invalid buffer pointer
MPI_ERRORS_ARE_FATAL (processes in this communicator will now abort,
and MPI will try to terminate your MPI job as well)
My code is the following:
//Compute the number of executor processes//
int activeCores(long long int txt_size, long long int pat_size, int cores){
int executors = cores;
for (int i = 0; i < cores; ++i)
{
if (txt_size/executors < pat_size)
{
executors = cores - i;
} else {
break;
}
}
return executors;
}
int main(int argc, char *argv[])
{
//SOME VARIABLES...
int rank;
int size;
//long long int patlen;
char *text;
char *pattern;
long long int occurrences = 0;
long long int total_occ = 0;
int ex_size;
int active = 0; // indicates if a core is active or not. Inititally all cores are disabled
MPI_File text_handler;
MPI_File pattern_handler;
MPI_Offset txt_size; //total dimension of the file
MPI_Offset pat_size;
char *chunk;
long long int chunk_length;
MPI_Init(&argc, &argv);
MPI_Comm_rank(MPI_COMM_WORLD, &rank);
MPI_Comm_size(MPI_COMM_WORLD, &size);
int flags[size];
clock_t begin = clock();
//reading routine
if (rank == 0)
{
MPI_File_open(MPI_COMM_SELF, argv[1], MPI_MODE_RDONLY, MPI_INFO_NULL, &text_handler);
MPI_File_open(MPI_COMM_SELF, argv[2], MPI_MODE_RDONLY, MPI_INFO_NULL, &pattern_handler);
MPI_File_get_size(pattern_handler, &pat_size);
MPI_File_get_size(text_handler, &txt_size);
pattern = (char *)malloc(sizeof(char)*pat_size + 1);
text = (char *)malloc(sizeof(char)*txt_size + 1);
MPI_File_read(pattern_handler, pattern, pat_size, MPI_CHAR, MPI_STATUS_IGNORE);
MPI_File_read(text_handler, text, txt_size, MPI_CHAR, MPI_STATUS_IGNORE);
text[txt_size] = '\0';
pattern[pat_size] = '\0';
MPI_File_close(&text_handler);
MPI_File_close(&pattern_handler);
ex_size = activeCores(txt_size, pat_size, size);
if (ex_size == 0)
{
printf("The length of the pattern is bigger than the dimension of the text\n");
MPI_Finalize();
exit(-1);
}
for (int i = 0; i < size; ++i)
{
if (i<ex_size)
{
flags[i] = 1;
} else flags[i] = 0;
}
chunk_length = txt_size/ex_size;
}
//send flag to all processes
MPI_Scatter(flags, 1, MPI_INT, &active, 1, MPI_INT, 0, MPI_COMM_WORLD);
MPI_Bcast(&chunk_length, 1, MPI_INT, 0, MPI_COMM_WORLD);
MPI_Bcast(&txt_size, 1, MPI_INT, 0, MPI_COMM_WORLD);
MPI_Bcast(&ex_size, 1, MPI_INT, 0, MPI_COMM_WORLD);
// splitting text routine
if (rank == 0 && active == 1)
{
chunk = (char *)malloc(sizeof(char) * (chunk_length +1));
chunk = strncpy(chunk, text, chunk_length);
chunk[chunk_length]='\0';
long long int start = 0;
long long int stop;
for (int i = 1; i < ex_size; ++i)
{
if (i<ex_size-1)
{
MPI_Send(&text[chunk_length*i], chunk_length, MPI_CHAR, i, 1, MPI_COMM_WORLD);
} else if (i == ex_size-1)
{
MPI_Send(&text[chunk_length*i], txt_size-i*chunk_length, MPI_CHAR, i, 2, MPI_COMM_WORLD);
}
}
}
if (rank > 0 && rank < ex_size-1)
{
chunk = (char *)malloc(sizeof(char)*chunk_length);
MPI_Recv(chunk, chunk_length, MPI_CHAR, 0, 1, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
chunk[chunk_length]='\0';
}
if (rank == ex_size-1)
{
chunk = (char *)malloc(sizeof(char)*(txt_size - rank*chunk_length +1));
chunk[txt_size - rank*chunk_length]='\0';
MPI_Recv(chunk, txt_size-rank*chunk_length, MPI_CHAR, 0, 2, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
}
//CLOSE THE MPI LAYER
MPI_Finalize();
//STOP COUNTING CLOCK CYCLES
clock_t end = clock();
if (rank == 0)
{
//COMPUTING THE TIME SPENT
double time_spent = (double)(end - begin) / CLOCKS_PER_SEC;
printf("Total occurrences %d\n", total_occ);
printf("time required %lf\n", time_spent);
printf("Program executed by %d cores over %d\n", ex_size, size);
}
return 0;
}
Please, someone can help me? Thank you very much guys
I'm implementing a parallel version of Rabin-karp algorithm but I'm stuck because of this error and I cannot proceed. I tried to use different functions as Bsend because I read that is more "safe" and often avoid this type of error but I didn't succeed.
chunk[chunk_length]='\0';is a buffer overflow.