1

I'm studying for an exam at university about parallel programming with Open MPI. I'm trying to read a file with the master process (rank 0) and send some data to all workers processes, but I get a runtime error that I'm not able to solve. The error is:

An error occurred in MPI_Recv
reported by process [724303873,11]
on communicator MPI_COMM_WORLD
MPI_ERR_BUFFER: invalid buffer pointer
MPI_ERRORS_ARE_FATAL (processes in this communicator will now abort,
and MPI will try to terminate your MPI job as well)

My code is the following:

//Compute the number of executor processes//
int activeCores(long long int txt_size, long long int pat_size, int cores){

    int executors = cores;

    for (int i = 0; i < cores; ++i)
    {
        if (txt_size/executors < pat_size)
        {
            executors = cores - i;
        } else {
            break;
        }
    }
    return executors;
}




int main(int argc, char *argv[])
{
    
    //SOME VARIABLES...
    int rank;
    int size;
    //long long int patlen;
    char *text;
    char *pattern;
    long long int occurrences = 0;
    long long int total_occ = 0;
    int ex_size;
    int active = 0;                         // indicates if a core is active or not. Inititally all cores are disabled
    MPI_File text_handler;
    MPI_File pattern_handler;
    MPI_Offset txt_size;    //total dimension of the file
    MPI_Offset pat_size;
    char *chunk;
    long long int chunk_length;

    MPI_Init(&argc, &argv);
    MPI_Comm_rank(MPI_COMM_WORLD, &rank);
    MPI_Comm_size(MPI_COMM_WORLD, &size);
    int flags[size];

    clock_t begin = clock();



//reading routine

    if (rank == 0)
    {
    

    MPI_File_open(MPI_COMM_SELF, argv[1], MPI_MODE_RDONLY, MPI_INFO_NULL, &text_handler);
    MPI_File_open(MPI_COMM_SELF, argv[2], MPI_MODE_RDONLY, MPI_INFO_NULL, &pattern_handler);
    MPI_File_get_size(pattern_handler, &pat_size);
    MPI_File_get_size(text_handler, &txt_size);

    pattern = (char *)malloc(sizeof(char)*pat_size + 1);
    text = (char *)malloc(sizeof(char)*txt_size + 1);

    MPI_File_read(pattern_handler, pattern, pat_size, MPI_CHAR, MPI_STATUS_IGNORE);
    MPI_File_read(text_handler, text, txt_size, MPI_CHAR, MPI_STATUS_IGNORE);
    
    text[txt_size] = '\0';
    pattern[pat_size] = '\0';

    MPI_File_close(&text_handler);
    MPI_File_close(&pattern_handler);

    ex_size = activeCores(txt_size, pat_size, size);

    if (ex_size == 0)
        {
            printf("The length of the pattern is bigger than the dimension of the text\n");
            MPI_Finalize();
            exit(-1);
        }

    for (int i = 0; i < size; ++i)
    {
        if (i<ex_size)
        {
            flags[i] = 1;
        } else flags[i] = 0;
    }

    chunk_length = txt_size/ex_size;

    }

    //send flag to all processes
    MPI_Scatter(flags, 1, MPI_INT, &active, 1, MPI_INT, 0, MPI_COMM_WORLD);
    MPI_Bcast(&chunk_length, 1, MPI_INT, 0, MPI_COMM_WORLD);
    MPI_Bcast(&txt_size, 1, MPI_INT, 0, MPI_COMM_WORLD);
    MPI_Bcast(&ex_size, 1, MPI_INT, 0, MPI_COMM_WORLD);


// splitting text routine

    if (rank == 0 && active == 1)
    {
        chunk = (char *)malloc(sizeof(char) * (chunk_length +1));
        chunk = strncpy(chunk, text, chunk_length);
        chunk[chunk_length]='\0';

        long long int start = 0;
        long long int stop;

        for (int i = 1; i < ex_size; ++i)
        {
            if (i<ex_size-1)
            {
                MPI_Send(&text[chunk_length*i], chunk_length, MPI_CHAR, i, 1, MPI_COMM_WORLD);
            } else if (i == ex_size-1)
            {
                MPI_Send(&text[chunk_length*i], txt_size-i*chunk_length, MPI_CHAR, i, 2, MPI_COMM_WORLD);
            }
        }
    }


    if (rank > 0 && rank < ex_size-1)
    {
        chunk = (char *)malloc(sizeof(char)*chunk_length);
        MPI_Recv(chunk, chunk_length, MPI_CHAR, 0, 1, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
        chunk[chunk_length]='\0';
    }

    if (rank == ex_size-1)
    {
        chunk = (char *)malloc(sizeof(char)*(txt_size - rank*chunk_length +1));
        chunk[txt_size - rank*chunk_length]='\0';
        MPI_Recv(chunk, txt_size-rank*chunk_length, MPI_CHAR, 0, 2, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
    }


    

    //CLOSE THE MPI LAYER
    MPI_Finalize();

    //STOP COUNTING CLOCK CYCLES
    clock_t end = clock();
    
    if (rank == 0)
    {
        //COMPUTING THE TIME SPENT
        double time_spent = (double)(end - begin) / CLOCKS_PER_SEC;
        printf("Total occurrences %d\n", total_occ);
        printf("time required %lf\n", time_spent);
        printf("Program executed by %d cores over %d\n", ex_size, size);

    }

    return 0;
}

Please, someone can help me? Thank you very much guys

I'm implementing a parallel version of Rabin-karp algorithm but I'm stuck because of this error and I cannot proceed. I tried to use different functions as Bsend because I read that is more "safe" and often avoid this type of error but I didn't succeed.

3
  • 1. Your code does not seem to have anything specific to OpenMPI. Please confirm and I'll remove that tag. 2. Does the error message tell you what call has the problem? Otherwise, start removing code from the end until you know where the problem is. Commented Feb 21, 2024 at 19:34
  • The compiler doesn’t say anything else. The problem seems to be in the MPI_Recv under the condition “rank == ex_size-1” but I can’t understand why it throws an error. If I remove the MPI_Recv under the condition “rank == ex_size-1” the program works. Furthermore, the malloc “chunk = (char )malloc(sizeof(char)*(txt_size - rankchunk_length +1));” returns NULL but the argument of the malloc is positive. Why? Commented Feb 21, 2024 at 20:42
  • print all parameters to confirm the size is strictly positive and reasonnable. note in the previous block chunk[chunk_length]='\0'; is a buffer overflow. Commented Feb 22, 2024 at 0:19

0

Your Answer

By clicking “Post Your Answer”, you agree to our terms of service and acknowledge you have read our privacy policy.

Start asking to get answers

Find the answer to your question by asking.

Ask question

Explore related questions

See similar questions with these tags.