2

I want to parallelize mergesort. My idea was to look at the value in the middle of the first subarray and then perform a binary search on the other array to find an upper bound whose index should act as a pivot to split the second array there.Then I sort the first half of the first array with the first part of the second array sequentially and the second half of the first array with the second half of the second array. I want to uses tasks in open mp such that one thread performs the first merge and another thread performs the second merge.

In my code the assignment cretes garbage values. I.e. out[a]=in[b]; and then when I check printf("%d%",out[a]); I get a value c which is not in[b}. When I print out the array the values out[i] sometimes differ from the value that were shown with the print function differ again, i.e. out[a]=d and d is neither c or in[b]

I have prepared the code and some output of some printstatements that show the values in[b] and in[a].

#include <stdio.h>
#include <stdlib.h>
#include <errno.h>
#include <sys/time.h>

#include <iostream>
#include <algorithm>

#include <cstdlib>
#include <cstdio>

#include <cmath>
#include <ctime>
#include <cstring>
#include <omp.h>
// Constants.h
#if !defined(MYLIB_CONSTANTS_H)
#define MYLIB_CONSTANTS_H 1

const int CUTOFF =11;

#endif



int binarysearchfinduperrank(int *in,int n,int value, int projection){

    int* array= in+projection;
    int L=0;
    int R=n;

      printf("\nUpperBinarysearchrankfinder [");
      for(int i=0;i<n; i++){
          printf("%d,",array[i]);

     }
     printf("]\n");
    while(R-L>1){
        int middle = (R+L)/2;
         printf("L:%d,middle:%d,R:%d,value:%d\n",L,middle,R,value);
        if(array[middle]==value){
            while(array[middle]==value&&middle<n){
                middle=middle+1;

            }
             printf("L:%d,R:%d,result:%d,index:%d\n",L,R,middle,middle+projection);
            return middle;

        }
        if(array[middle]<value){
            L=middle;

        }
        if(array[middle]>value){
            R=middle;

        }
    }

      printf("L:%d,R:%d,value:%d\n",L,R,value);

     if(n==1){
         if(array[0]> value){
              printf(" result:0\n,index:%d\n",0+projection);
             return 0;

        }
        else{
             printf(" result:1,index:%d\n",1+projection);
            return 1;

        }

    }

    if(L==0&&array[L]>value){
         printf("L:%d,R:%d,result:%d,index:%d\n",L,R,0,projection);
        return 0;

    }
    if(R==n && array[R-1]<= value){
          printf("L:%d,R:%d,result:%d,index:%d\n",L,R,n,n+projection);
        return n;

    }
    if(R==n&& array[R-1]>value){
          printf("L:%d,R:%d,result:%d,index:%d\n",L,R,R-1,((R-1)+projection));
        return R-1;

    }
    if(array[R]<=value){
          printf("L:%d,R:%d,result:%d,index:%d\n",L,R,R+1,(R+1+projection));
        return R+1;

    }
    if(array[L]<=value){
         printf("L:%d,R:%d,result:%d,index:%d\n",L,R,R,R+projection);
        return R;

    }

     printf("L:%d,R:%d,result:%d,index:%d\n",L,R,L,L+projection);
    return L;


}

void printarray(int *array,int n){
    printf("\n[");
    for(int i=0;i<n;i++){
        printf("%d,",array[i]);
    }
    printf("]\n");
}


void MsMergeSequential(int *out, int *in, int begin1, int end1, int begin2, int end2, int outBegin) {
    int left = begin1;
    int right = begin2;
    int idx = outBegin;
    printf("Inputarray1:");
    printarray(in+begin1,(end1-begin1));

    printf("Inputarray2:");
    printarray(in+begin2,(end2-begin2));


    while (left < end1 && right < end2) {
        if (in[left] <= in[right]) {
            out[idx] = in[left];
            printf("! %d \n",in[left]);
            printf(" %d \n",out[idx]);
            left++;
        } else {

            out[idx] = in[right];
            printf("!! %d \n",in[left]);
            printf(" %d \n",out[idx]);
            right++;
        }
        idx++;
        }

    while (left < end1) {

        out[idx] = in[left];
        printf("!!! %d \n",in[left]);

        printf(" %d \n",out[idx]);
        left++, idx++;
    }
    while (right < end2) {
        out[idx] = in[right];
        printf(" !!!!%d \n",in[right]);
        printf(" %d \n",out[idx]);
        right++, idx++;
    }

    printf("Outputarray:\n[");
    printarray(out+begin1,(end1-begin1)+(end2-begin2));
    printf("]\n");
}


bool myfunc (long i , long j){return (i<j);}

void MsSequential(int *array, int *tmp, bool inplace, long begin, long end) {
  if( end <= begin + CUTOFF -1){

    std::sort(array+begin,array + end, myfunc);
  }
  else  if (begin < (end - 1)) {
           long half =(begin+end) / 2;
           int halffirst= (half+begin)/2;


            #pragma omp taskgroup
         {
           #pragma omp task shared(array) untied if(end-begin >= (1<<15))

             MsSequential(array, tmp, !inplace, begin, half);

             MsSequential(array, tmp, !inplace, half, end);
              }
        if (inplace){
            printf("Originalinputarray-tmp:");
            printarray(tmp+begin,(end-begin));
            int halfsecond=binarysearchfinduperrank(tmp,(end-half),tmp[halffirst],half)+half+begin;
            printf("%d,%d\n",halffirst,halfsecond);
            printf("Halffirst:%d,Halfsecond:%d\n,",tmp[halffirst],tmp[halfsecond]);
            #pragma omp taskgroup
         {
           #pragma omp task shared(tmp) untied if(end-begin >= (1<<15))

             MsMergeSequential(array, tmp, begin, halffirst, half, halfsecond, begin);


            MsMergeSequential(array, tmp, halffirst, half,halfsecond, end, begin+(halffirst-begin)+(halfsecond-half));
              }



        }
        else {
            int halfsecond=binarysearchfinduperrank(array,(end-half),array[halffirst],half)+half;
            printf("%d,%d\n",halffirst,halfsecond);
            printf("Originalinputarray-arr:");
            printarray(array+begin,(end-begin));
            printf("Halffirst:%d,Halfsecond:%d\n,",array[halffirst],array[halfsecond]);


             #pragma omp taskgroup
         {
           #pragma omp task shared(array) untied if(end-begin >= (1<<15))

             MsMergeSequential(tmp, array, begin, halffirst, half,halfsecond, begin);

            MsMergeSequential(tmp, array, halffirst, half,halfsecond, end, begin+halffirst+(halfsecond-half));
              }




        }

    } else if (!inplace) {

        tmp[begin] = array[begin];
    }
}

void MsParallel(int *array, int *tmp, const size_t size) {

    MsSequential(array, tmp, true, 0, size);
}




bool isSorted(int ref[], int data[], const size_t size){
    std::sort(ref, ref + size);
    for (size_t idx = 0; idx < size; ++idx){
        if (ref[idx] != data[idx]) {
            printf("\nFalscher Index:%d\n",idx);
            return false;
        }
    }
    return true;
}

/**

/**
  * @brief program entry point
  */
int main(int argc, char* argv[]) {
    // variables to measure the elapsed time
    struct timeval t1, t2;
    double etime;

    // expect one command line arguments: array size
    if (argc != 2) {
        printf("Usage: MergeSort.exe <array size> \n");
        printf("\n");
        return EXIT_FAILURE;
    }
    else {
        const size_t stSize = strtol(argv[1], NULL, 10);
        int *data = (int*) malloc(stSize * sizeof(int));
        int *tmp = (int*) malloc(stSize * sizeof(int));
        int *ref = (int*) malloc(stSize * sizeof(int));
        printf("Initialization...\n");

        srand(95);

        #pragma omp parallel for num_threads(100) schedule(static)
        {
        for (size_t idx = 0; idx < stSize; ++idx){
            data[idx] = (int) (stSize * (double(rand()) / RAND_MAX));
        }
        }
        std::copy(data, data + stSize, ref);
        double dSize = (stSize * sizeof(int)) / 1024 / 1024;
        printf("Sorting %u elements of type int (%f MiB)...\n", stSize, dSize);
        gettimeofday(&t1, NULL);
        // Mergesort starts
        #pragma omp parallel num_threads(80)
        {
        #pragma omp single
        {
        MsParallel(data, tmp, stSize);
        }
        }



        gettimeofday(&t2, NULL);
        etime = (t2.tv_sec - t1.tv_sec) * 1000 + (t2.tv_usec - t1.tv_usec) / 1000;
        etime = etime / 1000;
        printf("done, took %f sec. Verification...", etime);
        if (isSorted(ref, data, stSize)) {
            printf(" successful.\n");
        }
        else {
            printf(" FAILED.\n");
        }
        free(data);
        //delete[] data;
        free(tmp);
        //delete[] tmp;
        free(ref);
        //delete[] ref;
    }
    return EXIT_SUCCESS;
}

Output:(the ! marks are used to locate where the assignment in the code tool place)

Initialization...
Sorting 40 elements of type int (0.000000 MiB)...

UpperBinarysearchrankfinder [0,8,8,15,26,29,30,38,39,39,]
L:0,middle:5,R:10,value:15
L:0,middle:2,R:5,value:15
L:2,middle:3,R:5,value:15
L:2,R:5,result:4,index:14
5,14
Originalinputarray-arr:
[0,4,6,7,11,15,17,19,29,33,0,8,8,15,26,29,30,38,39,39,]
Halffirst:15,Halfsecond:26
,Inputarray1:
[0,4,6,7,11,]
Inputarray2:
[0,8,8,15,]
! 0
 0
!! 4
 0
! 4
 4
! 6
 6
! 7
 7
!! 11
 8
!! 11
 8
! 11
 11
 !!!!15
 15
Outputarray:
[
[0,0,4,6,7,8,8,11,15,]
]
Inputarray1:
[15,17,19,29,33,]
Inputarray2:
[26,29,30,38,39,39,]
! 15
 15
! 17
 17
! 19
 19
!! 29
 26
! 29
 29
!! 33
 29
!! 33
 30
! 33
 33
 !!!!38
 38
 !!!!39
 39
 !!!!39
 39
Outputarray:
[
[8,8,11,15,15,17,19,26,29,29,30,]
]

UpperBinarysearchrankfinder [1,5,7,11,16,26,27,30,32,36,]
L:0,middle:5,R:10,value:26
L:0,R:10,result:6,index:36
25,36
Originalinputarray-arr:
[3,4,5,7,10,26,29,33,34,35,1,5,7,11,16,26,27,30,32,36,]
Halffirst:26,Halfsecond:27
,Inputarray1:
[3,4,5,7,10,]
Inputarray2:
[1,5,7,11,16,26,]
!! 3
 1
! 3
 3
! 4
 4
! 5
 5
!! 7
 5
! 7
 7
!! 10
 7
! 10
 10
 !!!!11
 11
 !!!!16
 16
 !!!!26
 26
Outputarray:
[
[1,3,4,5,5,7,7,10,11,16,26,]
]
Inputarray1:
[26,29,33,34,35,]
Inputarray2:
[27,30,32,36,]
! 26
 26
!! 29
 27
! 29
 29
!! 33
 30
!! 33
 32
! 33
 33
! 34
 34
! 35
 35
 !!!!36
 36
Outputarray:
[
[7,7,10,11,16,26,-1751738988,-1684366952,-6382180,]
]
Originalinputarray-tmp:
[0,0,4,6,7,8,8,11,15,15,17,19,26,29,29,30,33,38,39,39,1,3,4,5,5,7,7,10,11,16,26,-1751738988,-1684366952,-6382180,-1549622880,-1482250844,-1414878808,-1347506772,-1280134736,-1212762700,]

UpperBinarysearchrankfinder [1,3,4,5,5,7,7,10,11,16,26,-1751738988,-1684366952,-6382180,-1549622880,-1482250844,-1414878808,-1347506772,-1280134736,-1212762700,]
L:0,middle:10,R:20,value:17
L:0,middle:5,R:10,value:17
L:5,middle:7,R:10,value:17
L:7,middle:8,R:10,value:17
L:8,middle:9,R:10,value:17
L:9,R:10,value:17
L:9,R:10,result:10,index:30
10,30
Halffirst:17,Halfsecond:26
,Inputarray1:
[0,0,4,6,7,8,8,11,15,15,]
Inputarray2:
[1,3,4,5,5,7,7,10,11,16,]
! 0
 0
! 0
 0
!! 4
 1
!! 4
 3
! 4
 4
!! 6
 4
!! 6
 5
!! 6
 5
! 6
 6
! 7
 7
!! 8
 7
!! 8
 7
! 8
 8
! 8
 8
!! 11
 10
! 11
 11
!! 15
 11
! 15
 15
! 15
 15
 !!!!16
 16
Outputarray:
[
[0,0,1,3,4,4,5,5,6,7,7,7,8,8,10,11,11,15,15,16,]
]
Inputarray1:
[17,19,26,29,29,30,33,38,39,39,]
Inputarray2:
[26,-1751738988,-1684366952,-6382180,-1549622880,-1482250844,-1414878808,-1347506772,-1280134736,-1212762700,]
! 17
 17
! 19
 19
! 26
 26
!! 29
 26
!! 29
 -1751738988
!! 29
 -1684366952
!! 29
 -6382180
!! 29
 -1549622880
!! 29
 -1482250844
!! 29
 -1414878808
!! 29
 -1347506772
!! 29
 -1280134736
!! 29
 -1212762700
!!! 29
 29
!!! 29
 29
!!! 30
 30
!!! 33
 33
!!! 38
 38
!!! 39
 39
!!! 39
 39
Outputarray:
[
[7,7,8,8,10,11,11,15,15,16,17,19,26,26,-1751738988,-1684366952,-6382180,-1549622880,-1482250844,-1414878808,]
]
done, took 0.046000 sec. Verification...
Falscher Index:1
 FAILED.
1
  • 1
    As far as I can tell, you never assign any values to any elements of tmp array, but you are certainly reading them. Whereupon your program exhibits undefined behavior, by way of accessing uninitialized objects. Commented Jul 24, 2021 at 19:06

3 Answers 3

2

If you want to parallelize it the you should use threads to do the sorting part in parallel. I don't see any parallelization in your code.

The normal Mergesort is:

mergesort(arr, l, r):
1. middle = (l + r)/2
2. mergesort(arr, l, middle)
3. mergesort(arr, middle + 1, r)
4. merge(arr, l, middle, r) //merges the subarray

If you wanna parallelize, you can create a thread to do 2 and another thread to do 3 and then join them, once they're done, you do the merge.

mergesort(arr, l, r):
1. middle = (l + r)/2
2. create thread to execute mergesort(arr, l, middle)
3. create thread to execute mergesort(arr, middle + 1, r)
4. wait until both threads are done
5. merge(arr, l, middle, r) //merges the subarray

The time complexity is T(n) = T(n/2) + O(n) which if you solve will be n + n/2 + n/4 + ... 1 = O(n).

Sign up to request clarification or add additional context in comments.

3 Comments

The time complexity is reduced to linear if the system can run n threads in parallel, which is highly unlikely.
That's true. In theory, theory and practice are the same but in practice they're not :)
For merge sort, typically the initial call is mergesort(arr, 0, n), where n is number of elements in arr[], in which case the recursive calls would be mergesort(arr, l, middle) | mergesort(arr, middle, r). {not middle + 1} .
1

I created a simplified sequential example of a top down merge sort using binary search for the merge. I verified that the code is a stable sort using a modified version. MergeSortAtoA and MergeSortAtoB are mutually recursive functions that call each other, instead of passing an inplace flag. The main point of this are the functions BinSrchLo, BinSrchHi, and Merge. BinSrchxx(a, ll, rr, v) returns the index where v would be inserted into a[ll,rr] (as if a[] is expanded by 1 element to make room for the inserted element). For example, if v < a[ll], it returns ll or if v > a[rr-1], it returns rr. Note that merge assigns b[i+j-rr] = a[i] in both for loops. Check for differences between your code and this working example.

void MergeSortAtoA(int a[], int b[], size_t ll, size_t ee);
void MergeSortAtoB(int a[], int b[], size_t ll, size_t ee);
void Merge(int a[], int b[], size_t ll, size_t rr, size_t ee);

void MergeSort(int a[], int b[], size_t n)
{
    MergeSortAtoA(a, b, 0, n);
}

void MergeSortAtoA(int a[], int b[], size_t ll, size_t ee)
{
    if (ee-ll > 1) {
        size_t rr = (ll + ee)>>1;       /* midpoint, start of right half */
        MergeSortAtoB(a, b, ll, rr);
        MergeSortAtoB(a, b, rr, ee);
        Merge(b, a, ll, rr, ee);        /* merge b to a */
    }
}

void MergeSortAtoB(int a[], int b[], size_t ll, size_t ee)
{
    if (ee-ll > 1) {
        size_t rr = (ll+ee)/2;          /* midpoint, start of right half */
        MergeSortAtoA(a, b, ll, rr);
        MergeSortAtoA(a, b, rr, ee);
        Merge(a, b, ll, rr, ee);        /* merge a to b */
    } else if (ee-ll == 1) {
        b[ll] = a[ll];
    }
}

size_t BinSrchLo(int a[], size_t ll, size_t rr, int v)
{
    while(ll < rr){
        size_t i = (ll+rr)/2;
        if(a[i] < v)
            ll = i+1;
        else
            rr = i;
    }
    return ll;
}

size_t BinSrchHi(int a[], size_t ll, size_t rr, int v)
{
    while(ll < rr){
        size_t i = (ll+rr)/2;
        if(a[i] <= v)
            ll = i+1;
        else
            rr = i;
    }
    return ll;
}

void Merge(uint64_t a[], uint64_t b[], size_t ll, size_t rr, size_t ee)
{
    for(size_t i = ll; i < rr; i++){
        size_t j = BinSrchLo(a, rr, ee, a[i]);
        b[i+j-rr] = a[i];
    }
    for(size_t i = rr; i < ee; i++){
        size_t j = BinSrchHi(a, ll, rr, a[i]);
        b[i+j-rr] = a[i];
    }
}

Comments

0

There were some errors in the code so that the variable halffirst was not assigned to the index it was supposed to. And also the leaf arrays at the bottom were filled with garbagevalues. The fixed code is here. The runtime for an array of the size 10^7 is 0.2 seconds, and for an array of the size 10^8 2.4 seconds, assumed that the computer has 256 threads

#include <stdio.h>
#include <stdlib.h>
#include <errno.h>
#include <sys/time.h>

#include <iostream>
#include <algorithm>

#include <cstdlib>
#include <cstdio>

#include <cmath>
#include <ctime>
#include <cstring>
#include <omp.h>
// Constants.h
#if !defined(MYLIB_CONSTANTS_H)
#define MYLIB_CONSTANTS_H 1

const int CUTOFF =11;

#endif



int binarysearchfinduperrank(int *in,int n,int value, int projection){

    int* array= in+projection;
    int L=0;
    int R=n;

      /*printf*/("\nUpperBinarysearchrankfinder [");
//       for(int i=0;i<n; i++){
//           printf("%d,",array[i]);

//      }
//      printf("]\n");
    while(R-L>1){
        int middle = (R+L)/2;
//          printf("L:%d,middle:%d,R:%d,value:%d\n",L,middle,R,value);
        if(array[middle]==value){
            while(array[middle]==value&&middle<n){
                middle=middle+1;

            }
//              printf("L:%d,R:%d,result:%d,index:%d\n",L,R,middle,middle+projection);
            return middle;

        }
        if(array[middle]<value){
            L=middle;

        }
        if(array[middle]>value){
            R=middle;

        }
    }

//       printf("L:%d,R:%d,value:%d\n",L,R,value);

     if(n==1){
         if(array[0]> value){
//               printf(" result:0\n,index:%d\n",0+projection);
             return 0;

        }
        else{
//              printf(" result:1,index:%d\n",1+projection);
            return 1;

        }

    }

    if(L==0&&array[L]>value){
//          printf("L:%d,R:%d,result:%d,index:%d\n",L,R,0,projection);
        return 0;

    }
    if(R==n && array[R-1]<= value){
//           printf("L:%d,R:%d,result:%d,index:%d\n",L,R,n,n+projection);
        return n;

    }
    if(R==n&& array[R-1]>value){
//           printf("L:%d,R:%d,result:%d,index:%d\n",L,R,R-1,((R-1)+projection));
        return R-1;

    }
    if(array[R]<=value){
//           printf("L:%d,R:%d,result:%d,index:%d\n",L,R,R+1,(R+1+projection));
        return R+1;

    }
    if(array[L]<=value){
//          printf("L:%d,R:%d,result:%d,index:%d\n",L,R,R,R+projection);
        return R;

    }

//      printf("L:%d,R:%d,result:%d,index:%d\n",L,R,L,L+projection);
    return L;


}

void printarray(int *array,int n){
    printf("\n[");
    for(int i=0;i<n;i++){
        printf("%d,",array[i]);
    }
    printf("]\n");
}


void MsMergeSequential(int *out, int *in, int begin1, int end1, int begin2, int end2, int outBegin) {
    int left = begin1;
    int right = begin2;
    int idx = outBegin;
//     printf("Inputarray1:");
//     printarray(in+begin1,(end1-begin1));

//     printf("Inputarray2:");
//     printarray(in+begin2,(end2-begin2));


    while (left < end1 && right < end2) {
        if (in[left] <= in[right]) {
            out[idx] = in[left];
//             printf("! %d \n",in[left]);
//             printf(" %d \n",out[idx]);
            left++;
        } else {

            out[idx] = in[right];
//             printf("!! %d \n",in[left]);
//             printf(" %d \n",out[idx]);
            right++;
        }
        idx++;
        }

    while (left < end1) {

        out[idx] = in[left];
//         printf("!!! %d \n",in[left]);

//         printf(" %d \n",out[idx]);
        left++, idx++;
    }
    while (right < end2) {
        out[idx] = in[right];
//         printf(" !!!!%d \n",in[right]);
//         printf(" %d \n",out[idx]);
        right++, idx++;
    }

//  printf("Outputarray:\n[");
//     printarray(out+outBegin,end2-outBegin);
//     printf("]\n");
}


bool myfunc (long i , long j){return (i<j);}

void MsSequential(int *array, int *tmp, bool inplace, long begin, long end) {
  if( end <= begin + CUTOFF -1){

    std::sort(array+begin,array + end, myfunc);
    std::copy(array+begin,array+end,tmp+begin);
  }
  else  if (begin < (end - 1)) {
           long half =(begin+end) / 2;
           int halffirst= (half+begin)/2;


            #pragma omp taskgroup
         {
           #pragma omp task shared(array) untied if(end-begin >= (1<<15))

             MsSequential(array, tmp, !inplace, begin, half);

             MsSequential(array, tmp, !inplace, half, end);
              }
        if (inplace){
//             printf("Originalinputarray-tmp:");
//             printarray(tmp+begin,(end-begin));
            int halfsecond=binarysearchfinduperrank(tmp,(end-half),tmp[halffirst],half)+half;
//             printf("%d,%d\n",halffirst,halfsecond);
//             printf("Halffirst:%d,Halfsecond:%d\n,",tmp[halffirst],tmp[halfsecond]);
            #pragma omp taskgroup
         {
           #pragma omp task shared(tmp) untied if(end-begin >= (1<<15))

             MsMergeSequential(array, tmp, begin, halffirst, half, halfsecond, begin);


            MsMergeSequential(array, tmp, halffirst, half,halfsecond, end, begin+(halffirst-begin)+(halfsecond-half));
              }



        }
        else {
            int halfsecond=binarysearchfinduperrank(array,(end-half),array[halffirst],half)+half;
//             printf("%d,%d\n",halffirst,halfsecond);
//             printf("Originalinputarray-arr:");
//             printarray(array+begin,(end-begin));
//             printf("Halffirst:%d,Halfsecond:%d\n,",array[halffirst],array[halfsecond]);


             #pragma omp taskgroup
         {
           #pragma omp task shared(array) untied if(end-begin >= (1<<15))

             MsMergeSequential(tmp, array, begin, halffirst, half,halfsecond, begin);

            MsMergeSequential(tmp, array, halffirst, half,halfsecond, end, begin+(halffirst-begin)+(halfsecond-half));
              }




        }

    } else if (!inplace) {

        tmp[begin] = array[begin];
    }
}

void MsParallel(int *array, int *tmp, const size_t size) {

    MsSequential(array, tmp, true, 0, size);
}




bool isSorted(int ref[], int data[], const size_t size){
    std::sort(ref, ref + size);
    for (size_t idx = 0; idx < size; ++idx){
        if (ref[idx] != data[idx]) {
//             printf("\nFalscher Index:%d\n",idx);
            return false;
        }
    }
    return true;
}

/**

/**
  * @brief program entry point
  */
int main(int argc, char* argv[]) {
    // variables to measure the elapsed time
    struct timeval t1, t2;
    double etime;

    // expect one command line arguments: array size
    if (argc != 2) {
//         printf("Usage: MergeSort.exe <array size> \n");
//         printf("\n");
        return EXIT_FAILURE;
    }
    else {
        const size_t stSize = strtol(argv[1], NULL, 10);
        int *data = (int*) malloc(stSize * sizeof(int));
        int *tmp = (int*) malloc(stSize * sizeof(int));
        int *ref = (int*) malloc(stSize * sizeof(int));
        printf("Initialization...\n");

        srand(95);

        #pragma omp parallel for num_threads(100) schedule(static)
        {
        for (size_t idx = 0; idx < stSize; ++idx){
            data[idx] = (int) (stSize * (double(rand()) / RAND_MAX));
        }
        }
        std::copy(data, data + stSize, ref);
        double dSize = (stSize * sizeof(int)) / 1024 / 1024;
        printf("Sorting %u elements of type int (%f MiB)...\n", stSize, dSize);
        gettimeofday(&t1, NULL);
        // Mergesort starts
        #pragma omp parallel num_threads(80)
        {
        #pragma omp single
        {
        MsParallel(data, tmp, stSize);
        }
        }



        gettimeofday(&t2, NULL);
        etime = (t2.tv_sec - t1.tv_sec) * 1000 + (t2.tv_usec - t1.tv_usec) / 1000;
        etime = etime / 1000;
        printf("done, took %f sec. Verification...", etime);
        if (isSorted(ref, data, stSize)) {
            printf(" successful.\n");
        }
        else {
            printf(" FAILED.\n");
        }
        free(data);
        //delete[] data;
        free(tmp);
        //delete[] tmp;
        free(ref);
        //delete[] ref;
    }
    return EXIT_SUCCESS;
}

1 Comment

Using 8 threads on a laptop 4 core Intel Core i7-10510U, sorting 10^8 pseudo random integers took 1.9 seconds. This was without the binary search. After the 8 parts were sorted, then 4 threads were used to merge 8 runs to 4 runs, 2 threads to merge 4 runs to 2 runs, 1 thread to merge 2 runs to 1 run. Due to cache and memory conflicts with 8 threads on 4 cores with common L3 cache and main memory, the 8 thread version is only ~23.5% faster than a 4 thread version.

Your Answer

By clicking “Post Your Answer”, you agree to our terms of service and acknowledge you have read our privacy policy.

Start asking to get answers

Find the answer to your question by asking.

Ask question

Explore related questions

See similar questions with these tags.