I'm trying to parallelize the following Radix Sort algorithm C code using OpenMP but I have some doubts about using the OpenMP clauses. In particular, there are some loops where I doubt that they can be parallelized at all.
Here is the code I'm working on:
unsigned getMax(size_t n, unsigned arr[n]) {
unsigned mx = arr[0];
unsigned i;
#pragma omp parallel for reduction(max:mx) private(i)
for (i = 1; i < n; i++)
if (arr[i] > mx)
mx = arr[i];
return mx;
}
void countSort(size_t n, unsigned arr[n], unsigned exp) {
unsigned output[n]; // output array
int i, count[10] = { 0 };
// Store count of occurrences in count[]
#pragma omp parallel for private(i)
for (i = 0; i < n; i++) {
#pragma omp atomic
count[(arr[i] / exp) % 10]++; }
for (i = 1; i < 10; i++)
count[i] += count[i - 1];
// Build the output array
#pragma omp parallel for private(i)
for (i = (int) n - 1; i >= 0; i--) {
#pragma omp atomic write
output[count[(arr[i] / exp) % 10] - 1] = arr[i];
count[(arr[i] / exp) % 10]--;
}
#pragma omp parallel for private(i)
for (i = 0; i < n; i++)
arr[i] = output[i];
}
// The main function to that sorts arr[] of size n using Radix Sort
void radixsort(size_t n, unsigned arr[n], int threads) {
omp_set_num_threads(threads);
unsigned m = getMax(n, arr);
unsigned exp;
for (exp = 1; m / exp > 0; exp *= 10)
countSort(n, arr, exp);
}
In particular, I'm not sure if for loops like the following can be parallelized or not:
for (i = 1; i < 10; i++)
count[i] += count[i - 1];
#pragma omp parallel for private(i)
for (i = (int) n - 1; i >= 0; i--) {
#pragma omp atomic write
output[count[(arr[i] / exp) % 10] - 1] = arr[i];
count[(arr[i] / exp) % 10]--;
}
I'm asking for help on the specific OMP clauses I should use; other comments on the code shown are also welcome.