I implemented the \$O(N)\$ solution linked by Jerry Coffinsolution linked by Jerry Coffin to see whether it was measurably better than my solution. Here is the code for it:
#include <stdio.h>
#include <stdlib.h>
static void inPlaceShuffle(int *array, int len);
static inline void cyclePermute(int *array, int start, int len);
static inline void reverseArray(int *array, int start, int end);
void printArray(int *array, int len)
{
for (int i=0; i < len; ++i)
printf("%d ", array[i]);
printf("\n");
}
// Takes an array, and interleaves its first half with the reverse of the
// second half:
//
// 0 1 2 3 4 5 6 7 (First half = 0 1 2 3) (Rev 2nd half = 7 6 5 4)
// 0 7 1 6 2 5 3 4
void weave(int *array, int start, int len)
{
int halfLen = len >> 1;
int mid = start + halfLen;
int end = len - 1;
// First step: reverse the second half.
while (mid < end) {
int tmp = array[mid];
array[mid++] = array[end];
array[end--] = tmp;
}
// Second step, swap the first and second half, because the shuffler
// makes the second half go first.
for (int i=0, mid=start + halfLen; i < halfLen; i++) {
int tmp = array[start];
array[start++] = array[mid];
array[mid++] = tmp;
}
// Next step, weave the second half with the second half. If the length
// of the array is odd, only weave len-1 entries because the last entry
// is in the correct place already.
inPlaceShuffle(array, len & ~1);
}
// Takes an array, and interleaves its second half with its first half:
//
// 0 1 2 3 4 5 6 7 (First half = 0 1 2 3) (Second half = 4 5 6 7)
// 4 0 5 1 6 2 7 3
//
// Note: len must always be an even number.
//
// Solution taken from this article: http://arxiv.org/pdf/0805.1598v1.pdf
static void inPlaceShuffle(int *array, int len)
{
int k = 0;
int m = 0;
int n = len >> 1;
if (len < 2)
return;
if (len == 2) {
int tmp = array[0];
array[0] = array[1];
array[1] = tmp;
return;
}
// Step 1: Find k such that 3^k <= len < 3^(k+1)
// Then set m = (3^k - 1) / 2
for (m = 1; m <= len; k++, m *= 3);
k--;
m = ((m / 3) - 1) >> 1;
// Step 2: Do a cyclic right shift of A[m, ..., n+m-1] by a distance m
// where n is len/2.
reverseArray(array, m, n+m-1);
reverseArray(array, m, m+m-1);
reverseArray(array, m+m, n+m-1);
// Step 3: For i = 0 .. k-1, do a cyclic permutation starting at 3^i-1
for (int i = 0, start = 1, mlen = m+m; i < k; i++, start *= 3) {
cyclePermute(array, start-1, mlen);
}
// Step 4: Recurse on A[2m, ..., 2n-1]
inPlaceShuffle(array + m + m, len - m - m);
}
// Reverse array from index start to index end inclusive.
static inline void reverseArray(int *array, int start, int end)
{
while (start < end) {
int tmp = array[start];
array[start++] = array[end];
array[end--] = tmp;
}
}
// Starting from index start, permutes the elements in a cycle, where the
// new permutation is the shuffled permutation.
static inline void cyclePermute(int *array, int start, int len)
{
int halfLen = len >> 1;
int i = start;
int hold = array[i];
int next = halfLen + (i >> 1);
while (next != start) {
array[i] = array[next];
i = next;
// array[i] in the final array is:
//
// array[halfLen + i/2] if i is even
// array[(i-1)/2] if i is odd
if (i & 1) {
next = (i >> 1);
} else {
next = halfLen + (i >> 1);
}
} while (next != start);
array[i] = hold;
}
int main(int argc, char *argv[])
{
int size;
int *array;
if (argc < 2)
size = 10;
else
size = atoi(argv[1]);
array = calloc(size, sizeof(*array));
for (int i=0; i < size; ++i)
array[i] = i;
//printArray(array, size);
weave(array, 0, size);
//printArray(array, size);
}
On 200000 elements:
OP code: 7.58 sec
Mine : 0.01 sec
Linear : 0.01 sec
On 200000000 elements:
Mine : 1.84 sec
Linear : 3.74 sec
On 200000 elements:
OP code: 7.58 sec
Mine : 0.01 sec
Linear : 0.01 sec
On 200000000 elements:
Mine : 1.84 sec
Linear : 3.74 sec
My solution swaps blocks at a time, but does so in a linear fashion. Therefore, the memory accesses are completely predictablevery cache friendly and shouldn't cause nearly as many cache misses. So even though my version is theoretically slower, it is practically better.