So I'm trying to copy a jagged array from host to device. First of all here is my current understanding of cudaMalloc and cudaMemcpy:
cudaMalloc takes a pointer to the pointer to the memory block.
cudaMemcpy takes a pointer to the memory block to copy to or from.
Correct me if I'm wrong please.
Now this is my code that doesn't work (compiles fine but no output):
__global__ void kernel(int** arr)
{
for (int i=0; i<3; i++)
printf("%d\n", arr[i][0]);
}
int main()
{
int arr[][3] = {{1},{2},{3}}; // 3 arrays, 1 element each
int **d_arr;
cudaMalloc((void**)(&d_arr), sizeof(int*)*3); // allocate for 3 int pointers
for (int i=0; i<3; i++)
{
cudaMalloc( (void**) &(d_arr[i]), sizeof(int) * 1 ); // allocate for 1 int in each int pointer
cudaMemcpy(d_arr[i], arr[i], sizeof(int) * 1, cudaMemcpyHostToDevice); // copy data
}
kernel<<<1,1>>>(d_arr);
cudaDeviceSynchronize();
cudaDeviceReset();
}
So what am I doing wrong here? Cheers