Dynamic 2D array using double pointer in CUDA [duplicate]

Ask Question

Asked 8 years, 8 months ago

Modified 8 years, 8 months ago

Viewed 154 times

I'm trying to write a simple array sum between two dynamic arrays using double pointers, both for host and device arrays. The following code can be compiled normally, but at the end the array h_c, containing the result of the sum, is full of zeros. I think that the device arrays are bad allocated and the host arrays aren't transferred properly. Can someone fix the problem? Thanks.

#include <stdio.h>
#include <cuda_runtime.h>
#define N 16
#define BLOCK_DIM 4

__global__ void matrixAdd (int **a, int **b, int **c) {

int col = blockIdx.x * blockDim.x + threadIdx.x;
int row = blockIdx.y * blockDim.y + threadIdx.y;

if (col < N && row < N) 
c[row][col] = a[row][col] + b[row][col];}

int** create(int row, int col){
  int i,j;
  int** temp;

  temp=(int**) malloc(row*sizeof(int*));
  for(i=0;i<row;i++)
    temp[i]=(int*) malloc(row*sizeof(int));

  for(i=0;i<row;i++)
    for(j=0;j<col;j++)
    temp[i][j]=0;

 return(temp);}

void destroy(int **temp,int rows){
  int i;

  for(i=0;i<rows;i++)
    free(temp[i]);
  free(temp);
 }

int main() {
int i,j;
int** h_a=create(N,N);
int** h_b=create(N,N);
int** h_c=create(N,N);
int **dev_a, **dev_b, **dev_c;
int size = N * N * sizeof(int);

cudaMalloc((void**)&dev_a, size);
cudaMalloc((void**)&dev_b, size);
cudaMalloc((void**)&dev_c, size);

for(i=0;i<N;i++)  
   for(j=0;j<N;j++)
    h_a[i][j]=5;

for(i=0;i<N;i++)  
   for(j=0;j<N;j++)
    h_b[i][j]=15;  


cudaMemcpy(dev_a, h_a, size, cudaMemcpyHostToDevice);
cudaMemcpy(dev_b, h_b, size, cudaMemcpyHostToDevice);

dim3 dimBlock(BLOCK_DIM, BLOCK_DIM);
dim3 dimGrid((int)ceil(N/dimBlock.x),(int)ceil(N/dimBlock.y));

matrixAdd<<<dimGrid,dimBlock>>>(dev_a,dev_b,dev_c);
cudaMemcpy(h_c,dev_c, size, cudaMemcpyDeviceToHost);

for(i=0;i<N;i++)
{  
   for(j=0;j<N;j++)
    printf("%d ",h_c[i][j]);
   printf("\n");
}

cudaFree(dev_a); cudaFree(dev_b); cudaFree(dev_c);

destroy(h_a,N); destroy(h_b,N); destroy(h_c,N);

return 0;}

edited Mar 10, 2017 at 11:56

talonmies

72.7k35 gold badges204 silver badges296 bronze badges

asked Mar 9, 2017 at 10:00

horus

911 silver badge9 bronze badges

1

hover your mouse over the cuda tag. When the pop up appears, click on "info". In the page that opens up, click on "Using Arrays of pointers in CUDA". Start reading. Using dynamically allocated 2D arrays is non-trivial, and you have two errors. 1. You must properly allocate the device array so that the pointers can be chased. 2. Your host array cannot be conveniently allocated with a bunch of malloc operations like you are doing. You need a guaranteed contiguous array on the host, if you dont want to have to use a loop to transfer data from host to device and back.

Robert Crovella
– Robert Crovella

2017-03-09 14:52:36 +00:00
Commented Mar 9, 2017 at 14:52
Thank you talonmies. Yes, my question is similar to the one you cited. The following code works! But I'm not able to understand why it is necessary to define h_a and a casting to (void***) for d&a. Can someone explain me it? Thanks.

horus
– horus

2017-03-15 14:06:11 +00:00
Commented Mar 15, 2017 at 14:06
int** aa=create(2,2); aa[0][0]=1; aa[0][1]=2;aa[1][0]=3;aa[1][1]=4; int** h_a = (int**)malloc(2 * sizeof(int*)); for(int i=0; i<2;i++){ cudaMalloc((void**)&h_a[i], 2*sizeof(int)); cudaMemcpy(h_a[i], &aa[i][0], 2*sizeof(int), cudaMemcpyHostToDevice); } int** d_a; cudaMalloc((void ***)&d_a, 2 * sizeof(int*)); cudaMemcpy(d_a, h_a, 2*sizeof(int *), cudaMemcpyHostToDevice);

horus
– horus

2017-03-15 14:12:48 +00:00
Commented Mar 15, 2017 at 14:12

Add a comment |

0

Start asking to get answers

Find the answer to your question by asking.

Ask question

Explore related questions

See similar questions with these tags.

Collectives™ on Stack Overflow

Dynamic 2D array using double pointer in CUDA [duplicate]

0

Linked

Hot Network Questions