How to copy cudaArray (Cuda) to GpuMat (OpenCV)?

Question

I tried both cudaMemcpy2DFromArray and cudaMemcpy2D, but neither of them work correctly. By not working correctly, I mean that the GpuMat did copied something from the cudaArray but the horizontal scale was wrong.

The code snippet is the following:

cudaArray *colorArr;
checkCudaErrors( cudaGraphicsMapResources( 1, &cudaResourceColor, 0 ) );
checkCudaErrors( cudaGraphicsSubResourceGetMappedArray( &colorArr, cudaResourceColor, 0, 0 ) );

cv::gpu::GpuMat gpuColorMat(Size(w,h), CV_32FC3);   

// Tried method 1: the following didn't work correctly
checkCudaErrors( cudaMemcpy2DFromArray( gpuColorMat.data, gpuColorMat.step, colorArr, 
    0, 0, gpuColorMat.cols*sizeof(float3), gpuColorMat.rows, cudaMemcpyDeviceToDevice ) );

// Tried method 2: also didn't work correctly. Same error as the first method.
float3 *cuarr; 
checkCudaErrors( cudaMalloc( (void**)&cuarr, w*h*sizeof(float3) ) );
checkCudaErrors( cudaMemcpy2DFromArray( cuarr, w*h, colorArr, 0, 0, w*sizeof(float3), h, cudaMemcpyDeviceToDevice ) );
checkCudaErrors( cudaMemcpy2D( (float*)gpuColorMat.data, gpuColorMat.step, cuarr, w*sizeof(float3), w*sizeof(float3), h, cudaMemcpyDeviceToDevice ) );

// unmap buffer objects
checkCudaErrors( cudaGraphicsUnmapResources( 1, &cudaResourceColor, 0 ) );

Can anyone help me with this?

Feng · Accepted Answer · 2013-11-03 16:16:04Z

I finally made it work. I'm sharing my code below:

.cu file: do the device array copy. The rendered image is bound to the texture inTex, and it is copied to the destination float3 *dst.

texture<float4, 2, cudaReadModeElementType> inTex;

__global__ void CuDeviceArrayCopyFromTexture( float3* dst, int dstStep, int width, int height )     
{
    int x = blockIdx.x * blockDim.x + threadIdx.x;
    int y = blockIdx.y * blockDim.y + threadIdx.y;

    if ( x > width || y > height ) return;

    float4 res = tex2D(inTex, x, y);
    float3* row_y = (float3*)((char*)dst + y * dstStep);
    row_y[x] = make_float3(res.x, res.y, res.z);
}

// round up n/m
inline int iDivUp(int n, int m)
{
    return (n + m - 1) / m;
}

void DeviceArrayCopyFromTexture( float3* dst, int dstStep, int width, int height ) 
{
    dim3 threads( 64, 1 );
    dim3 grid = dim3( iDivUp( width, threads.x ), height/threads.y );
    CuDeviceArrayCopyFromTexture <<< grid, threads >>> ( dst, dstStep, width, height );
}

void BindToTexture( cudaArray *cuArr )
{
     checkCudaErrors( cudaBindTextureToArray( inTex, cuArr ) );
}

.cpp file: set up gl render texture, bind to cuda texture and call device array copy method.

glActiveTexture(GL_TEXTURE0);
glGenTextures(1, &fboColorTex);
glBindTexture(GL_TEXTURE_2D, fboColorTex);
// I used RGB16F and RGB32F, both not working. So I changed to GL_RGBA16F and it could be mapped to cudaArray as float4 element.
glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA16F, width, height, 0, GL_RGBA, GL_UNSIGNED_BYTE, NULL);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
checkCudaErrors( cudaGraphicsGLRegisterImage( &cudaResourceColor, fboColorTex, GL_TEXTURE_2D, cudaGraphicsMapFlagsReadOnly ) );

extern void BindToTexture( cudaArray *cuArr );
extern void DeviceArrayCopyFromTexture( float3* dst, int dstStep, int width, int height );

static GpuMat gpuMat( Size(w,h), CV_32FC3 );
cudaArray *cuArr;

// Copy color buffer
checkCudaErrors( cudaGraphicsMapResources( 1, &cudaResourceColor, 0 ) );
checkCudaErrors( cudaGraphicsSubResourceGetMappedArray( &cuArr, cudaResourceColor, 0, 0 ) );

BindToTexture( cuArr );
DeviceArrayCopyFromTexture( (float3*)gpuMat.data, gpuMat.step, gpuMat.cols, gpuMat.rows  );

checkCudaErrors( cudaGraphicsUnmapResources( 1, &cudaResourceColor, 0 ) );

References:

http://answers.opencv.org/question/12958/read-rendered-images-using-gpumat-and-cuda/
CUDA Samples\v5.5\3_Imaging\postProcessGL

Since you are doing copy operation anyway, wouldn't it be easier (that is no need for writing a custom CUDA kernel) if you use cudaMemcpy2DFromArray() to convert the opaque CUDA memory block (represented by cudaArray to a flat CUDA memory block (what the function returns) and then simply assign that copy to the GpuMat? I am also tackling similar case currently, although initially I wanted to avoid a dependency on OpenCV. However, doing image manipulation on a raw texture is a nightmare and literally reinventing the wheel. :D

kangshiyin · Accepted Answer · 2013-11-01 05:30:04Z

1

GpuMat::step is in number of elements while pitch is in bytes, so try to change from

cudaMemcpy2DFromArray( gpuColorMat.data, gpuColorMat.step,                  colorArr, 0, 0, gpuColorMat.cols*sizeof(float3), gpuColorMat.rows, cudaMemcpyDeviceToDevice );

to

cudaMemcpy2DFromArray( gpuColorMat.data, gpuColorMat.step * sizeof(float3), colorArr, 0, 0, gpuColorMat.cols*sizeof(float3), gpuColorMat.rows, cudaMemcpyDeviceToDevice );

answered Nov 1, 2013 at 5:30

kangshiyin

9,8291 gold badge19 silver badges30 bronze badges

1 Comment

Feng Over a year ago

I tried your suggested method. It didn't work either. Thank you all the same!

Collectives™ on Stack Overflow

How to copy cudaArray (Cuda) to GpuMat (OpenCV)?

2 Answers 2

1 Comment

1 Comment

Your Answer

Hot Network Questions

Collectives™ on Stack Overflow

2 Answers 2

1 Comment

1 Comment

Your Answer

Sign up or log in

Post as a guest

Related