0

I am trying to understand how to use the texture memory by binding it to a linear device array (not a cudaArray). My code is simple (below). I have a float* array of 8 numbers which I am trying to bind to a 1D texture and then in my kernel function I try to read out of the texture and put the values into an output array. But when I run this test, all values in my output array are zero:

Input = 0.000000 1.000000 2.000000 3.000000 4.000000 5.000000 6.000000 7.000000
Output = 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000

What am I missing here?

texture<float, 1, cudaReadModeElementType> texInput;

__global__ void copyKernel(float*output, int n) {
for (int i = 0; i < n; i++) {
    output[i] = tex1D(texInput, (float)i);
}
}

int main(int argc, char*argv[]) {

const int WIDTH = 8;

float* hInput = (float*)malloc(sizeof(float) * WIDTH);
float*hOutput = (float*)malloc(sizeof(float) * WIDTH);

for (int i = 0; i < WIDTH; i++) {
    hInput[i] = (float)i;
}

float* dInput = NULL, *dOutput = NULL;

size_t offset = 0;

texInput.addressMode[0] = cudaAddressModeBorder;
texInput.addressMode[1] = cudaAddressModeBorder;
texInput.filterMode = cudaFilterModePoint;
texInput.normalized = false;

checkCudaErrors(cudaMalloc((void**)&dInput, sizeof(float)*WIDTH));
checkCudaErrors(cudaMalloc((void**)&dOutput, sizeof(float)*WIDTH));

cudaMemcpy(dInput, hInput, sizeof(float)*WIDTH, cudaMemcpyHostToDevice);

cudaBindTexture(&offset, texInput, dInput, sizeof(float)*WIDTH);


copyKernel<<<1,1>>>(dOutput, WIDTH);

cudaMemcpy(hOutput, dOutput, sizeof(float)*WIDTH, cudaMemcpyDeviceToHost);
printf("\nInput = ");

for (int i = 0; i < WIDTH; i++) {
        printf("%f\t",hInput[i]);
    }
printf("\nOutput = ");
for (int i = 0; i < WIDTH; i++) {
    printf("%f\t",hOutput[i]);
}

return 0;
}

1 Answer 1

4

According to the documentation, tex1D() is used when the underlying allocation is a CUDA Array. For linear-memory bound textures, the correct texturing function is tex1Dfetch().

That modification (only) to your code makes it work for me:

$ cat t1139.cu
#include <stdio.h>
#include <helper_cuda.h>

texture<float, 1, cudaReadModeElementType> texInput;

__global__ void copyKernel(float*output, int n) {
for (int i = 0; i < n; i++) {
    output[i] = tex1Dfetch(texInput, i);
}
}

int main(int argc, char*argv[]) {

const int WIDTH = 8;

float* hInput = (float*)malloc(sizeof(float) * WIDTH);
float*hOutput = (float*)malloc(sizeof(float) * WIDTH);

for (int i = 0; i < WIDTH; i++) {
    hInput[i] = (float)i;
}

float* dInput = NULL, *dOutput = NULL;

size_t offset = 0;

texInput.addressMode[0] = cudaAddressModeBorder;
texInput.addressMode[1] = cudaAddressModeBorder;
texInput.filterMode = cudaFilterModePoint;
texInput.normalized = false;

checkCudaErrors(cudaMalloc((void**)&dInput, sizeof(float)*WIDTH));
checkCudaErrors(cudaMalloc((void**)&dOutput, sizeof(float)*WIDTH));

cudaMemcpy(dInput, hInput, sizeof(float)*WIDTH, cudaMemcpyHostToDevice);

cudaBindTexture(&offset, texInput, dInput, sizeof(float)*WIDTH);


copyKernel<<<1,1>>>(dOutput, WIDTH);

cudaMemcpy(hOutput, dOutput, sizeof(float)*WIDTH, cudaMemcpyDeviceToHost);
printf("\nInput = ");

for (int i = 0; i < WIDTH; i++) {
        printf("%f\t",hInput[i]);
    }
printf("\nOutput = ");
for (int i = 0; i < WIDTH; i++) {
    printf("%f\t",hOutput[i]);
}

return 0;
}
$ nvcc -I/usr/local/cuda/samples/common/inc t1139.cu -o t1139
$ cuda-memcheck ./t1139
========= CUDA-MEMCHECK

Input = 0.000000        1.000000        2.000000        3.000000        4.0000005.000000        6.000000        7.000000
Output = 0.000000       1.000000        2.000000        3.000000        4.0000005.000000        6.000000        7.000000        ========= ERROR SUMMARY: 0 errors
$
Sign up to request clarification or add additional context in comments.

Comments

Your Answer

By clicking “Post Your Answer”, you agree to our terms of service and acknowledge you have read our privacy policy.

Start asking to get answers

Find the answer to your question by asking.

Ask question

Explore related questions

See similar questions with these tags.