Cannot read out Values from Texture Memory

Question

Hi I'm writing a simple Program for practicing to work with texture memory. I Just want to write my data into Texture Memory and write it back into Global Memory. But i cannont read out the Values. Here is the code.

#include <stdio.h>
#include <iostream>
#include "cuda.h"
#include <stdlib.h>
#include "cuda_runtime.h"
#include "device_launch_parameters.h"
#include "HelloWorld.h"

#include "linearInterpolation_kernel4.cu"

using namespace std;
using std::cout;

const int blocksize = 16; 

__global__ 
void hello(char *a, int *b) {
    a[threadIdx.x] += b[threadIdx.x];
}



////////////////////////////////////////////////////////////////////////////////
// These are CUDA Helper functions

// This will output the proper CUDA error strings in the event that a CUDA host call returns an error
#define checkCudaErrors(err)           __checkCudaErrors (err, __FILE__, __LINE__)

inline void __checkCudaErrors( cudaError err, const char *file, const int line )
{
    if( cudaSuccess != err) {
        printf("%s(%i) : CUDA Runtime API error %d: %s.\n",file, line, (int)err, cudaGetErrorString( err ) );

    }
}

// This will output the proper error string when calling cudaGetLastError
#define getLastCudaError(msg)      __getLastCudaError (msg, __FILE__, __LINE__)

inline void __getLastCudaError( const char *errorMessage, const char *file, const int line )
{
    cudaError_t err = cudaGetLastError();
    if( cudaSuccess != err) {
        printf("%s(%i) : getLastCudaError() CUDA error : %s : (%d) %s.\n", file, line, errorMessage, (int)err, cudaGetErrorString( err ) );

    }
}

int main()
{
    int N = 40; 
    float *A; 
    A = (float *) malloc(N*sizeof(float));
    float *B;
    B = (float *) malloc(N*sizeof(float));
    float *result;
    result = (float *) malloc(N*sizeof(float));
    float angle = 0.8f; 

    for(int i = 0; i < N; i++){
        A[i] = i; //(float)rand();
        B[i] = i+1; //(float)rand();
    }
    ipLinearTexture2(A,B,result,angle,N);

    float result2;

    result2 = (angle)*A[4] + (1-angle)*B[4]; 

    printf(" A %f B %f Result %f\n", A[4], B[4], result[4]);
    cout << result2 << endl;

    return 1;
}

void ipLinearTexture2(float *A, float* B, float* result, float angle, int N)
{
    float cuTime;

    int N2 = N * 2;
    float *dev_result;

    float **AB;

    AB = (float **) malloc( N * sizeof(float *));

    if(AB)
    {
        for(int i = 0; i < N; i++)
        {
            AB[i] = (float *) malloc( 2 * sizeof(float *));
        }
    }

    for (int i = 0; i < N; i = i++)
    {
        AB[i][0] = A[i];
        AB[i][1] = B[i];
    }

    cudaMalloc(&dev_result, N * sizeof(float));
    unsigned int size = N2 * sizeof(float);

    //cudaChannelFormatDesc channelDesc = cudaCreateChannelDesc(32, 0, 0, 0, cudaChannelFormatKindFloat);
    cudaChannelFormatDesc channelDesc = cudaCreateChannelDesc<float>();
    cudaArray* cu_array;

    checkCudaErrors(cudaMallocArray( &cu_array, &channelDesc,N,2)); 
    cudaMemcpy2DToArray(cu_array,0,0,AB,N * sizeof(float), N * sizeof(float), 2, cudaMemcpyHostToDevice);

    // set texture parameters
    tex2.normalized = true;  
    tex2.filterMode = cudaFilterModeLinear;
    tex2.addressMode[0] = cudaAddressModeWrap; //cudaAddressModeWrap;
    tex2.addressMode[1] = cudaAddressModeWrap; //cudaAddressModeClamp;

    checkCudaErrors(cudaBindTextureToArray( tex2, cu_array, channelDesc));

    dim3 dimBlock(10, 1, 1);
    dim3 dimGrid((int)ceil((double)N*2/dimBlock.x), 1, 1);

    transformKernel4<<< 256, 256, 0 >>>( dev_result, N, 2, angle);

    checkCudaErrors(cudaMemcpy(result, dev_result, N * sizeof(float), cudaMemcpyDeviceToHost));
    cout << "==================================================" << endl;

    for (int i = 0 ; i < N ;i++)
    {
        cout << result[i] << " on " << i << endl;   
    }

    cout << "==================================================" << endl;
    checkCudaErrors(cudaUnbindTexture(tex));
    checkCudaErrors(cudaFree(dev_result));
    checkCudaErrors(cudaFreeArray(cu_array));
}

and here is the kernel code

#ifndef _SIMPLETEXTURE_KERNEL5_H_
#define _SIMPLETEXTURE_KERNEL5_H_

// Texture references

texture<float, 2, cudaReadModeElementType> tex2;

__global__ void
transformKernel4(float* g_odata, int width, int height, float theta) 
{
    unsigned int xid = blockIdx.x * blockDim.x + threadIdx.x;
    unsigned int yid = blockIdx.y * blockDim.y + threadIdx.y;

    if (xid >= width || yid >= height) return; 

    float dx = 1.0f / (float)width;
    float dy = 1.0f / (float)height;

    float x = ((float)xid + 0.5f) * dx;
    float y = ((float)yid + 0.5f) * dy;


        float value = tex2D(tex2, x , y);
        printf("wert %f xid %i yid %i \n",value, xid, yid);
g_odata[yid * width + xid] = value;

    }
#endif // #ifndef _SIMPLETEXTURE_KERNEL_H_

Can somebody tell what i am doing wrong? I have edited it to remove the first 2 logical mistake. Put why am I need able to print out my data?

The point is you are reading data from 2D texture and trying to save it in 1D array. You should use linear index for saving the data, eg. something like: ofs = xid + yid * width; g_odata[ofs] = value; but I may not be careful enough with index arithmetic. — user1545642
– user1545642, Commented Sep 4, 2012 at 13:30

Silve2611 · Accepted Answer · 2012-09-06 08:59:24Z

1

It was the wrong binding of the Arrays. You can not use multidimensional Arrays in C that can be copied. You have to use a onedimensional array that respresents a multidimensional.

answered Sep 6, 2012 at 8:59

Silve2611

2,2782 gold badges38 silver badges58 bronze badges

Sign up to request clarification or add additional context in comments.

Comments

sgarizvi · Accepted Answer · 2012-09-05 07:18:52Z

0

I can see 2 logical errors here.

The first one is the one pointed out by @asm. The output should be stored by calculating linear index from 2D x and y indices.

outputIndex = yid * width + xid;

The second one is that the memory allocation for the cudaArray structure is internally aligned. You should consider using cudaMemcpy2DToArray function to avoid erroneous data copying.

cudaMemcpy2DToArray(cu_array,0,0,AB,N * sizeof(float), N * sizeof(float), 2, cudaMemcpyHostToDevice);

answered Sep 5, 2012 at 7:18

sgarizvi

16.9k9 gold badges67 silver badges105 bronze badges

1 Comment

Silve2611 Over a year ago

ok but even if i do a printf("Show me the value %f and the xid %i and yid %i", value, xid, yid); i don't have the any value. So i understand the saving part. Changing the memcpy to 2DToArray also didn't help. Am i accesing the values wrong?

Collectives™ on Stack Overflow

Cannot read out Values from Texture Memory

2 Answers 2

Comments

1 Comment

Your Answer

Hot Network Questions

Collectives™ on Stack Overflow

2 Answers 2

Comments

1 Comment

Your Answer

Sign up or log in

Post as a guest

Related