0

Hi I'm writing a simple Program for practicing to work with texture memory. I Just want to write my data into Texture Memory and write it back into Global Memory. But i cannont read out the Values. Here is the code.

#include <stdio.h>
#include <iostream>
#include "cuda.h"
#include <stdlib.h>
#include "cuda_runtime.h"
#include "device_launch_parameters.h"
#include "HelloWorld.h"

#include "linearInterpolation_kernel4.cu"

using namespace std;
using std::cout;

const int blocksize = 16; 

__global__ 
void hello(char *a, int *b) {
    a[threadIdx.x] += b[threadIdx.x];
}



////////////////////////////////////////////////////////////////////////////////
// These are CUDA Helper functions

// This will output the proper CUDA error strings in the event that a CUDA host call returns an error
#define checkCudaErrors(err)           __checkCudaErrors (err, __FILE__, __LINE__)

inline void __checkCudaErrors( cudaError err, const char *file, const int line )
{
    if( cudaSuccess != err) {
        printf("%s(%i) : CUDA Runtime API error %d: %s.\n",file, line, (int)err, cudaGetErrorString( err ) );

    }
}

// This will output the proper error string when calling cudaGetLastError
#define getLastCudaError(msg)      __getLastCudaError (msg, __FILE__, __LINE__)

inline void __getLastCudaError( const char *errorMessage, const char *file, const int line )
{
    cudaError_t err = cudaGetLastError();
    if( cudaSuccess != err) {
        printf("%s(%i) : getLastCudaError() CUDA error : %s : (%d) %s.\n", file, line, errorMessage, (int)err, cudaGetErrorString( err ) );

    }
}

int main()
{
    int N = 40; 
    float *A; 
    A = (float *) malloc(N*sizeof(float));
    float *B;
    B = (float *) malloc(N*sizeof(float));
    float *result;
    result = (float *) malloc(N*sizeof(float));
    float angle = 0.8f; 

    for(int i = 0; i < N; i++){
        A[i] = i; //(float)rand();
        B[i] = i+1; //(float)rand();
    }
    ipLinearTexture2(A,B,result,angle,N);

    float result2;

    result2 = (angle)*A[4] + (1-angle)*B[4]; 

    printf(" A %f B %f Result %f\n", A[4], B[4], result[4]);
    cout << result2 << endl;

    return 1;
}

void ipLinearTexture2(float *A, float* B, float* result, float angle, int N)
{
    float cuTime;

    int N2 = N * 2;
    float *dev_result;

    float **AB;

    AB = (float **) malloc( N * sizeof(float *));

    if(AB)
    {
        for(int i = 0; i < N; i++)
        {
            AB[i] = (float *) malloc( 2 * sizeof(float *));
        }
    }

    for (int i = 0; i < N; i = i++)
    {
        AB[i][0] = A[i];
        AB[i][1] = B[i];
    }

    cudaMalloc(&dev_result, N * sizeof(float));
    unsigned int size = N2 * sizeof(float);

    //cudaChannelFormatDesc channelDesc = cudaCreateChannelDesc(32, 0, 0, 0, cudaChannelFormatKindFloat);
    cudaChannelFormatDesc channelDesc = cudaCreateChannelDesc<float>();
    cudaArray* cu_array;

    checkCudaErrors(cudaMallocArray( &cu_array, &channelDesc,N,2)); 
    cudaMemcpy2DToArray(cu_array,0,0,AB,N * sizeof(float), N * sizeof(float), 2, cudaMemcpyHostToDevice);

    // set texture parameters
    tex2.normalized = true;  
    tex2.filterMode = cudaFilterModeLinear;
    tex2.addressMode[0] = cudaAddressModeWrap; //cudaAddressModeWrap;
    tex2.addressMode[1] = cudaAddressModeWrap; //cudaAddressModeClamp;

    checkCudaErrors(cudaBindTextureToArray( tex2, cu_array, channelDesc));

    dim3 dimBlock(10, 1, 1);
    dim3 dimGrid((int)ceil((double)N*2/dimBlock.x), 1, 1);

    transformKernel4<<< 256, 256, 0 >>>( dev_result, N, 2, angle);

    checkCudaErrors(cudaMemcpy(result, dev_result, N * sizeof(float), cudaMemcpyDeviceToHost));
    cout << "==================================================" << endl;

    for (int i = 0 ; i < N ;i++)
    {
        cout << result[i] << " on " << i << endl;   
    }

    cout << "==================================================" << endl;
    checkCudaErrors(cudaUnbindTexture(tex));
    checkCudaErrors(cudaFree(dev_result));
    checkCudaErrors(cudaFreeArray(cu_array));
}

and here is the kernel code

#ifndef _SIMPLETEXTURE_KERNEL5_H_
#define _SIMPLETEXTURE_KERNEL5_H_

// Texture references

texture<float, 2, cudaReadModeElementType> tex2;

__global__ void
transformKernel4(float* g_odata, int width, int height, float theta) 
{
    unsigned int xid = blockIdx.x * blockDim.x + threadIdx.x;
    unsigned int yid = blockIdx.y * blockDim.y + threadIdx.y;

    if (xid >= width || yid >= height) return; 

    float dx = 1.0f / (float)width;
    float dy = 1.0f / (float)height;

    float x = ((float)xid + 0.5f) * dx;
    float y = ((float)yid + 0.5f) * dy;


        float value = tex2D(tex2, x , y);
        printf("wert %f xid %i yid %i \n",value, xid, yid);
g_odata[yid * width + xid] = value;

    }
#endif // #ifndef _SIMPLETEXTURE_KERNEL_H_

Can somebody tell what i am doing wrong? I have edited it to remove the first 2 logical mistake. Put why am I need able to print out my data?

1
  • The point is you are reading data from 2D texture and trying to save it in 1D array. You should use linear index for saving the data, eg. something like: ofs = xid + yid * width; g_odata[ofs] = value; but I may not be careful enough with index arithmetic. Commented Sep 4, 2012 at 13:30

2 Answers 2

1

It was the wrong binding of the Arrays. You can not use multidimensional Arrays in C that can be copied. You have to use a onedimensional array that respresents a multidimensional.

Sign up to request clarification or add additional context in comments.

Comments

0

I can see 2 logical errors here.

The first one is the one pointed out by @asm. The output should be stored by calculating linear index from 2D x and y indices.

outputIndex = yid * width + xid;

The second one is that the memory allocation for the cudaArray structure is internally aligned. You should consider using cudaMemcpy2DToArray function to avoid erroneous data copying.

cudaMemcpy2DToArray(cu_array,0,0,AB,N * sizeof(float), N * sizeof(float), 2, cudaMemcpyHostToDevice);

1 Comment

ok but even if i do a printf("Show me the value %f and the xid %i and yid %i", value, xid, yid); i don't have the any value. So i understand the saving part. Changing the memcpy to 2DToArray also didn't help. Am i accesing the values wrong?

Your Answer

By clicking “Post Your Answer”, you agree to our terms of service and acknowledge you have read our privacy policy.

Start asking to get answers

Find the answer to your question by asking.

Ask question

Explore related questions

See similar questions with these tags.