2

The procedure describe as follows:

#include <cuda.h> 
#include <cutil_math>  
#include <cuda_runtime.h>  
#include <iostream>

struct testtype  
{
    float x;  
    int y;  
    char z;  
};

 __device__   testtype* gpu_config;

__global__
void test()
{
    gpu_config->y = 3.0;
};

int main(void)  
{  
testtype cpu_config;

cpu_config.x = 1;
cpu_config.y = 2.0f;
cpu_config.z = 'A';

testtype val ;

if (cudaMalloc((void**) &gpu_config, sizeof(testtype)) != cudaSuccess)
{
    return -1;
}

cudaMemcpy(gpu_config, &cpu_config, sizeof(testtype), cudaMemcpyHostToDevice);

test<<<1,1,0>>>();

cudaMemcpy(&val, gpu_config, sizeof(testtype), cudaMemcpyDeviceToHost);

std::cout << val.y << std::endl;
}   

when I delete test<<<1,1,0>>>(); val is changed the same with gpu_config. but when has test<<<1,1,0>>>();, the val.y is not equal to 3.0 . it means that the global function test not change the value of val. I want to know how to change the _device_ variable the value through the global functions.

1 Answer 1

1
#include <stdio.h>
#include <cuda.h>
#include <cutil_math.h>
#include <cuda_runtime.h>

// check runtime call error
#define cudaSafeCall(call) {  \
  cudaError err = call;       \
  if(cudaSuccess != err){     \
    fprintf(stderr, "%s(%i) : %s.\n", __FILE__, __LINE__, cudaGetErrorString(err));   \
    exit(EXIT_FAILURE);       \
}}

// check kernel launch error
#define cudaCheckErr(errorMessage) {    \
  cudaError_t err = cudaGetLastError(); \
  if(cudaSuccess != err){               \
    fprintf(stderr, "%s(%i) : %s : %s.\n", __FILE__, __LINE__, errorMessage, cudaGetErrorString(err)); \
    exit(EXIT_FAILURE);                 \
}}

struct g{
  int m;
};
__device__ struct g *d; // device (global)

__global__ void kernel()
{
  int tid=blockIdx.x * blockDim.x + threadIdx.x;
  d[tid].m=10;
}

int main()
{
  size_t size = 1 * sizeof(struct g);
  size_t sizep = 1 * sizeof(struct g*);
  struct g *ld; // device (local)
  cudaSafeCall(cudaMalloc(&ld, size));
  cudaSafeCall(cudaMemcpyToSymbol(d,&ld,sizep));
  kernel<<<1,1>>>();
  cudaSafeCall(cudaDeviceSynchronize());
  cudaCheckErr("kernel error");
  struct g *h = (struct g*)malloc(size);
  if(h==NULL){
     fprintf(stderr, "%s(%i) : malloc error.\n", __FILE__, __LINE__);
    exit(EXIT_FAILURE);
  }
  //cudaSafeCall(cudaMemcpyFromSymbol(&ld,d,sizep)); // not necessary
  cudaSafeCall(cudaMemcpy(h, ld, size, cudaMemcpyDeviceToHost));
  printf("Result: %d\n",h[0].m);
}
Sign up to request clarification or add additional context in comments.

Comments

Your Answer

By clicking “Post Your Answer”, you agree to our terms of service and acknowledge you have read our privacy policy.

Start asking to get answers

Find the answer to your question by asking.

Ask question

Explore related questions

See similar questions with these tags.