I am trying to populate a number of linked lists on the device and then return those lists back to the hosts.
From my understanding I need to allocate memory for my struct Element, but I don't know how to go about it since I will have many linked lists, each with an unknown number of elements. I've tried a couple of different things but it still didn't work. So I'm back to the starting point. Here is my code:
//NODE CLASS
class Node{
public:
int x,y;
Node *parent;
__device__ __host__ Node(){}
__device__ __host__ Node(int cX, int cY){x = cX; y = cY;}
__device__ __host__ int get_row() { return x; }
__device__ __host__ int get_col() { return y; }
};
//LINKED LIST
class LinkedList{
public:
__device__ __host__ struct Element{
Node n1;
Element *next;
};
__device__ __host__ LinkedList(){
head = NULL;
}
__device__ __host__ void addNode(Node n){
Element *el = new Element();
el->n1 = n;
el->next = head;
head = el;
}
__device__ __host__ Node popFirstNode(){
Element *cur = head;
Node n;
if(cur != NULL){
n = cur -> n1;
head = head -> next;
}
delete cur;
return n;
}
__device__ __host__ bool isEmpty(){
Element *cur = head;
if(cur == NULL){
return true;
}
return false;
}
Element *head;
};
//LISTS
__global__ void listsKernel(LinkedList* d_Results, int numLists){
int idx = blockIdx.x * blockDim.x + threadIdx.x;
Node n(1,1);
if(idx < numLists){
d_Results[idx].addNode(n);
d_Results[idx].addNode(n);
d_Results[idx].addNode(n);
d_Results[idx].addNode(n);
}
}
int main(){
int numLists = 10;
size_t size = numLists * sizeof(LinkedList);
LinkedList curList;
LinkedList* h_Results = (LinkedList*)malloc(size);
LinkedList* d_Results;
cudaMalloc((void**)&d_Results, size);
listsKernel<<<256,256>>>(d_Results, numLists);
cudaMemcpy(h_Results, d_Results, sizeof(LinkedList)*numLists, cudaMemcpyDeviceToHost);
for(int i = 0; i < numLists; i++){
curList = h_Results[i];
while(curList.isEmpty() == false){
Node n = curList.popFirstNode();
std::cout << "x: " << n.get_row() << " y: " << n.get_col();
}
}
}
As you can see I'm trying to populate 10 linked lists on the device and then return them back to the host, but the code above results in unhandled exception - Access violation reading location. I am assuming it is not coping the pointers from the device.
Any help would be great.
listsKernel? Where does the host access violation occur?newcannot take part in a device->hostcudaMemcpyoperation, although that is just one of several issues with your approach. This blog may be of interest.