0

I'm having a problem with some openCL code I'm writing.

I've written a collection of utility functions to remove some boilerplate code from where I'm using it. The test method runs at the beginning and works absolutely fine, the code being below:

void openCLtest(char *arg_program, char *arg_device)
{
    cl_int ret;

    cl_device_id device_id = getDeviceId(atoi(arg_program), atoi(arg_device));
    cl_context context = get_cl_context(&device_id);
    cl_command_queue queue = get_cl_command_queue(&context, &device_id);
    cl_kernel kernel = compileCLkernel(&context, &device_id, "src/hello.cl", "hello");
    cl_mem memobj = clCreateBuffer(context, CL_MEM_READ_WRITE, MEM_SIZE * sizeof(char), NULL, &ret);
    if (ret != CL_SUCCESS)
    {
        fprintf(stderr, "Failed to Allocate Buffer\n");
        exit(1);
    }
    ret = clSetKernelArg(kernel, 0, sizeof(cl_mem), (void *)&memobj);
    if (ret != CL_SUCCESS)
    {
        fprintf(stderr, "Failed to set kernel Arg\n");
        exit(1);
    }
    ret = clEnqueueTask(queue, kernel, 0, NULL, NULL);
    if (ret != CL_SUCCESS)
    {
        fprintf(stderr, "Failed to Enqueue Task\n");
        exit(1);
    }

    ret = clFinish(queue);
    if (ret != CL_SUCCESS)
    {
        fprintf(stderr, "Failed to wait for finish\n");
        exit(1);
    }

    char string[MEM_SIZE];
    ret = clEnqueueReadBuffer(queue, memobj, CL_TRUE, 0, MEM_SIZE * sizeof(char), string, 0, NULL, NULL);
    if (ret != CL_SUCCESS)
    {
        fprintf(stderr, "Failed to read buffer\n");
        exit(1);
    }

    printf("CL Produced: %s\n", string);

    ret = clFlush(queue);
    ret = clFinish(queue);
     if (ret != CL_SUCCESS)
    {
        fprintf(stderr, "Failed to Wait for test queue to finish\n");
        exit(1);
    }
    ret = clReleaseKernel(kernel);
    ret = clReleaseMemObject(memobj);
    ret = clReleaseCommandQueue(queue);
    ret = clReleaseContext(context);
}

This code works fine, and I then extracted the code into more functions which can be used for the real openCL I'm writing.

The same principle has been applied in the rest of the code, but this time, it doesn't work.

main:

openCLtest(argv[2], argv[3]); //This is the code above and works great

cl_device_id device_id = getDeviceId(atoi(argv[2]), atoi(argv[3]));
cl_context context = get_cl_context(&device_id);
cl_command_queue queue = get_cl_command_queue(&context, &device_id);

....

double *coords_3D = cl_extrude_coords(&device_id, &context, &queue, coords_2D, nodes, LAYERS, LAYER_HEIGHT);

cl_extrude_coords:

double *cl_extrude_coords(cl_device_id* device_id, cl_context* context, cl_command_queue* queue, double *coords, int nodes, int layers, double layer_height)
{

    cl_int ret;

    cl_kernel extrude_coords = compileCLkernel(context, device_id, "src/OpenCL_Kernels/extrude_coords.cl", "extrude_coords");

    cl_mem coords_2d = clCreateBuffer(*context, CL_MEM_READ_ONLY, sizeof(coords) / sizeof(coords[0]), NULL, &ret);
    if (ret != CL_SUCCESS)
    {
        fprintf(stderr, "Failed to Create coords_2d CL Buffer %d\n", ret);
        exit(1);
    }
    cl_mem result = clCreateBuffer(*context, CL_MEM_WRITE_ONLY, sizeof(double) * nodes * 3 * layers, NULL, &ret);
    if (ret != CL_SUCCESS)
    {
        fprintf(stderr, "Failed to Create result CL Buffer %d\n", ret);
        exit(1);
    }

    ret = clEnqueueWriteBuffer(*queue, coords_2d, CL_TRUE, 0, sizeof(coords) / sizeof(coords[0]), (const void *)&coords, 0, NULL, NULL);
    if (ret != CL_SUCCESS)
    {
        fprintf(stderr, "Failed enqueue coords_2d write to buffer %d\n", ret);
        exit(1);
    }

    ret = clSetKernelArg(extrude_coords, 0, sizeof(cl_mem), (void *)&coords_2d);
    if (ret != CL_SUCCESS)
    {
        fprintf(stderr, "Failed to Set kernel argument coords_2d %d\n", ret);
        exit(1);
    }
    ret = clSetKernelArg(extrude_coords, 1, sizeof(cl_mem), (void *)&result);
    if (ret != CL_SUCCESS)
    {
        fprintf(stderr, "Failed to Set kernel argument result CL Buffer %d\n", ret);
        exit(1);
    }
    ret = clSetKernelArg(extrude_coords, 2, sizeof(double), (void *)&layer_height);
    if (ret != CL_SUCCESS)
    {
        fprintf(stderr, "Failed to Set kernel argument layers %d\n", ret);
        exit(1);
    }

    size_t gWorkSize[]  = {nodes, layers};

    cl_event clEvent;
    ret = clEnqueueNDRangeKernel(*queue, extrude_coords, 2, NULL, (const size_t *)&gWorkSize, NULL, 0, NULL, &clEvent);
    if (ret != CL_SUCCESS)
    {
        fprintf(stderr, "Enqueue Extrude Coordinates Kernel\n");
        exit(1);
    }

    double *res = (double *)malloc(sizeof(double) * nodes * 3 * layers);

    ret = clFinish(*queue);
        if (ret != CL_SUCCESS)
    {
        fprintf(stderr, "Failed to wait for queue to finish in extrude_coords %d\n", ret);
        exit(1);
    }

    ret = clEnqueueReadBuffer(*queue, result, CL_TRUE, 0, sizeof(double) * nodes * 3 * layers, (void *)res, 1, &clEvent, NULL);
        if (ret != CL_SUCCESS)
    {
        fprintf(stderr, "Failed to Enqueue the extrude_coords result buffer read %d\n", ret);
        exit(1);
    }

    ret = clReleaseKernel(extrude_coords);
    if (ret != CL_SUCCESS)
    {
        fprintf(stderr, "Failed to release kernel\n");
        exit(1);
    }
    ret = clReleaseMemObject(coords_2d);
    if (ret != CL_SUCCESS)
    {
        fprintf(stderr, "Failed to release result memory object\n");
        exit(1);
    }
    ret = clReleaseMemObject(result);
    if (ret != CL_SUCCESS)
    {
        fprintf(stderr, "Failed to release result memory object\n");
        exit(1);
    }

    return res;

}

cl kernel:

#pragma OPENCL EXTENSION cl_khr_fp64: enable

__kernel void extrude_coords(__global const double * coords, __global double * res, const double layer_height){

    uint i=get_global_id(0);
    uint j=get_global_id(1);
    uint layers=get_global_size(0);

    res[3*(i*layers + j)] = coords[2*i];
    res[3*(i*layers + j) + 1] = coords[2*i + 1];
    res[3*(i*layers + j) + 2] = layer_height * j;

}

This function however, does not work, throwing the error below when clFinish(queue) is called.

Failed to wait for queue to finish in extrude_coords -36

Looking this up, I can see -36 is CL_INVALID_COMMAND_QUEUE. If I don't exit here, I then get an error thrown at the buffer read, error code -5, CL_OUT_OF_RESOURCES.

I'm not sure what is going wrong. The values of nodes and layers when this code is being tested are 151731 and 101 respectively. I'm not sure if that has something to do with it.

Does anyone have any ideas on what could be the issue and how to fix it, or even any suggestions on whether this structure for the code is a good idea. The plan was by passing the queue, context and device ID, each function can produce and execute its own kernel(s) to do something with the queue etc being released at the end of the program when they're no longer needed.

Any help would be appreciated, I've been stumped on this for several hours now.

EDIT:

I have since tried changinging the calling convention of clEnqueueNDRange in extrude_coords to

ret = clEnqueueNDRangeKernel(*queue, extrude_coords, 2, NULL, (const size_t *)&gWorkSize[0], NULL, 0, NULL, &clEvent);

as suggested in an answer but this does not work. Testing with printf("%d\n", &gWorkSize == &gWorkSize[0]); shows that the two pointers are functionally the same, so this is not the issue.

I then went on to modify the test openCL code to use clEnqueueNDRange instead of clEnqueueTask as follows:

size_t gWorkSize[]  = {1, 1};
// ret = clEnqueueTask(queue, kernel, 0, NULL, NULL);
ret = clEnqueueNDRangeKernel(queue, kernel, 2, NULL, (const size_t *)&gWorkSize, NULL, 0, NULL, NULL);

This still all works correctly, so something else is clearly wrong... I'm still not sure what...

3
  • Can you verify that sizeof(coords) / sizeof(coords[0]) is computing the correct value? Commented Mar 2, 2014 at 20:36
  • That's the problem! It was returning one as both sizeof(coords) and sizeof(coords[0]) were returning 4, the size of a double, giving one. The CL Kernel was therefore seg faulting. I just hadn't realised that due to the asynchronous enqueue of the kernel, the clEnqueueNDRangeKernel() was returning CL_SUCCESS with the error being shown by the GPU invalidating the queue and context! If you write an actual answer saying as much, I shall mark you down as correct! Commented Mar 2, 2014 at 21:16
  • Sure, answer is up. Glad the issue is resolved! Commented Mar 3, 2014 at 4:52

2 Answers 2

1

The sizeof(coords) / sizeof(coords[0]) will not give the array size in C/C++. Best to use sizeof(coords)*elementsInCoords and pass in elementsInCoords. Alternatively, setup coords to be a std::vector<> and pass that around since you can get a data pointer out of it and the size as well.

Sign up to request clarification or add additional context in comments.

Comments

0

Look at code:

size_t gWorkSize[]  = {nodes, layers};

cl_event clEvent;
ret = clEnqueueNDRangeKernel(*queue, extrude_coords, 2, NULL, (const size_t *)&gWorkSize, NULL, 0, NULL, &clEvent);

&gWorkSize is of type size_t (*)[2], while argument must be of type const size_t*

Try this:

ret = clEnqueueNDRangeKernel(*queue, extrude_coords, 2, NULL, &gWorkSize[0], NULL, 0, NULL, &clEvent);

2 Comments

Sorry, that's not it. Tried changing the code to that suggested above and it doesn't work. Furthermore, I added printf("%d\n", &gWorkSize == &gWorkSize[0]); to the code and it's showing that the code is functionally the same with or without [0]
Look at kernel execution time in Nvidia Visual Profiler. If it's too big or small, usually, that means error in kernel algorithm and / or invalid OpenCL objects, with which kernel works.

Your Answer

By clicking “Post Your Answer”, you agree to our terms of service and acknowledge you have read our privacy policy.

Start asking to get answers

Find the answer to your question by asking.

Ask question

Explore related questions

See similar questions with these tags.