Skip to content

clEnqueueReadBuffer fails on IGPUs for mapped host-buffer desinations #866

@FreddieWitherden

Description

@FreddieWitherden

Consider the following snippet which I believe to be a valid use of the OpenCL API:

#include <CL/cl.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>

#define CHECK(err, msg) \
if (err != CL_SUCCESS) { \
    fprintf(stderr, "%s failed (%d)\n", msg, err); \
    exit(1); \
}

int main(void) {
    cl_int err;

    cl_platform_id platform;
    CHECK(clGetPlatformIDs(1, &platform, NULL), "clGetPlatformIDs");

    cl_device_id device;
    CHECK(clGetDeviceIDs(platform, CL_DEVICE_TYPE_DEFAULT, 1, &device, NULL),
          "clGetDeviceIDs");

    cl_context context = clCreateContext(NULL, 1, &device, NULL, NULL, &err);
    CHECK(err, "clCreateContext");

    cl_command_queue queue = clCreateCommandQueueWithProperties(context, device, 0, &err);
    CHECK(err, "clCreateCommandQueue");

    const size_t N = 16;
    const size_t bytes = N * sizeof(float);
    cl_mem dev_buf = clCreateBuffer(context, CL_MEM_READ_WRITE, bytes, NULL, &err);
    CHECK(err, "clCreateBuffer dev_buf");

    float pattern = 42.0f; // fill value
    CHECK(clEnqueueFillBuffer(queue, dev_buf, &pattern, sizeof(float),
                              0, bytes, 0, NULL, NULL),
          "clEnqueueFillBuffer");

    cl_mem host_buf = clCreateBuffer(context,
                                     CL_MEM_READ_WRITE | CL_MEM_ALLOC_HOST_PTR,
                                     bytes, NULL, &err);
    CHECK(err, "clCreateBuffer host_buf");

    void *host_ptr = clEnqueueMapBuffer(queue, host_buf, CL_TRUE,
                                        CL_MAP_WRITE, 0, bytes,
                                        0, NULL, NULL, &err);
    CHECK(err, "clEnqueueMapBuffer");

    err = clEnqueueReadBuffer(queue, dev_buf, CL_FALSE, 0, bytes, host_ptr,
                              0, NULL, NULL);
    CHECK(err, "clEnqueueReadBuffer (non-blocking)");

    CHECK(clFinish(queue), "clFinish");

    CHECK(clEnqueueUnmapMemObject(queue, host_buf, host_ptr, 0, NULL, NULL),
          "clEnqueueUnmapMemObject");
    clFinish(queue);

    clReleaseMemObject(dev_buf);
    clReleaseMemObject(host_buf);
    clReleaseCommandQueue(queue);
    clReleaseContext(context);

    return 0;
}

Running this on an NVIDIA GPU or an Intel A770m works as expected. However, on my IGPU (TigerLake-H GT1) it fails with:

clEnqueueReadBuffer (non-blocking) failed (-5)

Changing to an ordinary buffer (from malloc) appears to work, as does changing to blocking reads. My runtime version is 25.40.35563.4 and I am on a 6.17.5 kernel with the i915 module.

Metadata

Metadata

Assignees

No one assigned

    Labels

    No labels
    No labels

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions