summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--CMakeLists.txt11
-rw-r--r--src/CMakeLists.txt5
-rw-r--r--src/cl_api.c10
-rw-r--r--src/cl_driver.h3
-rw-r--r--src/cl_driver_defs.c1
-rw-r--r--src/cl_enqueue.c19
-rw-r--r--src/cl_mem.c37
-rw-r--r--src/cl_mem.h2
-rw-r--r--src/cl_mem_gl.c2
-rw-r--r--src/intel/intel_driver.c15
10 files changed, 87 insertions, 18 deletions
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 40cb74cc..15386f9b 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -108,7 +108,7 @@ ENDIF(X11_FOUND)
# DRM
pkg_check_modules(DRM REQUIRED libdrm)
IF(DRM_FOUND)
- MESSAGE(STATUS "Looking for DRM - found at ${DRM_PREFIX}")
+ MESSAGE(STATUS "Looking for DRM - found at ${DRM_PREFIX} ${DRM_VERSION}")
INCLUDE_DIRECTORIES(${DRM_INCLUDE_DIRS})
ELSE(DRM_FOUND)
MESSAGE(STATUS "Looking for DRM - not found")
@@ -118,7 +118,14 @@ ENDIF(DRM_FOUND)
pkg_check_modules(DRM_INTEL libdrm_intel>=2.4.52)
IF(DRM_INTEL_FOUND)
INCLUDE_DIRECTORIES(${DRM_INTEL_INCLUDE_DIRS})
- MESSAGE(STATUS "Looking for DRM Intel - found at ${DRM_INTEL_PREFIX}")
+ MESSAGE(STATUS "Looking for DRM Intel - found at ${DRM_INTEL_PREFIX} ${DRM_INTEL_VERSION}")
+ #userptr support starts from 2.4.57, but 2.4.58 is the actual stable release
+ IF(DRM_INTEL_VERSION VERSION_GREATER 2.4.57)
+ MESSAGE(STATUS "Enable userptr support")
+ SET(DRM_INTEL_USERPTR "enable")
+ ELSE(DRM_INTEL_VERSION VERSION_GREATER 2.4.57)
+ MESSAGE(STATUS "Disable userptr support")
+ ENDIF(DRM_INTEL_VERSION VERSION_GREATER 2.4.57)
ELSE(DRM_INTEL_FOUND)
MESSAGE(FATAL_ERROR "Looking for DRM Intel (>= 2.4.52) - not found")
ENDIF(DRM_INTEL_FOUND)
diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
index fc5de89e..7182bada 100644
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@@ -109,6 +109,11 @@ SET(CMAKE_CXX_FLAGS "-DHAS_OCLIcd ${CMAKE_CXX_FLAGS}")
SET(CMAKE_C_FLAGS "-DHAS_OCLIcd ${CMAKE_C_FLAGS}")
endif (OCLIcd_FOUND)
+if (DRM_INTEL_USERPTR)
+SET(CMAKE_CXX_FLAGS "-DHAS_USERPTR ${CMAKE_CXX_FLAGS}")
+SET(CMAKE_C_FLAGS "-DHAS_USERPTR ${CMAKE_C_FLAGS}")
+endif (DRM_INTEL_USERPTR)
+
set(GIT_SHA1 "git_sha1.h")
add_custom_target(${GIT_SHA1} ALL
COMMAND chmod +x ${CMAKE_CURRENT_SOURCE_DIR}/git_sha1.sh
diff --git a/src/cl_api.c b/src/cl_api.c
index 05d30933..1f246386 100644
--- a/src/cl_api.c
+++ b/src/cl_api.c
@@ -2665,9 +2665,13 @@ clEnqueueMapBuffer(cl_command_queue command_queue,
ptr = data->ptr;
if(event) cl_event_set_status(*event, CL_COMPLETE);
} else {
- if ((ptr = cl_mem_map_gtt_unsync(buffer)) == NULL) {
- err = CL_MAP_FAILURE;
- goto error;
+ if (buffer->is_userptr)
+ ptr = buffer->host_ptr;
+ else {
+ if ((ptr = cl_mem_map_gtt_unsync(buffer)) == NULL) {
+ err = CL_MAP_FAILURE;
+ goto error;
+ }
}
}
err = _cl_map_mem(buffer, ptr, &mem_ptr, offset, size, NULL, NULL);
diff --git a/src/cl_driver.h b/src/cl_driver.h
index 638b791c..8697ff2c 100644
--- a/src/cl_driver.h
+++ b/src/cl_driver.h
@@ -285,6 +285,9 @@ extern cl_gpgpu_walker_cb *cl_gpgpu_walker;
typedef cl_buffer (cl_buffer_alloc_cb)(cl_buffer_mgr, const char*, size_t, size_t);
extern cl_buffer_alloc_cb *cl_buffer_alloc;
+typedef cl_buffer (cl_buffer_alloc_userptr_cb)(cl_buffer_mgr, const char*, void *, size_t, unsigned long);
+extern cl_buffer_alloc_userptr_cb *cl_buffer_alloc_userptr;
+
/* Set a buffer's tiling mode */
typedef cl_buffer (cl_buffer_set_tiling_cb)(cl_buffer, int tiling, size_t stride);
extern cl_buffer_set_tiling_cb *cl_buffer_set_tiling;
diff --git a/src/cl_driver_defs.c b/src/cl_driver_defs.c
index c31b6fc4..1335c20f 100644
--- a/src/cl_driver_defs.c
+++ b/src/cl_driver_defs.c
@@ -29,6 +29,7 @@ LOCAL cl_driver_get_device_id_cb *cl_driver_get_device_id = NULL;
/* Buffer */
LOCAL cl_buffer_alloc_cb *cl_buffer_alloc = NULL;
+LOCAL cl_buffer_alloc_userptr_cb *cl_buffer_alloc_userptr = NULL;
LOCAL cl_buffer_set_tiling_cb *cl_buffer_set_tiling = NULL;
LOCAL cl_buffer_alloc_from_texture_cb *cl_buffer_alloc_from_texture = NULL;
LOCAL cl_buffer_release_from_texture_cb *cl_buffer_release_from_texture = NULL;
diff --git a/src/cl_enqueue.c b/src/cl_enqueue.c
index db0bce74..5bdb7cd7 100644
--- a/src/cl_enqueue.c
+++ b/src/cl_enqueue.c
@@ -234,11 +234,15 @@ cl_int cl_enqueue_map_buffer(enqueue_data *data)
mem->type == CL_MEM_SUBBUFFER_TYPE);
struct _cl_mem_buffer* buffer = (struct _cl_mem_buffer*)mem;
- if(data->unsync_map == 1)
- //because using unsync map in clEnqueueMapBuffer, so force use map_gtt here
- ptr = cl_mem_map_gtt(mem);
- else
- ptr = cl_mem_map_auto(mem, data->write_map ? 1 : 0);
+ if (mem->is_userptr)
+ ptr = mem->host_ptr;
+ else {
+ if(data->unsync_map == 1)
+ //because using unsync map in clEnqueueMapBuffer, so force use map_gtt here
+ ptr = cl_mem_map_gtt(mem);
+ else
+ ptr = cl_mem_map_auto(mem, data->write_map ? 1 : 0);
+ }
if (ptr == NULL) {
err = CL_MAP_FAILURE;
@@ -246,7 +250,7 @@ cl_int cl_enqueue_map_buffer(enqueue_data *data)
}
data->ptr = ptr;
- if(mem->flags & CL_MEM_USE_HOST_PTR) {
+ if((mem->flags & CL_MEM_USE_HOST_PTR) && !mem->is_userptr) {
assert(mem->host_ptr);
ptr = (char*)ptr + data->offset + buffer->sub_offset;
memcpy(mem->host_ptr + data->offset + buffer->sub_offset, ptr, data->size);
@@ -331,7 +335,8 @@ cl_int cl_enqueue_unmap_mem_object(enqueue_data *data)
assert(mapped_ptr >= memobj->host_ptr &&
mapped_ptr + mapped_size <= memobj->host_ptr + memobj->size);
/* Sync the data. */
- memcpy(v_ptr, mapped_ptr, mapped_size);
+ if (!memobj->is_userptr)
+ memcpy(v_ptr, mapped_ptr, mapped_size);
} else {
CHECK_IMAGE(memobj, image);
diff --git a/src/cl_mem.c b/src/cl_mem.c
index 16bd6135..d3199668 100644
--- a/src/cl_mem.c
+++ b/src/cl_mem.c
@@ -33,6 +33,7 @@
#include <assert.h>
#include <stdio.h>
#include <string.h>
+#include <unistd.h>
#define FIELD_SIZE(CASE,TYPE) \
case JOIN(CL_,CASE): \
@@ -223,6 +224,7 @@ cl_mem_allocate(enum cl_mem_type type,
cl_mem_flags flags,
size_t sz,
cl_int is_tiled,
+ void *host_ptr,
cl_int *errcode)
{
cl_buffer_mgr bufmgr = NULL;
@@ -251,6 +253,7 @@ cl_mem_allocate(enum cl_mem_type type,
mem->ref_n = 1;
mem->magic = CL_MAGIC_MEM_HEADER;
mem->flags = flags;
+ mem->is_userptr = 0;
if (sz != 0) {
/* Pinning will require stricter alignment rules */
@@ -260,7 +263,28 @@ cl_mem_allocate(enum cl_mem_type type,
/* Allocate space in memory */
bufmgr = cl_context_get_bufmgr(ctx);
assert(bufmgr);
+
+#ifdef HAS_USERPTR
+ if (ctx->device->host_unified_memory) {
+ /* currently only cl buf is supported, will add cl image support later */
+ if ((flags & CL_MEM_USE_HOST_PTR) && host_ptr != NULL) {
+ /* userptr not support tiling */
+ if (!is_tiled) {
+ int page_size = getpagesize();
+ if ((((unsigned long)host_ptr | sz) & (page_size - 1)) == 0) {
+ mem->is_userptr = 1;
+ mem->bo = cl_buffer_alloc_userptr(bufmgr, "CL userptr memory object", host_ptr, sz, 0);
+ }
+ }
+ }
+ }
+
+ if (!mem->is_userptr)
+ mem->bo = cl_buffer_alloc(bufmgr, "CL memory object", sz, alignment);
+#else
mem->bo = cl_buffer_alloc(bufmgr, "CL memory object", sz, alignment);
+#endif
+
if (UNLIKELY(mem->bo == NULL)) {
err = CL_MEM_OBJECT_ALLOCATION_FAILURE;
goto error;
@@ -387,12 +411,15 @@ cl_mem_new_buffer(cl_context ctx,
sz = ALIGN(sz, 4);
/* Create the buffer in video memory */
- mem = cl_mem_allocate(CL_MEM_BUFFER_TYPE, ctx, flags, sz, CL_FALSE, &err);
+ mem = cl_mem_allocate(CL_MEM_BUFFER_TYPE, ctx, flags, sz, CL_FALSE, data, &err);
if (mem == NULL || err != CL_SUCCESS)
goto error;
/* Copy the data if required */
- if (flags & CL_MEM_COPY_HOST_PTR || flags & CL_MEM_USE_HOST_PTR)
+ if (flags & CL_MEM_COPY_HOST_PTR)
+ cl_buffer_subdata(mem->bo, 0, sz, data);
+
+ if ((flags & CL_MEM_USE_HOST_PTR) && !mem->is_userptr)
cl_buffer_subdata(mem->bo, 0, sz, data);
if (flags & CL_MEM_USE_HOST_PTR || flags & CL_MEM_COPY_HOST_PTR)
@@ -762,7 +789,7 @@ _cl_mem_new_image(cl_context ctx,
sz = aligned_pitch * aligned_h * depth;
}
- mem = cl_mem_allocate(CL_MEM_IMAGE_TYPE, ctx, flags, sz, tiling != CL_NO_TILE, &err);
+ mem = cl_mem_allocate(CL_MEM_IMAGE_TYPE, ctx, flags, sz, tiling != CL_NO_TILE, NULL, &err);
if (mem == NULL || err != CL_SUCCESS)
goto error;
@@ -1834,7 +1861,7 @@ LOCAL cl_mem cl_mem_new_libva_buffer(cl_context ctx,
cl_int err = CL_SUCCESS;
cl_mem mem = NULL;
- mem = cl_mem_allocate(CL_MEM_BUFFER_TYPE, ctx, 0, 0, CL_FALSE, &err);
+ mem = cl_mem_allocate(CL_MEM_BUFFER_TYPE, ctx, 0, 0, CL_FALSE, NULL, &err);
if (mem == NULL || err != CL_SUCCESS)
goto error;
@@ -1875,7 +1902,7 @@ LOCAL cl_mem cl_mem_new_libva_image(cl_context ctx,
goto error;
}
- mem = cl_mem_allocate(CL_MEM_IMAGE_TYPE, ctx, 0, 0, 0, &err);
+ mem = cl_mem_allocate(CL_MEM_IMAGE_TYPE, ctx, 0, 0, 0, NULL, &err);
if (mem == NULL || err != CL_SUCCESS) {
err = CL_OUT_OF_HOST_MEMORY;
goto error;
diff --git a/src/cl_mem.h b/src/cl_mem.h
index 95c5f056..2e9dd5ad 100644
--- a/src/cl_mem.h
+++ b/src/cl_mem.h
@@ -92,6 +92,7 @@ typedef struct _cl_mem {
int map_ref; /* The mapped count. */
uint8_t mapped_gtt; /* This object has mapped gtt, for unmap. */
cl_mem_dstr_cb *dstr_cb; /* The destroy callback. */
+ uint8_t is_userptr; /* CL_MEM_USE_HOST_PTR is enabled*/
} _cl_mem;
struct _cl_mem_image {
@@ -262,6 +263,7 @@ cl_mem_allocate(enum cl_mem_type type,
cl_mem_flags flags,
size_t sz,
cl_int is_tiled,
+ void *host_ptr,
cl_int *errcode);
void
diff --git a/src/cl_mem_gl.c b/src/cl_mem_gl.c
index 28d2ac65..36409089 100644
--- a/src/cl_mem_gl.c
+++ b/src/cl_mem_gl.c
@@ -63,7 +63,7 @@ cl_mem_new_gl_texture(cl_context ctx,
goto error;
}
- mem = cl_mem_allocate(CL_MEM_GL_IMAGE_TYPE, ctx, flags, 0, 0, &err);
+ mem = cl_mem_allocate(CL_MEM_GL_IMAGE_TYPE, ctx, flags, 0, 0, NULL, &err);
if (mem == NULL || err != CL_SUCCESS)
goto error;
diff --git a/src/intel/intel_driver.c b/src/intel/intel_driver.c
index bb97220c..fc037cc8 100644
--- a/src/intel/intel_driver.c
+++ b/src/intel/intel_driver.c
@@ -690,6 +690,20 @@ cl_buffer intel_share_image_from_libva(cl_context ctx,
return (cl_buffer)intel_bo;
}
+static cl_buffer intel_buffer_alloc_userptr(cl_buffer_mgr bufmgr, const char* name, void *data,size_t size, unsigned long flags)
+{
+#ifdef HAS_USERPTR
+ drm_intel_bo *bo;
+ bo = drm_intel_bo_alloc_userptr((drm_intel_bufmgr *)bufmgr, name, data, I915_TILING_NONE, 0, size, flags);
+ /* Fallback to unsynchronized userptr allocation if kernel has no MMU notifier enabled. */
+ if (bo == NULL)
+ bo = drm_intel_bo_alloc_userptr((drm_intel_bufmgr *)bufmgr, name, data, I915_TILING_NONE, 0, size, flags | I915_USERPTR_UNSYNCHRONIZED);
+ return (cl_buffer)bo;
+#else
+ return NULL;
+#endif
+}
+
static int32_t get_intel_tiling(cl_int tiling, uint32_t *intel_tiling)
{
switch (tiling) {
@@ -734,6 +748,7 @@ intel_setup_callbacks(void)
cl_driver_get_bufmgr = (cl_driver_get_bufmgr_cb *) intel_driver_get_bufmgr;
cl_driver_get_device_id = (cl_driver_get_device_id_cb *) intel_get_device_id;
cl_buffer_alloc = (cl_buffer_alloc_cb *) drm_intel_bo_alloc;
+ cl_buffer_alloc_userptr = (cl_buffer_alloc_userptr_cb*) intel_buffer_alloc_userptr;
cl_buffer_set_tiling = (cl_buffer_set_tiling_cb *) intel_buffer_set_tiling;
#if defined(HAS_EGL)
cl_buffer_alloc_from_texture = (cl_buffer_alloc_from_texture_cb *) intel_alloc_buffer_from_texture;