summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorGuo Yejun <yejun.guo@intel.com>2014-11-07 16:18:54 +0800
committerZhigang Gong <zhigang.gong@intel.com>2014-11-07 15:50:41 +0800
commit075390db926de7bfd2ac853404ab1bcfc8b9c650 (patch)
tree9f70873dae000f4374025cfd27b3714562242b4d
parent54594b626c31a68956af97f69dc29132dc545f7c (diff)
downloadbeignet-075390db926de7bfd2ac853404ab1bcfc8b9c650.tar.gz
support CL_MEM_USE_HOST_PTR with userptr for cl buffer
userptr is used to wrap a memory pointer (page aligned) supplied by user space into a buffer object accessed by GPU, and so no extra copy is needed. It is supported starting from linux kernel 3.16 and libdrm 2.4.58. This patch is originally finished by Zhenyu Wang <zhenyuw@linux.intel.com>, I did a little change and some code clean. No regression issue found on IVB+Ubuntu14.10 with libdrm upgraded with tests: beignet/utests, piglit, OpenCV/test&perf, conformance/basic&mem_host_flags&buffers V2: add page align limit for data size, add comments for kernel without MMU_NOTIFIER V3: add runtime check with host_unified_memory, return CL_MEM_OBJECT_ALLOCATION_FAILURE if failed Signed-off-by: Guo Yejun <yejun.guo@intel.com> Reviewed-by: Zhigang Gong <zhigang.gong@linux.intel.com> Reviewed-by: Zhenyu Wang <zhenyuw@linux.intel.com>
-rw-r--r--CMakeLists.txt11
-rw-r--r--src/CMakeLists.txt5
-rw-r--r--src/cl_api.c10
-rw-r--r--src/cl_driver.h3
-rw-r--r--src/cl_driver_defs.c1
-rw-r--r--src/cl_enqueue.c19
-rw-r--r--src/cl_mem.c37
-rw-r--r--src/cl_mem.h2
-rw-r--r--src/cl_mem_gl.c2
-rw-r--r--src/intel/intel_driver.c15
10 files changed, 87 insertions, 18 deletions
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 40cb74cc..15386f9b 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -108,7 +108,7 @@ ENDIF(X11_FOUND)
# DRM
pkg_check_modules(DRM REQUIRED libdrm)
IF(DRM_FOUND)
- MESSAGE(STATUS "Looking for DRM - found at ${DRM_PREFIX}")
+ MESSAGE(STATUS "Looking for DRM - found at ${DRM_PREFIX} ${DRM_VERSION}")
INCLUDE_DIRECTORIES(${DRM_INCLUDE_DIRS})
ELSE(DRM_FOUND)
MESSAGE(STATUS "Looking for DRM - not found")
@@ -118,7 +118,14 @@ ENDIF(DRM_FOUND)
pkg_check_modules(DRM_INTEL libdrm_intel>=2.4.52)
IF(DRM_INTEL_FOUND)
INCLUDE_DIRECTORIES(${DRM_INTEL_INCLUDE_DIRS})
- MESSAGE(STATUS "Looking for DRM Intel - found at ${DRM_INTEL_PREFIX}")
+ MESSAGE(STATUS "Looking for DRM Intel - found at ${DRM_INTEL_PREFIX} ${DRM_INTEL_VERSION}")
+ #userptr support starts from 2.4.57, but 2.4.58 is the actual stable release
+ IF(DRM_INTEL_VERSION VERSION_GREATER 2.4.57)
+ MESSAGE(STATUS "Enable userptr support")
+ SET(DRM_INTEL_USERPTR "enable")
+ ELSE(DRM_INTEL_VERSION VERSION_GREATER 2.4.57)
+ MESSAGE(STATUS "Disable userptr support")
+ ENDIF(DRM_INTEL_VERSION VERSION_GREATER 2.4.57)
ELSE(DRM_INTEL_FOUND)
MESSAGE(FATAL_ERROR "Looking for DRM Intel (>= 2.4.52) - not found")
ENDIF(DRM_INTEL_FOUND)
diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
index fc5de89e..7182bada 100644
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@@ -109,6 +109,11 @@ SET(CMAKE_CXX_FLAGS "-DHAS_OCLIcd ${CMAKE_CXX_FLAGS}")
SET(CMAKE_C_FLAGS "-DHAS_OCLIcd ${CMAKE_C_FLAGS}")
endif (OCLIcd_FOUND)
+if (DRM_INTEL_USERPTR)
+SET(CMAKE_CXX_FLAGS "-DHAS_USERPTR ${CMAKE_CXX_FLAGS}")
+SET(CMAKE_C_FLAGS "-DHAS_USERPTR ${CMAKE_C_FLAGS}")
+endif (DRM_INTEL_USERPTR)
+
set(GIT_SHA1 "git_sha1.h")
add_custom_target(${GIT_SHA1} ALL
COMMAND chmod +x ${CMAKE_CURRENT_SOURCE_DIR}/git_sha1.sh
diff --git a/src/cl_api.c b/src/cl_api.c
index 05d30933..1f246386 100644
--- a/src/cl_api.c
+++ b/src/cl_api.c
@@ -2665,9 +2665,13 @@ clEnqueueMapBuffer(cl_command_queue command_queue,
ptr = data->ptr;
if(event) cl_event_set_status(*event, CL_COMPLETE);
} else {
- if ((ptr = cl_mem_map_gtt_unsync(buffer)) == NULL) {
- err = CL_MAP_FAILURE;
- goto error;
+ if (buffer->is_userptr)
+ ptr = buffer->host_ptr;
+ else {
+ if ((ptr = cl_mem_map_gtt_unsync(buffer)) == NULL) {
+ err = CL_MAP_FAILURE;
+ goto error;
+ }
}
}
err = _cl_map_mem(buffer, ptr, &mem_ptr, offset, size, NULL, NULL);
diff --git a/src/cl_driver.h b/src/cl_driver.h
index 638b791c..8697ff2c 100644
--- a/src/cl_driver.h
+++ b/src/cl_driver.h
@@ -285,6 +285,9 @@ extern cl_gpgpu_walker_cb *cl_gpgpu_walker;
typedef cl_buffer (cl_buffer_alloc_cb)(cl_buffer_mgr, const char*, size_t, size_t);
extern cl_buffer_alloc_cb *cl_buffer_alloc;
+typedef cl_buffer (cl_buffer_alloc_userptr_cb)(cl_buffer_mgr, const char*, void *, size_t, unsigned long);
+extern cl_buffer_alloc_userptr_cb *cl_buffer_alloc_userptr;
+
/* Set a buffer's tiling mode */
typedef cl_buffer (cl_buffer_set_tiling_cb)(cl_buffer, int tiling, size_t stride);
extern cl_buffer_set_tiling_cb *cl_buffer_set_tiling;
diff --git a/src/cl_driver_defs.c b/src/cl_driver_defs.c
index c31b6fc4..1335c20f 100644
--- a/src/cl_driver_defs.c
+++ b/src/cl_driver_defs.c
@@ -29,6 +29,7 @@ LOCAL cl_driver_get_device_id_cb *cl_driver_get_device_id = NULL;
/* Buffer */
LOCAL cl_buffer_alloc_cb *cl_buffer_alloc = NULL;
+LOCAL cl_buffer_alloc_userptr_cb *cl_buffer_alloc_userptr = NULL;
LOCAL cl_buffer_set_tiling_cb *cl_buffer_set_tiling = NULL;
LOCAL cl_buffer_alloc_from_texture_cb *cl_buffer_alloc_from_texture = NULL;
LOCAL cl_buffer_release_from_texture_cb *cl_buffer_release_from_texture = NULL;
diff --git a/src/cl_enqueue.c b/src/cl_enqueue.c
index db0bce74..5bdb7cd7 100644
--- a/src/cl_enqueue.c
+++ b/src/cl_enqueue.c
@@ -234,11 +234,15 @@ cl_int cl_enqueue_map_buffer(enqueue_data *data)
mem->type == CL_MEM_SUBBUFFER_TYPE);
struct _cl_mem_buffer* buffer = (struct _cl_mem_buffer*)mem;
- if(data->unsync_map == 1)
- //because using unsync map in clEnqueueMapBuffer, so force use map_gtt here
- ptr = cl_mem_map_gtt(mem);
- else
- ptr = cl_mem_map_auto(mem, data->write_map ? 1 : 0);
+ if (mem->is_userptr)
+ ptr = mem->host_ptr;
+ else {
+ if(data->unsync_map == 1)
+ //because using unsync map in clEnqueueMapBuffer, so force use map_gtt here
+ ptr = cl_mem_map_gtt(mem);
+ else
+ ptr = cl_mem_map_auto(mem, data->write_map ? 1 : 0);
+ }
if (ptr == NULL) {
err = CL_MAP_FAILURE;
@@ -246,7 +250,7 @@ cl_int cl_enqueue_map_buffer(enqueue_data *data)
}
data->ptr = ptr;
- if(mem->flags & CL_MEM_USE_HOST_PTR) {
+ if((mem->flags & CL_MEM_USE_HOST_PTR) && !mem->is_userptr) {
assert(mem->host_ptr);
ptr = (char*)ptr + data->offset + buffer->sub_offset;
memcpy(mem->host_ptr + data->offset + buffer->sub_offset, ptr, data->size);
@@ -331,7 +335,8 @@ cl_int cl_enqueue_unmap_mem_object(enqueue_data *data)
assert(mapped_ptr >= memobj->host_ptr &&
mapped_ptr + mapped_size <= memobj->host_ptr + memobj->size);
/* Sync the data. */
- memcpy(v_ptr, mapped_ptr, mapped_size);
+ if (!memobj->is_userptr)
+ memcpy(v_ptr, mapped_ptr, mapped_size);
} else {
CHECK_IMAGE(memobj, image);
diff --git a/src/cl_mem.c b/src/cl_mem.c
index 16bd6135..d3199668 100644
--- a/src/cl_mem.c
+++ b/src/cl_mem.c
@@ -33,6 +33,7 @@
#include <assert.h>
#include <stdio.h>
#include <string.h>
+#include <unistd.h>
#define FIELD_SIZE(CASE,TYPE) \
case JOIN(CL_,CASE): \
@@ -223,6 +224,7 @@ cl_mem_allocate(enum cl_mem_type type,
cl_mem_flags flags,
size_t sz,
cl_int is_tiled,
+ void *host_ptr,
cl_int *errcode)
{
cl_buffer_mgr bufmgr = NULL;
@@ -251,6 +253,7 @@ cl_mem_allocate(enum cl_mem_type type,
mem->ref_n = 1;
mem->magic = CL_MAGIC_MEM_HEADER;
mem->flags = flags;
+ mem->is_userptr = 0;
if (sz != 0) {
/* Pinning will require stricter alignment rules */
@@ -260,7 +263,28 @@ cl_mem_allocate(enum cl_mem_type type,
/* Allocate space in memory */
bufmgr = cl_context_get_bufmgr(ctx);
assert(bufmgr);
+
+#ifdef HAS_USERPTR
+ if (ctx->device->host_unified_memory) {
+ /* currently only cl buf is supported, will add cl image support later */
+ if ((flags & CL_MEM_USE_HOST_PTR) && host_ptr != NULL) {
+ /* userptr not support tiling */
+ if (!is_tiled) {
+ int page_size = getpagesize();
+ if ((((unsigned long)host_ptr | sz) & (page_size - 1)) == 0) {
+ mem->is_userptr = 1;
+ mem->bo = cl_buffer_alloc_userptr(bufmgr, "CL userptr memory object", host_ptr, sz, 0);
+ }
+ }
+ }
+ }
+
+ if (!mem->is_userptr)
+ mem->bo = cl_buffer_alloc(bufmgr, "CL memory object", sz, alignment);
+#else
mem->bo = cl_buffer_alloc(bufmgr, "CL memory object", sz, alignment);
+#endif
+
if (UNLIKELY(mem->bo == NULL)) {
err = CL_MEM_OBJECT_ALLOCATION_FAILURE;
goto error;
@@ -387,12 +411,15 @@ cl_mem_new_buffer(cl_context ctx,
sz = ALIGN(sz, 4);
/* Create the buffer in video memory */
- mem = cl_mem_allocate(CL_MEM_BUFFER_TYPE, ctx, flags, sz, CL_FALSE, &err);
+ mem = cl_mem_allocate(CL_MEM_BUFFER_TYPE, ctx, flags, sz, CL_FALSE, data, &err);
if (mem == NULL || err != CL_SUCCESS)
goto error;
/* Copy the data if required */
- if (flags & CL_MEM_COPY_HOST_PTR || flags & CL_MEM_USE_HOST_PTR)
+ if (flags & CL_MEM_COPY_HOST_PTR)
+ cl_buffer_subdata(mem->bo, 0, sz, data);
+
+ if ((flags & CL_MEM_USE_HOST_PTR) && !mem->is_userptr)
cl_buffer_subdata(mem->bo, 0, sz, data);
if (flags & CL_MEM_USE_HOST_PTR || flags & CL_MEM_COPY_HOST_PTR)
@@ -762,7 +789,7 @@ _cl_mem_new_image(cl_context ctx,
sz = aligned_pitch * aligned_h * depth;
}
- mem = cl_mem_allocate(CL_MEM_IMAGE_TYPE, ctx, flags, sz, tiling != CL_NO_TILE, &err);
+ mem = cl_mem_allocate(CL_MEM_IMAGE_TYPE, ctx, flags, sz, tiling != CL_NO_TILE, NULL, &err);
if (mem == NULL || err != CL_SUCCESS)
goto error;
@@ -1834,7 +1861,7 @@ LOCAL cl_mem cl_mem_new_libva_buffer(cl_context ctx,
cl_int err = CL_SUCCESS;
cl_mem mem = NULL;
- mem = cl_mem_allocate(CL_MEM_BUFFER_TYPE, ctx, 0, 0, CL_FALSE, &err);
+ mem = cl_mem_allocate(CL_MEM_BUFFER_TYPE, ctx, 0, 0, CL_FALSE, NULL, &err);
if (mem == NULL || err != CL_SUCCESS)
goto error;
@@ -1875,7 +1902,7 @@ LOCAL cl_mem cl_mem_new_libva_image(cl_context ctx,
goto error;
}
- mem = cl_mem_allocate(CL_MEM_IMAGE_TYPE, ctx, 0, 0, 0, &err);
+ mem = cl_mem_allocate(CL_MEM_IMAGE_TYPE, ctx, 0, 0, 0, NULL, &err);
if (mem == NULL || err != CL_SUCCESS) {
err = CL_OUT_OF_HOST_MEMORY;
goto error;
diff --git a/src/cl_mem.h b/src/cl_mem.h
index 95c5f056..2e9dd5ad 100644
--- a/src/cl_mem.h
+++ b/src/cl_mem.h
@@ -92,6 +92,7 @@ typedef struct _cl_mem {
int map_ref; /* The mapped count. */
uint8_t mapped_gtt; /* This object has mapped gtt, for unmap. */
cl_mem_dstr_cb *dstr_cb; /* The destroy callback. */
+ uint8_t is_userptr; /* CL_MEM_USE_HOST_PTR is enabled*/
} _cl_mem;
struct _cl_mem_image {
@@ -262,6 +263,7 @@ cl_mem_allocate(enum cl_mem_type type,
cl_mem_flags flags,
size_t sz,
cl_int is_tiled,
+ void *host_ptr,
cl_int *errcode);
void
diff --git a/src/cl_mem_gl.c b/src/cl_mem_gl.c
index 28d2ac65..36409089 100644
--- a/src/cl_mem_gl.c
+++ b/src/cl_mem_gl.c
@@ -63,7 +63,7 @@ cl_mem_new_gl_texture(cl_context ctx,
goto error;
}
- mem = cl_mem_allocate(CL_MEM_GL_IMAGE_TYPE, ctx, flags, 0, 0, &err);
+ mem = cl_mem_allocate(CL_MEM_GL_IMAGE_TYPE, ctx, flags, 0, 0, NULL, &err);
if (mem == NULL || err != CL_SUCCESS)
goto error;
diff --git a/src/intel/intel_driver.c b/src/intel/intel_driver.c
index bb97220c..fc037cc8 100644
--- a/src/intel/intel_driver.c
+++ b/src/intel/intel_driver.c
@@ -690,6 +690,20 @@ cl_buffer intel_share_image_from_libva(cl_context ctx,
return (cl_buffer)intel_bo;
}
+static cl_buffer intel_buffer_alloc_userptr(cl_buffer_mgr bufmgr, const char* name, void *data,size_t size, unsigned long flags)
+{
+#ifdef HAS_USERPTR
+ drm_intel_bo *bo;
+ bo = drm_intel_bo_alloc_userptr((drm_intel_bufmgr *)bufmgr, name, data, I915_TILING_NONE, 0, size, flags);
+ /* Fallback to unsynchronized userptr allocation if kernel has no MMU notifier enabled. */
+ if (bo == NULL)
+ bo = drm_intel_bo_alloc_userptr((drm_intel_bufmgr *)bufmgr, name, data, I915_TILING_NONE, 0, size, flags | I915_USERPTR_UNSYNCHRONIZED);
+ return (cl_buffer)bo;
+#else
+ return NULL;
+#endif
+}
+
static int32_t get_intel_tiling(cl_int tiling, uint32_t *intel_tiling)
{
switch (tiling) {
@@ -734,6 +748,7 @@ intel_setup_callbacks(void)
cl_driver_get_bufmgr = (cl_driver_get_bufmgr_cb *) intel_driver_get_bufmgr;
cl_driver_get_device_id = (cl_driver_get_device_id_cb *) intel_get_device_id;
cl_buffer_alloc = (cl_buffer_alloc_cb *) drm_intel_bo_alloc;
+ cl_buffer_alloc_userptr = (cl_buffer_alloc_userptr_cb*) intel_buffer_alloc_userptr;
cl_buffer_set_tiling = (cl_buffer_set_tiling_cb *) intel_buffer_set_tiling;
#if defined(HAS_EGL)
cl_buffer_alloc_from_texture = (cl_buffer_alloc_from_texture_cb *) intel_alloc_buffer_from_texture;