summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorYang Rong <rong.r.yang@intel.com>2014-05-20 10:46:19 +0800
committerZhigang Gong <zhigang.gong@intel.com>2014-05-22 13:06:45 +0800
commitde996fa90c26997a9d2ecd22192c146a661161f3 (patch)
tree0df219ea9a2e668e68d346f934932b3539273705
parenta13372ca0116861983a64868d9165dd7b7119e51 (diff)
downloadbeignet-de996fa90c26997a9d2ecd22192c146a661161f3.tar.gz
Fix map gtt fail when memory object size is too large.
After max allocate size is changed to 256M, the large memory object would map gtt fail in some system. So when image size is large then 128M, disable tiling, and used normal map. But in function clEnqueueMapBuffer/Image, may still fail because unsync map. Signed-off-by: Yang Rong <rong.r.yang@intel.com> Reviewed-by: Zhigang Gong <zhigang.gong@linux.intel.com>
-rw-r--r--src/cl_api.c55
-rw-r--r--src/cl_enqueue.c27
-rw-r--r--src/cl_enqueue.h1
-rw-r--r--src/cl_mem.c18
-rw-r--r--src/cl_mem.h1
5 files changed, 68 insertions, 34 deletions
diff --git a/src/cl_api.c b/src/cl_api.c
index 03a1cda7..0800c091 100644
--- a/src/cl_api.c
+++ b/src/cl_api.c
@@ -2161,7 +2161,7 @@ error:
return err;
}
-static cl_int _cl_map_mem(cl_mem mem, void **ptr, void **mem_ptr, size_t offset, size_t size)
+static cl_int _cl_map_mem(cl_mem mem, void *ptr, void **mem_ptr, size_t offset, size_t size)
{
cl_int slot = -1;
int err = CL_SUCCESS;
@@ -2172,17 +2172,13 @@ static cl_int _cl_map_mem(cl_mem mem, void **ptr, void **mem_ptr, size_t offset,
sub_offset = buffer->sub_offset;
}
- if (!(*ptr = cl_mem_map_gtt_unsync(mem))) {
- err = CL_MAP_FAILURE;
- goto error;
- }
- *ptr = (char*)(*ptr) + offset + sub_offset;
+ ptr = (char*)ptr + offset + sub_offset;
if(mem->flags & CL_MEM_USE_HOST_PTR) {
assert(mem->host_ptr);
//only calc ptr here, will do memcpy in enqueue
*mem_ptr = mem->host_ptr + offset + sub_offset;
} else {
- *mem_ptr = *ptr;
+ *mem_ptr = ptr;
}
/* Record the mapped address. */
if (!mem->mapped_ptr_sz) {
@@ -2190,7 +2186,7 @@ static cl_int _cl_map_mem(cl_mem mem, void **ptr, void **mem_ptr, size_t offset,
mem->mapped_ptr = (cl_mapped_ptr *)malloc(
sizeof(cl_mapped_ptr) * mem->mapped_ptr_sz);
if (!mem->mapped_ptr) {
- cl_mem_unmap_gtt(mem);
+ cl_mem_unmap_auto(mem);
err = CL_OUT_OF_HOST_MEMORY;
goto error;
}
@@ -2208,7 +2204,7 @@ static cl_int _cl_map_mem(cl_mem mem, void **ptr, void **mem_ptr, size_t offset,
cl_mapped_ptr *new_ptr = (cl_mapped_ptr *)malloc(
sizeof(cl_mapped_ptr) * mem->mapped_ptr_sz * 2);
if (!new_ptr) {
- cl_mem_unmap_gtt (mem);
+ cl_mem_unmap_auto(mem);
err = CL_OUT_OF_HOST_MEMORY;
goto error;
}
@@ -2223,7 +2219,7 @@ static cl_int _cl_map_mem(cl_mem mem, void **ptr, void **mem_ptr, size_t offset,
}
assert(slot != -1);
mem->mapped_ptr[slot].ptr = *mem_ptr;
- mem->mapped_ptr[slot].v_ptr = *ptr;
+ mem->mapped_ptr[slot].v_ptr = ptr;
mem->mapped_ptr[slot].size = size;
mem->map_ref++;
error:
@@ -2270,10 +2266,6 @@ clEnqueueMapBuffer(cl_command_queue command_queue,
goto error;
}
- err = _cl_map_mem(buffer, &ptr, &mem_ptr, offset, size);
- if (err != CL_SUCCESS)
- goto error;
-
TRY(cl_event_check_waitlist, num_events_in_wait_list, event_wait_list, event, buffer->ctx);
data = &no_wait_data;
@@ -2282,12 +2274,25 @@ clEnqueueMapBuffer(cl_command_queue command_queue,
data->offset = offset;
data->size = size;
data->ptr = ptr;
+ data->unsync_map = 1;
if(handle_events(command_queue, num_events_in_wait_list, event_wait_list,
event, data, CL_COMMAND_MAP_BUFFER) == CL_ENQUEUE_EXECUTE_IMM) {
+ data->unsync_map = 0;
err = cl_enqueue_handle(event ? *event : NULL, data);
+ if (err != CL_SUCCESS)
+ goto error;
+ ptr = data->ptr;
if(event) cl_event_set_status(*event, CL_COMPLETE);
+ } else {
+ if ((ptr = cl_mem_map_gtt_unsync(buffer)) == NULL) {
+ err = CL_MAP_FAILURE;
+ goto error;
+ }
}
+ err = _cl_map_mem(buffer, ptr, &mem_ptr, offset, size);
+ if (err != CL_SUCCESS)
+ goto error;
error:
if (errcode_ret)
@@ -2344,11 +2349,6 @@ clEnqueueMapImage(cl_command_queue command_queue,
goto error;
}
- if (!(ptr = cl_mem_map_gtt_unsync(mem))) {
- err = CL_MAP_FAILURE;
- goto error;
- }
-
size_t offset = image->bpp*origin[0] + image->row_pitch*origin[1] + image->slice_pitch*origin[2];
size_t size;
if(region[2] == 1) {
@@ -2362,10 +2362,6 @@ clEnqueueMapImage(cl_command_queue command_queue,
size += image->bpp * (origin[0] + region[0]);
}
- err = _cl_map_mem(mem, &ptr, &mem_ptr, offset, size);
- if (err != CL_SUCCESS)
- goto error;
-
TRY(cl_event_check_waitlist, num_events_in_wait_list, event_wait_list, event, mem->ctx);
data = &no_wait_data;
@@ -2378,12 +2374,25 @@ clEnqueueMapImage(cl_command_queue command_queue,
data->slice_pitch = *image_slice_pitch;
data->ptr = ptr;
data->offset = offset;
+ data->unsync_map = 1;
if(handle_events(command_queue, num_events_in_wait_list, event_wait_list,
event, data, CL_COMMAND_MAP_IMAGE) == CL_ENQUEUE_EXECUTE_IMM) {
+ data->unsync_map = 0;
err = cl_enqueue_handle(event ? *event : NULL, data);
+ if (err != CL_SUCCESS)
+ goto error;
+ ptr = data->ptr;
if(event) cl_event_set_status(*event, CL_COMPLETE);
+ } else {
+ if ((ptr = cl_mem_map_gtt_unsync(mem)) == NULL) {
+ err = CL_MAP_FAILURE;
+ goto error;
+ }
}
+ err = _cl_map_mem(mem, ptr, &mem_ptr, offset, size);
+ if (err != CL_SUCCESS)
+ goto error;
error:
if (errcode_ret)
diff --git a/src/cl_enqueue.c b/src/cl_enqueue.c
index 330d2301..800668d8 100644
--- a/src/cl_enqueue.c
+++ b/src/cl_enqueue.c
@@ -246,17 +246,21 @@ cl_int cl_enqueue_map_buffer(enqueue_data *data)
mem->type == CL_MEM_SUBBUFFER_TYPE);
struct _cl_mem_buffer* buffer = (struct _cl_mem_buffer*)mem;
- //because using unsync map in clEnqueueMapBuffer, so force use map_gtt here
- if (!(ptr = cl_mem_map_gtt(mem))) {
+ if(data->unsync_map == 1)
+ //because using unsync map in clEnqueueMapBuffer, so force use map_gtt here
+ ptr = cl_mem_map_gtt(mem);
+ else
+ ptr = cl_mem_map_auto(mem);
+
+ if (ptr == NULL) {
err = CL_MAP_FAILURE;
goto error;
}
-
- ptr = (char*)ptr + data->offset + buffer->sub_offset;
- assert(data->ptr == ptr);
+ data->ptr = ptr;
if(mem->flags & CL_MEM_USE_HOST_PTR) {
assert(mem->host_ptr);
+ ptr = (char*)ptr + data->offset + buffer->sub_offset;
memcpy(mem->host_ptr + data->offset, ptr, data->size);
}
@@ -271,12 +275,17 @@ cl_int cl_enqueue_map_image(enqueue_data *data)
void *ptr = NULL;
CHECK_IMAGE(mem, image);
- if (!(ptr = cl_mem_map_gtt(mem))) {
+ if(data->unsync_map == 1)
+ //because using unsync map in clEnqueueMapBuffer, so force use map_gtt here
+ ptr = cl_mem_map_gtt(mem);
+ else
+ ptr = cl_mem_map_auto(mem);
+
+ if (ptr == NULL) {
err = CL_MAP_FAILURE;
goto error;
}
-
- assert(data->ptr == (char*)ptr + data->offset);
+ data->ptr = ptr;
if(mem->flags & CL_MEM_USE_HOST_PTR) {
assert(mem->host_ptr);
@@ -323,7 +332,7 @@ cl_int cl_enqueue_unmap_mem_object(enqueue_data *data)
assert(v_ptr == mapped_ptr);
}
- cl_mem_unmap_gtt(memobj);
+ cl_mem_unmap_auto(memobj);
/* shrink the mapped slot. */
if (memobj->mapped_ptr_sz/2 > memobj->map_ref) {
diff --git a/src/cl_enqueue.h b/src/cl_enqueue.h
index 1d3ae5f9..c7e33da1 100644
--- a/src/cl_enqueue.h
+++ b/src/cl_enqueue.h
@@ -60,6 +60,7 @@ typedef struct _enqueue_data {
const void * const_ptr; /* Const ptr for memory read */
void * ptr; /* Ptr for write and return value */
const cl_mem* mem_list; /* mem_list of clEnqueueNativeKernel */
+ uint8_t unsync_map; /* Indicate the clEnqueueMapBuffer/Image is unsync map */
void (*user_func)(void *); /* pointer to a host-callable user function */
} enqueue_data;
diff --git a/src/cl_mem.c b/src/cl_mem.c
index 6b8ca7cd..7092385a 100644
--- a/src/cl_mem.c
+++ b/src/cl_mem.c
@@ -48,6 +48,8 @@
#define CL_MEM_OBJECT_IMAGE2D 0x10F1
#define CL_MEM_OBJECT_IMAGE3D 0x10F2
+#define MAX_TILING_SIZE 128 * MB
+
static cl_mem_object_type
cl_get_mem_object_type(cl_mem mem)
{
@@ -622,6 +624,15 @@ _cl_mem_new_image(cl_context ctx,
sz = aligned_pitch * aligned_h * depth;
+ /* If sz is large than 128MB, map gtt may fail in some system.
+ Because there is no obviours performance drop, disable tiling. */
+ if(tiling != CL_NO_TILE && sz > MAX_TILING_SIZE) {
+ tiling = CL_NO_TILE;
+ aligned_pitch = w * bpp;
+ aligned_h = h;
+ sz = aligned_pitch * aligned_h * depth;
+ }
+
mem = cl_mem_allocate(CL_MEM_IMAGE_TYPE, ctx, flags, sz, tiling != CL_NO_TILE, &err);
if (mem == NULL || err != CL_SUCCESS)
goto error;
@@ -714,7 +725,7 @@ cl_mem_delete(cl_mem mem)
for(i=0; i<mem->mapped_ptr_sz; i++) {
if(mem->mapped_ptr[i].ptr != NULL) {
mem->map_ref--;
- cl_mem_unmap_gtt(mem);
+ cl_mem_unmap_auto(mem);
}
}
assert(mem->map_ref == 0);
@@ -1326,6 +1337,7 @@ cl_mem_map_gtt(cl_mem mem)
{
cl_buffer_map_gtt(mem->bo);
assert(cl_buffer_get_virtual(mem->bo));
+ mem->mapped_gtt = 1;
return cl_buffer_get_virtual(mem->bo);
}
@@ -1356,8 +1368,10 @@ cl_mem_map_auto(cl_mem mem)
LOCAL cl_int
cl_mem_unmap_auto(cl_mem mem)
{
- if (IS_IMAGE(mem) && cl_mem_image(mem)->tiling != CL_NO_TILE)
+ if (mem->mapped_gtt == 1) {
cl_buffer_unmap_gtt(mem->bo);
+ mem->mapped_gtt = 0;
+ }
else
cl_buffer_unmap(mem->bo);
return CL_SUCCESS;
diff --git a/src/cl_mem.h b/src/cl_mem.h
index 47a30dc1..5719c60d 100644
--- a/src/cl_mem.h
+++ b/src/cl_mem.h
@@ -87,6 +87,7 @@ typedef struct _cl_mem {
cl_mapped_ptr* mapped_ptr;/* Store the mapped addresses and size by caller. */
int mapped_ptr_sz; /* The array size of mapped_ptr. */
int map_ref; /* The mapped count. */
+ uint8_t mapped_gtt; /* This object has mapped gtt, for unmap. */
cl_mem_dstr_cb *dstr_cb; /* The destroy callback. */
} _cl_mem;