runtime: fix some subtle event bugs.

This patch fix the following two bugs in event handling. 1. When it's time to call a event's user call back function, we need to set the executed to true before the call. As that call back function may call into clReleaseEvent(), and if we don't set the executed status to true, it will enter infinite recursive loop. 2. After the user call clEnqueueNDRangeKernel to get a valid event, the user set a call back function to that event, and in that call back function, it will release that event. This scenario is totally correct. But our current event handling doesn't have a deadicated timer thread to update those on-the-fly events' status. Thus those events will not have a chance to get updated, and those call back function will not executed forever. To introduce a complete timer style thread to maintain this type of events is too heavy for this fix release. This patch choose an easy way to work around it. It will make sure the last gpgpu event to be finished before current task to be enqueued. After this patch, most of the OpenCV 3.0 cases could run smoothly without any serious issue. Signed-off-by: Zhigang Gong <zhigang.gong@intel.com> Reviewed-by: Yang Rong <rong.r.yang@intel.com>
author: Zhigang Gong <zhigang.gong@intel.com> 2014-07-10 18:57:53 +0800
committer: Zhigang Gong <zhigang.gong@intel.com> 2014-07-11 17:01:52 +0800
commit: 7b846f6a276af32b091b1cdc889781aabcf24837 (patch)
tree: e5cae6818a13c6a3c83ba5fb501a78edf00a48ae
parent: e14647491dceb3d9f3d3facb529a96abfb457a0b (diff)
download: beignet-7b846f6a276af32b091b1cdc889781aabcf24837.tar.gz
4 files changed, 19 insertions, 8 deletions
diff --git a/src/cl_api.c b/src/cl_api.c
index 87590275..177a7e85 100644
--- a/src/cl_api.c
+++ b/src/cl_api.c
@@ -1365,7 +1365,7 @@ clGetEventInfo(cl_event      event,
   } else if (param_name == CL_EVENT_COMMAND_TYPE) {
     FILL_GETINFO_RET (cl_command_type, 1, &event->type, CL_SUCCESS);
   } else if (param_name == CL_EVENT_COMMAND_EXECUTION_STATUS) {
-    cl_event_update_status(event);
+    cl_event_update_status(event, 0);
     FILL_GETINFO_RET (cl_int, 1, &event->status, CL_SUCCESS);
   } else if (param_name == CL_EVENT_REFERENCE_COUNT) {
     cl_uint ref = event->ref_n;
diff --git a/src/cl_command_queue.c b/src/cl_command_queue.c
index d9718bfc..d45e92f1 100644
--- a/src/cl_command_queue.c
+++ b/src/cl_command_queue.c
@@ -75,6 +75,10 @@ cl_command_queue_delete(cl_command_queue queue)
   assert(queue);
   if (atomic_dec(&queue->ref_n) != 1) return;
 
+  // If there is a valid last event, we need to give it a chance to
+  // call the call-back function.
+  if (queue->last_event && queue->last_event->user_cb)
+    cl_event_update_status(queue->last_event, 1);
   /* Remove it from the list */
   assert(queue->ctx);
   pthread_mutex_lock(&queue->ctx->queue_lock);
@@ -454,6 +458,13 @@ cl_command_queue_flush(cl_command_queue queue)
 {
   GET_QUEUE_THREAD_GPGPU(queue);
   cl_command_queue_flush_gpgpu(queue, gpgpu);
+  // As we don't have a deadicate timer thread to take care the possible
+  // event which has a call back function registerred and the event will
+  // be released at the call back function, no other function will access
+  // the event any more. If we don't do this here, we will leak that event
+  // and all the corresponding buffers which is really bad.
+  if (queue->last_event && queue->last_event->user_cb)
+    cl_event_update_status(queue->last_event, 1);
   if (queue->current_event)
     cl_event_flush(queue->current_event);
   cl_invalid_thread_gpgpu(queue);
diff --git a/src/cl_event.c b/src/cl_event.c
index d40881a8..99e60eb5 100644
--- a/src/cl_event.c
+++ b/src/cl_event.c
@@ -55,6 +55,7 @@ void cl_event_flush(cl_event event)
     event->gpgpu = NULL;
   }
   cl_gpgpu_event_flush(event->gpgpu_event);
+  event->queue->last_event = event;
 }
 
 cl_event cl_event_new(cl_context ctx, cl_command_queue queue, cl_command_type type, cl_bool emplict)
@@ -95,8 +96,6 @@ cl_event cl_event_new(cl_context ctx, cl_command_queue queue, cl_command_type ty
   event->enqueue_cb = NULL;
   event->waits_head = NULL;
   event->emplict = emplict;
-  if(queue && event->gpgpu_event)
-    queue->last_event = event;
 
 exit:
   return event;
@@ -111,7 +110,7 @@ void cl_event_delete(cl_event event)
   if (UNLIKELY(event == NULL))
     return;
 
-  cl_event_update_status(event);
+  cl_event_update_status(event, 0);
 
   if (atomic_dec(&event->ref_n) > 1)
     return;
@@ -124,6 +123,7 @@ void cl_event_delete(cl_event event)
   while(event->user_cb) {
     cb = event->user_cb;
     if(cb->executed == CL_FALSE) {
+      cb->executed = CL_TRUE;
       cb->pfn_notify(event, event->status, cb->user_data);
     }
     event->user_cb = cb->next;
@@ -443,8 +443,8 @@ void cl_event_set_status(cl_event event, cl_int status)
   user_cb = event->user_cb;
   while(user_cb) {
     if(user_cb->status >= status) {
-      user_cb->pfn_notify(event, event->status, user_cb->user_data);
       user_cb->executed = CL_TRUE;
+      user_cb->pfn_notify(event, event->status, user_cb->user_data);
     }
     user_cb = user_cb->next;
   }
@@ -492,12 +492,12 @@ void cl_event_set_status(cl_event event, cl_int status)
   event->waits_head = NULL;
 }
 
-void cl_event_update_status(cl_event event)
+void cl_event_update_status(cl_event event, int wait)
 {
   if(event->status <= CL_COMPLETE)
     return;
   if((event->gpgpu_event) &&
-     (cl_gpgpu_event_update_status(event->gpgpu_event, 0) == command_complete))
+     (cl_gpgpu_event_update_status(event->gpgpu_event, wait) == command_complete))
     cl_event_set_status(event, CL_COMPLETE);
 }
 
diff --git a/src/cl_event.h b/src/cl_event.h
index 3c23d742..cfe5ddd6 100644
--- a/src/cl_event.h
+++ b/src/cl_event.h
@@ -89,7 +89,7 @@ void cl_event_new_enqueue_callback(cl_event, enqueue_data *, cl_uint, const cl_e
 /* Set the event status and call all callbacks */
 void cl_event_set_status(cl_event, cl_int);
 /* Check and update event status */
-void cl_event_update_status(cl_event);
+void cl_event_update_status(cl_event, cl_int);
 /* Create the marker event */
 cl_int cl_event_marker_with_wait_list(cl_command_queue, cl_uint, const cl_event *,  cl_event*);
 /* Create the barrier event */
author	Zhigang Gong <zhigang.gong@intel.com>	2014-07-10 18:57:53 +0800
committer	Zhigang Gong <zhigang.gong@intel.com>	2014-07-11 17:01:52 +0800
commit	7b846f6a276af32b091b1cdc889781aabcf24837 (patch)
tree	e5cae6818a13c6a3c83ba5fb501a78edf00a48ae
parent	e14647491dceb3d9f3d3facb529a96abfb457a0b (diff)
download	beignet-7b846f6a276af32b091b1cdc889781aabcf24837.tar.gz