summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJunyan He <junyan.he@linux.intel.com>2014-06-10 12:53:12 +0800
committerZhigang Gong <zhigang.gong@intel.com>2014-06-11 11:04:09 +0800
commit97dec3e93f14d2431352b1501b57fe21cee3b719 (patch)
tree17b3a7d62aac51351719c94ff012c3785e7771be
parent64f2ad234111a6f16293955c1943bf0474d84006 (diff)
downloadbeignet-97dec3e93f14d2431352b1501b57fe21cee3b719.tar.gz
Add the printf logic into the run time.
Signed-off-by: Junyan He <junyan.he@linux.intel.com> Reviewed-by: Zhigang Gong <zhigang.gong@linux.intel.com>
-rw-r--r--backend/src/gbe_bin_interpreter.cpp6
-rw-r--r--src/cl_command_queue.c14
-rw-r--r--src/cl_command_queue_gen7.c24
-rw-r--r--src/cl_driver.h28
-rw-r--r--src/cl_driver_defs.c7
-rw-r--r--src/cl_gbe_loader.cpp25
-rw-r--r--src/intel/intel_gpgpu.c106
7 files changed, 210 insertions, 0 deletions
diff --git a/backend/src/gbe_bin_interpreter.cpp b/backend/src/gbe_bin_interpreter.cpp
index 64bf5c4b..1945b5b9 100644
--- a/backend/src/gbe_bin_interpreter.cpp
+++ b/backend/src/gbe_bin_interpreter.cpp
@@ -21,6 +21,7 @@
#include "sys/assert.cpp"
#include "sys/platform.cpp"
#include "ir/constant.cpp"
+#include "ir/printf.cpp"
#pragma GCC diagnostic ignored "-Wunused-function"
#pragma GCC diagnostic ignored "-Wunused-variable"
@@ -62,6 +63,11 @@ struct BinInterpCallBackInitializer
gbe_kernel_get_image_data = gbe::kernelGetImageData;
gbe_get_image_base_index = gbe::getImageBaseIndex;
gbe_set_image_base_index = gbe::setImageBaseIndex;
+ gbe_get_printf_num = gbe::kernelGetPrintfNum;
+ gbe_dup_printfset = gbe::kernelDupPrintfSet;
+ gbe_get_printf_sizeof_size = gbe::kernelGetPrintfSizeOfSize;
+ gbe_release_printf_info = gbe::kernelReleasePrintfSet;
+ gbe_output_printf = gbe::kernelOutputPrintf;
}
~BinInterpCallBackInitializer() {
diff --git a/src/cl_command_queue.c b/src/cl_command_queue.c
index e6553ecd..618be65a 100644
--- a/src/cl_command_queue.c
+++ b/src/cl_command_queue.c
@@ -420,9 +420,23 @@ LOCAL cl_int
cl_command_queue_flush(cl_command_queue queue)
{
GET_QUEUE_THREAD_GPGPU(queue);
+ size_t global_wk_sz[3];
+ void* printf_info = cl_gpgpu_get_printf_info(gpgpu, global_wk_sz);
cl_gpgpu_flush(gpgpu);
+ if (printf_info && gbe_get_printf_num(printf_info)) {
+ void *index_addr = cl_gpgpu_map_printf_buffer(gpgpu, 0);
+ void *buf_addr = cl_gpgpu_map_printf_buffer(gpgpu, 1);
+ gbe_output_printf(printf_info, index_addr, buf_addr, global_wk_sz[0],
+ global_wk_sz[1], global_wk_sz[2]);
+ cl_gpgpu_unmap_printf_buffer(gpgpu, 0);
+ cl_gpgpu_unmap_printf_buffer(gpgpu, 1);
+ gbe_release_printf_info(printf_info);
+ global_wk_sz[0] = global_wk_sz[1] = global_wk_sz[2] = 0;
+ cl_gpgpu_set_printf_info(gpgpu, NULL, global_wk_sz);
+ }
+
cl_invalid_thread_gpgpu(queue);
return CL_SUCCESS;
}
diff --git a/src/cl_command_queue_gen7.c b/src/cl_command_queue_gen7.c
index 3401baa3..9680535b 100644
--- a/src/cl_command_queue_gen7.c
+++ b/src/cl_command_queue_gen7.c
@@ -247,6 +247,19 @@ cl_bind_stack(cl_gpgpu gpgpu, cl_kernel ker)
cl_gpgpu_set_stack(gpgpu, offset, stack_sz, cl_gpgpu_get_cache_ctrl());
}
+static void
+cl_bind_printf(cl_gpgpu gpgpu, cl_kernel ker, void* printf_info, int printf_num, size_t global_sz) {
+ int32_t value = GBE_CURBE_PRINTF_INDEX_POINTER;
+ int32_t offset = gbe_kernel_get_curbe_offset(ker->opaque, value, 0);
+ size_t buf_size = global_sz * sizeof(int) * printf_num;
+ cl_gpgpu_set_printf_buffer(gpgpu, 0, buf_size, offset);
+
+ value = GBE_CURBE_PRINTF_BUF_POINTER;
+ offset = gbe_kernel_get_curbe_offset(ker->opaque, value, 0);
+ buf_size = gbe_get_printf_sizeof_size(printf_info) * global_sz;
+ cl_gpgpu_set_printf_buffer(gpgpu, 1, buf_size, offset);
+}
+
LOCAL cl_int
cl_command_queue_ND_range_gen7(cl_command_queue queue,
cl_kernel ker,
@@ -264,7 +277,10 @@ cl_command_queue_ND_range_gen7(cl_command_queue queue,
size_t cst_sz = ker->curbe_sz= gbe_kernel_get_curbe_size(ker->opaque);
int32_t scratch_sz = gbe_kernel_get_scratch_size(ker->opaque);
size_t thread_n = 0u;
+ int printf_num = 0;
cl_int err = CL_SUCCESS;
+ size_t global_size = global_wk_sz[0] * global_wk_sz[1] * global_wk_sz[2];
+ void* printf_info = NULL;
/* Setup kernel */
kernel.name = "KERNEL";
@@ -298,12 +314,20 @@ cl_command_queue_ND_range_gen7(cl_command_queue queue,
}
}
+ printf_info = gbe_dup_printfset(ker->opaque);
+ cl_gpgpu_set_printf_info(gpgpu, printf_info, (size_t *)global_wk_sz);
+
/* Setup the kernel */
if (queue->props & CL_QUEUE_PROFILING_ENABLE)
cl_gpgpu_state_init(gpgpu, ctx->device->max_compute_unit, cst_sz / 32, 1);
else
cl_gpgpu_state_init(gpgpu, ctx->device->max_compute_unit, cst_sz / 32, 0);
+ printf_num = gbe_get_printf_num(printf_info);
+ if (printf_num) {
+ cl_bind_printf(gpgpu, ker, printf_info, printf_num, global_size);
+ }
+
/* Bind user buffers */
cl_command_queue_bind_surface(queue, ker);
/* Bind user images */
diff --git a/src/cl_driver.h b/src/cl_driver.h
index 2bca443c..421027a5 100644
--- a/src/cl_driver.h
+++ b/src/cl_driver.h
@@ -225,6 +225,34 @@ extern cl_gpgpu_ref_batch_buf_cb *cl_gpgpu_ref_batch_buf;
typedef void (cl_gpgpu_unref_batch_buf_cb)(void*);
extern cl_gpgpu_unref_batch_buf_cb *cl_gpgpu_unref_batch_buf;
+/* Set the printf buffer */
+typedef void (cl_gpgpu_set_printf_buffer_cb)(cl_gpgpu, uint32_t, uint32_t, uint32_t);
+extern cl_gpgpu_set_printf_buffer_cb *cl_gpgpu_set_printf_buffer;
+
+/* get the printf buffer offset in the apeture*/
+typedef unsigned long (cl_gpgpu_reloc_printf_buffer_cb)(cl_gpgpu, uint32_t, uint32_t);
+extern cl_gpgpu_reloc_printf_buffer_cb *cl_gpgpu_reloc_printf_buffer;
+
+/* map the printf buffer */
+typedef void* (cl_gpgpu_map_printf_buffer_cb)(cl_gpgpu, uint32_t);
+extern cl_gpgpu_map_printf_buffer_cb *cl_gpgpu_map_printf_buffer;
+
+/* unmap the printf buffer */
+typedef void (cl_gpgpu_unmap_printf_buffer_cb)(cl_gpgpu, uint32_t);
+extern cl_gpgpu_unmap_printf_buffer_cb *cl_gpgpu_unmap_printf_buffer;
+
+/* release the printf buffer */
+typedef unsigned long (cl_gpgpu_release_printf_buffer_cb)(cl_gpgpu, uint32_t);
+extern cl_gpgpu_release_printf_buffer_cb *cl_gpgpu_release_printf_buffer;
+
+/* Set the last printfset pointer */
+typedef void (cl_gpgpu_set_printf_info_cb)(cl_gpgpu, void *, size_t*);
+extern cl_gpgpu_set_printf_info_cb *cl_gpgpu_set_printf_info;
+
+/* Get the last printfset pointer */
+typedef void* (cl_gpgpu_get_printf_info_cb)(cl_gpgpu, size_t*);
+extern cl_gpgpu_get_printf_info_cb *cl_gpgpu_get_printf_info;
+
/* Will spawn all threads */
typedef void (cl_gpgpu_walker_cb)(cl_gpgpu,
uint32_t simd_sz,
diff --git a/src/cl_driver_defs.c b/src/cl_driver_defs.c
index ab3af498..3a9b9ed6 100644
--- a/src/cl_driver_defs.c
+++ b/src/cl_driver_defs.c
@@ -86,4 +86,11 @@ LOCAL cl_gpgpu_event_get_exec_timestamp_cb *cl_gpgpu_event_get_exec_timestamp =
LOCAL cl_gpgpu_event_get_gpu_cur_timestamp_cb *cl_gpgpu_event_get_gpu_cur_timestamp = NULL;
LOCAL cl_gpgpu_ref_batch_buf_cb *cl_gpgpu_ref_batch_buf = NULL;
LOCAL cl_gpgpu_unref_batch_buf_cb *cl_gpgpu_unref_batch_buf = NULL;
+LOCAL cl_gpgpu_set_printf_buffer_cb *cl_gpgpu_set_printf_buffer = NULL;
+LOCAL cl_gpgpu_reloc_printf_buffer_cb *cl_gpgpu_reloc_printf_buffer = NULL;
+LOCAL cl_gpgpu_map_printf_buffer_cb *cl_gpgpu_map_printf_buffer = NULL;
+LOCAL cl_gpgpu_unmap_printf_buffer_cb *cl_gpgpu_unmap_printf_buffer = NULL;
+LOCAL cl_gpgpu_set_printf_info_cb *cl_gpgpu_set_printf_info = NULL;
+LOCAL cl_gpgpu_get_printf_info_cb *cl_gpgpu_get_printf_info = NULL;
+LOCAL cl_gpgpu_release_printf_buffer_cb *cl_gpgpu_release_printf_buffer = NULL;
diff --git a/src/cl_gbe_loader.cpp b/src/cl_gbe_loader.cpp
index b1b75d6d..38f9ab62 100644
--- a/src/cl_gbe_loader.cpp
+++ b/src/cl_gbe_loader.cpp
@@ -59,6 +59,11 @@ gbe_kernel_get_image_size_cb *gbe_kernel_get_image_size = NULL;
gbe_kernel_get_image_data_cb *gbe_kernel_get_image_data = NULL;
gbe_set_image_base_index_cb *gbe_set_image_base_index_interp = NULL;
gbe_get_image_base_index_cb *gbe_get_image_base_index = NULL;
+gbe_get_printf_num_cb* gbe_get_printf_num = NULL;
+gbe_dup_printfset_cb* gbe_dup_printfset = NULL;
+gbe_get_printf_sizeof_size_cb* gbe_get_printf_sizeof_size = NULL;
+gbe_release_printf_info_cb* gbe_release_printf_info = NULL;
+gbe_output_printf_cb* gbe_output_printf = NULL;
struct GbeLoaderInitializer
{
@@ -200,6 +205,26 @@ struct GbeLoaderInitializer
if (gbe_get_image_base_index == NULL)
return false;
+ gbe_get_printf_num = *(gbe_get_printf_num_cb**)dlsym(dlhInterp, "gbe_get_printf_num");
+ if (gbe_get_printf_num == NULL)
+ return false;
+
+ gbe_dup_printfset = *(gbe_dup_printfset_cb**)dlsym(dlhInterp, "gbe_dup_printfset");
+ if (gbe_dup_printfset == NULL)
+ return false;
+
+ gbe_get_printf_sizeof_size = *(gbe_get_printf_sizeof_size_cb**)dlsym(dlhInterp, "gbe_get_printf_sizeof_size");
+ if (gbe_get_printf_sizeof_size == NULL)
+ return false;
+
+ gbe_release_printf_info = *(gbe_release_printf_info_cb**)dlsym(dlhInterp, "gbe_release_printf_info");
+ if (gbe_release_printf_info == NULL)
+ return false;
+
+ gbe_output_printf = *(gbe_output_printf_cb**)dlsym(dlhInterp, "gbe_output_printf");
+ if (gbe_output_printf == NULL)
+ return false;
+
return true;
}
diff --git a/src/intel/intel_gpgpu.c b/src/intel/intel_gpgpu.c
index a1bd6729..3ec03154 100644
--- a/src/intel/intel_gpgpu.c
+++ b/src/intel/intel_gpgpu.c
@@ -78,6 +78,9 @@ enum {max_sampler_n = 16 };
/* Handle GPGPU state */
struct intel_gpgpu
{
+ void* ker_opaque;
+ size_t global_wk_sz[3];
+ void* printf_info;
intel_driver_t *drv;
intel_batchbuffer_t *batch;
cl_gpgpu_kernel *ker;
@@ -97,6 +100,8 @@ struct intel_gpgpu
struct { drm_intel_bo *bo; } scratch_b;
struct { drm_intel_bo *bo; } constant_b;
struct { drm_intel_bo *bo; } time_stamp_b; /* time stamp buffer */
+ struct { drm_intel_bo *bo;
+ drm_intel_bo *ibo;} printf_b; /* the printf buf and index buf*/
struct { drm_intel_bo *bo; } aux_buf;
struct {
@@ -155,6 +160,10 @@ intel_gpgpu_delete(intel_gpgpu_t *gpgpu)
return;
if(gpgpu->time_stamp_b.bo)
drm_intel_bo_unreference(gpgpu->time_stamp_b.bo);
+ if(gpgpu->printf_b.bo)
+ drm_intel_bo_unreference(gpgpu->printf_b.bo);
+ if(gpgpu->printf_b.ibo)
+ drm_intel_bo_unreference(gpgpu->printf_b.ibo);
if (gpgpu->aux_buf.bo)
drm_intel_bo_unreference(gpgpu->aux_buf.bo);
if (gpgpu->perf_b.bo)
@@ -567,6 +576,13 @@ intel_gpgpu_state_init(intel_gpgpu_t *gpgpu,
gpgpu->urb.size_cs_entry = size_cs_entry;
gpgpu->max_threads = max_threads;
+ if (gpgpu->printf_b.ibo)
+ dri_bo_unreference(gpgpu->printf_b.ibo);
+ gpgpu->printf_b.ibo = NULL;
+ if (gpgpu->printf_b.bo)
+ dri_bo_unreference(gpgpu->printf_b.bo);
+ gpgpu->printf_b.bo = NULL;
+
/* Set the profile buffer*/
if(gpgpu->time_stamp_b.bo)
dri_bo_unreference(gpgpu->time_stamp_b.bo);
@@ -1209,6 +1225,90 @@ intel_gpgpu_event_get_exec_timestamp(intel_gpgpu_t* gpgpu, intel_event_t *event,
drm_intel_gem_bo_unmap_gtt(event->ts_buf);
}
+static void
+intel_gpgpu_set_printf_buf(intel_gpgpu_t *gpgpu, uint32_t i, uint32_t size, uint32_t offset)
+{
+ drm_intel_bo *bo = NULL;
+ if (i == 0) { // the index buffer.
+ if (gpgpu->printf_b.ibo)
+ dri_bo_unreference(gpgpu->printf_b.ibo);
+ gpgpu->printf_b.ibo = dri_bo_alloc(gpgpu->drv->bufmgr, "Printf index buffer", size, 4096);
+ bo = gpgpu->printf_b.ibo;
+ } else if (i == 1) {
+ if (gpgpu->printf_b.bo)
+ dri_bo_unreference(gpgpu->printf_b.bo);
+ gpgpu->printf_b.bo = dri_bo_alloc(gpgpu->drv->bufmgr, "Printf output buffer", size, 4096);
+ bo = gpgpu->printf_b.bo;
+ } else
+ assert(0);
+
+ drm_intel_bo_map(bo, 1);
+ memset(bo->virtual, 0, size);
+ drm_intel_bo_unmap(bo);
+
+ intel_gpgpu_bind_buf(gpgpu, bo, offset, 0, 0);
+}
+
+static void*
+intel_gpgpu_map_printf_buf(intel_gpgpu_t *gpgpu, uint32_t i)
+{
+ drm_intel_bo *bo = NULL;
+ if (i == 0) {
+ bo = gpgpu->printf_b.ibo;
+ } else if (i == 1) {
+ bo = gpgpu->printf_b.bo;
+ } else
+ assert(0);
+
+ drm_intel_bo_map(bo, 1);
+ return bo->virtual;
+}
+
+static void
+intel_gpgpu_unmap_printf_buf_addr(intel_gpgpu_t *gpgpu, uint32_t i)
+{
+ drm_intel_bo *bo = NULL;
+ if (i == 0) {
+ bo = gpgpu->printf_b.ibo;
+ } else if (i == 1) {
+ bo = gpgpu->printf_b.bo;
+ } else
+ assert(0);
+
+ drm_intel_bo_unmap(bo);
+}
+
+static void
+intel_gpgpu_release_printf_buf(intel_gpgpu_t *gpgpu, uint32_t i)
+{
+ if (i == 0) {
+ drm_intel_bo_unreference(gpgpu->printf_b.ibo);
+ gpgpu->printf_b.ibo = NULL;
+ } else if (i == 1) {
+ drm_intel_bo_unreference(gpgpu->printf_b.bo);
+ gpgpu->printf_b.bo = NULL;
+ } else
+ assert(0);
+}
+
+static void
+intel_gpgpu_set_printf_info(intel_gpgpu_t *gpgpu, void* printf_info, size_t * global_sz)
+{
+ gpgpu->printf_info = printf_info;
+ gpgpu->global_wk_sz[0] = global_sz[0];
+ gpgpu->global_wk_sz[1] = global_sz[1];
+ gpgpu->global_wk_sz[2] = global_sz[2];
+}
+
+static void*
+intel_gpgpu_get_printf_info(intel_gpgpu_t *gpgpu, size_t * global_sz)
+{
+ global_sz[0] = gpgpu->global_wk_sz[0];
+ global_sz[1] = gpgpu->global_wk_sz[1];
+ global_sz[2] = gpgpu->global_wk_sz[2];
+ return gpgpu->printf_info;
+}
+
LOCAL void
intel_set_gpgpu_callbacks(int device_id)
{
@@ -1239,6 +1339,12 @@ intel_set_gpgpu_callbacks(int device_id)
cl_gpgpu_event_get_gpu_cur_timestamp = (cl_gpgpu_event_get_gpu_cur_timestamp_cb *)intel_gpgpu_event_get_gpu_cur_timestamp;
cl_gpgpu_ref_batch_buf = (cl_gpgpu_ref_batch_buf_cb *)intel_gpgpu_ref_batch_buf;
cl_gpgpu_unref_batch_buf = (cl_gpgpu_unref_batch_buf_cb *)intel_gpgpu_unref_batch_buf;
+ cl_gpgpu_set_printf_buffer = (cl_gpgpu_set_printf_buffer_cb *)intel_gpgpu_set_printf_buf;
+ cl_gpgpu_map_printf_buffer = (cl_gpgpu_map_printf_buffer_cb *)intel_gpgpu_map_printf_buf;
+ cl_gpgpu_unmap_printf_buffer = (cl_gpgpu_unmap_printf_buffer_cb *)intel_gpgpu_unmap_printf_buf_addr;
+ cl_gpgpu_release_printf_buffer = (cl_gpgpu_release_printf_buffer_cb *)intel_gpgpu_release_printf_buf;
+ cl_gpgpu_set_printf_info = (cl_gpgpu_set_printf_info_cb *)intel_gpgpu_set_printf_info;
+ cl_gpgpu_get_printf_info = (cl_gpgpu_get_printf_info_cb *)intel_gpgpu_get_printf_info;
if (IS_HASWELL(device_id)) {
cl_gpgpu_bind_image = (cl_gpgpu_bind_image_cb *) intel_gpgpu_bind_image_gen75;