summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJunyan He <junyan.he@intel.com>2017-06-23 18:18:29 +0800
committerYang Rong <rong.r.yang@intel.com>2017-08-02 17:16:30 +0800
commit87f7a98225991f99928b3a933c229aed420af61b (patch)
tree689455825b699d5a5b2ce7945763c332e5f65892
parentde4044d0278c8487405d073eab4114c37eb9e5a9 (diff)
downloadbeignet-newRT.tar.gz
Implement printf for new runtime.newRT
We will store printf statement in ELF file and output its log when the ND_Range finished. Signed-off-by: Junyan He <junyan.he@intel.com>
-rw-r--r--backend/src/backend/gen_program_elf.cpp47
-rw-r--r--backend/src/backend/program.hpp6
-rw-r--r--backend/src/ir/printf.hpp37
-rw-r--r--backend/src/llvm/llvm_printf_parser.cpp16
-rw-r--r--runtime/gen/CMakeLists.txt1
-rw-r--r--runtime/gen/cl_command_queue_gen.c85
-rw-r--r--runtime/gen/cl_gen.h6
-rw-r--r--runtime/gen/cl_kernel_gen.c131
-rw-r--r--runtime/gen/cl_printf_gen.c633
9 files changed, 907 insertions, 55 deletions
diff --git a/backend/src/backend/gen_program_elf.cpp b/backend/src/backend/gen_program_elf.cpp
index 69294281..453bdcb1 100644
--- a/backend/src/backend/gen_program_elf.cpp
+++ b/backend/src/backend/gen_program_elf.cpp
@@ -214,14 +214,30 @@ using namespace ELFIO;
/* The format for Compiler info is:
-------------------------------
| GEN_NOTE_TYPE_COMPILER_INFO |
- ----------------------------------------
+ --------------------------------------
| Compiler name (GBE_Compiler e.g.) |
- ----------------------------------------
+ --------------------------------------
| LLVM version major:4 |
------------------------
| LLVM version minor:4 |
------------------------ */
+/* The format for printf is:
+ ---------------------------
+ | GEN_NOTE_TYPE_CL_PRINTF |
+ ---------------------------
+ | The Kernel name |
+ -------------------------------
+ | CL printf bti:4 |
+ ----------------------
+ | CL printf number:4 |
+ -------------------------------------------
+ | CL printf id for one printf statement:4 |
+ -------------------------------------------
+ | printf format string |
+ ------------------------
+ */
+
class GenProgramElfContext
{
public:
@@ -232,6 +248,7 @@ public:
GEN_NOTE_TYPE_CL_INFO = 4,
GEN_NOTE_TYPE_CL_DEVICE_ENQUEUE_INFO = 5,
GEN_NOTE_TYPE_COMPILER_INFO = 6,
+ GEN_NOTE_TYPE_CL_PRINTF = 7,
};
struct KernelInfoHelper {
@@ -394,6 +411,32 @@ void GenProgramElfContext::emitOneKernelCLInfo(GenKernel &kernel)
uint32_t wg_sz_size = 0;
uint32_t arg_info_size = 0;
+ /* Add printf info for this kernel */
+ if (kernel.getPrintfNum() != 0) {
+ std::map<uint32_t, std::string> all_printf;
+ uint32_t printf_n = kernel.collectPrintfStr(all_printf);
+ assert(printf_n == kernel.getPrintfNum());
+ std::ostringstream oss;
+ size_t sz = 0;
+
+ uint32_t bti = kernel.getPrintfBufBTI();
+ oss.write((char *)(&bti), sizeof(uint32_t));
+ sz += sizeof(uint32_t);
+ oss.write((char *)(&printf_n), sizeof(uint32_t));
+ sz += sizeof(uint32_t);
+
+ for (auto iter = all_printf.begin(); iter != all_printf.end(); iter++) {
+ uint32_t id = iter->first;
+ oss.write((char *)(&id), sizeof(uint32_t));
+ sz += sizeof(uint32_t);
+ oss.write(iter->second.c_str(), strlen(iter->second.c_str()) + 1);
+ sz += strlen(iter->second.c_str()) + 1;
+ }
+
+ this->cl_note_writer->add_note(GenProgramElfContext::GEN_NOTE_TYPE_CL_PRINTF,
+ kernel.getName(), oss.str().c_str(), sz);
+ }
+
if ((kernel.getFunctionAttributes())[0] != 0)
attr_size = ::strlen(kernel.getFunctionAttributes()) + 1;
all_str_len = ALIGN(attr_size, 4);
diff --git a/backend/src/backend/program.hpp b/backend/src/backend/program.hpp
index 85411b58..62fecf13 100644
--- a/backend/src/backend/program.hpp
+++ b/backend/src/backend/program.hpp
@@ -168,7 +168,11 @@ namespace gbe {
uint32_t getPrintfNum() const {
return printfSet ? printfSet->getPrintfNum() : 0;
}
-
+ uint32_t collectPrintfStr(std::map<uint32_t, std::string>& all_printf) const {
+ if (printfSet)
+ return printfSet->collectPrintfStr(all_printf);
+ return 0;
+ }
void * dupPrintfSet() const {
void* ptr = printfSet ? (void *)(new ir::PrintfSet(*printfSet)) : NULL;
return ptr;
diff --git a/backend/src/ir/printf.hpp b/backend/src/ir/printf.hpp
index 728aa683..28944c71 100644
--- a/backend/src/ir/printf.hpp
+++ b/backend/src/ir/printf.hpp
@@ -123,7 +123,7 @@ namespace gbe
type = PRINTF_SLOT_TYPE_STRING;
}
- PrintfSlot(PrintfState& st) {
+ PrintfSlot(PrintfState& st, std::string& s) : str(s) {
type = PRINTF_SLOT_TYPE_STATE;
state = st;
}
@@ -135,6 +135,7 @@ namespace gbe
} else if (other.type == PRINTF_SLOT_TYPE_STATE) {
type = PRINTF_SLOT_TYPE_STATE;
state = other.state;
+ str = other.str;
} else {
type = PRINTF_SLOT_TYPE_NONE;
}
@@ -245,6 +246,40 @@ namespace gbe
void outputPrintf(void* buf_addr);
+ uint32_t collectPrintfStr(std::map<uint32_t, std::string>& all_printf) const {
+ uint32_t n = 0;
+ for (auto iter = fmts.begin(); iter != fmts.end(); iter++) {
+ std::string s;
+ const PrintfFmt& fmt = iter->second;
+ for (auto &m : fmt) {
+ if (m.type == PRINTF_SLOT_TYPE_STATE && m.state.conversion_specifier == PRINTF_CONVERSION_S) {
+ std::string ss = m.state.str;
+ if (m.state.precision > 0 && (static_cast<size_t>(m.state.precision) < ss.size())) {
+ ss.resize(m.state.precision);
+ }
+ if (m.state.min_width > 0 && (static_cast<size_t>(m.state.min_width) > ss.size())) {
+ std::string spaces;
+ spaces.resize(static_cast<size_t>(m.state.min_width) - ss.size(), ' ');
+ if (m.state.left_justified) {
+ ss = ss + spaces;
+ } else {
+ ss = spaces + ss;
+ }
+ }
+
+ s += ss;
+ } else {
+ s += m.str;
+ }
+ }
+
+ all_printf.insert(std::pair<uint32_t, std::string>(iter->first, s));
+ n++;
+ }
+
+ return n;
+ }
+
private:
std::map<uint32_t, PrintfFmt> fmts;
friend struct LockOutput;
diff --git a/backend/src/llvm/llvm_printf_parser.cpp b/backend/src/llvm/llvm_printf_parser.cpp
index 6bb7c52a..b8c6114e 100644
--- a/backend/src/llvm/llvm_printf_parser.cpp
+++ b/backend/src/llvm/llvm_printf_parser.cpp
@@ -245,16 +245,18 @@ again:
/* Now parse the % start conversion_specifier. */
ret_char = __parse_printf_state(p, end, &rend, &state);
- if (ret_char < 0)
+ if (ret_char < 0) {
goto error;
+ } else {
+ std::string s(p, size_t(rend - p));
+ printf_fmt->push_back(PrintfSlot(state, s));
+ num++;
- printf_fmt->push_back(state);
- num++;
-
- if (rend == end)
- break;
+ if (rend == end)
+ break;
- begin = rend;
+ begin = rend;
+ }
}
#if 0
diff --git a/runtime/gen/CMakeLists.txt b/runtime/gen/CMakeLists.txt
index 83ed0c6a..142f1b3a 100644
--- a/runtime/gen/CMakeLists.txt
+++ b/runtime/gen/CMakeLists.txt
@@ -78,6 +78,7 @@ set(OPENCL_GEN_SRC
cl_compiler_gen.c
cl_event_gen.c
cl_sampler_gen.c
+ cl_printf_gen.c
)
if (X11_FOUND)
diff --git a/runtime/gen/cl_command_queue_gen.c b/runtime/gen/cl_command_queue_gen.c
index 46348b69..6215761d 100644
--- a/runtime/gen/cl_command_queue_gen.c
+++ b/runtime/gen/cl_command_queue_gen.c
@@ -82,6 +82,14 @@ typedef struct gen_gpgpu {
} mem;
struct {
+ uint32_t printf_buf_size;
+ drm_intel_bo *printf_bo; /* Printf buffer */
+ uint32_t printf_num;
+ cl_uint *printf_ids;
+ char **printf_strings;
+ } printf;
+
+ struct {
uint64_t sampler_bitmap; /* sampler usage bitmap. */
} sampler;
@@ -460,6 +468,59 @@ gen_gpgpu_setup_scratch(gen_gpgpu *gpu)
}
static cl_int
+gen_gpgpu_setup_printf_buffer(gen_gpgpu *gpu, cl_kernel_gen kernel_gen, const size_t *global_wk_sz_use)
+{
+ drm_intel_bufmgr *bufmgr = gpu->bufmgr;
+ uint32_t buf_size;
+ cl_uint i;
+
+ if (kernel_gen->printf_num == 0)
+ return CL_SUCCESS;
+
+ /* An guess size. */
+ buf_size = global_wk_sz_use[0] * global_wk_sz_use[1] * global_wk_sz_use[2] *
+ sizeof(int) * 16 * kernel_gen->printf_num;
+ if (buf_size > 16 * 1024 * 1024) //at most.
+ buf_size = 16 * 1024 * 1024;
+ if (buf_size < 1 * 1024 * 1024) // at least.
+ buf_size = 1 * 1024 * 1024;
+
+ gpu->printf.printf_ids = CL_CALLOC(kernel_gen->printf_num, sizeof(cl_uint));
+ gpu->printf.printf_strings = CL_CALLOC(kernel_gen->printf_num, sizeof(char *));
+
+ if (gpu->printf.printf_ids == NULL || gpu->printf.printf_strings == NULL)
+ return CL_OUT_OF_HOST_MEMORY;
+
+ for (i = 0; i < kernel_gen->printf_num; i++) {
+ gpu->printf.printf_ids[i] = kernel_gen->printf_ids[i];
+ gpu->printf.printf_strings[i] = CL_MALLOC(strlen(kernel_gen->printf_strings[i]) + 1);
+ if (gpu->printf.printf_strings[i] == NULL)
+ return CL_OUT_OF_HOST_MEMORY;
+
+ memcpy(gpu->printf.printf_strings[i], kernel_gen->printf_strings[i],
+ strlen(kernel_gen->printf_strings[i]) + 1);
+ }
+
+ gpu->printf.printf_buf_size = buf_size;
+ gpu->printf.printf_num = kernel_gen->printf_num;
+ gpu->printf.printf_bo = drm_intel_bo_alloc(bufmgr, "PRINTF_BO", buf_size, 4096);
+ if (gpu->printf.printf_bo == NULL)
+ return CL_OUT_OF_RESOURCES;
+
+ drm_intel_bo_map(gpu->printf.printf_bo, 1);
+ memset(gpu->printf.printf_bo->virtual, 0, buf_size);
+ *(uint32_t *)(gpu->printf.printf_bo->virtual) = 4; // first four is for the length.
+ drm_intel_bo_unmap(gpu->printf.printf_bo);
+
+ if (gpu->mem.max_bti < kernel_gen->printf_bti)
+ gpu->mem.max_bti = kernel_gen->printf_bti;
+
+ gen_gpgpu_setup_bti(gpu, gpu->printf.printf_bo, 0, buf_size,
+ kernel_gen->printf_bti, I965_SURFACEFORMAT_RAW);
+ return CL_SUCCESS;
+}
+
+static cl_int
gen_setup_constant_buffer_for_20(cl_kernel kernel, cl_kernel_gen kernel_gen,
cl_program_gen prog_gen, gen_gpgpu *gpu)
{
@@ -821,6 +882,19 @@ cl_command_queue_delete_gpgpu(void *gpgpu)
gpu->mem.scratch_bo = NULL;
}
+ if (gpu->printf.printf_bo) {
+ cl_uint i;
+ assert(gpu->printf.printf_num > 0);
+ for (i = 0; i < gpu->printf.printf_num; i++) {
+ CL_FREE(gpu->printf.printf_strings[i]);
+ }
+ CL_FREE(gpu->printf.printf_strings);
+ CL_FREE(gpu->printf.printf_ids);
+
+ drm_intel_bo_unreference(gpu->printf.printf_bo);
+ gpu->printf.printf_bo = NULL;
+ }
+
if (gpu->mem.stack_bo) {
drm_intel_bo_unreference(gpu->mem.stack_bo);
gpu->mem.stack_bo = NULL;
@@ -988,6 +1062,7 @@ cl_command_queue_ND_range_gen_once(cl_command_queue queue, cl_kernel kernel, cl_
if (ret != CL_SUCCESS)
break;
+ gen_gpgpu_setup_printf_buffer(gpu, kernel_gen, global_wk_sz_use);
gen_gpgpu_setup_kernel_exec_svm_mem(kernel, kernel_gen, gpu);
/* also setup the device enqueue helper bo if exist */
@@ -1502,7 +1577,7 @@ cl_command_queue_gen_handle_device_enqueue(cl_command_queue queue, cl_kernel ker
fixed_global_off[i] = ndrange_info->global_work_offset[i];
}
-// int *slm_sizes = (int *)ptr;
+ // int *slm_sizes = (int *)ptr;
int slm_size = block->descriptor->slm_size;
ptr += slm_size;
@@ -1570,6 +1645,14 @@ cl_command_queue_finish_gpgpu(void *gpgpu)
return CL_INVALID_VALUE;
intel_batchbuffer_finish(gpu->batch);
+
+ if (gpu->printf.printf_num > 0) {
+ drm_intel_bo_map(gpu->printf.printf_bo, 0);
+ cl_gen_output_printf(gpu->printf.printf_bo->virtual, gpu->printf.printf_buf_size,
+ gpu->printf.printf_ids, gpu->printf.printf_strings,
+ gpu->printf.printf_num);
+ drm_intel_bo_unmap(gpu->printf.printf_bo);
+ }
}
return CL_SUCCESS;
diff --git a/runtime/gen/cl_gen.h b/runtime/gen/cl_gen.h
index 85d8f63a..2fcfddda 100644
--- a/runtime/gen/cl_gen.h
+++ b/runtime/gen/cl_gen.h
@@ -173,6 +173,10 @@ typedef struct _cl_kernel_gen {
cl_gen_image_info_offset image_info;
cl_uint virt_reg_phy_offset_num; // The mapping between virtual reg and phy offset
cl_gen_virt_phy_offset virt_reg_phy_offset;
+ cl_uint printf_num;
+ cl_int printf_bti;
+ cl_uint *printf_ids;
+ char **printf_strings;
} _cl_kernel_gen;
typedef _cl_kernel_gen *cl_kernel_gen;
@@ -191,6 +195,7 @@ enum cl_gen_program_note_type {
GEN_NOTE_TYPE_CL_INFO = 4,
GEN_NOTE_TYPE_CL_DEVICE_ENQUEUE_INFO = 5,
GEN_NOTE_TYPE_COMPILER_INFO = 6,
+ GEN_NOTE_TYPE_CL_PRINTF = 7,
};
typedef struct _cl_program_gen_device_enqueue_info {
@@ -262,6 +267,7 @@ extern int cl_command_queue_finish_gpgpu(void *gpgpu);
extern void cl_enqueue_nd_range_delete_gen(cl_event event);
extern cl_int cl_command_queue_create_gen(cl_device_id device, cl_command_queue queue);
extern void cl_command_queue_delete_gen(cl_device_id device, cl_command_queue queue);
+extern void cl_gen_output_printf(void *buf_addr, uint32_t buf_size, cl_uint *ids, char **fmts, uint32_t printf_num);
/************************************ Compiler ******************************************/
extern cl_int cl_compiler_load_gen(cl_device_id device);
diff --git a/runtime/gen/cl_kernel_gen.c b/runtime/gen/cl_kernel_gen.c
index 7ff425e1..ce8cbf64 100644
--- a/runtime/gen/cl_kernel_gen.c
+++ b/runtime/gen/cl_kernel_gen.c
@@ -107,6 +107,17 @@ cl_kernel_delete_gen(cl_device_id device, cl_kernel kernel)
kernel_gen->image_info = NULL;
}
+ if (kernel_gen->printf_ids) {
+ assert(kernel_gen->printf_num > 0);
+ CL_FREE(kernel_gen->printf_ids);
+ }
+ kernel_gen->printf_ids = NULL;
+ if (kernel_gen->printf_strings) {
+ assert(kernel_gen->printf_num > 0);
+ CL_FREE(kernel_gen->printf_strings);
+ }
+ kernel_gen->printf_strings = NULL;
+
CL_FREE(kernel_gen);
}
@@ -153,56 +164,16 @@ cl_kernel_get_info_gen(cl_device_id device, cl_kernel kernel, cl_uint param_name
}
static cl_int
-cl_program_gen_get_kernel_func_cl_info(cl_device_id device, cl_kernel kernel)
+cl_program_gen_get_kernel_func_arg_info(cl_kernel kernel, void *desc, cl_uint desc_size,
+ cl_program_gen prog_gen, cl_kernel_gen kernel_gen)
{
- cl_program prog = kernel->program;
- cl_program_gen prog_gen;
- cl_kernel_gen kernel_gen;
- cl_int offset;
- void *desc;
void *ptr;
- cl_char *name;
- cl_uint name_size;
- cl_uint desc_size;
- cl_uint desc_type;
cl_uint wg_sz_size;
cl_uint attr_size;
cl_uint arg_info_size;
- int i;
char *arg_type_qual_str;
char *arg_access_qualifier_str;
-
- DEV_PRIVATE_DATA(prog, device, prog_gen);
- DEV_PRIVATE_DATA(kernel, device, kernel_gen);
-
- assert(kernel->name);
-
- if (prog_gen->func_cl_info == NULL)
- return CL_SUCCESS;
-
- offset = 0;
- desc = NULL;
- while (offset < prog_gen->func_cl_info_data->d_size) {
- name_size = *(cl_uint *)(prog_gen->func_cl_info_data->d_buf + offset);
- desc_size = *(cl_uint *)(prog_gen->func_cl_info_data->d_buf + offset + sizeof(cl_uint));
- desc_type = *(cl_uint *)(prog_gen->func_cl_info_data->d_buf + offset + 2 * sizeof(cl_uint));
- name = prog_gen->func_cl_info_data->d_buf + offset + sizeof(cl_uint) * 3;
-
- if (desc_type != GEN_NOTE_TYPE_CL_INFO) {
- offset += 3 * sizeof(cl_uint) + ALIGN(name_size, 4) + ALIGN(desc_size, 4);
- continue;
- }
-
- if (strcmp((char *)name, (char *)kernel->name) == 0) { // Find the kernel info slot
- desc = prog_gen->func_cl_info_data->d_buf + offset + sizeof(cl_uint) * 3 + ALIGN(name_size, 4);
- break;
- }
-
- offset += 3 * sizeof(cl_uint) + ALIGN(name_size, 4) + ALIGN(desc_size, 4);
- }
-
- if (desc == NULL)
- return CL_SUCCESS;
+ int i;
ptr = desc;
attr_size = *(cl_uint *)ptr;
@@ -316,6 +287,80 @@ cl_program_gen_get_kernel_func_cl_info(cl_device_id device, cl_kernel kernel)
}
static cl_int
+cl_program_gen_get_kernel_func_cl_info(cl_device_id device, cl_kernel kernel)
+{
+ cl_program prog = kernel->program;
+ cl_program_gen prog_gen;
+ cl_kernel_gen kernel_gen;
+ cl_uint name_size;
+ cl_uint desc_size;
+ void *desc;
+ cl_uint desc_type;
+ cl_int offset;
+ cl_char *name;
+ int i;
+ cl_int ret = CL_SUCCESS;
+ cl_bool already_set = CL_FALSE;
+
+ DEV_PRIVATE_DATA(prog, device, prog_gen);
+ DEV_PRIVATE_DATA(kernel, device, kernel_gen);
+
+ assert(kernel->name);
+
+ if (prog_gen->func_cl_info == NULL)
+ return CL_SUCCESS;
+
+ offset = 0;
+ desc = NULL;
+ while (offset < prog_gen->func_cl_info_data->d_size) {
+ name_size = *(cl_uint *)(prog_gen->func_cl_info_data->d_buf + offset);
+ desc_size = *(cl_uint *)(prog_gen->func_cl_info_data->d_buf + offset + sizeof(cl_uint));
+ desc_type = *(cl_uint *)(prog_gen->func_cl_info_data->d_buf + offset + 2 * sizeof(cl_uint));
+ name = prog_gen->func_cl_info_data->d_buf + offset + sizeof(cl_uint) * 3;
+ desc = prog_gen->func_cl_info_data->d_buf + offset + sizeof(cl_uint) * 3 + ALIGN(name_size, 4);
+
+ if (strcmp((char *)name, (char *)kernel->name) != 0) { // Find the kernel info slot
+ offset += 3 * sizeof(cl_uint) + ALIGN(name_size, 4) + ALIGN(desc_size, 4);
+ continue;
+ }
+
+ if (desc_type == GEN_NOTE_TYPE_CL_PRINTF) {
+ kernel_gen->printf_bti = *(cl_uint *)desc;
+ desc += sizeof(cl_uint);
+ kernel_gen->printf_num = *(cl_uint *)desc;
+ desc += sizeof(cl_uint);
+
+ kernel_gen->printf_strings = CL_CALLOC(kernel_gen->printf_num, sizeof(char *));
+ kernel_gen->printf_ids = CL_CALLOC(kernel_gen->printf_num, sizeof(cl_uint));
+
+ if (kernel_gen->printf_strings == NULL)
+ return CL_OUT_OF_HOST_MEMORY;
+ if (kernel_gen->printf_ids == NULL)
+ return CL_OUT_OF_HOST_MEMORY;
+
+ for (i = 0; i < kernel_gen->printf_num; i++) {
+ kernel_gen->printf_ids[i] = *(cl_uint *)desc;
+ desc += sizeof(cl_uint);
+ kernel_gen->printf_strings[i] = desc;
+ desc += strlen(desc) + 1;
+ }
+ } else if (desc_type == GEN_NOTE_TYPE_CL_INFO) {
+ if (already_set) {
+ /* Can not contain two CL info for one kernel */
+ return CL_INVALID_KERNEL_DEFINITION;
+ }
+ ret = cl_program_gen_get_kernel_func_arg_info(kernel, desc, desc_size, prog_gen, kernel_gen);
+ if (ret != CL_SUCCESS)
+ return ret;
+ }
+
+ offset += 3 * sizeof(cl_uint) + ALIGN(name_size, 4) + ALIGN(desc_size, 4);
+ }
+
+ return CL_SUCCESS;
+}
+
+static cl_int
cl_program_gen_get_one_kernel_func(cl_device_id device, cl_kernel kernel, GElf_Sym *p_sym_entry)
{
cl_program prog = kernel->program;
diff --git a/runtime/gen/cl_printf_gen.c b/runtime/gen/cl_printf_gen.c
new file mode 100644
index 00000000..7e6f182e
--- /dev/null
+++ b/runtime/gen/cl_printf_gen.c
@@ -0,0 +1,633 @@
+/*
+ * Copyright © 2012 Intel Corporation
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library. If not, see <http://www.gnu.org/licenses/>.
+ *
+ */
+
+#include "cl_gen.h"
+#include <stdarg.h>
+
+#define GEN_PRINTF_LOG_MAGIC 0xAABBCCDD
+
+typedef struct _cl_gen_printf_log {
+ uint32_t magic; // 0xAABBCCDD as magic for ASSERT.
+ uint32_t size; // Size of this printf log, include header.
+ uint32_t statement_num; // which printf within one kernel.
+ char *content;
+} _cl_gen_printf_log;
+typedef _cl_gen_printf_log *cl_gen_printf_log;
+
+/* Things about printf info. */
+enum {
+ GEN_PRINTF_LM_NONE,
+ GEN_PRINTF_LM_HH,
+ GEN_PRINTF_LM_H,
+ GEN_PRINTF_LM_L,
+ GEN_PRINTF_LM_HL,
+};
+
+enum {
+ GEN_PRINTF_CONVERSION_INVALID,
+ GEN_PRINTF_CONVERSION_D,
+ GEN_PRINTF_CONVERSION_I,
+ GEN_PRINTF_CONVERSION_O,
+ GEN_PRINTF_CONVERSION_U,
+ GEN_PRINTF_CONVERSION_X,
+ GEN_PRINTF_CONVERSION_x,
+ GEN_PRINTF_CONVERSION_F,
+ GEN_PRINTF_CONVERSION_f,
+ GEN_PRINTF_CONVERSION_E,
+ GEN_PRINTF_CONVERSION_e,
+ GEN_PRINTF_CONVERSION_G,
+ GEN_PRINTF_CONVERSION_g,
+ GEN_PRINTF_CONVERSION_A,
+ GEN_PRINTF_CONVERSION_a,
+ GEN_PRINTF_CONVERSION_C,
+ GEN_PRINTF_CONVERSION_S,
+ GEN_PRINTF_CONVERSION_P
+};
+
+typedef struct _gen_printf_state {
+ struct _gen_printf_state *next;
+ cl_int left_justified;
+ cl_int sign_symbol; //0 for nothing, 1 for sign, 2 for space.
+ cl_int alter_form;
+ cl_int zero_padding;
+ cl_int vector_n;
+ cl_int min_width;
+ cl_int precision;
+ cl_int length_modifier;
+ cl_int conversion_specifier;
+ char *str;
+} _gen_printf_state;
+typedef _gen_printf_state *gen_printf_state;
+
+static char *
+generate_printf_fmt(gen_printf_state state)
+{
+ char num_str[16];
+ char *str = CL_CALLOC(1, 256);
+ int len = 0;
+
+ str[len] = '%';
+ len++;
+
+ if (state->left_justified) {
+ str[len] = '-';
+ len++;
+ }
+
+ if (state->sign_symbol == 1) {
+ str[len] = '+';
+ len++;
+ } else if (state->sign_symbol == 2) {
+ str[len] = ' ';
+ len++;
+ }
+
+ if (state->alter_form) {
+ str[len] = '#';
+ len++;
+ }
+
+ if (state->zero_padding) {
+ str[len] = '0';
+ len++;
+ }
+
+ if (state->min_width >= 0) {
+ snprintf(num_str, 16, "%d", state->min_width);
+ memcpy(&(str[len]), num_str, strlen(num_str));
+ len += strlen(num_str);
+ }
+
+ if (state->precision >= 0) {
+ str[len] = '.';
+ len++;
+ memcpy(&(str[len]), num_str, strlen(num_str));
+ len += strlen(num_str);
+ }
+
+ switch (state->length_modifier) {
+ case GEN_PRINTF_LM_HH:
+ str[len] = 'h';
+ len++;
+ str[len] = 'h';
+ len++;
+ break;
+ case GEN_PRINTF_LM_H:
+ str[len] = 'h';
+ len++;
+ break;
+ case GEN_PRINTF_LM_L:
+ str[len] = 'l';
+ len++;
+ break;
+ case GEN_PRINTF_LM_HL:
+ break;
+ default:
+ assert(state->length_modifier == GEN_PRINTF_LM_NONE);
+ }
+
+ switch (state->conversion_specifier) {
+ case GEN_PRINTF_CONVERSION_D:
+ case GEN_PRINTF_CONVERSION_I:
+ str[len] = 'd';
+ break;
+
+ case GEN_PRINTF_CONVERSION_O:
+ str[len] = 'o';
+ break;
+ case GEN_PRINTF_CONVERSION_U:
+ str[len] = 'u';
+ break;
+ case GEN_PRINTF_CONVERSION_X:
+ str[len] = 'X';
+ break;
+ case GEN_PRINTF_CONVERSION_x:
+ str[len] = 'x';
+ break;
+ case GEN_PRINTF_CONVERSION_C:
+ str[len] = 'c';
+ break;
+ case GEN_PRINTF_CONVERSION_F:
+ str[len] = 'F';
+ break;
+ case GEN_PRINTF_CONVERSION_f:
+ str[len] = 'f';
+ break;
+ case GEN_PRINTF_CONVERSION_E:
+ str[len] = 'E';
+ break;
+ case GEN_PRINTF_CONVERSION_e:
+ str[len] = 'e';
+ break;
+ case GEN_PRINTF_CONVERSION_G:
+ str[len] = 'G';
+ break;
+ case GEN_PRINTF_CONVERSION_g:
+ str[len] = 'g';
+ break;
+ case GEN_PRINTF_CONVERSION_A:
+ str[len] = 'A';
+ break;
+ case GEN_PRINTF_CONVERSION_a:
+ str[len] = 'a';
+ break;
+ case GEN_PRINTF_CONVERSION_P:
+ str[len] = 'p';
+ break;
+ default:
+ assert(0);
+ break;
+ }
+
+ return str;
+}
+
+static cl_int
+parse_printf_state(char *begin, char *end, char **rend, gen_printf_state state)
+{
+ const char *fmt;
+ state->left_justified = 0;
+ state->sign_symbol = 0; //0 for nothing, 1 for sign, 2 for space.
+ state->alter_form = 0;
+ state->zero_padding = 0;
+ state->vector_n = 0;
+ state->min_width = -1;
+ state->precision = -1;
+ state->length_modifier = GEN_PRINTF_LM_NONE;
+ state->conversion_specifier = GEN_PRINTF_CONVERSION_INVALID;
+
+ fmt = begin;
+
+ if (*fmt != '%')
+ return -1;
+
+#define FMT_PLUS_PLUS \
+ do { \
+ if (fmt + 1 <= end) \
+ fmt++; \
+ else { \
+ printf("Error, line: %d, fmt > end\n", __LINE__); \
+ return -1; \
+ } \
+ } while (0)
+
+ FMT_PLUS_PLUS;
+
+ // parse the flags.
+ while (*fmt == '-' || *fmt == '+' || *fmt == ' ' || *fmt == '#' || *fmt == '0')
+ switch (*fmt) {
+ case '-':
+ /* The result of the conversion is left-justified within the field. */
+ state->left_justified = 1;
+ FMT_PLUS_PLUS;
+ break;
+ case '+':
+ /* The result of a signed conversion always begins with a plus or minus sign. */
+ state->sign_symbol = 1;
+ FMT_PLUS_PLUS;
+ break;
+ case ' ':
+ /* If the first character of a signed conversion is not a sign, or if a signed
+ conversion results in no characters, a space is prefixed to the result.
+ If the space and + flags both appear,the space flag is ignored. */
+ if (state->sign_symbol == 0)
+ state->sign_symbol = 2;
+ FMT_PLUS_PLUS;
+ break;
+ case '#':
+ /*The result is converted to an alternative form. */
+ state->alter_form = 1;
+ FMT_PLUS_PLUS;
+ break;
+ case '0':
+ if (!state->left_justified)
+ state->zero_padding = 1;
+ FMT_PLUS_PLUS;
+ break;
+ default:
+ break;
+ }
+
+ // The minimum field width
+ while ((*fmt >= '0') && (*fmt <= '9')) {
+ if (state->min_width < 0)
+ state->min_width = 0;
+ state->min_width = state->min_width * 10 + (*fmt - '0');
+ FMT_PLUS_PLUS;
+ }
+
+ // The precision
+ if (*fmt == '.') {
+ FMT_PLUS_PLUS;
+ state->precision = 0;
+ while (*fmt >= '0' && *fmt <= '9') {
+ state->precision = state->precision * 10 + (*fmt - '0');
+ FMT_PLUS_PLUS;
+ }
+ }
+
+ // handle the vector specifier.
+ if (*fmt == 'v') {
+ FMT_PLUS_PLUS;
+ switch (*fmt) {
+ case '2':
+ case '3':
+ case '4':
+ case '8':
+ state->vector_n = *fmt - '0';
+ FMT_PLUS_PLUS;
+ break;
+ case '1':
+ FMT_PLUS_PLUS;
+ if (*fmt == '6') {
+ state->vector_n = 16;
+ FMT_PLUS_PLUS;
+ } else
+ return -1;
+ break;
+ default:
+ //Wrong vector, error.
+ return -1;
+ }
+ }
+
+ // length modifiers
+ if (*fmt == 'h') {
+ FMT_PLUS_PLUS;
+ if (*fmt == 'h') { //hh
+ state->length_modifier = GEN_PRINTF_LM_HH;
+ FMT_PLUS_PLUS;
+ } else if (*fmt == 'l') { //hl
+ state->length_modifier = GEN_PRINTF_LM_HL;
+ FMT_PLUS_PLUS;
+ } else { //h
+ state->length_modifier = GEN_PRINTF_LM_H;
+ }
+ } else if (*fmt == 'l') {
+ state->length_modifier = GEN_PRINTF_LM_L;
+ FMT_PLUS_PLUS;
+ }
+
+#define CONVERSION_SPEC_AND_RET(XXX, xxx) \
+ case XXX: \
+ state->conversion_specifier = GEN_PRINTF_CONVERSION_##xxx; \
+ FMT_PLUS_PLUS; \
+ *rend = (char *)fmt; \
+ return XXX; \
+ break;
+
+ // conversion specifiers
+ switch (*fmt) {
+ CONVERSION_SPEC_AND_RET('d', D)
+ CONVERSION_SPEC_AND_RET('i', I)
+ CONVERSION_SPEC_AND_RET('o', O)
+ CONVERSION_SPEC_AND_RET('u', U)
+ CONVERSION_SPEC_AND_RET('x', x)
+ CONVERSION_SPEC_AND_RET('X', X)
+ CONVERSION_SPEC_AND_RET('f', f)
+ CONVERSION_SPEC_AND_RET('F', F)
+ CONVERSION_SPEC_AND_RET('e', e)
+ CONVERSION_SPEC_AND_RET('E', E)
+ CONVERSION_SPEC_AND_RET('g', g)
+ CONVERSION_SPEC_AND_RET('G', G)
+ CONVERSION_SPEC_AND_RET('a', a)
+ CONVERSION_SPEC_AND_RET('A', A)
+ CONVERSION_SPEC_AND_RET('c', C)
+ CONVERSION_SPEC_AND_RET('s', S)
+ CONVERSION_SPEC_AND_RET('p', P)
+
+ // %% has been handled
+
+ default:
+ return -1;
+ }
+}
+
+static void
+free_printf_state(gen_printf_state state)
+{
+ gen_printf_state s;
+
+ while (state) {
+ s = state->next;
+
+ if (state->str)
+ CL_FREE(state->str);
+
+ CL_FREE(state);
+ state = s;
+ }
+}
+
+static gen_printf_state
+parser_printf_fmt(char *format)
+{
+ char *begin;
+ char *end;
+ char *p;
+ char ret_char;
+ char *rend;
+ gen_printf_state curr, prev, first;
+
+ p = format;
+ begin = format;
+ end = format + strlen(format);
+ first = NULL;
+ prev = NULL;
+
+ /* Now parse it. */
+ while (*begin) {
+ p = begin;
+
+ again:
+ while (p < end && *p != '%') {
+ p++;
+ }
+ if (p < end && p + 1 == end) { // String with % at end.
+ printf("string end with %%\n");
+ goto error;
+ }
+ if (p + 1 < end && *(p + 1) == '%') { // %%
+ p += 2;
+ goto again;
+ }
+
+ if (p != begin) {
+ curr = CL_CALLOC(1, sizeof(_gen_printf_state));
+ curr->conversion_specifier = GEN_PRINTF_CONVERSION_S;
+ curr->str = CL_MALLOC(p - begin + 1);
+ memcpy(curr->str, begin, p - begin);
+
+ curr->str[p - begin] = 0;
+ if (first == NULL) {
+ first = curr;
+ }
+ if (prev) {
+ prev->next = curr;
+ }
+ prev = curr;
+ }
+
+ if (p == end) // finish
+ break;
+
+ /* Now parse the % start conversion_specifier. */
+ curr = CL_CALLOC(1, sizeof(_gen_printf_state));
+ ret_char = parse_printf_state(p, end, &rend, curr);
+ if (ret_char < 0) {
+ goto error;
+ }
+
+ if (curr->vector_n > 0) {
+ curr->str = generate_printf_fmt(curr); // Standard printf can not recognize %v4XXX
+ } else {
+ curr->str = CL_MALLOC(rend - p + 1);
+ memcpy(curr->str, p, rend - p);
+ curr->str[rend - p] = 0;
+ }
+
+ if (first == NULL) {
+ first = curr;
+ }
+ if (prev) {
+ prev->next = curr;
+ }
+ prev = curr;
+
+ if (rend == end)
+ break;
+
+ begin = rend;
+ }
+
+#if 0
+ {
+ cl_int j = 0;
+ gen_printf_state s = first;
+ while (s) {
+ fprintf(stderr, "---- %d ---- state : \n", j);
+ fprintf(stderr, " conversion_specifier : %d\n", s->conversion_specifier);
+ fprintf(stderr, " vector_n : %d\n", s->vector_n);
+ fprintf(stderr, " left_justified : %d\n", s->left_justified);
+ fprintf(stderr, " sign_symbol: %d\n", s->sign_symbol);
+ fprintf(stderr, " alter_form : %d\n", s->alter_form);
+ fprintf(stderr, " zero_padding : %d\n", s->zero_padding);
+ fprintf(stderr, " min_width : %d\n", s->min_width);
+ fprintf(stderr, " precision : %d\n", s->precision);
+ fprintf(stderr, " length_modifier : %d\n", s->length_modifier);
+ fprintf(stderr, " string : %s strlen is %ld\n", s->str, strlen(s->str));
+ j++;
+ s = s->next;
+ }
+ }
+#endif
+
+ return first;
+
+error:
+ printf("error format string.\n");
+ free_printf_state(first);
+ return NULL;
+}
+
+static void
+output_one_printf(gen_printf_state all_state, cl_gen_printf_log log)
+{
+#define PRINT_SOMETHING(target_ty) \
+ do { \
+ printf(s->str, *(target_ty *)(data + data_offset)); \
+ data_offset += sizeof(target_ty); \
+ } while (0)
+
+ gen_printf_state s = all_state;
+ cl_int vec_num, vec_i;
+ char *data = (char *)(log) + 3 * sizeof(uint32_t);
+ size_t data_offset = 0;
+
+ while (s) {
+ if (s->conversion_specifier == GEN_PRINTF_CONVERSION_S) {
+ printf("%s", s->str);
+ s = s->next;
+ continue;
+ }
+
+ vec_num = s->vector_n > 0 ? s->vector_n : 1;
+ for (vec_i = 0; vec_i < vec_num; vec_i++) {
+ if (vec_i)
+ printf(",");
+
+ switch (s->conversion_specifier) {
+ case GEN_PRINTF_CONVERSION_D:
+ case GEN_PRINTF_CONVERSION_I:
+ if (s->length_modifier == GEN_PRINTF_LM_L)
+ PRINT_SOMETHING(uint64_t);
+ else
+ PRINT_SOMETHING(int);
+ break;
+
+ case GEN_PRINTF_CONVERSION_O:
+ if (s->length_modifier == GEN_PRINTF_LM_L)
+ PRINT_SOMETHING(uint64_t);
+ else
+ PRINT_SOMETHING(int);
+ break;
+ case GEN_PRINTF_CONVERSION_U:
+ if (s->length_modifier == GEN_PRINTF_LM_L)
+ PRINT_SOMETHING(uint64_t);
+ else
+ PRINT_SOMETHING(int);
+ break;
+ case GEN_PRINTF_CONVERSION_X:
+ if (s->length_modifier == GEN_PRINTF_LM_L)
+ PRINT_SOMETHING(uint64_t);
+ else
+ PRINT_SOMETHING(int);
+ break;
+ case GEN_PRINTF_CONVERSION_x:
+ if (s->length_modifier == GEN_PRINTF_LM_L)
+ PRINT_SOMETHING(uint64_t);
+ else
+ PRINT_SOMETHING(int);
+ break;
+
+ case GEN_PRINTF_CONVERSION_C:
+ PRINT_SOMETHING(char);
+ break;
+
+ case GEN_PRINTF_CONVERSION_F:
+ PRINT_SOMETHING(float);
+ break;
+ case GEN_PRINTF_CONVERSION_f:
+ PRINT_SOMETHING(float);
+ break;
+ case GEN_PRINTF_CONVERSION_E:
+ PRINT_SOMETHING(float);
+ break;
+ case GEN_PRINTF_CONVERSION_e:
+ PRINT_SOMETHING(float);
+ break;
+ case GEN_PRINTF_CONVERSION_G:
+ PRINT_SOMETHING(float);
+ break;
+ case GEN_PRINTF_CONVERSION_g:
+ PRINT_SOMETHING(float);
+ break;
+ case GEN_PRINTF_CONVERSION_A:
+ PRINT_SOMETHING(float);
+ break;
+ case GEN_PRINTF_CONVERSION_a:
+ PRINT_SOMETHING(float);
+ break;
+ case GEN_PRINTF_CONVERSION_P:
+ PRINT_SOMETHING(int);
+ break;
+
+ default:
+ assert(0);
+ return;
+ }
+ }
+
+ s = s->next;
+ }
+}
+
+LOCAL void
+cl_gen_output_printf(void *buf_addr, uint32_t buf_size, cl_uint *ids,
+ char **fmts, uint32_t printf_num)
+{
+ uint32_t parsed;
+ uint32_t total_sz = ((uint32_t *)buf_addr)[0];
+ char *p = (char *)buf_addr + sizeof(uint32_t);
+ uint32_t i;
+ gen_printf_state all_states;
+
+ if (total_sz > buf_size)
+ total_sz = buf_size;
+
+ for (parsed = 4; parsed < total_sz;) {
+ cl_gen_printf_log log = (cl_gen_printf_log)(p);
+ if (log->magic != GEN_PRINTF_LOG_MAGIC) {
+ CL_LOG_ERROR("Printf log output has wrong magic");
+ return;
+ }
+
+ for (i = 0; i < printf_num; i++) {
+ if (ids[i] == log->statement_num)
+ break;
+ }
+ if (i == printf_num) {
+ CL_LOG_ERROR("Printf log output, can not find the printf statement for %d",
+ log->statement_num);
+ return;
+ }
+
+ all_states = parser_printf_fmt(fmts[i]);
+ if (all_states == NULL) {
+ CL_LOG_ERROR("Printf statement %d with wrong format %s",
+ log->statement_num, fmts[i]);
+ continue;
+ }
+
+ output_one_printf(all_states, log);
+ free_printf_state(all_states);
+
+ parsed += log->size;
+ p += log->size;
+ }
+}