summaryrefslogtreecommitdiff
path: root/utests/compiler_time_stamp.cpp
diff options
context:
space:
mode:
authorRuiling Song <ruiling.song@intel.com>2014-09-18 14:42:01 +0800
committerZhigang Gong <zhigang.gong@intel.com>2014-09-18 15:03:05 +0800
commitc0ba37d62dcac92adfc309e73abd7e12a02d8498 (patch)
tree4c1a7865c530101dcdf627bc6f93c5db5d7f43f5 /utests/compiler_time_stamp.cpp
parent68a81947984de6cceab310c0f205ae66361b7468 (diff)
downloadbeignet-c0ba37d62dcac92adfc309e73abd7e12a02d8498.tar.gz
GBE/libocl: Add __gen_ocl_get_timestamp() to get timestamp.
Gen provide tm0 register for intra-kernel profiling. Here we provide an API __gen_ocl_get_timestamp() to return the timestamp in TM. The return type is defined as: struct time_stamp { ulong tick; uint event; }; 'tick' is a 64bit time tick. 'event' stores a value which means whether a tmEvent has occured (non-zero) or not (0). tmEvent includes time-impacting event such as context switch or frequency change since last time tm0 was read. I add a sample in the kernels/compiler_time_stamp.cl. Hope it would help you understand how to use it. V2: Introduce ir::ARFRegister to avoid directly use of nr/subnr in Gen IR. Rename __gen_ocl_extract_reg to __gen_ocl_region. Rename beignet_get_time_stamp to __gen_ocl_get_timestamp. Signed-off-by: Ruiling Song <ruiling.song@intel.com> Reviewed-by: Zhigang Gong <zhigang.gong@linux.intel.com>
Diffstat (limited to 'utests/compiler_time_stamp.cpp')
-rw-r--r--utests/compiler_time_stamp.cpp52
1 files changed, 52 insertions, 0 deletions
diff --git a/utests/compiler_time_stamp.cpp b/utests/compiler_time_stamp.cpp
new file mode 100644
index 00000000..4da57523
--- /dev/null
+++ b/utests/compiler_time_stamp.cpp
@@ -0,0 +1,52 @@
+#include "utest_helper.hpp"
+
+static void cpu(int global_id, int *src, int *dst) {
+ int i;
+ int final[16];
+ for (i = 0; i < 16; ++i) {
+ int array[16], j;
+ for (j = 0; j < 16; ++j)
+ array[j] = global_id;
+ for (j = 0; j < src[0]; ++j)
+ array[j] = 1+src[j];
+ final[i] = array[i];
+ }
+ dst[global_id] = final[global_id];
+}
+
+void compiler_time_stamp(void)
+{
+ const size_t n = 16;
+ int cpu_dst[16], cpu_src[16];
+
+ // Setup kernel and buffers
+ OCL_CREATE_KERNEL("compiler_time_stamp");
+ OCL_CREATE_BUFFER(buf[0], 0, n * sizeof(uint32_t), NULL);
+ OCL_CREATE_BUFFER(buf[1], 0, n * sizeof(uint32_t), NULL);
+ OCL_SET_ARG(0, sizeof(cl_mem), &buf[0]);
+ OCL_SET_ARG(1, sizeof(cl_mem), &buf[1]);
+ globals[0] = 16;
+ locals[0] = 16;
+
+ // Run random tests
+ for (uint32_t pass = 0; pass < 1; ++pass) {
+ OCL_MAP_BUFFER(0);
+ for (int32_t i = 0; i < (int32_t) n; ++i)
+ cpu_src[i] = ((int32_t*)buf_data[0])[i] = rand() % 16;
+ OCL_UNMAP_BUFFER(0);
+
+ // Run the kernel on GPU
+ OCL_NDRANGE(1);
+
+ // Run on CPU
+ for (int32_t i = 0; i <(int32_t) n; ++i) cpu(i, cpu_src, cpu_dst);
+
+ // Compare
+ OCL_MAP_BUFFER(1);
+ for (int32_t i = 0; i < 11; ++i)
+ OCL_ASSERT(((int32_t*)buf_data[1])[i] == cpu_dst[i]);
+ OCL_UNMAP_BUFFER(1);
+ }
+}
+
+MAKE_UTEST_FROM_FUNCTION(compiler_time_stamp);