summaryrefslogtreecommitdiff
path: root/utests/compiler_abs.cpp
diff options
context:
space:
mode:
authorJunyan He <junyan.he@linux.intel.com>2013-07-03 15:16:59 +0800
committerZhigang Gong <zhigang.gong@linux.intel.com>2013-07-03 18:21:00 +0800
commit0e1afe6fb357d85f24e37cde0285cc0c3613cec0 (patch)
tree3260b8e96845d700ab8d206f58a9258fa9cf8db8 /utests/compiler_abs.cpp
parente271f10397fe2fc38986046679d6fea95449eaae (diff)
downloadbeignet-0e1afe6fb357d85f24e37cde0285cc0c3613cec0.tar.gz
Add the vector3 support for builtin abs function
Add the forgetten abs vector3 for all the types. Because the kernel input alignment, improve the test case to match the alignment request. Signed-off-by: Junyan He <junyan.he@linux.intel.com> Reviewed-by: Zhigang Gong <zhigang.gong@linux.intel.com>
Diffstat (limited to 'utests/compiler_abs.cpp')
-rw-r--r--utests/compiler_abs.cpp54
1 files changed, 44 insertions, 10 deletions
diff --git a/utests/compiler_abs.cpp b/utests/compiler_abs.cpp
index 59d83651..a1b14b46 100644
--- a/utests/compiler_abs.cpp
+++ b/utests/compiler_abs.cpp
@@ -3,23 +3,26 @@
template <typename T, int N>
struct cl_vec {
- T ptr[N];
+ T ptr[((N+1)/2)*2]; //align to 2 elements.
typedef cl_vec<T, N> vec_type;
cl_vec(void) {
- memset(ptr, 0, sizeof(T) * N);
+ memset(ptr, 0, sizeof(T) * ((N+1)/2)*2);
}
cl_vec(vec_type & other) {
+ memset(ptr, 0, sizeof(T) * ((N+1)/2)*2);
memcpy (this->ptr, other.ptr, sizeof(T) * N);
}
vec_type& operator= (vec_type & other) {
+ memset(ptr, 0, sizeof(T) * ((N+1)/2)*2);
memcpy (this->ptr, other.ptr, sizeof(T) * N);
return *this;
}
template <typename U> vec_type& operator= (cl_vec<U, N> & other) {
+ memset(ptr, 0, sizeof(T) * ((N+1)/2)*2);
memcpy (this->ptr, other.ptr, sizeof(T) * N);
return *this;
}
@@ -56,6 +59,8 @@ template <typename T, typename U> static void cpu(int global_id, T *src, U *dst)
template <typename T, int N> static void gen_rand_val (cl_vec<T, N>& vect)
{
int i = 0;
+
+ memset(vect.ptr, 0, sizeof(T) * ((N+1)/2)*2);
for (; i < N; i++) {
vect.ptr[i] = static_cast<T>((rand() & 63) - 32);
}
@@ -66,25 +71,34 @@ template <typename T> static void gen_rand_val (T & val)
val = static_cast<T>((rand() & 63) - 32);
}
+template <typename T>
+inline static void print_data (T& val)
+{
+ if (std::is_unsigned<T>::value)
+ printf(" %u", val);
+ else
+ printf(" %d", val);
+}
+
template <typename T, typename U, int N> static void dump_data (cl_vec<T, N>* src,
- cl_vec<U, N>* dst, int n)
+ cl_vec<U, N>* dst, int n)
{
U* val = reinterpret_cast<U *>(dst);
- n = n*N;
+ n = n*((N+1)/2)*2;
printf("\nRaw: \n");
for (int32_t i = 0; i < (int32_t) n; ++i) {
- printf(" %d", ((T *)buf_data[0])[i]);
+ print_data(((T *)buf_data[0])[i]);
}
printf("\nCPU: \n");
for (int32_t i = 0; i < (int32_t) n; ++i) {
- printf(" %d", val[i]);
+ print_data(val[i]);
}
printf("\nGPU: \n");
for (int32_t i = 0; i < (int32_t) n; ++i) {
- printf(" %d", ((U *)buf_data[1])[i]);
+ print_data(((U *)buf_data[1])[i]);
}
}
@@ -92,16 +106,16 @@ template <typename T, typename U> static void dump_data (T* src, U* dst, int n)
{
printf("\nRaw: \n");
for (int32_t i = 0; i < (int32_t) n; ++i) {
- printf(" %d", ((T *)buf_data[0])[i]);
+ print_data(((T *)buf_data[0])[i]);
}
printf("\nCPU: \n");
for (int32_t i = 0; i < (int32_t) n; ++i) {
- printf(" %d", dst[i]);
+ print_data(dst[i]);
}
printf("\nGPU: \n");
for (int32_t i = 0; i < (int32_t) n; ++i) {
- printf(" %d", ((U *)buf_data[1])[i]);
+ print_data(((U *)buf_data[1])[i]);
}
}
@@ -111,6 +125,8 @@ template <typename T, typename U> static void compiler_abs_with_type(void)
U cpu_dst[16];
T cpu_src[16];
+ printf("sizeof T, is %u, sizeof U is %u\n", (int)sizeof(T), (int)sizeof(U));
+
// Setup buffers
OCL_CREATE_BUFFER(buf[0], 0, n * sizeof(T), NULL);
OCL_CREATE_BUFFER(buf[1], 0, n * sizeof(T), NULL);
@@ -122,6 +138,12 @@ template <typename T, typename U> static void compiler_abs_with_type(void)
// Run random tests
for (uint32_t pass = 0; pass < 8; ++pass) {
OCL_MAP_BUFFER(0);
+
+ /* Clear the dst buffer to avoid random data. */
+ OCL_MAP_BUFFER(1);
+ memset(buf_data[1], 0, sizeof(U) * n);
+ OCL_UNMAP_BUFFER(1);
+
for (int32_t i = 0; i < (int32_t) n; ++i) {
gen_rand_val(cpu_src[i]);
}
@@ -166,54 +188,66 @@ ABS_TEST_TYPE(uchar, uchar)
typedef cl_vec<int, 2> int2;
+typedef cl_vec<int, 3> int3;
typedef cl_vec<int, 4> int4;
typedef cl_vec<int, 8> int8;
typedef cl_vec<int, 16> int16;
typedef cl_vec<unsigned int, 2> uint2;
+typedef cl_vec<unsigned int, 3> uint3;
typedef cl_vec<unsigned int, 4> uint4;
typedef cl_vec<unsigned int, 8> uint8;
typedef cl_vec<unsigned int, 16> uint16;
ABS_TEST_TYPE(int2, uint2)
+ABS_TEST_TYPE(int3, uint3)
ABS_TEST_TYPE(int4, uint4)
ABS_TEST_TYPE(int8, uint8)
ABS_TEST_TYPE(int16, uint16)
ABS_TEST_TYPE(uint2, uint2)
+ABS_TEST_TYPE(uint3, uint3)
ABS_TEST_TYPE(uint4, uint4)
ABS_TEST_TYPE(uint8, uint8)
ABS_TEST_TYPE(uint16, uint16)
typedef cl_vec<char, 2> char2;
+typedef cl_vec<char, 3> char3;
typedef cl_vec<char, 4> char4;
typedef cl_vec<char, 8> char8;
typedef cl_vec<char, 16> char16;
typedef cl_vec<unsigned char, 2> uchar2;
+typedef cl_vec<unsigned char, 3> uchar3;
typedef cl_vec<unsigned char, 4> uchar4;
typedef cl_vec<unsigned char, 8> uchar8;
typedef cl_vec<unsigned char, 16> uchar16;
ABS_TEST_TYPE(char2, uchar2)
+ABS_TEST_TYPE(char3, uchar3)
ABS_TEST_TYPE(char4, uchar4)
ABS_TEST_TYPE(char8, uchar8)
ABS_TEST_TYPE(char16, uchar16)
ABS_TEST_TYPE(uchar2, uchar2)
+ABS_TEST_TYPE(uchar3, uchar3)
ABS_TEST_TYPE(uchar4, uchar4)
ABS_TEST_TYPE(uchar8, uchar8)
ABS_TEST_TYPE(uchar16, uchar16)
typedef cl_vec<short, 2> short2;
+typedef cl_vec<short, 3> short3;
typedef cl_vec<short, 4> short4;
typedef cl_vec<short, 8> short8;
typedef cl_vec<short, 16> short16;
typedef cl_vec<unsigned short, 2> ushort2;
+typedef cl_vec<unsigned short, 3> ushort3;
typedef cl_vec<unsigned short, 4> ushort4;
typedef cl_vec<unsigned short, 8> ushort8;
typedef cl_vec<unsigned short, 16> ushort16;
ABS_TEST_TYPE(short2, ushort2)
+ABS_TEST_TYPE(short3, ushort3)
ABS_TEST_TYPE(short4, ushort4)
ABS_TEST_TYPE(short8, ushort8)
ABS_TEST_TYPE(short16, ushort16)
ABS_TEST_TYPE(ushort2, ushort2)
+ABS_TEST_TYPE(ushort3, ushort3)
ABS_TEST_TYPE(ushort4, ushort4)
ABS_TEST_TYPE(ushort8, ushort8)
ABS_TEST_TYPE(ushort16, ushort16)