summaryrefslogtreecommitdiff
path: root/kernels/compiler_sub_group_shuffle_xor.cl
diff options
context:
space:
mode:
Diffstat (limited to 'kernels/compiler_sub_group_shuffle_xor.cl')
-rw-r--r--kernels/compiler_sub_group_shuffle_xor.cl23
1 files changed, 22 insertions, 1 deletions
diff --git a/kernels/compiler_sub_group_shuffle_xor.cl b/kernels/compiler_sub_group_shuffle_xor.cl
index 8bc15d35..df3dfe70 100644
--- a/kernels/compiler_sub_group_shuffle_xor.cl
+++ b/kernels/compiler_sub_group_shuffle_xor.cl
@@ -1,4 +1,4 @@
-__kernel void compiler_sub_group_shuffle_xor(global int *dst, int c)
+__kernel void compiler_sub_group_shuffle_xor_int(global int *dst, int c)
{
int i = get_global_id(0);
if (i == 0)
@@ -17,3 +17,24 @@ __kernel void compiler_sub_group_shuffle_xor(global int *dst, int c)
dst[i*4+2] = o2;
dst[i*4+3] = o3;
}
+#ifdef SHORT
+__kernel void compiler_sub_group_shuffle_xor_short(global short *dst, int c)
+{
+ short i = get_global_id(0);
+ if (i == 0)
+ dst[0] = get_max_sub_group_size();
+ dst++;
+
+ short from = i;
+ int j = get_max_sub_group_size() - get_sub_group_local_id() - 1;
+ int k = get_sub_group_local_id() + 1;
+ short o0 = get_sub_group_local_id();
+ short o1 = intel_sub_group_shuffle_xor(from, c);
+ short o2 = intel_sub_group_shuffle_xor(from, j);
+ short o3 = intel_sub_group_shuffle_xor(from, k);
+ dst[i*4] = o0;
+ dst[i*4+1] = o1;
+ dst[i*4+2] = o2;
+ dst[i*4+3] = o3;
+}
+#endif