diff options
Diffstat (limited to 'kernels/compiler_sub_group_shuffle_up.cl')
-rw-r--r-- | kernels/compiler_sub_group_shuffle_up.cl | 23 |
1 files changed, 22 insertions, 1 deletions
diff --git a/kernels/compiler_sub_group_shuffle_up.cl b/kernels/compiler_sub_group_shuffle_up.cl index 5c5cee12..fd287d52 100644 --- a/kernels/compiler_sub_group_shuffle_up.cl +++ b/kernels/compiler_sub_group_shuffle_up.cl @@ -1,4 +1,4 @@ -__kernel void compiler_sub_group_shuffle_up(global int *dst, int c) +__kernel void compiler_sub_group_shuffle_up_int(global int *dst, int c) { int i = get_global_id(0); if (i == 0) @@ -17,3 +17,24 @@ __kernel void compiler_sub_group_shuffle_up(global int *dst, int c) dst[i*4+2] = o2; dst[i*4+3] = o3; } +#ifdef SHORT +__kernel void compiler_sub_group_shuffle_up_short(global short *dst, int c) +{ + short i = get_global_id(0); + if (i == 0) + dst[0] = get_max_sub_group_size(); + dst++; + + short from = i; + int j = get_sub_group_local_id() + 1; + int k = get_max_sub_group_size() - get_sub_group_local_id() - 1; + short o0 = intel_sub_group_shuffle_up((short)123, (short)456, c); + short o1 = intel_sub_group_shuffle_up((short)123, from, c); + short o2 = intel_sub_group_shuffle_up(from, (short)-from, k); + short o3 = intel_sub_group_shuffle_up(from, (short)321, j); + dst[i*4] = o0; + dst[i*4+1] = o1; + dst[i*4+2] = o2; + dst[i*4+3] = o3; +} +#endif |