diff options
author | Junyan He <junyan.he@linux.intel.com> | 2013-07-03 15:17:05 +0800 |
---|---|---|
committer | Zhigang Gong <zhigang.gong@linux.intel.com> | 2013-07-03 18:21:07 +0800 |
commit | 3580067c560ace6bb6ab4366ddf9391815519ab7 (patch) | |
tree | 8e6ce555fa97f17c11c7c3622cf9ee91eec610fa /backend/src/ocl_stdlib.h | |
parent | 0e1afe6fb357d85f24e37cde0285cc0c3613cec0 (diff) | |
download | beignet-3580067c560ace6bb6ab4366ddf9391815519ab7.tar.gz |
Add the abs_diff builtin function support
Signed-off-by: Junyan He <junyan.he@linux.intel.com>
Reviewed-by: Zhigang Gong <zhigang.gong@linux.intel.com>
Diffstat (limited to 'backend/src/ocl_stdlib.h')
-rw-r--r-- | backend/src/ocl_stdlib.h | 84 |
1 files changed, 84 insertions, 0 deletions
diff --git a/backend/src/ocl_stdlib.h b/backend/src/ocl_stdlib.h index 4bfdf9a7..9b80445b 100644 --- a/backend/src/ocl_stdlib.h +++ b/backend/src/ocl_stdlib.h @@ -4515,6 +4515,90 @@ DEC(uchar) #undef ABS_VEC8 #undef ABS_VEC16 + +/* Char and short type abs diff */ +/* promote char and short to int and will be no module overflow */ +#define ABS_DIFF(CVT) (CVT)(abs((int)x - (int)y)) +#define ABS_DIFF_I(CVT, I) (CVT)(abs((int)x.s##I - (int)y.s##I)) + +#define ABS_DIFF_VEC1(CVT) ABS_DIFF(CVT) +#define ABS_DIFF_VEC2(CVT) ABS_DIFF_I(CVT, 0), ABS_DIFF_I(CVT, 1) +#define ABS_DIFF_VEC3(CVT) ABS_DIFF_I(CVT, 0), ABS_DIFF_I(CVT, 1), ABS_DIFF_I(CVT, 2) +#define ABS_DIFF_VEC4(CVT) ABS_DIFF_VEC2(CVT), ABS_DIFF_I(CVT, 2), ABS_DIFF_I(CVT, 3) +#define ABS_DIFF_VEC8(CVT) ABS_DIFF_VEC4(CVT), ABS_DIFF_I(CVT, 4), ABS_DIFF_I(CVT, 5), \ + ABS_DIFF_I(CVT, 6), ABS_DIFF_I(CVT, 7) +#define ABS_DIFF_VEC16(CVT) ABS_DIFF_VEC8(CVT), ABS_DIFF_I(CVT, 8), ABS_DIFF_I(CVT, 9), \ + ABS_DIFF_I(CVT, A), ABS_DIFF_I(CVT, B), \ + ABS_DIFF_I(CVT, C), ABS_DIFF_I(CVT, D), \ + ABS_DIFF_I(CVT, E), ABS_DIFF_I(CVT, F) + +#define DEC_1(TYPE, UTYPE) INLINE_OVERLOADABLE UTYPE abs_diff(TYPE x, TYPE y) \ + { return ABS_DIFF_VEC1(UTYPE); } +#define DEC_N(TYPE, UTYPE, N) INLINE_OVERLOADABLE UTYPE##N abs_diff(TYPE##N x, TYPE##N y) \ + { return (UTYPE##N)(ABS_DIFF_VEC##N(UTYPE)); }; +#define DEC(TYPE, UTYPE) DEC_1(TYPE, UTYPE) DEC_N(TYPE, UTYPE, 2) DEC_N(TYPE, UTYPE, 3 ) \ + DEC_N(TYPE, UTYPE, 4) DEC_N(TYPE, UTYPE, 8) DEC_N(TYPE, UTYPE, 16) +DEC(char, uchar) +DEC(uchar, uchar) +DEC(short, ushort) +DEC(ushort, ushort) + +#undef DEC +#undef DEC_1 +#undef DEC_N +#undef ABS_DIFF +#undef ABS_DIFF_I +#undef ABS_DIFF_VEC1 +#undef ABS_DIFF_VEC2 +#undef ABS_DIFF_VEC3 +#undef ABS_DIFF_VEC4 +#undef ABS_DIFF_VEC8 +#undef ABS_DIFF_VEC16 + +INLINE_OVERLOADABLE uint abs_diff (uint x, uint y) { + /* same signed will never overflow. */ + return y > x ? (y -x) : (x - y); +} + +INLINE_OVERLOADABLE uint abs_diff (int x, int y) { + /* same signed will never module overflow. */ + if ((x >= 0 && y >= 0) || (x <= 0 && y <= 0)) + return abs(x - y); + + return (abs(x) + abs(y)); +} + +#define ABS_DIFF_I(I) abs_diff(x.s##I, y.s##I) + +#define ABS_DIFF_VEC2 ABS_DIFF_I(0), ABS_DIFF_I(1) +#define ABS_DIFF_VEC3 ABS_DIFF_I(0), ABS_DIFF_I(1), ABS_DIFF_I(2) +#define ABS_DIFF_VEC4 ABS_DIFF_VEC2, ABS_DIFF_I(2), ABS_DIFF_I(3) +#define ABS_DIFF_VEC8 ABS_DIFF_VEC4, ABS_DIFF_I(4), ABS_DIFF_I(5), \ + ABS_DIFF_I(6), ABS_DIFF_I(7) +#define ABS_DIFF_VEC16 ABS_DIFF_VEC8, ABS_DIFF_I(8), ABS_DIFF_I(9), \ + ABS_DIFF_I(A), ABS_DIFF_I(B), \ + ABS_DIFF_I(C), ABS_DIFF_I(D), \ + ABS_DIFF_I(E), ABS_DIFF_I(F) + +#define DEC_N(TYPE, N) INLINE_OVERLOADABLE uint##N abs_diff(TYPE##N x, TYPE##N y) \ + { return (uint##N)(ABS_DIFF_VEC##N); }; +#define DEC(TYPE) DEC_N(TYPE, 2) DEC_N(TYPE, 3 ) \ + DEC_N(TYPE, 4) DEC_N(TYPE, 8) DEC_N(TYPE, 16) +DEC(int) +DEC(uint) + +#undef DEC +#undef DEC_1 +#undef DEC_N +#undef ABS_DIFF +#undef ABS_DIFF_I +#undef ABS_DIFF_VEC1 +#undef ABS_DIFF_VEC2 +#undef ABS_DIFF_VEC3 +#undef ABS_DIFF_VEC4 +#undef ABS_DIFF_VEC8 +#undef ABS_DIFF_VEC16 + ///////////////////////////////////////////////////////////////////////////// // Work Items functions (see 6.11.1 of OCL 1.1 spec) ///////////////////////////////////////////////////////////////////////////// |