summaryrefslogtreecommitdiff
path: root/backend
diff options
context:
space:
mode:
authorZhigang Gong <zhigang.gong@intel.com>2014-12-11 18:45:04 +0800
committerZhigang Gong <zhigang.gong@intel.com>2014-12-18 15:42:11 +0800
commitc09acfac5ac91177166d358c45541b6ecc6b2dc1 (patch)
treefae0691a2ff99ddcc5671d2570f6f0e1ed94b541 /backend
parent92866a083d19343bfa47463523484286d5143def (diff)
downloadbeignet-c09acfac5ac91177166d358c45541b6ecc6b2dc1.tar.gz
Refactor all image builtin functions.
Refactor almost all the image builtin related functions to simplfy the code and get rid of most of the awful macros. Signed-off-by: Zhigang Gong <zhigang.gong@intel.com> Reviewed-by: "Yang, Rong R" <rong.r.yang@intel.com>
Diffstat (limited to 'backend')
-rw-r--r--backend/src/libocl/src/ocl_image.cl811
-rw-r--r--backend/src/llvm/llvm_gen_backend.cpp174
-rw-r--r--backend/src/llvm/llvm_gen_ocl_function.hxx36
-rw-r--r--backend/src/llvm/llvm_scalarize.cpp13
4 files changed, 618 insertions, 416 deletions
diff --git a/backend/src/libocl/src/ocl_image.cl b/backend/src/libocl/src/ocl_image.cl
index fd421bf4..95b98ff4 100644
--- a/backend/src/libocl/src/ocl_image.cl
+++ b/backend/src/libocl/src/ocl_image.cl
@@ -20,29 +20,90 @@
#include "ocl_integer.h"
#include "ocl_common.h"
+///////////////////////////////////////////////////////////////////////////////
+// Beignet builtin functions.
+///////////////////////////////////////////////////////////////////////////////
+
// 1D read
-OVERLOADABLE int4 __gen_ocl_read_imagei(uint surface_id, sampler_t sampler, float u, uint sampler_offset);
-OVERLOADABLE int4 __gen_ocl_read_imagei(uint surface_id, sampler_t sampler, int u, uint sampler_offset);
-OVERLOADABLE uint4 __gen_ocl_read_imageui(uint surface_id, sampler_t sampler, float u, uint sampler_offset);
-OVERLOADABLE uint4 __gen_ocl_read_imageui(uint surface_id, sampler_t sampler, int u, uint sampler_offset);
-OVERLOADABLE float4 __gen_ocl_read_imagef(uint surface_id, sampler_t sampler, float u, uint sampler_offset);
-OVERLOADABLE float4 __gen_ocl_read_imagef(uint surface_id, sampler_t sampler, int u, uint sampler_offset);
+OVERLOADABLE int4 __gen_ocl_read_imagei(uint surface_id, sampler_t sampler,
+ float u, uint sampler_offset);
+OVERLOADABLE int4 __gen_ocl_read_imagei(uint surface_id, sampler_t sampler,
+ int u, uint sampler_offset);
+OVERLOADABLE uint4 __gen_ocl_read_imageui(uint surface_id, sampler_t sampler,
+ float u, uint sampler_offset);
+OVERLOADABLE uint4 __gen_ocl_read_imageui(uint surface_id, sampler_t sampler,
+ int u, uint sampler_offset);
+OVERLOADABLE float4 __gen_ocl_read_imagef(uint surface_id, sampler_t sampler,
+ float u, uint sampler_offset);
+OVERLOADABLE float4 __gen_ocl_read_imagef(uint surface_id, sampler_t sampler,
+ int u, uint sampler_offset);
// 2D & 1D Array read
-OVERLOADABLE int4 __gen_ocl_read_imagei(uint surface_id, sampler_t sampler, float u, float v, uint sampler_offset);
-OVERLOADABLE int4 __gen_ocl_read_imagei(uint surface_id, sampler_t sampler, int u, int v, uint sampler_offset);
-OVERLOADABLE uint4 __gen_ocl_read_imageui(uint surface_id, sampler_t sampler, float u, float v, uint sampler_offset);
-OVERLOADABLE uint4 __gen_ocl_read_imageui(uint surface_id, sampler_t sampler, int u, int v, uint sampler_offset);
-OVERLOADABLE float4 __gen_ocl_read_imagef(uint surface_id, sampler_t sampler, float u, float v, uint sampler_offset);
-OVERLOADABLE float4 __gen_ocl_read_imagef(uint surface_id, sampler_t sampler, int u, int v, uint sampler_offset);
+OVERLOADABLE int4 __gen_ocl_read_imagei(uint surface_id, sampler_t sampler,
+ float2 coord, uint sampler_offset);
+OVERLOADABLE int4 __gen_ocl_read_imagei(uint surface_id, sampler_t sampler,
+ int2 coord, uint sampler_offset);
+OVERLOADABLE uint4 __gen_ocl_read_imageui(uint surface_id, sampler_t sampler,
+ float2 coord, uint sampler_offset);
+OVERLOADABLE uint4 __gen_ocl_read_imageui(uint surface_id, sampler_t sampler,
+ int2 coord, uint sampler_offset);
+OVERLOADABLE float4 __gen_ocl_read_imagef(uint surface_id, sampler_t sampler,
+ float2 coord, uint sampler_offset);
+OVERLOADABLE float4 __gen_ocl_read_imagef(uint surface_id, sampler_t sampler,
+ int2 coord, uint sampler_offset);
// 3D & 2D Array read
-OVERLOADABLE int4 __gen_ocl_read_imagei(uint surface_id, sampler_t sampler, float u, float v, float w, uint sampler_offset);
-OVERLOADABLE int4 __gen_ocl_read_imagei(uint surface_id, sampler_t sampler, int u, int v, int w, uint sampler_offset);
-OVERLOADABLE uint4 __gen_ocl_read_imageui(uint surface_id, sampler_t sampler, float u, float v, float w, uint sampler_offset);
-OVERLOADABLE uint4 __gen_ocl_read_imageui(uint surface_id, sampler_t sampler, int u, int v, int w, uint sampler_offset);
-OVERLOADABLE float4 __gen_ocl_read_imagef(uint surface_id, sampler_t sampler, float u, float v, float w, uint sampler_offset);
-OVERLOADABLE float4 __gen_ocl_read_imagef(uint surface_id, sampler_t sampler, int u, int v, int w, uint sampler_offset);
+OVERLOADABLE int4 __gen_ocl_read_imagei(uint surface_id, sampler_t sampler,
+ float4 coord, uint sampler_offset);
+OVERLOADABLE int4 __gen_ocl_read_imagei(uint surface_id, sampler_t sampler,
+ int4 coord, uint sampler_offset);
+OVERLOADABLE uint4 __gen_ocl_read_imageui(uint surface_id, sampler_t sampler,
+ float4 coord, uint sampler_offset);
+OVERLOADABLE uint4 __gen_ocl_read_imageui(uint surface_id, sampler_t sampler,
+ int4 coord, uint sampler_offset);
+OVERLOADABLE float4 __gen_ocl_read_imagef(uint surface_id, sampler_t sampler,
+ float4 coord, uint sampler_offset);
+OVERLOADABLE float4 __gen_ocl_read_imagef(uint surface_id, sampler_t sampler,
+ int4 coord, uint sampler_offset);
+
+// Don't know why we need to support 3 component coordinates, but it's in the old
+// version, let's keep to support it.
+INLINE_OVERLOADABLE int4 __gen_ocl_read_imagei(uint surface_id, sampler_t sampler,
+ float3 coord, uint sampler_offset)
+{
+ return __gen_ocl_read_imagei(surface_id, sampler,
+ (float4)(coord.s0, coord.s1, coord.s2, 0), sampler_offset);
+}
+INLINE_OVERLOADABLE int4 __gen_ocl_read_imagei(uint surface_id, sampler_t sampler,
+ int3 coord, uint sampler_offset)
+{
+ return __gen_ocl_read_imagei(surface_id, sampler,
+ (int4)(coord.s0, coord.s1, coord.s2, 0), sampler_offset);
+}
+INLINE_OVERLOADABLE uint4 __gen_ocl_read_imageui(uint surface_id, sampler_t sampler,
+ float3 coord, uint sampler_offset)
+{
+ return __gen_ocl_read_imageui(surface_id, sampler,
+ (float4)(coord.s0, coord.s1, coord.s2, 0), sampler_offset);
+}
+INLINE_OVERLOADABLE uint4 __gen_ocl_read_imageui(uint surface_id, sampler_t sampler,
+ int3 coord, uint sampler_offset)
+{
+ return __gen_ocl_read_imageui(surface_id, sampler,
+ (int4)(coord.s0, coord.s1, coord.s2, 0), sampler_offset);
+}
+INLINE_OVERLOADABLE float4 __gen_ocl_read_imagef(uint surface_id, sampler_t sampler,
+ float3 coord, uint sampler_offset)
+{
+ return __gen_ocl_read_imagef(surface_id, sampler,
+ (float4)(coord.s0, coord.s1, coord.s2, 0), sampler_offset);
+}
+INLINE_OVERLOADABLE float4 __gen_ocl_read_imagef(uint surface_id, sampler_t sampler,
+ int3 coord, uint sampler_offset)
+{
+ return __gen_ocl_read_imagef(surface_id, sampler,
+ (int4)(coord.s0, coord.s1, coord.s2, 0), sampler_offset);
+}
// 1D write
OVERLOADABLE void __gen_ocl_write_imagei(uint surface_id, int u, int4 color);
@@ -50,14 +111,27 @@ OVERLOADABLE void __gen_ocl_write_imageui(uint surface_id, int u, uint4 color);
OVERLOADABLE void __gen_ocl_write_imagef(uint surface_id, int u, float4 color);
// 2D & 1D Array write
-OVERLOADABLE void __gen_ocl_write_imagei(uint surface_id, int u, int v, int4 color);
-OVERLOADABLE void __gen_ocl_write_imageui(uint surface_id, int u, int v, uint4 color);
-OVERLOADABLE void __gen_ocl_write_imagef(uint surface_id, int u, int v, float4 color);
+OVERLOADABLE void __gen_ocl_write_imagei(uint surface_id, int2 coord, int4 color);
+OVERLOADABLE void __gen_ocl_write_imageui(uint surface_id, int2 coord, uint4 color);
+OVERLOADABLE void __gen_ocl_write_imagef(uint surface_id, int2 coord, float4 color);
// 3D & 2D Array write
-OVERLOADABLE void __gen_ocl_write_imagei(uint surface_id, int u, int v, int w, int4 color);
-OVERLOADABLE void __gen_ocl_write_imageui(uint surface_id, int u, int v, int w, uint4 color);
-OVERLOADABLE void __gen_ocl_write_imagef(uint surface_id, int u, int v, int w, float4 color);
+OVERLOADABLE void __gen_ocl_write_imagei(uint surface_id, int4 coord, int4 color);
+OVERLOADABLE void __gen_ocl_write_imageui(uint surface_id, int4 coord, uint4 color);
+OVERLOADABLE void __gen_ocl_write_imagef(uint surface_id, int4 coord, float4 color);
+
+INLINE_OVERLOADABLE void __gen_ocl_write_imagei(uint surface_id, int3 coord, int4 color)
+{
+ __gen_ocl_write_imagei(surface_id, (int4)(coord.s0, coord.s1, coord.s2, 0), color);
+}
+INLINE_OVERLOADABLE void __gen_ocl_write_imageui(uint surface_id, int3 coord, uint4 color)
+{
+ __gen_ocl_write_imageui(surface_id, (int4)(coord.s0, coord.s1, coord.s2, 0), color);
+}
+INLINE_OVERLOADABLE void __gen_ocl_write_imagef(uint surface_id, int3 coord, float4 color)
+{
+ __gen_ocl_write_imagef(surface_id, (int4)(coord.s0, coord.s1, coord.s2, 0), color);
+}
int __gen_ocl_get_image_width(uint surface_id);
int __gen_ocl_get_image_height(uint surface_id);
@@ -65,225 +139,436 @@ int __gen_ocl_get_image_channel_data_type(uint surface_id);
int __gen_ocl_get_image_channel_order(uint surface_id);
int __gen_ocl_get_image_depth(uint surface_id);
-// 2D 3D Image Common Macro
-#ifdef GEN7_SAMPLER_CLAMP_BORDER_WORKAROUND
-#define GEN_FIX_1 1
-#else
-#define GEN_FIX_1 0
-#endif
#define GET_IMAGE(cl_image, surface_id) \
uint surface_id = (uint)cl_image
-OVERLOADABLE float __gen_compute_array_index(const float index, image1d_array_t image)
+
+///////////////////////////////////////////////////////////////////////////////
+// helper functions to validate array index.
+///////////////////////////////////////////////////////////////////////////////
+INLINE_OVERLOADABLE float2 __gen_validate_array_index(float2 coord, image1d_array_t image)
{
GET_IMAGE(image, surface_id);
float array_size = __gen_ocl_get_image_depth(surface_id);
- return clamp(rint(index), 0.f, array_size - 1.f);
+ coord.s1 = clamp(rint(coord.s1), 0.f, array_size - 1.f);
+ return coord;
}
-OVERLOADABLE float __gen_compute_array_index(float index, image2d_array_t image)
+INLINE_OVERLOADABLE float4 __gen_validate_array_index(float4 coord, image2d_array_t image)
{
GET_IMAGE(image, surface_id);
float array_size = __gen_ocl_get_image_depth(surface_id);
- return clamp(rint(index), 0.f, array_size - 1.f);
+ coord.s2 = clamp(rint(coord.s2), 0.f, array_size - 1.f);
+ return coord;
}
-OVERLOADABLE int __gen_compute_array_index(int index, image1d_array_t image)
+INLINE_OVERLOADABLE float3 __gen_validate_array_index(float3 coord, image2d_array_t image)
+{
+ GET_IMAGE(image, surface_id);
+ float array_size = __gen_ocl_get_image_depth(surface_id);
+ coord.s2 = clamp(rint(coord.s2), 0.f, array_size - 1.f);
+ return coord;
+}
+
+INLINE_OVERLOADABLE int2 __gen_validate_array_index(int2 coord, image1d_array_t image)
{
GET_IMAGE(image, surface_id);
int array_size = __gen_ocl_get_image_depth(surface_id);
- return clamp(index, 0, array_size - 1);
+ coord.s1 = clamp(coord.s1, 0, array_size - 1);
+ return coord;
}
-OVERLOADABLE int __gen_compute_array_index(int index, image2d_array_t image)
+INLINE_OVERLOADABLE int4 __gen_validate_array_index(int4 coord, image2d_array_t image)
{
GET_IMAGE(image, surface_id);
int array_size = __gen_ocl_get_image_depth(surface_id);
- return clamp(index, 0, array_size - 1);
-}
-
-#define DECL_READ_IMAGE0(int_clamping_fix, \
- image_type, type, suffix, coord_type, n) \
- OVERLOADABLE type read_image ##suffix(image_type cl_image, \
- const sampler_t sampler, \
- coord_type coord) \
- { \
- GET_IMAGE(cl_image, surface_id); \
- GET_IMAGE_ARRAY_SIZE(cl_image, coord, int, ai); \
- if (int_clamping_fix && \
- ((sampler & __CLK_ADDRESS_MASK) == CLK_ADDRESS_CLAMP) && \
- ((sampler & __CLK_FILTER_MASK) == CLK_FILTER_NEAREST)) \
- return __gen_ocl_read_image ##suffix( \
- EXPEND_READ_COORD(surface_id, sampler, coord)); \
- return __gen_ocl_read_image ##suffix( \
- EXPEND_READ_COORDF(surface_id, sampler, coord), 0); \
- }
+ coord.s2 = clamp(coord.s2, 0, array_size - 1);
+ return coord;
+}
-#define DECL_READ_IMAGE1(float_coord_rounding_fix, int_clamping_fix, \
- image_type, type, suffix, coord_type, n) \
- OVERLOADABLE type read_image ##suffix(image_type cl_image, \
- const sampler_t sampler, \
- coord_type coord) \
- { \
- GET_IMAGE(cl_image, surface_id); \
- GET_IMAGE_ARRAY_SIZE(cl_image, coord, float, ai) \
- coord_type tmpCoord = coord; \
- if (float_coord_rounding_fix | int_clamping_fix) { \
- if (((sampler & __CLK_ADDRESS_MASK) == CLK_ADDRESS_CLAMP) \
- && ((sampler & __CLK_FILTER_MASK) == CLK_FILTER_NEAREST)) { \
- if (float_coord_rounding_fix \
- && ((sampler & CLK_NORMALIZED_COORDS_TRUE) == 0)) { \
- FIXUP_FLOAT_COORD(tmpCoord); \
- } \
- if (int_clamping_fix) { \
- coord_type intCoord; \
- if (sampler & CLK_NORMALIZED_COORDS_TRUE) { \
- DENORMALIZE_COORD(surface_id, intCoord, tmpCoord); \
- } else \
- intCoord = tmpCoord; \
- return __gen_ocl_read_image ##suffix( \
- EXPEND_READ_COORDI(surface_id, sampler, intCoord));\
- } \
- } \
- } \
- return __gen_ocl_read_image ##suffix( \
- EXPEND_READ_COORDF(surface_id, sampler, tmpCoord), 0);\
- }
+INLINE_OVERLOADABLE int3 __gen_validate_array_index(int3 coord, image2d_array_t image)
+{
+ GET_IMAGE(image, surface_id);
+ int array_size = __gen_ocl_get_image_depth(surface_id);
+ coord.s2 = clamp(coord.s2, 0, array_size - 1);
+ return coord;
+}
+
+// For non array image type, we need to do nothing.
+#define GEN_VALIDATE_ARRAY_INDEX(coord_type, image_type) \
+INLINE_OVERLOADABLE coord_type __gen_validate_array_index(coord_type coord, image_type image) \
+{ \
+ return coord; \
+}
+
+GEN_VALIDATE_ARRAY_INDEX(float, image1d_t)
+GEN_VALIDATE_ARRAY_INDEX(int, image1d_t)
+GEN_VALIDATE_ARRAY_INDEX(float2, image2d_t)
+GEN_VALIDATE_ARRAY_INDEX(int2, image2d_t)
+GEN_VALIDATE_ARRAY_INDEX(float4, image3d_t)
+GEN_VALIDATE_ARRAY_INDEX(int4, image3d_t)
+GEN_VALIDATE_ARRAY_INDEX(float3, image3d_t)
+GEN_VALIDATE_ARRAY_INDEX(int3, image3d_t)
+GEN_VALIDATE_ARRAY_INDEX(float, image1d_buffer_t)
+GEN_VALIDATE_ARRAY_INDEX(int, image1d_buffer_t)
+
+///////////////////////////////////////////////////////////////////////////////
+// Helper functions to work around some coordiate boundary issues.
+// The major issue on Gen7/Gen7.5 are the sample message could not sampling
+// integer type surfaces correctly with CLK_ADDRESS_CLAMP and CLK_FILTER_NEAREST.
+// The work around is to use a LD message instead of normal sample message.
+///////////////////////////////////////////////////////////////////////////////
+bool __gen_sampler_need_fix(const sampler_t sampler)
+{
+ return (((sampler & __CLK_ADDRESS_MASK) == CLK_ADDRESS_CLAMP) &&
+ ((sampler & __CLK_FILTER_MASK) == CLK_FILTER_NEAREST));
+}
+
+bool __gen_sampler_need_rounding_fix(const sampler_t sampler)
+{
+ return ((sampler & CLK_NORMALIZED_COORDS_TRUE) == 0);
+}
+
+
+INLINE_OVERLOADABLE float __gen_fixup_float_coord(float tmpCoord)
+{
+ if (tmpCoord < 0 && tmpCoord > -0x1p-20f)
+ tmpCoord += -0x1p-9f;
+ return tmpCoord;
+}
+
+INLINE_OVERLOADABLE float2 __gen_fixup_float_coord(float2 tmpCoord)
+{
+ if (tmpCoord.s0 < 0 && tmpCoord.s0 > -0x1p-20f)
+ tmpCoord.s0 += -0x1p-9f;
+ if (tmpCoord.s1 < 0 && tmpCoord.s1 > -0x1p-20f)
+ tmpCoord.s1 += -0x1p-9f;
+ return tmpCoord;
+}
+
+INLINE_OVERLOADABLE float3 __gen_fixup_float_coord(float3 tmpCoord)
+{
+ if (tmpCoord.s0 < 0 && tmpCoord.s0 > -0x1p-20f)
+ tmpCoord.s0 += -0x1p-9f;
+ if (tmpCoord.s1 < 0 && tmpCoord.s1 > -0x1p-20f)
+ tmpCoord.s1 += -0x1p-9f;
+ if (tmpCoord.s2 < 0 && tmpCoord.s2 > -0x1p-20f)
+ tmpCoord.s2 += -0x1p-9f;
+ return tmpCoord;
+}
+
+INLINE_OVERLOADABLE float4 __gen_fixup_float_coord(float4 tmpCoord)
+{
+ if (tmpCoord.s0 < 0 && tmpCoord.s0 > -0x1p-20f)
+ tmpCoord.s0 += -0x1p-9f;
+ if (tmpCoord.s1 < 0 && tmpCoord.s1 > -0x1p-20f)
+ tmpCoord.s1 += -0x1p-9f;
+ if (tmpCoord.s2 < 0 && tmpCoord.s2 > -0x1p-20f)
+ tmpCoord.s2 += -0x1p-9f;
+ return tmpCoord;
+}
+
+// Functions to denormalize coordiates, it's needed when we need to use LD
+// message (sampler offset is non-zero) and the coordiates are normalized
+// coordiates.
+INLINE_OVERLOADABLE float __gen_denormalize_coord(const image1d_t image, float srcCoord)
+{
+ GET_IMAGE(image, surface_id);
+ return srcCoord * __gen_ocl_get_image_width(surface_id);
+}
+
+INLINE_OVERLOADABLE float2 __gen_denormalize_coord(const image1d_array_t image, float2 srcCoord)
+{
+ GET_IMAGE(image, surface_id);
+ srcCoord.s0 = srcCoord.s0 * __gen_ocl_get_image_width(surface_id);
+ return srcCoord;
+}
+
+INLINE_OVERLOADABLE float __gen_denormalize_coord(const image1d_buffer_t image, float srcCoord)
+{
+ GET_IMAGE(image, surface_id);
+ return srcCoord * __gen_ocl_get_image_width(surface_id);
+}
+
+INLINE_OVERLOADABLE float2 __gen_denormalize_coord(const image2d_t image, float2 srcCoord)
+{
+ GET_IMAGE(image, surface_id);
+ srcCoord.s0 = srcCoord.s0 * __gen_ocl_get_image_width(surface_id);
+ srcCoord.s1 = srcCoord.s1 * __gen_ocl_get_image_height(surface_id);
+ return srcCoord;
+}
+
+INLINE_OVERLOADABLE float3 __gen_denormalize_coord(const image2d_array_t image, float3 srcCoord)
+{
+ GET_IMAGE(image, surface_id);
+ srcCoord.s0 = srcCoord.s0 * __gen_ocl_get_image_width(surface_id);
+ srcCoord.s1 = srcCoord.s1 * __gen_ocl_get_image_height(surface_id);
+ return srcCoord;
+}
+
+INLINE_OVERLOADABLE float3 __gen_denormalize_coord(const image3d_t image, float3 srcCoord)
+{
+ GET_IMAGE(image, surface_id);
+ srcCoord.s0 = srcCoord.s0 * __gen_ocl_get_image_width(surface_id);
+ srcCoord.s1 = srcCoord.s1 * __gen_ocl_get_image_height(surface_id);
+ srcCoord.s2 = srcCoord.s2 * __gen_ocl_get_image_depth(surface_id);
+ return srcCoord;
+}
+
+INLINE_OVERLOADABLE float4 __gen_denormalize_coord(const image2d_array_t image, float4 srcCoord)
+{
+ GET_IMAGE(image, surface_id);
+ srcCoord.s0 = srcCoord.s0 * __gen_ocl_get_image_width(surface_id);
+ srcCoord.s1 = srcCoord.s1 * __gen_ocl_get_image_height(surface_id);
+ return srcCoord;
+}
+
+INLINE_OVERLOADABLE float4 __gen_denormalize_coord(const image3d_t image, float4 srcCoord)
+{
+ GET_IMAGE(image, surface_id);
+ srcCoord.s0 = srcCoord.s0 * __gen_ocl_get_image_width(surface_id);
+ srcCoord.s1 = srcCoord.s1 * __gen_ocl_get_image_height(surface_id);
+ srcCoord.s2 = srcCoord.s2 * __gen_ocl_get_image_depth(surface_id);
+ return srcCoord;
+}
+
+// After denormalize, we have to fixup the negative boundary.
+INLINE_OVERLOADABLE float __gen_fixup_neg_boundary(float coord)
+{
+ return coord < 0 ? -1 : coord;
+}
+
+INLINE_OVERLOADABLE float2 __gen_fixup_neg_boundary(float2 coord)
+{
+ coord.s0 = coord.s0 < 0 ? -1 : coord.s0;
+ coord.s1 = coord.s1 < 0 ? -1 : coord.s1;
+ return coord;
+}
+
+INLINE_OVERLOADABLE float4 __gen_fixup_neg_boundary(float4 coord)
+{
+ coord.s0 = coord.s0 < 0 ? -1 : coord.s0;
+ coord.s1 = coord.s1 < 0 ? -1 : coord.s1;
+ coord.s2 = coord.s2 < 0 ? -1 : coord.s2;
+ return coord;
+}
-#define DECL_READ_IMAGE_NOSAMPLER(image_type, type, suffix, coord_type, n) \
- OVERLOADABLE type read_image ##suffix(image_type cl_image, \
- coord_type coord) \
- { \
- GET_IMAGE(cl_image, surface_id); \
- GET_IMAGE_ARRAY_SIZE(cl_image, coord, int, ai) \
- return __gen_ocl_read_image ##suffix( \
- EXPEND_READ_COORDF(surface_id, \
- CLK_NORMALIZED_COORDS_FALSE \
- | CLK_ADDRESS_NONE \
- | CLK_FILTER_NEAREST, (float)coord), 0); \
+INLINE_OVERLOADABLE float3 __gen_fixup_neg_boundary(float3 coord)
+{
+ coord.s0 = coord.s0 < 0 ? -1 : coord.s0;
+ coord.s1 = coord.s1 < 0 ? -1 : coord.s1;
+ coord.s2 = coord.s2 < 0 ? -1 : coord.s2;
+ return coord;
+}
+
+///////////////////////////////////////////////////////////////////////////////
+// Built-in Image Read/Write Functions
+///////////////////////////////////////////////////////////////////////////////
+
+// 2D 3D Image Common Macro
+#ifdef GEN7_SAMPLER_CLAMP_BORDER_WORKAROUND
+#define GEN_FIX_FLOAT_ROUNDING 1
+#define GEN_FIX_INT_CLAMPING 1
+#else
+#define GEN_FIX_FLOAT_ROUNDING 0
+#define GEN_FIX_INT_CLAMPING 0
+#endif
+
+// For integer coordinates
+#define DECL_READ_IMAGE0(int_clamping_fix, image_type, \
+ image_data_type, suffix, coord_type) \
+ OVERLOADABLE image_data_type read_image ##suffix(image_type cl_image, \
+ const sampler_t sampler, \
+ coord_type coord) \
+ { \
+ GET_IMAGE(cl_image, surface_id); \
+ coord = __gen_validate_array_index(coord, cl_image); \
+ if (int_clamping_fix && __gen_sampler_need_fix(sampler)) \
+ return __gen_ocl_read_image ##suffix(surface_id, sampler, coord, 1); \
+ return __gen_ocl_read_image ##suffix(surface_id, sampler, coord, 0); \
}
-#define DECL_WRITE_IMAGE(image_type, type, suffix, coord_type) \
- OVERLOADABLE void write_image ##suffix(image_type cl_image, coord_type coord, type color)\
- {\
- GET_IMAGE(cl_image, surface_id);\
- __gen_ocl_write_image ##suffix(EXPEND_WRITE_COORD(surface_id, coord, color));\
+// For float coordinates
+#define DECL_READ_IMAGE1(int_clamping_fix, image_type, \
+ image_data_type, suffix, coord_type) \
+ OVERLOADABLE image_data_type read_image ##suffix(image_type cl_image, \
+ const sampler_t sampler, \
+ coord_type coord) \
+ { \
+ GET_IMAGE(cl_image, surface_id); \
+ coord_type tmpCoord = __gen_validate_array_index(coord, cl_image); \
+ if (GEN_FIX_FLOAT_ROUNDING | int_clamping_fix) { \
+ if (__gen_sampler_need_fix(sampler)) { \
+ if (GEN_FIX_FLOAT_ROUNDING && \
+ __gen_sampler_need_rounding_fix(sampler)) \
+ tmpCoord = __gen_fixup_float_coord(tmpCoord); \
+ if (int_clamping_fix) { \
+ if (sampler & CLK_NORMALIZED_COORDS_TRUE) \
+ tmpCoord = __gen_denormalize_coord(cl_image, tmpCoord); \
+ tmpCoord = __gen_fixup_neg_boundary(tmpCoord); \
+ return __gen_ocl_read_image ##suffix( \
+ surface_id, sampler, tmpCoord, 1); \
+ } \
+ } \
+ } \
+ return __gen_ocl_read_image ##suffix(surface_id, sampler, tmpCoord, 0); \
}
-#define DECL_IMAGE_INFO_COMMON(image_type) \
- OVERLOADABLE int get_image_channel_data_type(image_type image)\
- { \
- GET_IMAGE(image, surface_id);\
- return __gen_ocl_get_image_channel_data_type(surface_id); \
- }\
- OVERLOADABLE int get_image_channel_order(image_type image)\
- { \
- GET_IMAGE(image, surface_id);\
- return __gen_ocl_get_image_channel_order(surface_id); \
- } \
- OVERLOADABLE int get_image_width(image_type image) \
- { \
- GET_IMAGE(image, surface_id); \
- return __gen_ocl_get_image_width(surface_id); \
+#define DECL_READ_IMAGE_NOSAMPLER(image_type, image_data_type, \
+ suffix, coord_type) \
+ OVERLOADABLE image_data_type read_image ##suffix(image_type cl_image, \
+ coord_type coord) \
+ { \
+ GET_IMAGE(cl_image, surface_id); \
+ coord = __gen_validate_array_index(coord, cl_image); \
+ return __gen_ocl_read_image ##suffix( \
+ surface_id, CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_NONE \
+ | CLK_FILTER_NEAREST, coord, 0); \
}
-// 1D
-#define DECL_IMAGE(int_clamping_fix, image_type, type, suffix) \
- DECL_READ_IMAGE0(int_clamping_fix, image_type, type, suffix, int, 1) \
- DECL_READ_IMAGE1(GEN_FIX_1, int_clamping_fix, image_type, type, suffix, float, 1) \
- DECL_READ_IMAGE_NOSAMPLER(image_type, type, suffix, int, 1) \
- DECL_WRITE_IMAGE(image_type, type, suffix, int) \
- DECL_WRITE_IMAGE(image_type, type, suffix, float)
-
-#define EXPEND_READ_COORD(id, sampler, coord) id, sampler, coord, 1
-#define EXPEND_READ_COORDF(id, sampler, coord) id, sampler, (float)coord
-#define EXPEND_READ_COORDI(id, sampler, coord) id, sampler, (int)(coord < 0 ? -1 : coord), 1
-#define DENORMALIZE_COORD(id, dstCoord, srcCoord) dstCoord = srcCoord * __gen_ocl_get_image_width(id);
-#define EXPEND_WRITE_COORD(id, coord, color) id, coord, color
-#define GET_IMAGE_ARRAY_SIZE(a,b,c,d)
-
-#define FIXUP_FLOAT_COORD(tmpCoord) \
- { \
- if (tmpCoord < 0 && tmpCoord > -0x1p-20f) \
- tmpCoord += -0x1p-9f; \
+#define DECL_WRITE_IMAGE(image_type, image_data_type, suffix, coord_type) \
+ OVERLOADABLE void write_image ##suffix(image_type cl_image, \
+ coord_type coord, \
+ image_data_type color) \
+ { \
+ GET_IMAGE(cl_image, surface_id); \
+ coord_type fixedCoord = __gen_validate_array_index(coord, cl_image); \
+ __gen_ocl_write_image ##suffix(surface_id, fixedCoord, color); \
}
-DECL_IMAGE(GEN_FIX_1, image1d_t, int4, i)
-DECL_IMAGE(GEN_FIX_1, image1d_t, uint4, ui)
-DECL_IMAGE(0, image1d_t, float4, f)
-DECL_IMAGE(GEN_FIX_1, image1d_buffer_t, int4, i)
-DECL_IMAGE(GEN_FIX_1, image1d_buffer_t, uint4, ui)
-DECL_IMAGE(0, image1d_buffer_t, float4, f)
+#define int1 int
+#define float1 float
-// 1D Info
-DECL_IMAGE_INFO_COMMON(image1d_t)
-DECL_IMAGE_INFO_COMMON(image1d_buffer_t)
-#undef EXPEND_READ_COORD
-#undef EXPEND_READ_COORDF
-#undef EXPEND_READ_COORDI
-#undef DENORMALIZE_COORD
-#undef EXPEND_WRITE_COORD
-#undef FIXUP_FLOAT_COORD
-#undef DECL_IMAGE
-// End of 1D
-
-#define DECL_IMAGE(int_clamping_fix, image_type, type, suffix, n) \
- DECL_READ_IMAGE0(int_clamping_fix, image_type, type, suffix, int ##n, n) \
- DECL_READ_IMAGE1(GEN_FIX_1, int_clamping_fix, image_type, type, suffix, float ##n, n) \
- DECL_READ_IMAGE_NOSAMPLER(image_type, type, suffix, int ##n, n) \
- DECL_WRITE_IMAGE(image_type, type, suffix, int ## n) \
- DECL_WRITE_IMAGE(image_type, type, suffix, float ## n)
-// 2D
-#define EXPEND_READ_COORD(id, sampler, coord) id, sampler, coord.s0, coord.s1, 1
-#define EXPEND_READ_COORDF(id, sampler, coord) id, sampler, (float)coord.s0, (float)coord.s1
-#define EXPEND_READ_COORDI(id, sampler, coord) id, sampler, (int)(coord.s0 < 0 ? -1 : coord.s0), \
- (int)(coord.s1 < 0 ? -1 : coord.s1), 1
-#define DENORMALIZE_COORD(id, dstCoord, srcCoord) dstCoord.x = srcCoord.x * __gen_ocl_get_image_width(id); \
- dstCoord.y = srcCoord.y * __gen_ocl_get_image_height(id);
-#define EXPEND_WRITE_COORD(id, coord, color) id, coord.s0, coord.s1, color
-
-#define FIXUP_FLOAT_COORD(tmpCoord) \
- { \
- if (tmpCoord.s0 < 0 && tmpCoord.s0 > -0x1p-20f) \
- tmpCoord.s0 += -0x1p-9f; \
- if (tmpCoord.s1 < 0 && tmpCoord.s1 > -0x1p-20f) \
- tmpCoord.s1 += -0x1p-9f; \
+#define DECL_IMAGE(int_clamping_fix, image_type, image_data_type, suffix, n) \
+ DECL_READ_IMAGE0(int_clamping_fix, image_type, \
+ image_data_type, suffix, int ##n) \
+ DECL_READ_IMAGE1(int_clamping_fix, image_type, \
+ image_data_type, suffix, float ##n) \
+ DECL_READ_IMAGE_NOSAMPLER(image_type, image_data_type, suffix, int ##n) \
+ DECL_WRITE_IMAGE(image_type, image_data_type, suffix, int ## n) \
+
+// 1D
+#define DECL_IMAGE_TYPE(image_type, n) \
+ DECL_IMAGE(GEN_FIX_INT_CLAMPING, image_type, int4, i, n) \
+ DECL_IMAGE(GEN_FIX_INT_CLAMPING, image_type, uint4, ui, n) \
+ DECL_IMAGE(0, image_type, float4, f, n)
+
+DECL_IMAGE_TYPE(image1d_t, 1)
+DECL_IMAGE_TYPE(image1d_buffer_t, 1)
+DECL_IMAGE_TYPE(image2d_t, 2)
+DECL_IMAGE_TYPE(image3d_t, 4)
+DECL_IMAGE_TYPE(image3d_t, 3)
+DECL_IMAGE_TYPE(image2d_array_t, 4)
+DECL_IMAGE_TYPE(image2d_array_t, 3)
+
+// For 1D Array:
+// fixup_1darray_coord functions are to convert 1d array coord to 2d array coord
+// and the caller must set the sampler offset to 2 by using this converted coord.
+// It is used to work around an image 1d array restrication which could not set
+// ai in the LD message. We solve it by fake the same image as a 2D array, and
+// then access it by LD message as a 3D sufface, treat the ai as the w coordinate.
+INLINE_OVERLOADABLE float4 __gen_fixup_1darray_coord(float2 coord, image1d_array_t image)
+{
+ float4 newCoord;
+ newCoord.s0 = coord.s0 < 0 ? -1 : coord.s0;
+ newCoord.s1 = 0;
+ newCoord.s2 = coord.s1;
+ newCoord.s3 = 0;
+ return newCoord;
+}
+
+INLINE_OVERLOADABLE int4 __gen_fixup_1darray_coord(int2 coord, image1d_array_t image)
+{
+ int4 newCoord;
+ newCoord.s0 = coord.s0;
+ newCoord.s1 = 0;
+ newCoord.s2 = coord.s1;
+ newCoord.s3 = 0;
+ return newCoord;
+}
+
+// For integer coordinates
+#define DECL_READ_IMAGE0_1DArray(int_clamping_fix, \
+ image_data_type, suffix, coord_type) \
+ OVERLOADABLE image_data_type read_image ##suffix(image1d_array_t cl_image, \
+ const sampler_t sampler, \
+ coord_type coord) \
+ { \
+ GET_IMAGE(cl_image, surface_id); \
+ coord = __gen_validate_array_index(coord, cl_image); \
+ if (int_clamping_fix && __gen_sampler_need_fix(sampler)) { \
+ int4 newCoord = __gen_fixup_1darray_coord(coord, cl_image); \
+ return __gen_ocl_read_image ##suffix(surface_id, sampler, newCoord, 2); \
+ } \
+ return __gen_ocl_read_image ##suffix(surface_id, sampler, coord, 0); \
}
-DECL_IMAGE(GEN_FIX_1, image2d_t, int4, i, 2)
-DECL_IMAGE(GEN_FIX_1, image2d_t, uint4, ui, 2)
-DECL_IMAGE(0, image2d_t, float4, f, 2)
-
-// 1D Array
-#undef GET_IMAGE_ARRAY_SIZE
-#undef EXPEND_READ_COORD
-#undef EXPEND_READ_COORDF
-#undef EXPEND_READ_COORDI
-#undef DENORMALIZE_COORD
-#undef EXPEND_WRITE_COORD
-#undef FIXUP_FLOAT_COORD
-
-#define EXPEND_READ_COORD(id, sampler, coord) id, sampler, coord.s0, (int)0, ai, 2
-#define EXPEND_READ_COORDF(id, sampler, coord) id, sampler, (float)coord.s0, (float)ai
-#define EXPEND_READ_COORDI(id, sampler, coord) id, sampler, (int)(coord.s0 < 0 ? -1 : coord.s0), 0, (int)ai, 2
-#define DENORMALIZE_COORD(id, dstCoord, srcCoord) dstCoord.x = srcCoord.x * __gen_ocl_get_image_width(id);
-#define EXPEND_WRITE_COORD(id, coord, color) id, coord.s0, __gen_compute_array_index(coord.s1, cl_image), color
-#define GET_IMAGE_ARRAY_SIZE(image, coord, coord_type, ai) \
- coord_type ai = __gen_compute_array_index(coord.s1, image);
-
-#define FIXUP_FLOAT_COORD(tmpCoord) \
- { \
- if (tmpCoord.s0 < 0 && tmpCoord.s0 > -0x1p-20f) \
- tmpCoord.s0 += -0x1p-9f; \
+// For float coordiates
+#define DECL_READ_IMAGE1_1DArray(int_clamping_fix, image_data_type, \
+ suffix, coord_type) \
+ OVERLOADABLE image_data_type read_image ##suffix(image1d_array_t cl_image, \
+ const sampler_t sampler, \
+ coord_type coord) \
+ { \
+ GET_IMAGE(cl_image, surface_id); \
+ coord_type tmpCoord = __gen_validate_array_index(coord, cl_image); \
+ if (GEN_FIX_FLOAT_ROUNDING | int_clamping_fix) { \
+ if (__gen_sampler_need_fix(sampler)) { \
+ if (GEN_FIX_FLOAT_ROUNDING && \
+ __gen_sampler_need_rounding_fix(sampler)) \
+ tmpCoord = __gen_fixup_float_coord(tmpCoord); \
+ if (int_clamping_fix) { \
+ if (sampler & CLK_NORMALIZED_COORDS_TRUE) \
+ tmpCoord = __gen_denormalize_coord(cl_image, tmpCoord); \
+ float4 newCoord = __gen_fixup_1darray_coord(tmpCoord, cl_image); \
+ return __gen_ocl_read_image ##suffix( \
+ surface_id, sampler, newCoord, 2); \
+ } \
+ } \
+ } \
+ return __gen_ocl_read_image ##suffix(surface_id, sampler, tmpCoord, 0); \
}
-DECL_IMAGE(GEN_FIX_1, image1d_array_t, int4, i, 2)
-DECL_IMAGE(GEN_FIX_1, image1d_array_t, uint4, ui, 2)
-DECL_IMAGE(0, image1d_array_t, float4, f, 2)
+#define DECL_IMAGE_1DArray(int_clamping_fix, image_data_type, suffix) \
+ DECL_READ_IMAGE0_1DArray(int_clamping_fix, image_data_type, suffix, int2) \
+ DECL_READ_IMAGE1_1DArray(int_clamping_fix, image_data_type, \
+ suffix, float2) \
+ DECL_READ_IMAGE_NOSAMPLER(image1d_array_t, image_data_type, suffix, int2) \
+ DECL_WRITE_IMAGE(image1d_array_t, image_data_type, suffix, int2) \
+
+DECL_IMAGE_1DArray(GEN_FIX_INT_CLAMPING, int4, i)
+DECL_IMAGE_1DArray(GEN_FIX_INT_CLAMPING, uint4, ui)
+DECL_IMAGE_1DArray(0, float4, f)
+
+///////////////////////////////////////////////////////////////////////////////
+// Built-in Image Query Functions
+///////////////////////////////////////////////////////////////////////////////
+#define DECL_IMAGE_INFO_COMMON(image_type) \
+ OVERLOADABLE int get_image_channel_data_type(image_type image) \
+ { \
+ GET_IMAGE(image, surface_id); \
+ return __gen_ocl_get_image_channel_data_type(surface_id); \
+ } \
+ OVERLOADABLE int get_image_channel_order(image_type image) \
+ { \
+ GET_IMAGE(image, surface_id); \
+ return __gen_ocl_get_image_channel_order(surface_id); \
+ } \
+ OVERLOADABLE int get_image_width(image_type image) \
+ { \
+ GET_IMAGE(image, surface_id); \
+ return __gen_ocl_get_image_width(surface_id); \
+ }
-// 2D Info
+DECL_IMAGE_INFO_COMMON(image1d_t)
+DECL_IMAGE_INFO_COMMON(image1d_buffer_t)
+DECL_IMAGE_INFO_COMMON(image1d_array_t)
DECL_IMAGE_INFO_COMMON(image2d_t)
+DECL_IMAGE_INFO_COMMON(image3d_t)
+DECL_IMAGE_INFO_COMMON(image2d_array_t)
+
+// 2D extra Info
OVERLOADABLE int get_image_height(image2d_t image)
{
GET_IMAGE(image, surface_id);
@@ -293,90 +578,9 @@ OVERLOADABLE int2 get_image_dim(image2d_t image)
{
return (int2){get_image_width(image), get_image_height(image)};
}
+// End of 2D
-// 1D Array info
-DECL_IMAGE_INFO_COMMON(image1d_array_t)
-OVERLOADABLE size_t get_image_array_size(image1d_array_t image)
-{
- GET_IMAGE(image, surface_id);
- return __gen_ocl_get_image_depth(surface_id);
-}
-
-#undef EXPEND_READ_COORD
-#undef EXPEND_READ_COORDI
-#undef EXPEND_READ_COORDF
-#undef DENORMALIZE_COORD
-#undef EXPEND_WRITE_COORD
-#undef FIXUP_FLOAT_COORD
-#undef GET_IMAGE_ARRAY_SIZE
-// End of 2D and 1D Array
-
-// 3D
-#define EXPEND_READ_COORD(id, sampler, coord) id, sampler, coord.s0, coord.s1, coord.s2, 1
-#define EXPEND_READ_COORDF(id, sampler, coord) id, sampler, (float)coord.s0, (float)coord.s1, (float)coord.s2
-#define EXPEND_READ_COORDI(id, sampler, coord) id, sampler, (int) (coord.s0 < 0 ? -1 : coord.s0), \
- (int)(coord.s1 < 0 ? -1 : coord.s1), (int)(coord.s2 < 0 ? -1 : coord.s2), 1
-#define DENORMALIZE_COORD(id, dstCoord, srcCoord) dstCoord.x = srcCoord.x * __gen_ocl_get_image_width(id); \
- dstCoord.y = srcCoord.y * __gen_ocl_get_image_height(id); \
- dstCoord.z = srcCoord.z * __gen_ocl_get_image_depth(id);
-#define EXPEND_WRITE_COORD(id, coord, color) id, coord.s0, coord.s1, coord.s2, color
-
-#define FIXUP_FLOAT_COORD(tmpCoord) \
- { \
- if (tmpCoord.s0 < 0 && tmpCoord.s0 > -0x1p-20f) \
- tmpCoord.s0 += -0x1p-9f; \
- if (tmpCoord.s1 < 0 && tmpCoord.s1 > -0x1p-20f) \
- tmpCoord.s1 += -0x1p-9f; \
- if (tmpCoord.s2 < 0 && tmpCoord.s2 > -0x1p-20f) \
- tmpCoord.s2 += -0x1p-9f; \
- }
-#define GET_IMAGE_ARRAY_SIZE(a,b,c,d)
-
-DECL_IMAGE(GEN_FIX_1, image3d_t, int4, i, 4)
-DECL_IMAGE(GEN_FIX_1, image3d_t, uint4, ui, 4)
-DECL_IMAGE(0, image3d_t, float4, f, 4)
-
-DECL_IMAGE(GEN_FIX_1, image3d_t, int4, i, 3)
-DECL_IMAGE(GEN_FIX_1, image3d_t, uint4, ui, 3)
-DECL_IMAGE(0, image3d_t, float4, f, 3)
-
-#undef EXPEND_READ_COORD
-#undef EXPEND_READ_COORDF
-#undef EXPEND_READ_COORDI
-#undef DENORMALIZE_COORD
-#undef EXPEND_WRITE_COORD
-#undef FIXUP_FLOAT_COORD
-#undef GET_IMAGE_ARRAY_SIZE
-
-#define EXPEND_READ_COORD(id, sampler, coord) id, sampler, coord.s0, coord.s1, ai, 1
-#define EXPEND_READ_COORDF(id, sampler, coord) id, sampler, (float)coord.s0, (float)coord.s1, (float)ai
-#define EXPEND_READ_COORDI(id, sampler, coord) id, sampler, (int) (coord.s0 < 0 ? -1 : coord.s0), \
- (int)(coord.s1 < 0 ? -1 : coord.s1), (int)ai, 1
-#define DENORMALIZE_COORD(id, dstCoord, srcCoord) dstCoord.x = srcCoord.x * __gen_ocl_get_image_width(id); \
- dstCoord.y = srcCoord.y * __gen_ocl_get_image_height(id);
-#define EXPEND_WRITE_COORD(id, coord, color) id, coord.s0, coord.s1, __gen_compute_array_index(coord.s2, cl_image), color
-
-#define FIXUP_FLOAT_COORD(tmpCoord) \
- { \
- if (tmpCoord.s0 < 0 && tmpCoord.s0 > -0x1p-20f) \
- tmpCoord.s0 += -0x1p-9f; \
- if (tmpCoord.s1 < 0 && tmpCoord.s1 > -0x1p-20f) \
- tmpCoord.s1 += -0x1p-9f; \
- }
-#define GET_IMAGE_ARRAY_SIZE(image, coord, coord_type, ai) \
- coord_type ai = __gen_compute_array_index(coord.s2, image);
-
-// 2D Array
-DECL_IMAGE(GEN_FIX_1, image2d_array_t, int4, i, 4)
-DECL_IMAGE(GEN_FIX_1, image2d_array_t, uint4, ui, 4)
-DECL_IMAGE(0, image2d_array_t, float4, f, 4)
-
-DECL_IMAGE(GEN_FIX_1, image2d_array_t, int4, i, 3)
-DECL_IMAGE(GEN_FIX_1, image2d_array_t, uint4, ui, 3)
-DECL_IMAGE(0, image2d_array_t, float4, f, 3)
-
-// 3D Info
-DECL_IMAGE_INFO_COMMON(image3d_t)
+// 3D extra Info
OVERLOADABLE int get_image_height(image3d_t image)
{
GET_IMAGE(image, surface_id);
@@ -389,11 +593,13 @@ OVERLOADABLE int get_image_depth(image3d_t image)
}
OVERLOADABLE int4 get_image_dim(image3d_t image)
{
- return (int4){get_image_width(image), get_image_height(image), get_image_depth(image), 0};
+ return (int4) (get_image_width(image),
+ get_image_height(image),
+ get_image_depth(image),
+ 0);
}
-// 2D Array Info
-DECL_IMAGE_INFO_COMMON(image2d_array_t)
+// 2D Array extra Info
OVERLOADABLE int get_image_height(image2d_array_t image)
{
GET_IMAGE(image, surface_id);
@@ -409,21 +615,10 @@ OVERLOADABLE size_t get_image_array_size(image2d_array_t image)
return __gen_ocl_get_image_depth(surface_id);
}
-#undef EXPEND_READ_COORD
-#undef EXPEND_READ_COORDF
-#undef EXPEND_READ_COORDI
-#undef DENORMALIZE_COORD
-#undef EXPEND_WRITE_COORD
-#undef FIXUP_FLOAT_COORD
-#undef GET_IMAGE_ARRAY_SIZE
-// End of 3D and 2D Array
-
-#undef DECL_IMAGE
-#undef DECL_READ_IMAGE
-#undef DECL_READ_IMAGE_NOSAMPLER
-#undef DECL_WRITE_IMAGE
-#undef GEN_FIX_1
-// End of Image
-
-
-#undef GET_IMAGE
+// 1D Array info
+OVERLOADABLE size_t get_image_array_size(image1d_array_t image)
+{
+ GET_IMAGE(image, surface_id);
+ return __gen_ocl_get_image_depth(surface_id);
+}
+// End of 1DArray
diff --git a/backend/src/llvm/llvm_gen_backend.cpp b/backend/src/llvm/llvm_gen_backend.cpp
index 1ea1f339..167b8f02 100644
--- a/backend/src/llvm/llvm_gen_backend.cpp
+++ b/backend/src/llvm/llvm_gen_backend.cpp
@@ -257,9 +257,10 @@ namespace gbe
/*! Get number of element to process dealing either with a vector or a scalar
* value
*/
- static ir::Type getVectorInfo(ir::Context &ctx, Type *llvmType, Value *value, uint32_t &elemNum, bool useUnsigned = false)
+ static ir::Type getVectorInfo(ir::Context &ctx, Value *value, uint32_t &elemNum, bool useUnsigned = false)
{
ir::Type type;
+ Type *llvmType = value->getType();
if (llvmType->isVectorTy() == true) {
VectorType *vectorType = cast<VectorType>(llvmType);
Type *elementType = vectorType->getElementType();
@@ -629,6 +630,7 @@ namespace gbe
void emitAtomicInst(CallInst &I, CallSite &CS, ir::AtomicOps opcode);
uint8_t appendSampler(CallSite::arg_iterator AI);
+ uint8_t getImageID(CallInst &I);
// These instructions are not supported at all
void visitVAArgInst(VAArgInst &I) {NOT_SUPPORTED;}
@@ -2507,8 +2509,8 @@ error:
Value *srcValue = I.getOperand(0);
Value *dstValue = &I;
uint32_t srcElemNum = 0, dstElemNum = 0 ;
- ir::Type srcType = getVectorInfo(ctx, srcValue->getType(), srcValue, srcElemNum);
- ir::Type dstType = getVectorInfo(ctx, dstValue->getType(), dstValue, dstElemNum);
+ ir::Type srcType = getVectorInfo(ctx, srcValue, srcElemNum);
+ ir::Type dstType = getVectorInfo(ctx, dstValue, dstElemNum);
// As long and double are not compatible in register storage
// and we do not support double yet, simply put an assert here
GBE_ASSERT(!(srcType == ir::TYPE_S64 && dstType == ir::TYPE_DOUBLE));
@@ -2908,7 +2910,7 @@ error:
{
// dst is a 4 elements vector. We allocate all 4 registers here.
uint32_t elemNum;
- (void)getVectorInfo(ctx, I.getType(), &I, elemNum);
+ (void)getVectorInfo(ctx, &I, elemNum);
GBE_ASSERT(elemNum == 4);
this->newRegister(&I);
break;
@@ -3036,6 +3038,15 @@ error:
return index;
}
+ uint8_t GenWriter::getImageID(CallInst &I) {
+ PtrOrigMapIter iter = pointerOrigMap.find(&I);
+ GBE_ASSERT(iter != pointerOrigMap.end());
+ SmallVectorImpl<Value *> &origins = iter->second;
+ GBE_ASSERT(origins.size() == 1);
+ const ir::Register imageReg = this->getRegister(origins[0]);
+ return ctx.getFunction().getImageSet()->getIdx(imageReg);
+ }
+
void GenWriter::emitCallInst(CallInst &I) {
if (Function *F = I.getCalledFunction()) {
if (F->getIntrinsicID() != 0) {
@@ -3199,7 +3210,6 @@ error:
default: NOT_IMPLEMENTED;
}
} else {
- int image_dim;
// Get the name of the called function and handle it
Value *Callee = I.getCalledValue();
const std::string fnName = Callee->getName();
@@ -3315,13 +3325,13 @@ error:
case GEN_OCL_GET_IMAGE_CHANNEL_DATA_TYPE:
case GEN_OCL_GET_IMAGE_CHANNEL_ORDER:
{
- GBE_ASSERT(AI != AE); const ir::Register surfaceReg = this->getRegister(*AI); ++AI;
+ const uint8_t imageID = getImageID(I);
+ GBE_ASSERT(AI != AE); ++AI;
const ir::Register reg = this->getRegister(&I, 0);
int infoType = it->second - GEN_OCL_GET_IMAGE_WIDTH;
- const uint8_t surfaceID = ctx.getFunction().getImageSet()->getIdx(surfaceReg);
- ir::ImageInfoKey key(surfaceID, infoType);
+ ir::ImageInfoKey key(imageID, infoType);
const ir::Register infoReg = ctx.getFunction().getImageSet()->appendInfo(key, &ctx);
- ctx.GET_IMAGE_INFO(infoType, reg, surfaceID, infoReg);
+ ctx.GET_IMAGE_INFO(infoType, reg, imageID, infoReg);
break;
}
@@ -3331,69 +3341,75 @@ error:
case GEN_OCL_READ_IMAGE_I_1D_I:
case GEN_OCL_READ_IMAGE_UI_1D_I:
case GEN_OCL_READ_IMAGE_F_1D_I:
- image_dim = 1;
- goto handle_read_image;
case GEN_OCL_READ_IMAGE_I_2D:
case GEN_OCL_READ_IMAGE_UI_2D:
case GEN_OCL_READ_IMAGE_F_2D:
case GEN_OCL_READ_IMAGE_I_2D_I:
case GEN_OCL_READ_IMAGE_UI_2D_I:
case GEN_OCL_READ_IMAGE_F_2D_I:
- image_dim = 2;
- goto handle_read_image;
case GEN_OCL_READ_IMAGE_I_3D:
case GEN_OCL_READ_IMAGE_UI_3D:
case GEN_OCL_READ_IMAGE_F_3D:
case GEN_OCL_READ_IMAGE_I_3D_I:
case GEN_OCL_READ_IMAGE_UI_3D_I:
case GEN_OCL_READ_IMAGE_F_3D_I:
- image_dim = 3;
-handle_read_image:
{
- GBE_ASSERT(AI != AE); const ir::Register surfaceReg = this->getRegister(*AI); ++AI;
- const uint8_t surfaceID = ctx.getFunction().getImageSet()->getIdx(surfaceReg);
+ const uint8_t imageID = getImageID(I);
+ GBE_ASSERT(AI != AE); ++AI;
GBE_ASSERT(AI != AE);
const uint8_t sampler = this->appendSampler(AI);
- ++AI;
-
- ir::Register ucoord;
- ir::Register vcoord;
- ir::Register wcoord;
-
- GBE_ASSERT(AI != AE); ucoord = this->getRegister(*AI); ++AI;
- if (image_dim > 1) {
- GBE_ASSERT(AI != AE);
- vcoord = this->getRegister(*AI);
- ++AI;
- } else {
- vcoord = ir::ocl::invalid;
- }
-
- if (image_dim > 2) {
- GBE_ASSERT(AI != AE);
- wcoord = this->getRegister(*AI);
- ++AI;
- } else {
- wcoord = ir::ocl::invalid;
- }
+ ++AI; GBE_ASSERT(AI != AE);
+ uint32_t coordNum;
+ (void)getVectorInfo(ctx, *AI, coordNum);
+ if (coordNum == 4)
+ coordNum = 3;
+ const uint32_t imageDim = coordNum;
+ GBE_ASSERT(imageDim >= 1 && imageDim <= 3);
- vector<ir::Register> dstTupleData, srcTupleData;
- const uint32_t elemNum = 4;
- for (uint32_t elemID = 0; elemID < elemNum; ++elemID) {
- const ir::Register reg = this->getRegister(&I, elemID);
- dstTupleData.push_back(reg);
- }
- srcTupleData.push_back(ucoord);
- srcTupleData.push_back(vcoord);
- srcTupleData.push_back(wcoord);
uint8_t samplerOffset = 0;
+ Value *coordVal = *AI;
+ ++AI; GBE_ASSERT(AI != AE);
+ Value *samplerOffsetVal = *AI;
#ifdef GEN7_SAMPLER_CLAMP_BORDER_WORKAROUND
- GBE_ASSERT(AI != AE); Constant *CPV = dyn_cast<Constant>(*AI);
+ Constant *CPV = dyn_cast<Constant>(samplerOffsetVal);
assert(CPV);
const ir::Immediate &x = processConstantImm(CPV);
GBE_ASSERTM(x.getType() == ir::TYPE_U32 || x.getType() == ir::TYPE_S32, "Invalid sampler type");
samplerOffset = x.getIntegerValue();
#endif
+ bool isFloatCoord = it->second <= GEN_OCL_READ_IMAGE_F_3D;
+ bool requiredFloatCoord = samplerOffset == 0;
+
+ vector<ir::Register> dstTupleData, srcTupleData;
+ for (uint32_t elemID = 0; elemID < 3; elemID++) {
+ ir::Register reg;
+
+ if (elemID < imageDim)
+ reg = this->getRegister(coordVal, elemID);
+ else
+ reg = ir::ocl::invalid;
+
+ if (isFloatCoord == requiredFloatCoord)
+ srcTupleData.push_back(reg);
+ else if (!requiredFloatCoord) {
+ ir::Register intCoordReg = ctx.reg(ir::RegisterFamily::FAMILY_DWORD);
+ ctx.CVT(ir::TYPE_S32, ir::TYPE_FLOAT, intCoordReg, reg);
+ srcTupleData.push_back(intCoordReg);
+ } else {
+ ir::Register floatCoordReg = ctx.reg(ir::RegisterFamily::FAMILY_DWORD);
+ ctx.CVT(ir::TYPE_FLOAT, ir::TYPE_S32, floatCoordReg, reg);
+ srcTupleData.push_back(floatCoordReg);
+ }
+ }
+
+ uint32_t elemNum;
+ (void)getVectorInfo(ctx, &I, elemNum);
+ GBE_ASSERT(elemNum == 4);
+
+ for (uint32_t elemID = 0; elemID < elemNum; ++elemID) {
+ const ir::Register reg = this->getRegister(&I, elemID);
+ dstTupleData.push_back(reg);
+ }
const ir::Tuple dstTuple = ctx.arrayTuple(&dstTupleData[0], elemNum);
const ir::Tuple srcTuple = ctx.arrayTuple(&srcTupleData[0], 3);
@@ -3426,58 +3442,46 @@ handle_read_image:
GBE_ASSERT(0); // never been here.
}
- bool isFloatCoord = it->second <= GEN_OCL_READ_IMAGE_F_3D;
-
- ctx.SAMPLE(surfaceID, dstTuple, srcTuple, dstType == ir::TYPE_FLOAT,
- isFloatCoord, sampler, samplerOffset);
+ ctx.SAMPLE(imageID, dstTuple, srcTuple, dstType == ir::TYPE_FLOAT,
+ requiredFloatCoord, sampler, samplerOffset);
break;
}
case GEN_OCL_WRITE_IMAGE_I_1D:
case GEN_OCL_WRITE_IMAGE_UI_1D:
case GEN_OCL_WRITE_IMAGE_F_1D:
- image_dim = 1;
- goto handle_write_image;
case GEN_OCL_WRITE_IMAGE_I_2D:
case GEN_OCL_WRITE_IMAGE_UI_2D:
case GEN_OCL_WRITE_IMAGE_F_2D:
- image_dim = 2;
- goto handle_write_image;
case GEN_OCL_WRITE_IMAGE_I_3D:
case GEN_OCL_WRITE_IMAGE_UI_3D:
case GEN_OCL_WRITE_IMAGE_F_3D:
- image_dim = 3;
-handle_write_image:
{
- GBE_ASSERT(AI != AE); const ir::Register surfaceReg = this->getRegister(*AI); ++AI;
- const uint8_t surfaceID = ctx.getFunction().getImageSet()->getIdx(surfaceReg);
- ir::Register ucoord, vcoord, wcoord;
-
- GBE_ASSERT(AI != AE); ucoord = this->getRegister(*AI); ++AI;
+ const uint8_t imageID = getImageID(I);
+ GBE_ASSERT(AI != AE); ++AI; GBE_ASSERT(AI != AE);
+ uint32_t coordNum;
+ (void)getVectorInfo(ctx, *AI, coordNum);
+ if (coordNum == 4)
+ coordNum = 3;
+ const uint32_t imageDim = coordNum;
+ vector<ir::Register> srcTupleData;
+ GBE_ASSERT(imageDim >= 1 && imageDim <= 3);
- if (image_dim > 1) {
- GBE_ASSERT(AI != AE);
- vcoord = this->getRegister(*AI);
- ++AI;
- } else
- vcoord = ir::ocl::invalid;
-
- if (image_dim > 2) {
- GBE_ASSERT(AI != AE);
- wcoord = this->getRegister(*AI);
- ++AI;
- } else {
- wcoord = ir::ocl::invalid;
- }
+ for (uint32_t elemID = 0; elemID < 3; elemID++) {
+ ir::Register reg;
- GBE_ASSERT(AI != AE);
- vector<ir::Register> srcTupleData;
+ if (elemID < imageDim)
+ reg = this->getRegister(*AI, elemID);
+ else
+ reg = ir::ocl::invalid;
- srcTupleData.push_back(ucoord);
- srcTupleData.push_back(vcoord);
- srcTupleData.push_back(wcoord);
+ srcTupleData.push_back(reg);
+ }
+ ++AI; GBE_ASSERT(AI != AE);
+ uint32_t elemNum;
+ (void)getVectorInfo(ctx, *AI, elemNum);
+ GBE_ASSERT(elemNum == 4);
- const uint32_t elemNum = 4;
for (uint32_t elemID = 0; elemID < elemNum; ++elemID) {
const ir::Register reg = this->getRegister(*AI, elemID);
srcTupleData.push_back(reg);
@@ -3504,7 +3508,7 @@ handle_write_image:
GBE_ASSERT(0); // never been here.
}
- ctx.TYPED_WRITE(surfaceID, srcTuple, srcType, ir::TYPE_U32);
+ ctx.TYPED_WRITE(imageID, srcTuple, srcType, ir::TYPE_U32);
break;
}
case GEN_OCL_MUL_HI_INT:
diff --git a/backend/src/llvm/llvm_gen_ocl_function.hxx b/backend/src/llvm/llvm_gen_ocl_function.hxx
index 7434c783..8d55c3f5 100644
--- a/backend/src/llvm/llvm_gen_ocl_function.hxx
+++ b/backend/src/llvm/llvm_gen_ocl_function.hxx
@@ -49,35 +49,35 @@ DECL_LLVM_GEN_FUNCTION(FORCE_SIMD16, __gen_ocl_force_simd16)
DECL_LLVM_GEN_FUNCTION(READ_IMAGE_I_1D, _Z21__gen_ocl_read_imageijtfj)
DECL_LLVM_GEN_FUNCTION(READ_IMAGE_UI_1D, _Z22__gen_ocl_read_imageuijtfj)
DECL_LLVM_GEN_FUNCTION(READ_IMAGE_F_1D, _Z21__gen_ocl_read_imagefjtfj)
-DECL_LLVM_GEN_FUNCTION(READ_IMAGE_I_2D, _Z21__gen_ocl_read_imageijtffj)
-DECL_LLVM_GEN_FUNCTION(READ_IMAGE_UI_2D, _Z22__gen_ocl_read_imageuijtffj)
-DECL_LLVM_GEN_FUNCTION(READ_IMAGE_F_2D, _Z21__gen_ocl_read_imagefjtffj)
-DECL_LLVM_GEN_FUNCTION(READ_IMAGE_I_3D, _Z21__gen_ocl_read_imageijtfffj)
-DECL_LLVM_GEN_FUNCTION(READ_IMAGE_UI_3D, _Z22__gen_ocl_read_imageuijtfffj)
-DECL_LLVM_GEN_FUNCTION(READ_IMAGE_F_3D, _Z21__gen_ocl_read_imagefjtfffj)
+DECL_LLVM_GEN_FUNCTION(READ_IMAGE_I_2D, _Z21__gen_ocl_read_imageijtDv2_fj)
+DECL_LLVM_GEN_FUNCTION(READ_IMAGE_UI_2D, _Z22__gen_ocl_read_imageuijtDv2_fj)
+DECL_LLVM_GEN_FUNCTION(READ_IMAGE_F_2D, _Z21__gen_ocl_read_imagefjtDv2_fj)
+DECL_LLVM_GEN_FUNCTION(READ_IMAGE_I_3D, _Z21__gen_ocl_read_imageijtDv4_fj)
+DECL_LLVM_GEN_FUNCTION(READ_IMAGE_UI_3D, _Z22__gen_ocl_read_imageuijtDv4_fj)
+DECL_LLVM_GEN_FUNCTION(READ_IMAGE_F_3D, _Z21__gen_ocl_read_imagefjtDv4_fj)
// work around read image with the LD message. The coords are integer type.
DECL_LLVM_GEN_FUNCTION(READ_IMAGE_I_1D_I, _Z21__gen_ocl_read_imageijtij)
DECL_LLVM_GEN_FUNCTION(READ_IMAGE_UI_1D_I, _Z22__gen_ocl_read_imageuijtij)
DECL_LLVM_GEN_FUNCTION(READ_IMAGE_F_1D_I, _Z21__gen_ocl_read_imagefjtij)
-DECL_LLVM_GEN_FUNCTION(READ_IMAGE_I_2D_I, _Z21__gen_ocl_read_imageijtiij)
-DECL_LLVM_GEN_FUNCTION(READ_IMAGE_UI_2D_I, _Z22__gen_ocl_read_imageuijtiij)
-DECL_LLVM_GEN_FUNCTION(READ_IMAGE_F_2D_I, _Z21__gen_ocl_read_imagefjtiij)
-DECL_LLVM_GEN_FUNCTION(READ_IMAGE_I_3D_I, _Z21__gen_ocl_read_imageijtiiij)
-DECL_LLVM_GEN_FUNCTION(READ_IMAGE_UI_3D_I, _Z22__gen_ocl_read_imageuijtiiij)
-DECL_LLVM_GEN_FUNCTION(READ_IMAGE_F_3D_I, _Z21__gen_ocl_read_imagefjtiiij)
+DECL_LLVM_GEN_FUNCTION(READ_IMAGE_I_2D_I, _Z21__gen_ocl_read_imageijtDv2_ij)
+DECL_LLVM_GEN_FUNCTION(READ_IMAGE_UI_2D_I, _Z22__gen_ocl_read_imageuijtDv2_ij)
+DECL_LLVM_GEN_FUNCTION(READ_IMAGE_F_2D_I, _Z21__gen_ocl_read_imagefjtDv2_ij)
+DECL_LLVM_GEN_FUNCTION(READ_IMAGE_I_3D_I, _Z21__gen_ocl_read_imageijtDv4_ij)
+DECL_LLVM_GEN_FUNCTION(READ_IMAGE_UI_3D_I, _Z22__gen_ocl_read_imageuijtDv4_ij)
+DECL_LLVM_GEN_FUNCTION(READ_IMAGE_F_3D_I, _Z21__gen_ocl_read_imagefjtDv4_ij)
// To write_image functions.
DECL_LLVM_GEN_FUNCTION(WRITE_IMAGE_I_1D, _Z22__gen_ocl_write_imageijiDv4_i)
DECL_LLVM_GEN_FUNCTION(WRITE_IMAGE_UI_1D, _Z23__gen_ocl_write_imageuijiDv4_j)
DECL_LLVM_GEN_FUNCTION(WRITE_IMAGE_F_1D, _Z22__gen_ocl_write_imagefjiDv4_f)
-DECL_LLVM_GEN_FUNCTION(WRITE_IMAGE_I_2D, _Z22__gen_ocl_write_imageijiiDv4_i)
-DECL_LLVM_GEN_FUNCTION(WRITE_IMAGE_UI_2D, _Z23__gen_ocl_write_imageuijiiDv4_j)
-DECL_LLVM_GEN_FUNCTION(WRITE_IMAGE_F_2D, _Z22__gen_ocl_write_imagefjiiDv4_f)
+DECL_LLVM_GEN_FUNCTION(WRITE_IMAGE_I_2D, _Z22__gen_ocl_write_imageijDv2_iDv4_i)
+DECL_LLVM_GEN_FUNCTION(WRITE_IMAGE_UI_2D, _Z23__gen_ocl_write_imageuijDv2_iDv4_j)
+DECL_LLVM_GEN_FUNCTION(WRITE_IMAGE_F_2D, _Z22__gen_ocl_write_imagefjDv2_iDv4_f)
-DECL_LLVM_GEN_FUNCTION(WRITE_IMAGE_I_3D, _Z22__gen_ocl_write_imageijiiiDv4_i)
-DECL_LLVM_GEN_FUNCTION(WRITE_IMAGE_UI_3D, _Z23__gen_ocl_write_imageuijiiiDv4_j)
-DECL_LLVM_GEN_FUNCTION(WRITE_IMAGE_F_3D, _Z22__gen_ocl_write_imagefjiiiDv4_f)
+DECL_LLVM_GEN_FUNCTION(WRITE_IMAGE_I_3D, _Z22__gen_ocl_write_imageijDv4_iS_)
+DECL_LLVM_GEN_FUNCTION(WRITE_IMAGE_UI_3D, _Z23__gen_ocl_write_imageuijDv4_iDv4_j)
+DECL_LLVM_GEN_FUNCTION(WRITE_IMAGE_F_3D, _Z22__gen_ocl_write_imagefjDv4_iDv4_f)
// To get image info function
DECL_LLVM_GEN_FUNCTION(GET_IMAGE_WIDTH, __gen_ocl_get_image_width)
diff --git a/backend/src/llvm/llvm_scalarize.cpp b/backend/src/llvm/llvm_scalarize.cpp
index 5450a2b2..baf526b6 100644
--- a/backend/src/llvm/llvm_scalarize.cpp
+++ b/backend/src/llvm/llvm_scalarize.cpp
@@ -648,7 +648,7 @@ namespace gbe {
// Get the function arguments
CallSite CS(call);
- CallSite::arg_iterator CI = CS.arg_begin() + 2;
+ CallSite::arg_iterator CI = CS.arg_begin() + 1;
switch (it->second) {
default: break;
@@ -661,8 +661,7 @@ namespace gbe {
case GEN_OCL_READ_IMAGE_I_3D:
case GEN_OCL_READ_IMAGE_UI_3D:
case GEN_OCL_READ_IMAGE_F_3D:
-
- case GEN_OCL_READ_IMAGE_I_1D_I:
+ case GEN_OCL_READ_IMAGE_I_1D_I:
case GEN_OCL_READ_IMAGE_UI_1D_I:
case GEN_OCL_READ_IMAGE_F_1D_I:
case GEN_OCL_READ_IMAGE_I_2D_I:
@@ -674,6 +673,9 @@ namespace gbe {
case GEN_OCL_GET_IMAGE_WIDTH:
case GEN_OCL_GET_IMAGE_HEIGHT:
{
+ ++CI;
+ if ((*CI)->getType()->isVectorTy())
+ *CI = InsertToVector(call, *CI);
setAppendPoint(call);
extractFromVector(call);
break;
@@ -681,15 +683,16 @@ namespace gbe {
case GEN_OCL_WRITE_IMAGE_I_3D:
case GEN_OCL_WRITE_IMAGE_UI_3D:
case GEN_OCL_WRITE_IMAGE_F_3D:
- CI++;
case GEN_OCL_WRITE_IMAGE_I_2D:
case GEN_OCL_WRITE_IMAGE_UI_2D:
case GEN_OCL_WRITE_IMAGE_F_2D:
- CI++;
case GEN_OCL_WRITE_IMAGE_I_1D:
case GEN_OCL_WRITE_IMAGE_UI_1D:
case GEN_OCL_WRITE_IMAGE_F_1D:
{
+ if ((*CI)->getType()->isVectorTy())
+ *CI = InsertToVector(call, *CI);
+ ++CI;
*CI = InsertToVector(call, *CI);
break;
}