diff options
author | rguenth <rguenth@138bc75d-0d04-0410-961f-82ee72b054a4> | 2007-02-05 21:38:53 +0000 |
---|---|---|
committer | rguenth <rguenth@138bc75d-0d04-0410-961f-82ee72b054a4> | 2007-02-05 21:38:53 +0000 |
commit | 6d85df694dad7dce6feb3a161bab3c3fc2878e5a (patch) | |
tree | acd2d1f47d5813e7f9565319ed7b57c1c8a10607 /gcc | |
parent | 3bee4a6b4bed564400cb06ed3056b98cf2e07661 (diff) | |
download | gcc-6d85df694dad7dce6feb3a161bab3c3fc2878e5a.tar.gz |
2007-02-05 Richard Guenther <rguenther@suse.de>
* tree-vectorizer.h (vectorizable_function): Add argument type
argument, change return type.
* tree-vect-patterns.c (vect_recog_pow_pattern): Adjust caller.
* tree-vect-transform.c (vectorizable_function): Handle extra
argument, return vectorized function decl.
(build_vectorized_function_call): Remove.
(vectorizable_call): Handle calls with result and argument types
differing. Handle loop vectorization factor correctly.
* targhooks.c (default_builtin_vectorized_function): Adjust for
extra argument.
* targhooks.h (default_builtin_vectorized_function): Likewise.
* target.h (builtin_vectorized_function): Add argument type
argument.
* config/i386/i386.c (ix86_builtin_vectorized_function): Handle
extra argument, allow vectorizing of lrintf.
* doc/tm.texi (TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION): Adjust
documentation of target hook.
* gcc.target/i386/vectorize3.c: New testcase.
git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@121617 138bc75d-0d04-0410-961f-82ee72b054a4
Diffstat (limited to 'gcc')
-rw-r--r-- | gcc/ChangeLog | 20 | ||||
-rw-r--r-- | gcc/config/i386/i386.c | 30 | ||||
-rw-r--r-- | gcc/doc/tm.texi | 5 | ||||
-rw-r--r-- | gcc/target.h | 2 | ||||
-rw-r--r-- | gcc/targhooks.c | 3 | ||||
-rw-r--r-- | gcc/targhooks.h | 2 | ||||
-rw-r--r-- | gcc/testsuite/ChangeLog | 4 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/i386/vectorize3.c | 30 | ||||
-rw-r--r-- | gcc/tree-vect-patterns.c | 2 | ||||
-rw-r--r-- | gcc/tree-vect-transform.c | 171 | ||||
-rw-r--r-- | gcc/tree-vectorizer.h | 2 |
11 files changed, 189 insertions, 82 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog index e2b0f58fff6..00483ff65c3 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,23 @@ +2007-02-05 Richard Guenther <rguenther@suse.de> + + * tree-vectorizer.h (vectorizable_function): Add argument type + argument, change return type. + * tree-vect-patterns.c (vect_recog_pow_pattern): Adjust caller. + * tree-vect-transform.c (vectorizable_function): Handle extra + argument, return vectorized function decl. + (build_vectorized_function_call): Remove. + (vectorizable_call): Handle calls with result and argument types + differing. Handle loop vectorization factor correctly. + * targhooks.c (default_builtin_vectorized_function): Adjust for + extra argument. + * targhooks.h (default_builtin_vectorized_function): Likewise. + * target.h (builtin_vectorized_function): Add argument type + argument. + * config/i386/i386.c (ix86_builtin_vectorized_function): Handle + extra argument, allow vectorizing of lrintf. + * doc/tm.texi (TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION): Adjust + documentation of target hook. + 2007-02-05 Hans-Peter Nilsson <hp@axis.com> PR target/30665 diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c index e652d7ef341..ea1284960ad 100644 --- a/gcc/config/i386/i386.c +++ b/gcc/config/i386/i386.c @@ -1355,7 +1355,7 @@ static bool ix86_pass_by_reference (CUMULATIVE_ARGS *, enum machine_mode, tree, bool); static void ix86_init_builtins (void); static rtx ix86_expand_builtin (tree, rtx, rtx, enum machine_mode, int); -static tree ix86_builtin_vectorized_function (enum built_in_function, tree); +static tree ix86_builtin_vectorized_function (enum built_in_function, tree, tree); static const char *ix86_mangle_fundamental_type (tree); static tree ix86_stack_protect_fail (void); static rtx ix86_internal_arg_pointer (void); @@ -17661,29 +17661,41 @@ ix86_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED, if it is not available. */ static tree -ix86_builtin_vectorized_function (enum built_in_function fn, tree type) +ix86_builtin_vectorized_function (enum built_in_function fn, tree type_out, + tree type_in) { - enum machine_mode el_mode; - int n; + enum machine_mode in_mode, out_mode; + int in_n, out_n; - if (TREE_CODE (type) != VECTOR_TYPE) + if (TREE_CODE (type_out) != VECTOR_TYPE + || TREE_CODE (type_in) != VECTOR_TYPE) return NULL_TREE; - el_mode = TYPE_MODE (TREE_TYPE (type)); - n = TYPE_VECTOR_SUBPARTS (type); + out_mode = TYPE_MODE (TREE_TYPE (type_out)); + out_n = TYPE_VECTOR_SUBPARTS (type_out); + in_mode = TYPE_MODE (TREE_TYPE (type_in)); + in_n = TYPE_VECTOR_SUBPARTS (type_in); switch (fn) { case BUILT_IN_SQRT: - if (el_mode == DFmode && n == 2) + if (out_mode == DFmode && out_n == 2 + && in_mode == DFmode && in_n == 2) return ix86_builtins[IX86_BUILTIN_SQRTPD]; return NULL_TREE; case BUILT_IN_SQRTF: - if (el_mode == SFmode && n == 4) + if (out_mode == SFmode && out_n == 4 + && in_mode == SFmode && in_n == 4) return ix86_builtins[IX86_BUILTIN_SQRTPS]; return NULL_TREE; + case BUILT_IN_LRINTF: + if (out_mode == SImode && out_n == 4 + && in_mode == SFmode && in_n == 4) + return ix86_builtins[IX86_BUILTIN_CVTPS2DQ]; + return NULL_TREE; + default: ; } diff --git a/gcc/doc/tm.texi b/gcc/doc/tm.texi index c9f6cb89d47..ebb3f2c871b 100644 --- a/gcc/doc/tm.texi +++ b/gcc/doc/tm.texi @@ -5384,11 +5384,12 @@ preserved (e.g. used only by a reduction computation). Otherwise, the @code{widen_mult_hi/lo} idioms will be used. @end deftypefn -@deftypefn {Target Hook} tree TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION (enum built_in_function @var{code}, tree @var{vec_type}) +@deftypefn {Target Hook} tree TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION (enum built_in_function @var{code}, tree @var{vec_type_out}, tree @var{vec_type_in}) This hook should return the decl of a function that implements the vectorized variant of the builtin function with builtin function code @var{code} or @code{NULL_TREE} if such a function is not available. The return type of -the vectorized function shall be of vector type @var{vec_type}. +the vectorized function shall be of vector type @var{vec_type_out} and the +argument types should be @var{vec_type_in}. @end deftypefn @node Anchored Addresses diff --git a/gcc/target.h b/gcc/target.h index f4678e44bc4..2d8d69bed64 100644 --- a/gcc/target.h +++ b/gcc/target.h @@ -395,7 +395,7 @@ struct gcc_target /* Returns a code for builtin that realizes vectorized version of function, or NULL_TREE if not available. */ - tree (* builtin_vectorized_function) (unsigned, tree); + tree (* builtin_vectorized_function) (unsigned, tree, tree); /* Target builtin that implements vector widening multiplication. builtin_mul_widen_eve computes the element-by-element products diff --git a/gcc/targhooks.c b/gcc/targhooks.c index e7bdf0b3cbc..ed4d890d2cf 100644 --- a/gcc/targhooks.c +++ b/gcc/targhooks.c @@ -323,7 +323,8 @@ default_invalid_within_doloop (rtx insn) tree default_builtin_vectorized_function (enum built_in_function fn ATTRIBUTE_UNUSED, - tree type ATTRIBUTE_UNUSED) + tree type_out ATTRIBUTE_UNUSED, + tree type_in ATTRIBUTE_UNUSED) { return NULL_TREE; } diff --git a/gcc/targhooks.h b/gcc/targhooks.h index 5f63dd77f6f..0b4ded93c45 100644 --- a/gcc/targhooks.h +++ b/gcc/targhooks.h @@ -57,7 +57,7 @@ extern const char * default_invalid_within_doloop (rtx); extern bool default_narrow_bitfield (void); -extern tree default_builtin_vectorized_function (enum built_in_function, tree); +extern tree default_builtin_vectorized_function (enum built_in_function, tree, tree); /* These are here, and not in hooks.[ch], because not all users of hooks.h include tm.h, and thus we don't have CUMULATIVE_ARGS. */ diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index d6f14ca147c..2b88029caf4 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,7 @@ +2007-02-05 Richard Guenther <rguenther@suse.de> + + * gcc.target/i386/vectorize3.c: New testcase. + 2007-02-05 Hans-Peter Nilsson <hp@axis.com> PR target/30665 diff --git a/gcc/testsuite/gcc.target/i386/vectorize3.c b/gcc/testsuite/gcc.target/i386/vectorize3.c new file mode 100644 index 00000000000..2947acbafda --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/vectorize3.c @@ -0,0 +1,30 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target ilp32 } */ +/* { dg-options "-O2 -ffast-math -ftree-vectorize -msse2 -mfpmath=sse" } */ + +float a[256]; +int b[256]; +unsigned short c[256]; + +extern long lrintf (float); + +void foo(void) +{ + int i; + + for (i=0; i<256; ++i) + b[i] = lrintf (a[i]); +} + +void bar(void) +{ + int i; + + for (i=0; i<256; ++i) + { + b[i] = lrintf (a[i]); + c[i] += c[i]; + } +} + +/* { dg-final { scan-assembler "cvtps2dq" } } */ diff --git a/gcc/tree-vect-patterns.c b/gcc/tree-vect-patterns.c index f9d3de6d4eb..c9e34e3e419 100644 --- a/gcc/tree-vect-patterns.c +++ b/gcc/tree-vect-patterns.c @@ -488,7 +488,7 @@ vect_recog_pow_pattern (tree last_stmt, tree *type_in, tree *type_out) if (*type_in) { newfn = build_function_call_expr (newfn, newarglist); - if (vectorizable_function (newfn, *type_in)) + if (vectorizable_function (newfn, *type_in, *type_in) != NULL_TREE) return newfn; } } diff --git a/gcc/tree-vect-transform.c b/gcc/tree-vect-transform.c index 846d52bf90c..fc95e6090aa 100644 --- a/gcc/tree-vect-transform.c +++ b/gcc/tree-vect-transform.c @@ -1579,47 +1579,28 @@ vectorizable_reduction (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt) } /* Checks if CALL can be vectorized in type VECTYPE. Returns - true if the target has a vectorized version of the function, - or false if the function cannot be vectorized. */ + a function declaration if the target has a vectorized version + of the function, or NULL_TREE if the function cannot be vectorized. */ -bool -vectorizable_function (tree call, tree vectype) +tree +vectorizable_function (tree call, tree vectype_out, tree vectype_in) { tree fndecl = get_callee_fndecl (call); + enum built_in_function code; /* We only handle functions that do not read or clobber memory -- i.e. const or novops ones. */ if (!(call_expr_flags (call) & (ECF_CONST | ECF_NOVOPS))) - return false; + return NULL_TREE; if (!fndecl || TREE_CODE (fndecl) != FUNCTION_DECL || !DECL_BUILT_IN (fndecl)) - return false; + return NULL_TREE; - if (targetm.vectorize.builtin_vectorized_function (DECL_FUNCTION_CODE (fndecl), vectype)) - return true; - - return false; -} - -/* Returns an expression that performs a call to vectorized version - of FNDECL in type VECTYPE, with the arguments given by ARGS. - If extra statements need to be generated, they are inserted - before BSI. */ - -static tree -build_vectorized_function_call (tree fndecl, - tree vectype, tree args) -{ - tree vfndecl; - enum built_in_function code = DECL_FUNCTION_CODE (fndecl); - - /* The target specific builtin should be available. */ - vfndecl = targetm.vectorize.builtin_vectorized_function (code, vectype); - gcc_assert (vfndecl != NULL_TREE); - - return build_function_call_expr (vfndecl, args); + code = DECL_FUNCTION_CODE (fndecl); + return targetm.vectorize.builtin_vectorized_function (code, vectype_out, + vectype_in); } /* Function vectorizable_call. @@ -1635,13 +1616,13 @@ vectorizable_call (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt) tree vec_dest; tree scalar_dest; tree operation; - tree op, args, type; - tree vec_oprnd, vargs, *pvargs_end; - stmt_vec_info stmt_info = vinfo_for_stmt (stmt); - tree vectype = STMT_VINFO_VECTYPE (stmt_info); + tree args, type; + stmt_vec_info stmt_info = vinfo_for_stmt (stmt), prev_stmt_info; + tree vectype_out, vectype_in; loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info); - tree fndecl, rhs, new_temp, def, def_stmt; - enum vect_def_type dt; + tree fndecl, rhs, new_temp, def, def_stmt, rhs_type, lhs_type; + enum vect_def_type dt[2]; + int ncopies, j, nargs; /* Is STMT a vectorizable call? */ if (TREE_CODE (stmt) != GIMPLE_MODIFY_STMT) @@ -1653,31 +1634,68 @@ vectorizable_call (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt) operation = GIMPLE_STMT_OPERAND (stmt, 1); if (TREE_CODE (operation) != CALL_EXPR) return false; - + + /* Process function arguments. */ + rhs_type = NULL_TREE; + for (args = TREE_OPERAND (operation, 1), nargs = 0; + args; args = TREE_CHAIN (args), ++nargs) + { + tree op = TREE_VALUE (args); + + /* Bail out if the function has more than two arguments, we + do not have interesting builtin functions to vectorize with + more than two arguments. */ + if (nargs >= 2) + return false; + + /* We can only handle calls with arguments of the same type. */ + if (rhs_type + && rhs_type != TREE_TYPE (op)) + { + if (vect_print_dump_info (REPORT_DETAILS)) + fprintf (vect_dump, "argument types differ."); + return false; + } + rhs_type = TREE_TYPE (op); + + if (!vect_is_simple_use (op, loop_vinfo, &def_stmt, &def, &dt[nargs])) + { + if (vect_print_dump_info (REPORT_DETAILS)) + fprintf (vect_dump, "use not simple."); + return false; + } + } + + /* No arguments is also not good. */ + if (nargs == 0) + return false; + + vectype_in = get_vectype_for_scalar_type (rhs_type); + + lhs_type = TREE_TYPE (GIMPLE_STMT_OPERAND (stmt, 0)); + vectype_out = get_vectype_for_scalar_type (lhs_type); + + /* Only handle the case of vectors with the same number of elements. + FIXME: We need a way to handle for example the SSE2 cvtpd2dq + instruction which converts V2DFmode to V4SImode but only + using the lower half of the V4SImode result. */ + if (TYPE_VECTOR_SUBPARTS (vectype_in) != TYPE_VECTOR_SUBPARTS (vectype_out)) + return false; + /* For now, we only vectorize functions if a target specific builtin is available. TODO -- in some cases, it might be profitable to insert the calls for pieces of the vector, in order to be able to vectorize other operations in the loop. */ - if (!vectorizable_function (operation, vectype)) + fndecl = vectorizable_function (operation, vectype_out, vectype_in); + if (fndecl == NULL_TREE) { if (vect_print_dump_info (REPORT_DETAILS)) fprintf (vect_dump, "function is not vectorizable."); return false; } - gcc_assert (ZERO_SSA_OPERANDS (stmt, SSA_OP_ALL_VIRTUALS)); - for (args = TREE_OPERAND (operation, 1); args; args = TREE_CHAIN (args)) - { - op = TREE_VALUE (args); - - if (!vect_is_simple_use (op, loop_vinfo, &def_stmt, &def, &dt)) - { - if (vect_print_dump_info (REPORT_DETAILS)) - fprintf (vect_dump, "use not simple."); - return false; - } - } + gcc_assert (ZERO_SSA_OPERANDS (stmt, SSA_OP_ALL_VIRTUALS)); if (!vec_stmt) /* transformation not required. */ { @@ -1690,29 +1708,50 @@ vectorizable_call (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt) if (vect_print_dump_info (REPORT_DETAILS)) fprintf (vect_dump, "transform operation."); + ncopies = (LOOP_VINFO_VECT_FACTOR (loop_vinfo) + / TYPE_VECTOR_SUBPARTS (vectype_out)); + gcc_assert (ncopies >= 1); + /* Handle def. */ scalar_dest = GIMPLE_STMT_OPERAND (stmt, 0); - vec_dest = vect_create_destination_var (scalar_dest, vectype); + vec_dest = vect_create_destination_var (scalar_dest, vectype_out); - /* Handle uses. */ - vargs = NULL_TREE; - pvargs_end = &vargs; - for (args = TREE_OPERAND (operation, 1); args; args = TREE_CHAIN (args)) + prev_stmt_info = NULL; + for (j = 0; j < ncopies; ++j) { - op = TREE_VALUE (args); - vec_oprnd = vect_get_vec_def_for_operand (op, stmt, NULL); - - *pvargs_end = tree_cons (NULL_TREE, vec_oprnd, NULL_TREE); - pvargs_end = &TREE_CHAIN (*pvargs_end); - } + tree new_stmt, vargs; + tree vec_oprnd[2]; + int n; + + /* Build argument list for the vectorized call. */ + vargs = NULL_TREE; + for (args = TREE_OPERAND (operation, 1), n = 0; + args; args = TREE_CHAIN (args), ++n) + { + tree op = TREE_VALUE (args); - fndecl = get_callee_fndecl (operation); - rhs = build_vectorized_function_call (fndecl, vectype, vargs); - *vec_stmt = build2 (GIMPLE_MODIFY_STMT, vectype, vec_dest, rhs); - new_temp = make_ssa_name (vec_dest, *vec_stmt); - GIMPLE_STMT_OPERAND (*vec_stmt, 0) = new_temp; + if (j == 0) + vec_oprnd[n] = vect_get_vec_def_for_operand (op, stmt, NULL); + else + vec_oprnd[n] = vect_get_vec_def_for_stmt_copy (dt[n], vec_oprnd[n]); - vect_finish_stmt_generation (stmt, *vec_stmt, bsi); + vargs = tree_cons (NULL_TREE, vec_oprnd[n], vargs); + } + vargs = nreverse (vargs); + + rhs = build_function_call_expr (fndecl, vargs); + new_stmt = build2 (GIMPLE_MODIFY_STMT, NULL_TREE, vec_dest, rhs); + new_temp = make_ssa_name (vec_dest, new_stmt); + GIMPLE_STMT_OPERAND (new_stmt, 0) = new_temp; + + vect_finish_stmt_generation (stmt, new_stmt, bsi); + + if (j == 0) + STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt; + else + STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt; + prev_stmt_info = vinfo_for_stmt (new_stmt); + } /* The call in STMT might prevent it from being removed in dce. We however cannot remove it here, due to the way the ssa name it defines is mapped diff --git a/gcc/tree-vectorizer.h b/gcc/tree-vectorizer.h index a13ee1e5c69..b523d880bfd 100644 --- a/gcc/tree-vectorizer.h +++ b/gcc/tree-vectorizer.h @@ -412,7 +412,7 @@ extern bool vectorizable_operation (tree, block_stmt_iterator *, tree *); extern bool vectorizable_type_promotion (tree, block_stmt_iterator *, tree *); extern bool vectorizable_type_demotion (tree, block_stmt_iterator *, tree *); extern bool vectorizable_assignment (tree, block_stmt_iterator *, tree *); -extern bool vectorizable_function (tree, tree); +extern tree vectorizable_function (tree, tree, tree); extern bool vectorizable_call (tree, block_stmt_iterator *, tree *); extern bool vectorizable_condition (tree, block_stmt_iterator *, tree *); extern bool vectorizable_live_operation (tree, block_stmt_iterator *, tree *); |