diff options
author | meissner <meissner@138bc75d-0d04-0410-961f-82ee72b054a4> | 2010-08-23 16:41:10 +0000 |
---|---|---|
committer | meissner <meissner@138bc75d-0d04-0410-961f-82ee72b054a4> | 2010-08-23 16:41:10 +0000 |
commit | 79676f03d03605298180b4e8d6ad33fb094fb416 (patch) | |
tree | f35f733850bc44e58b4d44dbbd2d90f9ad828859 /gcc | |
parent | 6f56c73420685315624a5069d3ab17f5f06602e3 (diff) | |
download | gcc-79676f03d03605298180b4e8d6ad33fb094fb416.tar.gz |
Add -mveclibabi=mass
git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@163470 138bc75d-0d04-0410-961f-82ee72b054a4
Diffstat (limited to 'gcc')
-rw-r--r-- | gcc/ChangeLog | 13 | ||||
-rw-r--r-- | gcc/config/rs6000/rs6000.c | 144 | ||||
-rw-r--r-- | gcc/config/rs6000/rs6000.opt | 4 | ||||
-rw-r--r-- | gcc/doc/invoke.texi | 27 | ||||
-rw-r--r-- | gcc/testsuite/ChangeLog | 4 |
5 files changed, 191 insertions, 1 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 13eb21bf6dc..406909b2afc 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,16 @@ +2010-08-23 Michael Meissner <meissner@linux.vnet.ibm.com> + + * config/rs6000/rs6000.opt (-mmass): New option to enable the + compiler to autovectorize mathmetical functions for power7 using + the Mathematical Acceleration Subsystem library. + + * config/rs6000/rs6000.c (rs6000_builtin_vectorized_libmass): New + function to handle auto vectorizing math functions that are in the + MASS library. + (rs6000_builtin_vectorized_function): Call it. + + * doc/invoke.texi (RS/6000 and PowerPC Options): Document -mmass. + 2010-08-22 John David Anglin <dave.anglin@nrc-cnrc.gc.ca> PR boehm-gc/34544 diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c index 3928901cdd9..fc15198f3d7 100644 --- a/gcc/config/rs6000/rs6000.c +++ b/gcc/config/rs6000/rs6000.c @@ -989,6 +989,7 @@ static rtx rs6000_emit_stack_reset (rs6000_stack_t *, rtx, rtx, int, bool); static rtx rs6000_make_savres_rtx (rs6000_stack_t *, rtx, int, enum machine_mode, bool, bool, bool); static bool rs6000_reg_live_or_pic_offset_p (int); +static tree rs6000_builtin_vectorized_libmass (tree, tree, tree); static tree rs6000_builtin_vectorized_function (tree, tree, tree); static int rs6000_savres_strategy (rs6000_stack_t *, bool, int, int); static void rs6000_restore_saved_cr (rtx, int); @@ -3602,6 +3603,145 @@ rs6000_parse_fpu_option (const char *option) return FPU_NONE; } + +/* Handler for the Mathematical Acceleration Subsystem (mass) interface to a + library with vectorized intrinsics. */ + +static tree +rs6000_builtin_vectorized_libmass (tree fndecl, tree type_out, tree type_in) +{ + char name[32]; + const char *suffix = NULL; + tree fntype, new_fndecl, bdecl = NULL_TREE; + int n_args = 1; + const char *bname; + enum machine_mode el_mode, in_mode; + int n, in_n; + + /* Libmass is suitable for unsafe math only as it does not correctly support + parts of IEEE with the required precision such as denormals. Only support + it if we have VSX to use the simd d2 or f4 functions. + XXX: Add variable length support. */ + if (!flag_unsafe_math_optimizations || !TARGET_VSX) + return NULL_TREE; + + el_mode = TYPE_MODE (TREE_TYPE (type_out)); + n = TYPE_VECTOR_SUBPARTS (type_out); + in_mode = TYPE_MODE (TREE_TYPE (type_in)); + in_n = TYPE_VECTOR_SUBPARTS (type_in); + if (el_mode != in_mode + || n != in_n) + return NULL_TREE; + + if (DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_NORMAL) + { + enum built_in_function fn = DECL_FUNCTION_CODE (fndecl); + switch (fn) + { + case BUILT_IN_ATAN2: + case BUILT_IN_HYPOT: + case BUILT_IN_POW: + n_args = 2; + /* fall through */ + + case BUILT_IN_ACOS: + case BUILT_IN_ACOSH: + case BUILT_IN_ASIN: + case BUILT_IN_ASINH: + case BUILT_IN_ATAN: + case BUILT_IN_ATANH: + case BUILT_IN_CBRT: + case BUILT_IN_COS: + case BUILT_IN_COSH: + case BUILT_IN_ERF: + case BUILT_IN_ERFC: + case BUILT_IN_EXP2: + case BUILT_IN_EXP: + case BUILT_IN_EXPM1: + case BUILT_IN_LGAMMA: + case BUILT_IN_LOG10: + case BUILT_IN_LOG1P: + case BUILT_IN_LOG2: + case BUILT_IN_LOG: + case BUILT_IN_SIN: + case BUILT_IN_SINH: + case BUILT_IN_SQRT: + case BUILT_IN_TAN: + case BUILT_IN_TANH: + bdecl = implicit_built_in_decls[fn]; + suffix = "d2"; /* pow -> powd2 */ + if (el_mode != DFmode + || n != 2) + return NULL_TREE; + break; + + case BUILT_IN_ATAN2F: + case BUILT_IN_HYPOTF: + case BUILT_IN_POWF: + n_args = 2; + /* fall through */ + + case BUILT_IN_ACOSF: + case BUILT_IN_ACOSHF: + case BUILT_IN_ASINF: + case BUILT_IN_ASINHF: + case BUILT_IN_ATANF: + case BUILT_IN_ATANHF: + case BUILT_IN_CBRTF: + case BUILT_IN_COSF: + case BUILT_IN_COSHF: + case BUILT_IN_ERFF: + case BUILT_IN_ERFCF: + case BUILT_IN_EXP2F: + case BUILT_IN_EXPF: + case BUILT_IN_EXPM1F: + case BUILT_IN_LGAMMAF: + case BUILT_IN_LOG10F: + case BUILT_IN_LOG1PF: + case BUILT_IN_LOG2F: + case BUILT_IN_LOGF: + case BUILT_IN_SINF: + case BUILT_IN_SINHF: + case BUILT_IN_SQRTF: + case BUILT_IN_TANF: + case BUILT_IN_TANHF: + bdecl = implicit_built_in_decls[fn]; + suffix = "4"; /* powf -> powf4 */ + if (el_mode != SFmode + || n != 4) + return NULL_TREE; + break; + + default: + return NULL_TREE; + } + } + else + return NULL_TREE; + + gcc_assert (suffix != NULL); + bname = IDENTIFIER_POINTER (DECL_NAME (bdecl)); + strcpy (name, bname + sizeof ("__builtin_") - 1); + strcat (name, suffix); + + if (n_args == 1) + fntype = build_function_type_list (type_out, type_in, NULL); + else if (n_args == 2) + fntype = build_function_type_list (type_out, type_in, type_in, NULL); + else + gcc_unreachable (); + + /* Build a function declaration for the vectorized function. */ + new_fndecl = build_decl (BUILTINS_LOCATION, + FUNCTION_DECL, get_identifier (name), fntype); + TREE_PUBLIC (new_fndecl) = 1; + DECL_EXTERNAL (new_fndecl) = 1; + DECL_IS_NOVOPS (new_fndecl) = 1; + TREE_READONLY (new_fndecl) = 1; + + return new_fndecl; +} + /* Returns a function decl for a vectorized version of the builtin function with builtin function code FN and the result vector type TYPE, or NULL_TREE if it is not available. */ @@ -3768,6 +3908,10 @@ rs6000_builtin_vectorized_function (tree fndecl, tree type_out, } } + /* Generate calls to libmass if appropriate. */ + if (TARGET_MASS) + return rs6000_builtin_vectorized_libmass (fndecl, type_out, type_in); + return NULL_TREE; } diff --git a/gcc/config/rs6000/rs6000.opt b/gcc/config/rs6000/rs6000.opt index 30f9b3988c0..dbd75802701 100644 --- a/gcc/config/rs6000/rs6000.opt +++ b/gcc/config/rs6000/rs6000.opt @@ -115,6 +115,10 @@ mpopcntd Target Report Mask(POPCNTD) Use PowerPC V2.06 popcntd instruction +mmass +Target Report Var(TARGET_MASS) Init(0) +Use the Mathematical Acceleration Subsystem library high performance math libraries. + mvsx Target Report Mask(VSX) Use vector/scalar (VSX) instructions diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi index b2d72d9a9d0..4cdda3dcef9 100644 --- a/gcc/doc/invoke.texi +++ b/gcc/doc/invoke.texi @@ -786,7 +786,9 @@ See RS/6000 and PowerPC Options. -mprototype -mno-prototype @gol -msim -mmvme -mads -myellowknife -memb -msdata @gol -msdata=@var{opt} -mvxworks -G @var{num} -pthread @gol --mrecip -mrecip=@var{opt} -mno-recip -mrecip-precision -mno-recip-precision} +-mrecip -mrecip=@var{opt} -mno-recip -mrecip-precision +-mno-recip-precision @gol +-mmass} @emph{RX Options} @gccoptlist{-m64bit-doubles -m32bit-doubles -fpu -nofpu@gol @@ -15847,6 +15849,29 @@ automatically selects @option{-mrecip-precision}. The double precision square root estimate instructions are not generated by default on low precision machines, since they do not provide an estimate that converges after three steps. + +@item -mmass +@itemx -mno-mass +@opindex mmass +Specifies to use IBM's Mathematical Acceleration Subsystem (MASS) +libraries for vectorizing intrinsics using external libraries. GCC +will currently emit calls to @code{acosd2}, @code{acosf4}, +@code{acoshd2}, @code{acoshf4}, @code{asind2}, @code{asinf4}, +@code{asinhd2}, @code{asinhf4}, @code{atan2d2}, @code{atan2f4}, +@code{atand2}, @code{atanf4}, @code{atanhd2}, @code{atanhf4}, +@code{cbrtd2}, @code{cbrtf4}, @code{cosd2}, @code{cosf4}, +@code{coshd2}, @code{coshf4}, @code{erfcd2}, @code{erfcf4}, +@code{erfd2}, @code{erff4}, @code{exp2d2}, @code{exp2f4}, +@code{expd2}, @code{expf4}, @code{expm1d2}, @code{expm1f4}, +@code{hypotd2}, @code{hypotf4}, @code{lgammad2}, @code{lgammaf4}, +@code{log10d2}, @code{log10f4}, @code{log1pd2}, @code{log1pf4}, +@code{log2d2}, @code{log2f4}, @code{logd2}, @code{logf4}, +@code{powd2}, @code{powf4}, @code{sind2}, @code{sinf4}, @code{sinhd2}, +@code{sinhf4}, @code{sqrtd2}, @code{sqrtf4}, @code{tand2}, +@code{tanf4}, @code{tanhd2}, and @code{tanhf4} when generating code +for power7. Both @option{-ftree-vectorize} and +@option{-funsafe-math-optimizations} have to be enabled. The MASS +libraries will have to be specified at link time. @end table @node RX Options diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index f117080a577..d705cf4ec66 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,7 @@ +2010-08-23 Michael Meissner <meissner@linux.vnet.ibm.com> + + * gcc.target/powerpc/vsx-mass-1.c: New file, test -mmass. + 2010-08-23 Janus Weil <janus@gcc.gnu.org> PR fortran/45366 |