diff options
author | nathan <nathan@138bc75d-0d04-0410-961f-82ee72b054a4> | 2015-11-13 15:08:11 +0000 |
---|---|---|
committer | nathan <nathan@138bc75d-0d04-0410-961f-82ee72b054a4> | 2015-11-13 15:08:11 +0000 |
commit | bde24c3570cbd5aa90f4beace53f22eb2707062e (patch) | |
tree | 19a08eb6b2e574c679c43e09a166f5db2cd3e2bf | |
parent | bdb62e6a5583c7e41c438d1c29789aee41f8d519 (diff) | |
download | gcc-bde24c3570cbd5aa90f4beace53f22eb2707062e.tar.gz |
gcc/
* config/nvptx/nvptx.c (nvptx_generate_vector_shuffle): Deal with
complex types.
libgomp/
* testsuite/libgomp.oacc-c-c++-common/reduction-cplx-dbl.c: New.
* testsuite/libgomp.oacc-c-c++-common/reduction-cplx-flt.c: New.
git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@230325 138bc75d-0d04-0410-961f-82ee72b054a4
-rw-r--r-- | gcc/ChangeLog | 5 | ||||
-rw-r--r-- | gcc/config/nvptx/nvptx.c | 49 | ||||
-rw-r--r-- | libgomp/ChangeLog | 5 | ||||
-rw-r--r-- | libgomp/testsuite/libgomp.oacc-c-c++-common/reduction-cplx-dbl.c | 52 | ||||
-rw-r--r-- | libgomp/testsuite/libgomp.oacc-c-c++-common/reduction-cplx-flt.c | 52 |
5 files changed, 151 insertions, 12 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 3c2d9364bca..18d0fdb2353 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -11,6 +11,11 @@ 2015-11-13 Nathan Sidwell <nathan@codesourcery.com> + * config/nvptx/nvptx.c (nvptx_generate_vector_shuffle): Deal with + complex types. + +2015-11-13 Nathan Sidwell <nathan@codesourcery.com> + * gimplify.c (oacc_default_clause): Use inform for enclosing scope. 2015-11-13 Tom de Vries <tom@codesourcery.com> diff --git a/gcc/config/nvptx/nvptx.c b/gcc/config/nvptx/nvptx.c index d8673018819..26c2e961051 100644 --- a/gcc/config/nvptx/nvptx.c +++ b/gcc/config/nvptx/nvptx.c @@ -3634,26 +3634,51 @@ nvptx_generate_vector_shuffle (location_t loc, { unsigned fn = NVPTX_BUILTIN_SHUFFLE; tree_code code = NOP_EXPR; - tree type = unsigned_type_node; - enum machine_mode mode = TYPE_MODE (TREE_TYPE (var)); + tree arg_type = unsigned_type_node; + tree var_type = TREE_TYPE (var); + tree dest_type = var_type; - if (!INTEGRAL_MODE_P (mode)) + if (TREE_CODE (var_type) == COMPLEX_TYPE) + var_type = TREE_TYPE (var_type); + + if (TREE_CODE (var_type) == REAL_TYPE) code = VIEW_CONVERT_EXPR; - if (GET_MODE_SIZE (mode) == GET_MODE_SIZE (DImode)) + + if (TYPE_SIZE (var_type) + == TYPE_SIZE (long_long_unsigned_type_node)) { fn = NVPTX_BUILTIN_SHUFFLELL; - type = long_long_unsigned_type_node; + arg_type = long_long_unsigned_type_node; } - + tree call = nvptx_builtin_decl (fn, true); - call = build_call_expr_loc - (loc, call, 3, fold_build1 (code, type, var), - build_int_cst (unsigned_type_node, shift), - build_int_cst (unsigned_type_node, SHUFFLE_DOWN)); + tree bits = build_int_cst (unsigned_type_node, shift); + tree kind = build_int_cst (unsigned_type_node, SHUFFLE_DOWN); + tree expr; - call = fold_build1 (code, TREE_TYPE (dest_var), call); + if (var_type != dest_type) + { + /* Do real and imaginary parts separately. */ + tree real = fold_build1 (REALPART_EXPR, var_type, var); + real = fold_build1 (code, arg_type, real); + real = build_call_expr_loc (loc, call, 3, real, bits, kind); + real = fold_build1 (code, var_type, real); + + tree imag = fold_build1 (IMAGPART_EXPR, var_type, var); + imag = fold_build1 (code, arg_type, imag); + imag = build_call_expr_loc (loc, call, 3, imag, bits, kind); + imag = fold_build1 (code, var_type, imag); + + expr = fold_build2 (COMPLEX_EXPR, dest_type, real, imag); + } + else + { + expr = fold_build1 (code, arg_type, var); + expr = build_call_expr_loc (loc, call, 3, expr, bits, kind); + expr = fold_build1 (code, dest_type, expr); + } - gimplify_assign (dest_var, call, seq); + gimplify_assign (dest_var, expr, seq); } /* Insert code to locklessly update *PTR with *PTR OP VAR just before diff --git a/libgomp/ChangeLog b/libgomp/ChangeLog index 4e0cddb4e28..9ca963ac314 100644 --- a/libgomp/ChangeLog +++ b/libgomp/ChangeLog @@ -1,3 +1,8 @@ +2015-11-13 Nathan Sidwell <nathan@codesourcery.com> + + * testsuite/libgomp.oacc-c-c++-common/reduction-cplx-dbl.c: New. + * testsuite/libgomp.oacc-c-c++-common/reduction-cplx-flt.c: New. + 2015-11-12 James Norris <jnorris@codesourcery.com> Joseph Myers <joseph@codesourcery.com> diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/reduction-cplx-dbl.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/reduction-cplx-dbl.c new file mode 100644 index 00000000000..314e5118be9 --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/reduction-cplx-dbl.c @@ -0,0 +1,52 @@ + +#include <complex.h> + +/* Double float has 53 bits of fraction. */ +#define FRAC (1.0 / (1LL << 48)) + +int close_enough (double _Complex a, double _Complex b) +{ + double _Complex diff = a - b; + double mag2_a = __real__(a) * __real__ (a) + __imag__ (a) * __imag__ (a); + double mag2_diff = (__real__(diff) * __real__ (diff) + + __imag__ (diff) * __imag__ (diff)); + + return mag2_diff / mag2_a < (FRAC * FRAC); +} + +int main (void) +{ +#define N 100 + double _Complex ary[N], sum, prod, tsum, tprod; + int ix; + + sum = tsum = 0; + prod = tprod = 1; + + for (ix = 0; ix < N; ix++) + { + double frac = ix * (1.0 / 1024) + 1.0; + + ary[ix] = frac + frac * 2.0i - 1.0i; + sum += ary[ix]; + prod *= ary[ix]; + } + +#pragma acc parallel vector_length(32) copyin(ary) copy (tsum, tprod) + { +#pragma acc loop vector reduction(+:tsum) reduction (*:tprod) + for (ix = 0; ix < N; ix++) + { + tsum += ary[ix]; + tprod *= ary[ix]; + } + } + + if (!close_enough (sum, tsum)) + return 1; + + if (!close_enough (prod, tprod)) + return 1; + + return 0; +} diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/reduction-cplx-flt.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/reduction-cplx-flt.c new file mode 100644 index 00000000000..b3bde656079 --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/reduction-cplx-flt.c @@ -0,0 +1,52 @@ + +#include <complex.h> + +/* Single float has 23 bits of fraction. */ +#define FRAC (1.0f / (1 << 20)) + +int close_enough (float _Complex a, float _Complex b) +{ + float _Complex diff = a - b; + float mag2_a = __real__(a) * __real__ (a) + __imag__ (a) * __imag__ (a); + float mag2_diff = (__real__(diff) * __real__ (diff) + + __imag__ (diff) * __imag__ (diff)); + + return mag2_diff / mag2_a < (FRAC * FRAC); +} + +int main (void) +{ +#define N 100 + float _Complex ary[N], sum, prod, tsum, tprod; + int ix; + + sum = tsum = 0; + prod = tprod = 1; + + for (ix = 0; ix < N; ix++) + { + float frac = ix * (1.0f / 1024) + 1.0f; + + ary[ix] = frac + frac * 2.0i - 1.0i; + sum += ary[ix]; + prod *= ary[ix]; + } + +#pragma acc parallel vector_length(32) copyin(ary) copy (tsum, tprod) + { +#pragma acc loop vector reduction(+:tsum) reduction (*:tprod) + for (ix = 0; ix < N; ix++) + { + tsum += ary[ix]; + tprod *= ary[ix]; + } + } + + if (!close_enough (sum, tsum)) + return 1; + + if (!close_enough (prod, tprod)) + return 1; + + return 0; +} |