summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authornathan <nathan@138bc75d-0d04-0410-961f-82ee72b054a4>2015-11-13 15:08:11 +0000
committernathan <nathan@138bc75d-0d04-0410-961f-82ee72b054a4>2015-11-13 15:08:11 +0000
commitbde24c3570cbd5aa90f4beace53f22eb2707062e (patch)
tree19a08eb6b2e574c679c43e09a166f5db2cd3e2bf
parentbdb62e6a5583c7e41c438d1c29789aee41f8d519 (diff)
downloadgcc-bde24c3570cbd5aa90f4beace53f22eb2707062e.tar.gz
gcc/
* config/nvptx/nvptx.c (nvptx_generate_vector_shuffle): Deal with complex types. libgomp/ * testsuite/libgomp.oacc-c-c++-common/reduction-cplx-dbl.c: New. * testsuite/libgomp.oacc-c-c++-common/reduction-cplx-flt.c: New. git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@230325 138bc75d-0d04-0410-961f-82ee72b054a4
-rw-r--r--gcc/ChangeLog5
-rw-r--r--gcc/config/nvptx/nvptx.c49
-rw-r--r--libgomp/ChangeLog5
-rw-r--r--libgomp/testsuite/libgomp.oacc-c-c++-common/reduction-cplx-dbl.c52
-rw-r--r--libgomp/testsuite/libgomp.oacc-c-c++-common/reduction-cplx-flt.c52
5 files changed, 151 insertions, 12 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index 3c2d9364bca..18d0fdb2353 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -11,6 +11,11 @@
2015-11-13 Nathan Sidwell <nathan@codesourcery.com>
+ * config/nvptx/nvptx.c (nvptx_generate_vector_shuffle): Deal with
+ complex types.
+
+2015-11-13 Nathan Sidwell <nathan@codesourcery.com>
+
* gimplify.c (oacc_default_clause): Use inform for enclosing scope.
2015-11-13 Tom de Vries <tom@codesourcery.com>
diff --git a/gcc/config/nvptx/nvptx.c b/gcc/config/nvptx/nvptx.c
index d8673018819..26c2e961051 100644
--- a/gcc/config/nvptx/nvptx.c
+++ b/gcc/config/nvptx/nvptx.c
@@ -3634,26 +3634,51 @@ nvptx_generate_vector_shuffle (location_t loc,
{
unsigned fn = NVPTX_BUILTIN_SHUFFLE;
tree_code code = NOP_EXPR;
- tree type = unsigned_type_node;
- enum machine_mode mode = TYPE_MODE (TREE_TYPE (var));
+ tree arg_type = unsigned_type_node;
+ tree var_type = TREE_TYPE (var);
+ tree dest_type = var_type;
- if (!INTEGRAL_MODE_P (mode))
+ if (TREE_CODE (var_type) == COMPLEX_TYPE)
+ var_type = TREE_TYPE (var_type);
+
+ if (TREE_CODE (var_type) == REAL_TYPE)
code = VIEW_CONVERT_EXPR;
- if (GET_MODE_SIZE (mode) == GET_MODE_SIZE (DImode))
+
+ if (TYPE_SIZE (var_type)
+ == TYPE_SIZE (long_long_unsigned_type_node))
{
fn = NVPTX_BUILTIN_SHUFFLELL;
- type = long_long_unsigned_type_node;
+ arg_type = long_long_unsigned_type_node;
}
-
+
tree call = nvptx_builtin_decl (fn, true);
- call = build_call_expr_loc
- (loc, call, 3, fold_build1 (code, type, var),
- build_int_cst (unsigned_type_node, shift),
- build_int_cst (unsigned_type_node, SHUFFLE_DOWN));
+ tree bits = build_int_cst (unsigned_type_node, shift);
+ tree kind = build_int_cst (unsigned_type_node, SHUFFLE_DOWN);
+ tree expr;
- call = fold_build1 (code, TREE_TYPE (dest_var), call);
+ if (var_type != dest_type)
+ {
+ /* Do real and imaginary parts separately. */
+ tree real = fold_build1 (REALPART_EXPR, var_type, var);
+ real = fold_build1 (code, arg_type, real);
+ real = build_call_expr_loc (loc, call, 3, real, bits, kind);
+ real = fold_build1 (code, var_type, real);
+
+ tree imag = fold_build1 (IMAGPART_EXPR, var_type, var);
+ imag = fold_build1 (code, arg_type, imag);
+ imag = build_call_expr_loc (loc, call, 3, imag, bits, kind);
+ imag = fold_build1 (code, var_type, imag);
+
+ expr = fold_build2 (COMPLEX_EXPR, dest_type, real, imag);
+ }
+ else
+ {
+ expr = fold_build1 (code, arg_type, var);
+ expr = build_call_expr_loc (loc, call, 3, expr, bits, kind);
+ expr = fold_build1 (code, dest_type, expr);
+ }
- gimplify_assign (dest_var, call, seq);
+ gimplify_assign (dest_var, expr, seq);
}
/* Insert code to locklessly update *PTR with *PTR OP VAR just before
diff --git a/libgomp/ChangeLog b/libgomp/ChangeLog
index 4e0cddb4e28..9ca963ac314 100644
--- a/libgomp/ChangeLog
+++ b/libgomp/ChangeLog
@@ -1,3 +1,8 @@
+2015-11-13 Nathan Sidwell <nathan@codesourcery.com>
+
+ * testsuite/libgomp.oacc-c-c++-common/reduction-cplx-dbl.c: New.
+ * testsuite/libgomp.oacc-c-c++-common/reduction-cplx-flt.c: New.
+
2015-11-12 James Norris <jnorris@codesourcery.com>
Joseph Myers <joseph@codesourcery.com>
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/reduction-cplx-dbl.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/reduction-cplx-dbl.c
new file mode 100644
index 00000000000..314e5118be9
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/reduction-cplx-dbl.c
@@ -0,0 +1,52 @@
+
+#include <complex.h>
+
+/* Double float has 53 bits of fraction. */
+#define FRAC (1.0 / (1LL << 48))
+
+int close_enough (double _Complex a, double _Complex b)
+{
+ double _Complex diff = a - b;
+ double mag2_a = __real__(a) * __real__ (a) + __imag__ (a) * __imag__ (a);
+ double mag2_diff = (__real__(diff) * __real__ (diff)
+ + __imag__ (diff) * __imag__ (diff));
+
+ return mag2_diff / mag2_a < (FRAC * FRAC);
+}
+
+int main (void)
+{
+#define N 100
+ double _Complex ary[N], sum, prod, tsum, tprod;
+ int ix;
+
+ sum = tsum = 0;
+ prod = tprod = 1;
+
+ for (ix = 0; ix < N; ix++)
+ {
+ double frac = ix * (1.0 / 1024) + 1.0;
+
+ ary[ix] = frac + frac * 2.0i - 1.0i;
+ sum += ary[ix];
+ prod *= ary[ix];
+ }
+
+#pragma acc parallel vector_length(32) copyin(ary) copy (tsum, tprod)
+ {
+#pragma acc loop vector reduction(+:tsum) reduction (*:tprod)
+ for (ix = 0; ix < N; ix++)
+ {
+ tsum += ary[ix];
+ tprod *= ary[ix];
+ }
+ }
+
+ if (!close_enough (sum, tsum))
+ return 1;
+
+ if (!close_enough (prod, tprod))
+ return 1;
+
+ return 0;
+}
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/reduction-cplx-flt.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/reduction-cplx-flt.c
new file mode 100644
index 00000000000..b3bde656079
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/reduction-cplx-flt.c
@@ -0,0 +1,52 @@
+
+#include <complex.h>
+
+/* Single float has 23 bits of fraction. */
+#define FRAC (1.0f / (1 << 20))
+
+int close_enough (float _Complex a, float _Complex b)
+{
+ float _Complex diff = a - b;
+ float mag2_a = __real__(a) * __real__ (a) + __imag__ (a) * __imag__ (a);
+ float mag2_diff = (__real__(diff) * __real__ (diff)
+ + __imag__ (diff) * __imag__ (diff));
+
+ return mag2_diff / mag2_a < (FRAC * FRAC);
+}
+
+int main (void)
+{
+#define N 100
+ float _Complex ary[N], sum, prod, tsum, tprod;
+ int ix;
+
+ sum = tsum = 0;
+ prod = tprod = 1;
+
+ for (ix = 0; ix < N; ix++)
+ {
+ float frac = ix * (1.0f / 1024) + 1.0f;
+
+ ary[ix] = frac + frac * 2.0i - 1.0i;
+ sum += ary[ix];
+ prod *= ary[ix];
+ }
+
+#pragma acc parallel vector_length(32) copyin(ary) copy (tsum, tprod)
+ {
+#pragma acc loop vector reduction(+:tsum) reduction (*:tprod)
+ for (ix = 0; ix < N; ix++)
+ {
+ tsum += ary[ix];
+ tprod *= ary[ix];
+ }
+ }
+
+ if (!close_enough (sum, tsum))
+ return 1;
+
+ if (!close_enough (prod, tprod))
+ return 1;
+
+ return 0;
+}