summaryrefslogtreecommitdiff
path: root/gcc/config/sparc/sparc.c
diff options
context:
space:
mode:
Diffstat (limited to 'gcc/config/sparc/sparc.c')
-rw-r--r--gcc/config/sparc/sparc.c430
1 files changed, 338 insertions, 92 deletions
diff --git a/gcc/config/sparc/sparc.c b/gcc/config/sparc/sparc.c
index 6431405b87f..5d22fc0313e 100644
--- a/gcc/config/sparc/sparc.c
+++ b/gcc/config/sparc/sparc.c
@@ -3440,7 +3440,7 @@ sparc_legitimate_address_p (enum machine_mode mode, rtx addr, bool strict)
REG+REG address, then only one of them gets converted to an
offsettable address. */
if (mode == TFmode
- && ! (TARGET_FPU && TARGET_ARCH64 && TARGET_HARD_QUAD))
+ && ! (TARGET_ARCH64 && TARGET_HARD_QUAD))
return 0;
/* We prohibit REG + REG on ARCH32 if not optimizing for
@@ -11280,64 +11280,333 @@ output_v8plus_mult (rtx insn, rtx *operands, const char *name)
}
static void
-vector_init_bshuffle (rtx target, rtx elt, enum machine_mode mode,
+vector_init_bshuffle (rtx target, rtx *locs, int n_elts, enum machine_mode mode,
enum machine_mode inner_mode)
{
- rtx t1, final_insn;
- int bmask;
+ rtx mid_target, r0_high, r0_low, r1_high, r1_low;
+ enum machine_mode partial_mode;
+ int bmask, i, idxs[8];
- t1 = gen_reg_rtx (mode);
+ partial_mode = (mode == V4HImode
+ ? V2HImode
+ : (mode == V8QImode
+ ? V4QImode : mode));
- elt = convert_modes (SImode, inner_mode, elt, true);
- emit_move_insn (gen_lowpart(SImode, t1), elt);
+ r0_high = r0_low = NULL_RTX;
+ r1_high = r1_low = NULL_RTX;
- switch (mode)
+ /* Move the pieces into place, as needed, and calculate the nibble
+ indexes for the bmask calculation. After we execute this loop the
+ locs[] array is no longer needed. Therefore, to simplify things,
+ we set entries that have been processed already to NULL_RTX. */
+
+ for (i = 0; i < n_elts; i++)
+ {
+ int j;
+
+ if (locs[i] == NULL_RTX)
+ continue;
+
+ if (!r0_low)
{
- case V2SImode:
- final_insn = gen_bshufflev2si_vis (target, t1, t1);
- bmask = 0x45674567;
- break;
- case V4HImode:
- final_insn = gen_bshufflev4hi_vis (target, t1, t1);
- bmask = 0x67676767;
+ r0_low = locs[i];
+ idxs[i] = 0x7;
+ }
+ else if (!r1_low)
+ {
+ r1_low = locs[i];
+ idxs[i] = 0xf;
+ }
+ else if (!r0_high)
+ {
+ r0_high = gen_highpart (partial_mode, r0_low);
+ emit_move_insn (r0_high, gen_lowpart (partial_mode, locs[i]));
+ idxs[i] = 0x3;
+ }
+ else if (!r1_high)
+ {
+ r1_high = gen_highpart (partial_mode, r1_low);
+ emit_move_insn (r1_high, gen_lowpart (partial_mode, locs[i]));
+ idxs[i] = 0xb;
+ }
+ else
+ gcc_unreachable ();
+
+ for (j = i + 1; j < n_elts; j++)
+ {
+ if (locs[j] == locs[i])
+ {
+ locs[j] = NULL_RTX;
+ idxs[j] = idxs[i];
+ }
+ }
+ locs[i] = NULL_RTX;
+ }
+
+ bmask = 0;
+ for (i = 0; i < n_elts; i++)
+ {
+ int v = idxs[i];
+
+ switch (GET_MODE_SIZE (inner_mode))
+ {
+ case 2:
+ bmask <<= 8;
+ bmask |= (((v - 1) << 4) | v);
break;
- case V8QImode:
- final_insn = gen_bshufflev8qi_vis (target, t1, t1);
- bmask = 0x77777777;
+
+ case 1:
+ bmask <<= 4;
+ bmask |= v;
break;
+
default:
gcc_unreachable ();
}
+ }
+
+ emit_insn (gen_bmasksi_vis (gen_reg_rtx (SImode), CONST0_RTX (SImode),
+ force_reg (SImode, GEN_INT (bmask))));
+
+ mid_target = target;
+ if (GET_MODE_SIZE (mode) == 4)
+ {
+ mid_target = gen_reg_rtx (mode == V2HImode
+ ? V4HImode : V8QImode);
+ }
+
+ if (!r1_low)
+ r1_low = r0_low;
+
+ switch (GET_MODE (mid_target))
+ {
+ case V4HImode:
+ emit_insn (gen_bshufflev4hi_vis (mid_target, r0_low, r1_low));
+ break;
+ case V8QImode:
+ emit_insn (gen_bshufflev8qi_vis (mid_target, r0_low, r1_low));
+ break;
+ default:
+ gcc_unreachable ();
+ }
+
+ if (mid_target != target)
+ emit_move_insn (target, gen_lowpart (partial_mode, mid_target));
+}
+
+static bool
+vector_init_move_words (rtx target, rtx vals, enum machine_mode mode,
+ enum machine_mode inner_mode)
+{
+ switch (mode)
+ {
+ case V1SImode:
+ case V1DImode:
+ emit_move_insn (gen_lowpart (inner_mode, target),
+ gen_lowpart (inner_mode, XVECEXP (vals, 0, 0)));
+ return true;
+
+ case V2SImode:
+ emit_move_insn (gen_highpart (SImode, target), XVECEXP (vals, 0, 0));
+ emit_move_insn (gen_lowpart (SImode, target), XVECEXP (vals, 0, 1));
+ return true;
+
+ default:
+ break;
+ }
+ return false;
+}
+
+/* Move the elements in rtvec VALS into registers compatible with MODE.
+ Store the rtx for these regs into the corresponding array entry of
+ LOCS. */
+static void
+vector_init_prepare_elts (rtx vals, int n_elts, rtx *locs, enum machine_mode mode,
+ enum machine_mode inner_mode)
+{
+ enum machine_mode loc_mode;
+ int i;
+
+ switch (mode)
+ {
+ case V2HImode:
+ loc_mode = V4HImode;
+ break;
+
+ case V4QImode:
+ loc_mode = V8QImode;
+ break;
+
+ case V4HImode:
+ case V8QImode:
+ loc_mode = mode;
+ break;
+
+ default:
+ gcc_unreachable ();
+ }
+
+ gcc_assert (GET_MODE_SIZE (inner_mode) <= 4);
+ for (i = 0; i < n_elts; i++)
+ {
+ rtx dst, elt = XVECEXP (vals, 0, i);
+ int j;
+
+ /* Did we see this already? If so just record it's location. */
+ dst = NULL_RTX;
+ for (j = 0; j < i; j++)
+ {
+ if (XVECEXP (vals, 0, j) == elt)
+ {
+ dst = locs[j];
+ break;
+ }
+ }
+
+ if (! dst)
+ {
+ enum rtx_code code = GET_CODE (elt);
+
+ dst = gen_reg_rtx (loc_mode);
+
+ /* We use different strategies based upon whether the element
+ is in memory or in a register. When we start in a register
+ and we're VIS3 capable, it's always cheaper to use the VIS3
+ int-->fp register moves since we avoid having to use stack
+ memory. */
+ if ((TARGET_VIS3 && (code == REG || code == SUBREG))
+ || (CONSTANT_P (elt)
+ && (const_zero_operand (elt, inner_mode)
+ || const_all_ones_operand (elt, inner_mode))))
+ {
+ elt = convert_modes (SImode, inner_mode, elt, true);
+
+ emit_clobber (dst);
+ emit_move_insn (gen_lowpart (SImode, dst), elt);
+ }
+ else
+ {
+ rtx m = elt;
+
+ if (CONSTANT_P (elt))
+ {
+ m = force_const_mem (inner_mode, elt);
+ }
+ else if (code != MEM)
+ {
+ rtx stk = assign_stack_temp (inner_mode, GET_MODE_SIZE(inner_mode), 0);
+ emit_move_insn (stk, elt);
+ m = stk;
+ }
+
+ switch (loc_mode)
+ {
+ case V4HImode:
+ emit_insn (gen_zero_extend_v4hi_vis (dst, m));
+ break;
+ case V8QImode:
+ emit_insn (gen_zero_extend_v8qi_vis (dst, m));
+ break;
+ default:
+ gcc_unreachable ();
+ }
+ }
+ }
+ locs[i] = dst;
+ }
+}
+
+static void
+sparc_expand_vector_init_vis2 (rtx target, rtx *locs, int n_elts, int n_unique,
+ enum machine_mode mode,
+ enum machine_mode inner_mode)
+{
+ if (n_unique <= 4)
+ {
+ vector_init_bshuffle (target, locs, n_elts, mode, inner_mode);
+ }
+ else
+ {
+ int i;
- emit_insn (gen_bmasksi_vis (gen_reg_rtx (SImode), CONST0_RTX (SImode),
- force_reg (SImode, GEN_INT (bmask))));
- emit_insn (final_insn);
+ gcc_assert (mode == V8QImode);
+
+ emit_insn (gen_alignaddrsi_vis (gen_reg_rtx (SImode),
+ force_reg (SImode, GEN_INT (7)),
+ CONST0_RTX (SImode)));
+ i = n_elts - 1;
+ emit_insn (gen_faligndatav8qi_vis (target, locs[i], locs[i]));
+ while (--i >= 0)
+ emit_insn (gen_faligndatav8qi_vis (target, locs[i], target));
+ }
}
static void
-vector_init_fpmerge (rtx target, rtx elt, enum machine_mode inner_mode)
+sparc_expand_vector_init_vis1 (rtx target, rtx *locs, int n_elts, int n_unique,
+ enum machine_mode mode)
{
- rtx t1, t2, t3, t3_low;
+ enum machine_mode full_mode = mode;
+ rtx (*emitter)(rtx, rtx, rtx);
+ int alignaddr_val, i;
+ rtx tmp = target;
+
+ if (n_unique == 1 && mode == V8QImode)
+ {
+ rtx t2, t2_low, t1;
+
+ t1 = gen_reg_rtx (V4QImode);
+ emit_move_insn (t1, gen_lowpart (V4QImode, locs[0]));
- t1 = gen_reg_rtx (V4QImode);
- elt = convert_modes (SImode, inner_mode, elt, true);
- emit_move_insn (gen_lowpart (SImode, t1), elt);
+ t2 = gen_reg_rtx (V8QImode);
+ t2_low = gen_lowpart (V4QImode, t2);
+
+ /* xxxxxxAA --> xxxxxxxxxxxxAAAA
+ xxxxAAAA --> xxxxxxxxAAAAAAAA
+ AAAAAAAA --> AAAAAAAAAAAAAAAA */
+ emit_insn (gen_fpmerge_vis (t2, t1, t1));
+ emit_move_insn (t1, t2_low);
+ emit_insn (gen_fpmerge_vis (t2, t1, t1));
+ emit_move_insn (t1, t2_low);
+ emit_insn (gen_fpmerge_vis (target, t1, t1));
+ return;
+ }
+
+ switch (mode)
+ {
+ case V2HImode:
+ full_mode = V4HImode;
+ /* FALLTHRU */
+ case V4HImode:
+ emitter = gen_faligndatav4hi_vis;
+ alignaddr_val = 6;
+ break;
+
+ case V4QImode:
+ full_mode = V8QImode;
+ /* FALLTHRU */
+ case V8QImode:
+ emitter = gen_faligndatav8qi_vis;
+ alignaddr_val = 7;
+ break;
- t2 = gen_reg_rtx (V4QImode);
- emit_move_insn (t2, t1);
+ default:
+ gcc_unreachable ();
+ }
- t3 = gen_reg_rtx (V8QImode);
- t3_low = gen_lowpart (V4QImode, t3);
+ if (full_mode != mode)
+ tmp = gen_reg_rtx (full_mode);
- emit_insn (gen_fpmerge_vis (t3, t1, t2));
- emit_move_insn (t1, t3_low);
- emit_move_insn (t2, t3_low);
+ emit_insn (gen_alignaddrsi_vis (gen_reg_rtx (SImode),
+ force_reg (SImode, GEN_INT (alignaddr_val)),
+ CONST0_RTX (SImode)));
- emit_insn (gen_fpmerge_vis (t3, t1, t2));
- emit_move_insn (t1, t3_low);
- emit_move_insn (t2, t3_low);
+ i = n_elts - 1;
+ emit_insn (emitter (tmp, locs[i], locs[i]));
+ while (--i >= 0)
+ emit_insn (emitter (tmp, locs[i], tmp));
- emit_insn (gen_fpmerge_vis (gen_lowpart (V8QImode, target), t1, t2));
+ if (tmp != target)
+ emit_move_insn (target, gen_highpart (mode, tmp));
}
void
@@ -11346,19 +11615,30 @@ sparc_expand_vector_init (rtx target, rtx vals)
enum machine_mode mode = GET_MODE (target);
enum machine_mode inner_mode = GET_MODE_INNER (mode);
int n_elts = GET_MODE_NUNITS (mode);
- int i, n_var = 0;
- bool all_same;
- rtx mem;
+ int i, n_var = 0, n_unique = 0;
+ rtx locs[8];
+
+ gcc_assert (n_elts <= 8);
- all_same = true;
for (i = 0; i < n_elts; i++)
{
rtx x = XVECEXP (vals, 0, i);
+ bool found = false;
+ int j;
+
if (!CONSTANT_P (x))
n_var++;
- if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
- all_same = false;
+ for (j = 0; j < i; j++)
+ {
+ if (rtx_equal_p (x, XVECEXP (vals, 0, j)))
+ {
+ found = true;
+ break;
+ }
+ }
+ if (!found)
+ n_unique++;
}
if (n_var == 0)
@@ -11367,51 +11647,16 @@ sparc_expand_vector_init (rtx target, rtx vals)
return;
}
- if (GET_MODE_SIZE (inner_mode) == GET_MODE_SIZE (mode))
- {
- if (GET_MODE_SIZE (inner_mode) == 4)
- {
- emit_move_insn (gen_lowpart (SImode, target),
- gen_lowpart (SImode, XVECEXP (vals, 0, 0)));
- return;
- }
- else if (GET_MODE_SIZE (inner_mode) == 8)
- {
- emit_move_insn (gen_lowpart (DImode, target),
- gen_lowpart (DImode, XVECEXP (vals, 0, 0)));
- return;
- }
- }
- else if (GET_MODE_SIZE (inner_mode) == GET_MODE_SIZE (word_mode)
- && GET_MODE_SIZE (mode) == 2 * GET_MODE_SIZE (word_mode))
- {
- emit_move_insn (gen_highpart (word_mode, target),
- gen_lowpart (word_mode, XVECEXP (vals, 0, 0)));
- emit_move_insn (gen_lowpart (word_mode, target),
- gen_lowpart (word_mode, XVECEXP (vals, 0, 1)));
- return;
- }
+ if (vector_init_move_words (target, vals, mode, inner_mode))
+ return;
- if (all_same && GET_MODE_SIZE (mode) == 8)
- {
- if (TARGET_VIS2)
- {
- vector_init_bshuffle (target, XVECEXP (vals, 0, 0), mode, inner_mode);
- return;
- }
- if (mode == V8QImode)
- {
- vector_init_fpmerge (target, XVECEXP (vals, 0, 0), inner_mode);
- return;
- }
- }
+ vector_init_prepare_elts (vals, n_elts, locs, mode, inner_mode);
- mem = assign_stack_temp (mode, GET_MODE_SIZE (mode), 0);
- for (i = 0; i < n_elts; i++)
- emit_move_insn (adjust_address_nv (mem, inner_mode,
- i * GET_MODE_SIZE (inner_mode)),
- XVECEXP (vals, 0, i));
- emit_move_insn (target, mem);
+ if (TARGET_VIS2)
+ sparc_expand_vector_init_vis2 (target, locs, n_elts, n_unique,
+ mode, inner_mode);
+ else
+ sparc_expand_vector_init_vis1 (target, locs, n_elts, n_unique, mode);
}
static reg_class_t
@@ -11485,12 +11730,16 @@ sparc_expand_conditional_move (enum machine_mode mode, rtx *operands)
rtx cc_reg, dst, cmp;
cmp = operands[1];
- cmp_mode = GET_MODE (XEXP (cmp, 0));
- if (cmp_mode == DImode && !TARGET_ARCH64)
+ if (GET_MODE (XEXP (cmp, 0)) == DImode && !TARGET_ARCH64)
return false;
- dst = operands[0];
+ if (GET_MODE (XEXP (cmp, 0)) == TFmode && !TARGET_HARD_QUAD)
+ cmp = sparc_emit_float_lib_cmp (XEXP (cmp, 0), XEXP (cmp, 1), rc);
+ cmp_mode = GET_MODE (XEXP (cmp, 0));
+ rc = GET_CODE (cmp);
+
+ dst = operands[0];
if (! rtx_equal_p (operands[2], dst)
&& ! rtx_equal_p (operands[3], dst))
{
@@ -11509,9 +11758,6 @@ sparc_expand_conditional_move (enum machine_mode mode, rtx *operands)
rc = reverse_condition (rc);
}
- if (cmp_mode == TFmode && !TARGET_HARD_QUAD)
- cmp = sparc_emit_float_lib_cmp (XEXP (cmp, 0), XEXP (cmp, 1), rc);
-
if (XEXP (cmp, 1) == const0_rtx
&& GET_CODE (XEXP (cmp, 0)) == REG
&& cmp_mode == DImode