summaryrefslogtreecommitdiff
path: root/gcc/config/arm/arm.c
diff options
context:
space:
mode:
Diffstat (limited to 'gcc/config/arm/arm.c')
-rw-r--r--gcc/config/arm/arm.c621
1 files changed, 419 insertions, 202 deletions
diff --git a/gcc/config/arm/arm.c b/gcc/config/arm/arm.c
index 88165f27da4..89affa7c8bd 100644
--- a/gcc/config/arm/arm.c
+++ b/gcc/config/arm/arm.c
@@ -620,6 +620,13 @@ static const struct attribute_spec arm_attribute_table[] =
#undef TARGET_CLASS_LIKELY_SPILLED_P
#define TARGET_CLASS_LIKELY_SPILLED_P arm_class_likely_spilled_p
+#undef TARGET_VECTORIZE_BUILTINS
+#define TARGET_VECTORIZE_BUILTINS
+
+#undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
+#define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
+ arm_builtin_vectorized_function
+
#undef TARGET_VECTOR_ALIGNMENT
#define TARGET_VECTOR_ALIGNMENT arm_vector_alignment
@@ -2639,6 +2646,9 @@ const_ok_for_dimode_op (HOST_WIDE_INT i, enum rtx_code code)
switch (code)
{
+ case AND:
+ return (const_ok_for_op (hi_val, code) || hi_val == 0xFFFFFFFF)
+ && (const_ok_for_op (lo_val, code) || lo_val == 0xFFFFFFFF);
case PLUS:
return arm_not_operand (hi, SImode) && arm_add_operand (lo, SImode);
@@ -12629,6 +12639,277 @@ operands_ok_ldrd_strd (rtx rt, rtx rt2, rtx rn, HOST_WIDE_INT offset,
return true;
}
+/* Helper for gen_operands_ldrd_strd. Returns true iff the memory
+ operand ADDR is an immediate offset from the base register and is
+ not volatile, in which case it sets BASE and OFFSET
+ accordingly. */
+bool
+mem_ok_for_ldrd_strd (rtx addr, rtx *base, rtx *offset)
+{
+ /* TODO: Handle more general memory operand patterns, such as
+ PRE_DEC and PRE_INC. */
+
+ /* Convert a subreg of mem into mem itself. */
+ if (GET_CODE (addr) == SUBREG)
+ addr = alter_subreg (&addr, true);
+
+ gcc_assert (MEM_P (addr));
+
+ /* Don't modify volatile memory accesses. */
+ if (MEM_VOLATILE_P (addr))
+ return false;
+
+ *offset = const0_rtx;
+
+ addr = XEXP (addr, 0);
+ if (REG_P (addr))
+ {
+ *base = addr;
+ return true;
+ }
+ else if (GET_CODE (addr) == PLUS || GET_CODE (addr) == MINUS)
+ {
+ *base = XEXP (addr, 0);
+ *offset = XEXP (addr, 1);
+ return (REG_P (*base) && CONST_INT_P (*offset));
+ }
+
+ return false;
+}
+
+#define SWAP_RTX(x,y) do { rtx tmp = x; x = y; y = tmp; } while (0)
+
+/* Called from a peephole2 to replace two word-size accesses with a
+ single LDRD/STRD instruction. Returns true iff we can generate a
+ new instruction sequence. That is, both accesses use the same base
+ register and the gap between constant offsets is 4. This function
+ may reorder its operands to match ldrd/strd RTL templates.
+ OPERANDS are the operands found by the peephole matcher;
+ OPERANDS[0,1] are register operands, and OPERANDS[2,3] are the
+ corresponding memory operands. LOAD indicaates whether the access
+ is load or store. CONST_STORE indicates a store of constant
+ integer values held in OPERANDS[4,5] and assumes that the pattern
+ is of length 4 insn, for the purpose of checking dead registers.
+ COMMUTE indicates that register operands may be reordered. */
+bool
+gen_operands_ldrd_strd (rtx *operands, bool load,
+ bool const_store, bool commute)
+{
+ int nops = 2;
+ HOST_WIDE_INT offsets[2], offset;
+ rtx base = NULL_RTX;
+ rtx cur_base, cur_offset, tmp;
+ int i, gap;
+ HARD_REG_SET regset;
+
+ gcc_assert (!const_store || !load);
+ /* Check that the memory references are immediate offsets from the
+ same base register. Extract the base register, the destination
+ registers, and the corresponding memory offsets. */
+ for (i = 0; i < nops; i++)
+ {
+ if (!mem_ok_for_ldrd_strd (operands[nops+i], &cur_base, &cur_offset))
+ return false;
+
+ if (i == 0)
+ base = cur_base;
+ else if (REGNO (base) != REGNO (cur_base))
+ return false;
+
+ offsets[i] = INTVAL (cur_offset);
+ if (GET_CODE (operands[i]) == SUBREG)
+ {
+ tmp = SUBREG_REG (operands[i]);
+ gcc_assert (GET_MODE (operands[i]) == GET_MODE (tmp));
+ operands[i] = tmp;
+ }
+ }
+
+ /* Make sure there is no dependency between the individual loads. */
+ if (load && REGNO (operands[0]) == REGNO (base))
+ return false; /* RAW */
+
+ if (load && REGNO (operands[0]) == REGNO (operands[1]))
+ return false; /* WAW */
+
+ /* If the same input register is used in both stores
+ when storing different constants, try to find a free register.
+ For example, the code
+ mov r0, 0
+ str r0, [r2]
+ mov r0, 1
+ str r0, [r2, #4]
+ can be transformed into
+ mov r1, 0
+ strd r1, r0, [r2]
+ in Thumb mode assuming that r1 is free. */
+ if (const_store
+ && REGNO (operands[0]) == REGNO (operands[1])
+ && INTVAL (operands[4]) != INTVAL (operands[5]))
+ {
+ if (TARGET_THUMB2)
+ {
+ CLEAR_HARD_REG_SET (regset);
+ tmp = peep2_find_free_register (0, 4, "r", SImode, &regset);
+ if (tmp == NULL_RTX)
+ return false;
+
+ /* Use the new register in the first load to ensure that
+ if the original input register is not dead after peephole,
+ then it will have the correct constant value. */
+ operands[0] = tmp;
+ }
+ else if (TARGET_ARM)
+ {
+ return false;
+ int regno = REGNO (operands[0]);
+ if (!peep2_reg_dead_p (4, operands[0]))
+ {
+ /* When the input register is even and is not dead after the
+ pattern, it has to hold the second constant but we cannot
+ form a legal STRD in ARM mode with this register as the second
+ register. */
+ if (regno % 2 == 0)
+ return false;
+
+ /* Is regno-1 free? */
+ SET_HARD_REG_SET (regset);
+ CLEAR_HARD_REG_BIT(regset, regno - 1);
+ tmp = peep2_find_free_register (0, 4, "r", SImode, &regset);
+ if (tmp == NULL_RTX)
+ return false;
+
+ operands[0] = tmp;
+ }
+ else
+ {
+ /* Find a DImode register. */
+ CLEAR_HARD_REG_SET (regset);
+ tmp = peep2_find_free_register (0, 4, "r", DImode, &regset);
+ if (tmp != NULL_RTX)
+ {
+ operands[0] = simplify_gen_subreg (SImode, tmp, DImode, 0);
+ operands[1] = simplify_gen_subreg (SImode, tmp, DImode, 4);
+ }
+ else
+ {
+ /* Can we use the input register to form a DI register? */
+ SET_HARD_REG_SET (regset);
+ CLEAR_HARD_REG_BIT(regset,
+ regno % 2 == 0 ? regno + 1 : regno - 1);
+ tmp = peep2_find_free_register (0, 4, "r", SImode, &regset);
+ if (tmp == NULL_RTX)
+ return false;
+ operands[regno % 2 == 1 ? 0 : 1] = tmp;
+ }
+ }
+
+ gcc_assert (operands[0] != NULL_RTX);
+ gcc_assert (operands[1] != NULL_RTX);
+ gcc_assert (REGNO (operands[0]) % 2 == 0);
+ gcc_assert (REGNO (operands[1]) == REGNO (operands[0]) + 1);
+ }
+ }
+
+ /* Make sure the instructions are ordered with lower memory access first. */
+ if (offsets[0] > offsets[1])
+ {
+ gap = offsets[0] - offsets[1];
+ offset = offsets[1];
+
+ /* Swap the instructions such that lower memory is accessed first. */
+ SWAP_RTX (operands[0], operands[1]);
+ SWAP_RTX (operands[2], operands[3]);
+ if (const_store)
+ SWAP_RTX (operands[4], operands[5]);
+ }
+ else
+ {
+ gap = offsets[1] - offsets[0];
+ offset = offsets[0];
+ }
+
+ /* Make sure accesses are to consecutive memory locations. */
+ if (gap != 4)
+ return false;
+
+ /* Make sure we generate legal instructions. */
+ if (operands_ok_ldrd_strd (operands[0], operands[1], base, offset,
+ false, load))
+ return true;
+
+ /* In Thumb state, where registers are almost unconstrained, there
+ is little hope to fix it. */
+ if (TARGET_THUMB2)
+ return false;
+
+ if (load && commute)
+ {
+ /* Try reordering registers. */
+ SWAP_RTX (operands[0], operands[1]);
+ if (operands_ok_ldrd_strd (operands[0], operands[1], base, offset,
+ false, load))
+ return true;
+ }
+
+ if (const_store)
+ {
+ /* If input registers are dead after this pattern, they can be
+ reordered or replaced by other registers that are free in the
+ current pattern. */
+ if (!peep2_reg_dead_p (4, operands[0])
+ || !peep2_reg_dead_p (4, operands[1]))
+ return false;
+
+ /* Try to reorder the input registers. */
+ /* For example, the code
+ mov r0, 0
+ mov r1, 1
+ str r1, [r2]
+ str r0, [r2, #4]
+ can be transformed into
+ mov r1, 0
+ mov r0, 1
+ strd r0, [r2]
+ */
+ if (operands_ok_ldrd_strd (operands[1], operands[0], base, offset,
+ false, false))
+ {
+ SWAP_RTX (operands[0], operands[1]);
+ return true;
+ }
+
+ /* Try to find a free DI register. */
+ CLEAR_HARD_REG_SET (regset);
+ add_to_hard_reg_set (&regset, SImode, REGNO (operands[0]));
+ add_to_hard_reg_set (&regset, SImode, REGNO (operands[1]));
+ while (true)
+ {
+ tmp = peep2_find_free_register (0, 4, "r", DImode, &regset);
+ if (tmp == NULL_RTX)
+ return false;
+
+ /* DREG must be an even-numbered register in DImode.
+ Split it into SI registers. */
+ operands[0] = simplify_gen_subreg (SImode, tmp, DImode, 0);
+ operands[1] = simplify_gen_subreg (SImode, tmp, DImode, 4);
+ gcc_assert (operands[0] != NULL_RTX);
+ gcc_assert (operands[1] != NULL_RTX);
+ gcc_assert (REGNO (operands[0]) % 2 == 0);
+ gcc_assert (REGNO (operands[0]) + 1 == REGNO (operands[1]));
+
+ return (operands_ok_ldrd_strd (operands[0], operands[1],
+ base, offset,
+ false, load));
+ }
+ }
+
+ return false;
+}
+#undef SWAP_RTX
+
+
+
/* Print a symbolic form of X to the debug file, F. */
static void
@@ -12821,8 +13102,8 @@ is_jump_table (rtx insn)
rtx table;
if (jump_to_label_p (insn)
- && ((table = next_real_insn (JUMP_LABEL (insn)))
- == next_real_insn (insn))
+ && ((table = next_active_insn (JUMP_LABEL (insn)))
+ == next_active_insn (insn))
&& table != NULL
&& JUMP_TABLE_DATA_P (table))
return table;
@@ -14818,7 +15099,8 @@ output_move_double (rtx *operands, bool emit, int *count)
{
/* Constraints should ensure this. */
gcc_assert (code0 == MEM && code1 == REG);
- gcc_assert (REGNO (operands[1]) != IP_REGNUM);
+ gcc_assert ((REGNO (operands[1]) != IP_REGNUM)
+ || (TARGET_ARM && TARGET_LDRD));
switch (GET_CODE (XEXP (operands[0], 0)))
{
@@ -19483,7 +19765,8 @@ typedef struct {
VAR9 (T, N, A, B, C, D, E, F, G, H, I), \
{#N, NEON_##T, UP (J), CF (N, J), 0}
-/* The mode entries in the following table correspond to the "key" type of the
+/* The NEON builtin data can be found in arm_neon_builtins.def.
+ The mode entries in the following table correspond to the "key" type of the
instruction variant, i.e. equivalent to that which would be specified after
the assembler mnemonic, which usually refers to the last vector operand.
(Signed/unsigned/polynomial types are not differentiated between though, and
@@ -19493,196 +19776,7 @@ typedef struct {
static neon_builtin_datum neon_builtin_data[] =
{
- VAR10 (BINOP, vadd,
- v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
- VAR3 (BINOP, vaddl, v8qi, v4hi, v2si),
- VAR3 (BINOP, vaddw, v8qi, v4hi, v2si),
- VAR6 (BINOP, vhadd, v8qi, v4hi, v2si, v16qi, v8hi, v4si),
- VAR8 (BINOP, vqadd, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di),
- VAR3 (BINOP, vaddhn, v8hi, v4si, v2di),
- VAR8 (BINOP, vmul, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
- VAR8 (TERNOP, vmla, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
- VAR3 (TERNOP, vmlal, v8qi, v4hi, v2si),
- VAR2 (TERNOP, vfma, v2sf, v4sf),
- VAR2 (TERNOP, vfms, v2sf, v4sf),
- VAR8 (TERNOP, vmls, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
- VAR3 (TERNOP, vmlsl, v8qi, v4hi, v2si),
- VAR4 (BINOP, vqdmulh, v4hi, v2si, v8hi, v4si),
- VAR2 (TERNOP, vqdmlal, v4hi, v2si),
- VAR2 (TERNOP, vqdmlsl, v4hi, v2si),
- VAR3 (BINOP, vmull, v8qi, v4hi, v2si),
- VAR2 (SCALARMULL, vmull_n, v4hi, v2si),
- VAR2 (LANEMULL, vmull_lane, v4hi, v2si),
- VAR2 (SCALARMULL, vqdmull_n, v4hi, v2si),
- VAR2 (LANEMULL, vqdmull_lane, v4hi, v2si),
- VAR4 (SCALARMULH, vqdmulh_n, v4hi, v2si, v8hi, v4si),
- VAR4 (LANEMULH, vqdmulh_lane, v4hi, v2si, v8hi, v4si),
- VAR2 (BINOP, vqdmull, v4hi, v2si),
- VAR8 (BINOP, vshl, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di),
- VAR8 (BINOP, vqshl, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di),
- VAR8 (SHIFTIMM, vshr_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di),
- VAR3 (SHIFTIMM, vshrn_n, v8hi, v4si, v2di),
- VAR3 (SHIFTIMM, vqshrn_n, v8hi, v4si, v2di),
- VAR3 (SHIFTIMM, vqshrun_n, v8hi, v4si, v2di),
- VAR8 (SHIFTIMM, vshl_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di),
- VAR8 (SHIFTIMM, vqshl_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di),
- VAR8 (SHIFTIMM, vqshlu_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di),
- VAR3 (SHIFTIMM, vshll_n, v8qi, v4hi, v2si),
- VAR8 (SHIFTACC, vsra_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di),
- VAR10 (BINOP, vsub,
- v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
- VAR3 (BINOP, vsubl, v8qi, v4hi, v2si),
- VAR3 (BINOP, vsubw, v8qi, v4hi, v2si),
- VAR8 (BINOP, vqsub, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di),
- VAR6 (BINOP, vhsub, v8qi, v4hi, v2si, v16qi, v8hi, v4si),
- VAR3 (BINOP, vsubhn, v8hi, v4si, v2di),
- VAR8 (BINOP, vceq, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
- VAR8 (BINOP, vcge, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
- VAR6 (BINOP, vcgeu, v8qi, v4hi, v2si, v16qi, v8hi, v4si),
- VAR8 (BINOP, vcgt, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
- VAR6 (BINOP, vcgtu, v8qi, v4hi, v2si, v16qi, v8hi, v4si),
- VAR2 (BINOP, vcage, v2sf, v4sf),
- VAR2 (BINOP, vcagt, v2sf, v4sf),
- VAR6 (BINOP, vtst, v8qi, v4hi, v2si, v16qi, v8hi, v4si),
- VAR8 (BINOP, vabd, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
- VAR3 (BINOP, vabdl, v8qi, v4hi, v2si),
- VAR6 (TERNOP, vaba, v8qi, v4hi, v2si, v16qi, v8hi, v4si),
- VAR3 (TERNOP, vabal, v8qi, v4hi, v2si),
- VAR8 (BINOP, vmax, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
- VAR8 (BINOP, vmin, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
- VAR4 (BINOP, vpadd, v8qi, v4hi, v2si, v2sf),
- VAR6 (UNOP, vpaddl, v8qi, v4hi, v2si, v16qi, v8hi, v4si),
- VAR6 (BINOP, vpadal, v8qi, v4hi, v2si, v16qi, v8hi, v4si),
- VAR4 (BINOP, vpmax, v8qi, v4hi, v2si, v2sf),
- VAR4 (BINOP, vpmin, v8qi, v4hi, v2si, v2sf),
- VAR2 (BINOP, vrecps, v2sf, v4sf),
- VAR2 (BINOP, vrsqrts, v2sf, v4sf),
- VAR8 (SHIFTINSERT, vsri_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di),
- VAR8 (SHIFTINSERT, vsli_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di),
- VAR8 (UNOP, vabs, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
- VAR6 (UNOP, vqabs, v8qi, v4hi, v2si, v16qi, v8hi, v4si),
- VAR8 (UNOP, vneg, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
- VAR6 (UNOP, vqneg, v8qi, v4hi, v2si, v16qi, v8hi, v4si),
- VAR6 (UNOP, vcls, v8qi, v4hi, v2si, v16qi, v8hi, v4si),
- VAR6 (UNOP, vclz, v8qi, v4hi, v2si, v16qi, v8hi, v4si),
- VAR2 (UNOP, vcnt, v8qi, v16qi),
- VAR4 (UNOP, vrecpe, v2si, v2sf, v4si, v4sf),
- VAR4 (UNOP, vrsqrte, v2si, v2sf, v4si, v4sf),
- VAR6 (UNOP, vmvn, v8qi, v4hi, v2si, v16qi, v8hi, v4si),
- /* FIXME: vget_lane supports more variants than this! */
- VAR10 (GETLANE, vget_lane,
- v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
- VAR10 (SETLANE, vset_lane,
- v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
- VAR5 (CREATE, vcreate, v8qi, v4hi, v2si, v2sf, di),
- VAR10 (DUP, vdup_n,
- v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
- VAR10 (DUPLANE, vdup_lane,
- v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
- VAR5 (COMBINE, vcombine, v8qi, v4hi, v2si, v2sf, di),
- VAR5 (SPLIT, vget_high, v16qi, v8hi, v4si, v4sf, v2di),
- VAR5 (SPLIT, vget_low, v16qi, v8hi, v4si, v4sf, v2di),
- VAR3 (UNOP, vmovn, v8hi, v4si, v2di),
- VAR3 (UNOP, vqmovn, v8hi, v4si, v2di),
- VAR3 (UNOP, vqmovun, v8hi, v4si, v2di),
- VAR3 (UNOP, vmovl, v8qi, v4hi, v2si),
- VAR6 (LANEMUL, vmul_lane, v4hi, v2si, v2sf, v8hi, v4si, v4sf),
- VAR6 (LANEMAC, vmla_lane, v4hi, v2si, v2sf, v8hi, v4si, v4sf),
- VAR2 (LANEMAC, vmlal_lane, v4hi, v2si),
- VAR2 (LANEMAC, vqdmlal_lane, v4hi, v2si),
- VAR6 (LANEMAC, vmls_lane, v4hi, v2si, v2sf, v8hi, v4si, v4sf),
- VAR2 (LANEMAC, vmlsl_lane, v4hi, v2si),
- VAR2 (LANEMAC, vqdmlsl_lane, v4hi, v2si),
- VAR6 (SCALARMUL, vmul_n, v4hi, v2si, v2sf, v8hi, v4si, v4sf),
- VAR6 (SCALARMAC, vmla_n, v4hi, v2si, v2sf, v8hi, v4si, v4sf),
- VAR2 (SCALARMAC, vmlal_n, v4hi, v2si),
- VAR2 (SCALARMAC, vqdmlal_n, v4hi, v2si),
- VAR6 (SCALARMAC, vmls_n, v4hi, v2si, v2sf, v8hi, v4si, v4sf),
- VAR2 (SCALARMAC, vmlsl_n, v4hi, v2si),
- VAR2 (SCALARMAC, vqdmlsl_n, v4hi, v2si),
- VAR10 (BINOP, vext,
- v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
- VAR8 (UNOP, vrev64, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
- VAR4 (UNOP, vrev32, v8qi, v4hi, v16qi, v8hi),
- VAR2 (UNOP, vrev16, v8qi, v16qi),
- VAR4 (CONVERT, vcvt, v2si, v2sf, v4si, v4sf),
- VAR4 (FIXCONV, vcvt_n, v2si, v2sf, v4si, v4sf),
- VAR10 (SELECT, vbsl,
- v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
- VAR2 (RINT, vrintn, v2sf, v4sf),
- VAR2 (RINT, vrinta, v2sf, v4sf),
- VAR2 (RINT, vrintp, v2sf, v4sf),
- VAR2 (RINT, vrintm, v2sf, v4sf),
- VAR2 (RINT, vrintz, v2sf, v4sf),
- VAR2 (RINT, vrintx, v2sf, v4sf),
- VAR1 (VTBL, vtbl1, v8qi),
- VAR1 (VTBL, vtbl2, v8qi),
- VAR1 (VTBL, vtbl3, v8qi),
- VAR1 (VTBL, vtbl4, v8qi),
- VAR1 (VTBX, vtbx1, v8qi),
- VAR1 (VTBX, vtbx2, v8qi),
- VAR1 (VTBX, vtbx3, v8qi),
- VAR1 (VTBX, vtbx4, v8qi),
- VAR8 (RESULTPAIR, vtrn, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
- VAR8 (RESULTPAIR, vzip, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
- VAR8 (RESULTPAIR, vuzp, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
- VAR5 (REINTERP, vreinterpretv8qi, v8qi, v4hi, v2si, v2sf, di),
- VAR5 (REINTERP, vreinterpretv4hi, v8qi, v4hi, v2si, v2sf, di),
- VAR5 (REINTERP, vreinterpretv2si, v8qi, v4hi, v2si, v2sf, di),
- VAR5 (REINTERP, vreinterpretv2sf, v8qi, v4hi, v2si, v2sf, di),
- VAR5 (REINTERP, vreinterpretdi, v8qi, v4hi, v2si, v2sf, di),
- VAR5 (REINTERP, vreinterpretv16qi, v16qi, v8hi, v4si, v4sf, v2di),
- VAR5 (REINTERP, vreinterpretv8hi, v16qi, v8hi, v4si, v4sf, v2di),
- VAR5 (REINTERP, vreinterpretv4si, v16qi, v8hi, v4si, v4sf, v2di),
- VAR5 (REINTERP, vreinterpretv4sf, v16qi, v8hi, v4si, v4sf, v2di),
- VAR5 (REINTERP, vreinterpretv2di, v16qi, v8hi, v4si, v4sf, v2di),
- VAR10 (LOAD1, vld1,
- v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
- VAR10 (LOAD1LANE, vld1_lane,
- v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
- VAR10 (LOAD1, vld1_dup,
- v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
- VAR10 (STORE1, vst1,
- v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
- VAR10 (STORE1LANE, vst1_lane,
- v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
- VAR9 (LOADSTRUCT,
- vld2, v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf),
- VAR7 (LOADSTRUCTLANE, vld2_lane,
- v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf),
- VAR5 (LOADSTRUCT, vld2_dup, v8qi, v4hi, v2si, v2sf, di),
- VAR9 (STORESTRUCT, vst2,
- v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf),
- VAR7 (STORESTRUCTLANE, vst2_lane,
- v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf),
- VAR9 (LOADSTRUCT,
- vld3, v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf),
- VAR7 (LOADSTRUCTLANE, vld3_lane,
- v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf),
- VAR5 (LOADSTRUCT, vld3_dup, v8qi, v4hi, v2si, v2sf, di),
- VAR9 (STORESTRUCT, vst3,
- v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf),
- VAR7 (STORESTRUCTLANE, vst3_lane,
- v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf),
- VAR9 (LOADSTRUCT, vld4,
- v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf),
- VAR7 (LOADSTRUCTLANE, vld4_lane,
- v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf),
- VAR5 (LOADSTRUCT, vld4_dup, v8qi, v4hi, v2si, v2sf, di),
- VAR9 (STORESTRUCT, vst4,
- v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf),
- VAR7 (STORESTRUCTLANE, vst4_lane,
- v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf),
- VAR10 (LOGICBINOP, vand,
- v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
- VAR10 (LOGICBINOP, vorr,
- v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
- VAR10 (BINOP, veor,
- v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
- VAR10 (LOGICBINOP, vbic,
- v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
- VAR10 (LOGICBINOP, vorn,
- v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di)
+#include "arm_neon_builtins.def"
};
#undef CF
@@ -19697,9 +19791,36 @@ static neon_builtin_datum neon_builtin_data[] =
#undef VAR9
#undef VAR10
-/* Neon defines builtins from ARM_BUILTIN_MAX upwards, though they don't have
- symbolic names defined here (which would require too much duplication).
- FIXME? */
+#define CF(N,X) ARM_BUILTIN_NEON_##N##X
+#define VAR1(T, N, A) \
+ CF (N, A)
+#define VAR2(T, N, A, B) \
+ VAR1 (T, N, A), \
+ CF (N, B)
+#define VAR3(T, N, A, B, C) \
+ VAR2 (T, N, A, B), \
+ CF (N, C)
+#define VAR4(T, N, A, B, C, D) \
+ VAR3 (T, N, A, B, C), \
+ CF (N, D)
+#define VAR5(T, N, A, B, C, D, E) \
+ VAR4 (T, N, A, B, C, D), \
+ CF (N, E)
+#define VAR6(T, N, A, B, C, D, E, F) \
+ VAR5 (T, N, A, B, C, D, E), \
+ CF (N, F)
+#define VAR7(T, N, A, B, C, D, E, F, G) \
+ VAR6 (T, N, A, B, C, D, E, F), \
+ CF (N, G)
+#define VAR8(T, N, A, B, C, D, E, F, G, H) \
+ VAR7 (T, N, A, B, C, D, E, F, G), \
+ CF (N, H)
+#define VAR9(T, N, A, B, C, D, E, F, G, H, I) \
+ VAR8 (T, N, A, B, C, D, E, F, G, H), \
+ CF (N, I)
+#define VAR10(T, N, A, B, C, D, E, F, G, H, I, J) \
+ VAR9 (T, N, A, B, C, D, E, F, G, H, I), \
+ CF (N, J)
enum arm_builtins
{
ARM_BUILTIN_GETWCGR0,
@@ -19948,11 +20069,25 @@ enum arm_builtins
ARM_BUILTIN_WMERGE,
- ARM_BUILTIN_NEON_BASE,
+#include "arm_neon_builtins.def"
- ARM_BUILTIN_MAX = ARM_BUILTIN_NEON_BASE + ARRAY_SIZE (neon_builtin_data)
+ ,ARM_BUILTIN_MAX
};
+#define ARM_BUILTIN_NEON_BASE (ARM_BUILTIN_MAX - ARRAY_SIZE (neon_builtin_data))
+
+#undef CF
+#undef VAR1
+#undef VAR2
+#undef VAR3
+#undef VAR4
+#undef VAR5
+#undef VAR6
+#undef VAR7
+#undef VAR8
+#undef VAR9
+#undef VAR10
+
static GTY(()) tree arm_builtin_decls[ARM_BUILTIN_MAX];
static void
@@ -21629,7 +21764,7 @@ arm_expand_builtin (tree exp,
rtx op1;
rtx op2;
rtx pat;
- int fcode = DECL_FUNCTION_CODE (fndecl);
+ unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
size_t i;
enum machine_mode tmode;
enum machine_mode mode0;
@@ -22581,6 +22716,11 @@ thumb1_final_prescan_insn (rtx insn)
else if (conds != CONDS_NOCOND)
cfun->machine->thumb1_cc_insn = NULL_RTX;
}
+
+ /* Check if unexpected far jump is used. */
+ if (cfun->machine->lr_save_eliminated
+ && get_attr_far_jump (insn) == FAR_JUMP_YES)
+ internal_error("Unexpected thumb1 far jump");
}
int
@@ -22606,6 +22746,8 @@ static int
thumb_far_jump_used_p (void)
{
rtx insn;
+ bool far_jump = false;
+ unsigned int func_size = 0;
/* This test is only important for leaf functions. */
/* assert (!leaf_function_p ()); */
@@ -22656,6 +22798,26 @@ thumb_far_jump_used_p (void)
{
if (JUMP_P (insn) && get_attr_far_jump (insn) == FAR_JUMP_YES)
{
+ far_jump = true;
+ }
+ func_size += get_attr_length (insn);
+ }
+
+ /* Attribute far_jump will always be true for thumb1 before
+ shorten_branch pass. So checking far_jump attribute before
+ shorten_branch isn't much useful.
+
+ Following heuristic tries to estimate more accurately if a far jump
+ may finally be used. The heuristic is very conservative as there is
+ no chance to roll-back the decision of not to use far jump.
+
+ Thumb1 long branch offset is -2048 to 2046. The worst case is each
+ 2-byte insn is associated with a 4 byte constant pool. Using
+ function size 2048/3 as the threshold is conservative enough. */
+ if (far_jump)
+ {
+ if ((func_size * 3) >= 2048)
+ {
/* Record the fact that we have decided that
the function does use far jumps. */
cfun->machine->far_jump_used = 1;
@@ -25802,7 +25964,7 @@ arm_output_iwmmxt_tinsr (rtx *operands)
const char *
thumb1_output_casesi (rtx *operands)
{
- rtx diff_vec = PATTERN (next_real_insn (operands[0]));
+ rtx diff_vec = PATTERN (next_active_insn (operands[0]));
gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC);
@@ -25825,7 +25987,7 @@ thumb1_output_casesi (rtx *operands)
const char *
thumb2_output_casesi (rtx *operands)
{
- rtx diff_vec = PATTERN (next_real_insn (operands[2]));
+ rtx diff_vec = PATTERN (next_active_insn (operands[2]));
gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC);
@@ -25873,6 +26035,7 @@ arm_issue_rate (void)
case cortexa7:
case cortexa8:
case cortexa9:
+ case cortexa53:
case fa726te:
case marvell_pj4:
return 2;
@@ -25999,6 +26162,60 @@ arm_have_conditional_execution (void)
return !TARGET_THUMB1;
}
+tree
+arm_builtin_vectorized_function (tree fndecl, tree type_out, tree type_in)
+{
+ enum machine_mode in_mode, out_mode;
+ int in_n, out_n;
+
+ if (TREE_CODE (type_out) != VECTOR_TYPE
+ || TREE_CODE (type_in) != VECTOR_TYPE
+ || !(TARGET_NEON && TARGET_FPU_ARMV8 && flag_unsafe_math_optimizations))
+ return NULL_TREE;
+
+ out_mode = TYPE_MODE (TREE_TYPE (type_out));
+ out_n = TYPE_VECTOR_SUBPARTS (type_out);
+ in_mode = TYPE_MODE (TREE_TYPE (type_in));
+ in_n = TYPE_VECTOR_SUBPARTS (type_in);
+
+/* ARM_CHECK_BUILTIN_MODE and ARM_FIND_VRINT_VARIANT are used to find the
+ decl of the vectorized builtin for the appropriate vector mode.
+ NULL_TREE is returned if no such builtin is available. */
+#undef ARM_CHECK_BUILTIN_MODE
+#define ARM_CHECK_BUILTIN_MODE(C) \
+ (out_mode == SFmode && out_n == C \
+ && in_mode == SFmode && in_n == C)
+
+#undef ARM_FIND_VRINT_VARIANT
+#define ARM_FIND_VRINT_VARIANT(N) \
+ (ARM_CHECK_BUILTIN_MODE (2) \
+ ? arm_builtin_decl(ARM_BUILTIN_NEON_##N##v2sf, false) \
+ : (ARM_CHECK_BUILTIN_MODE (4) \
+ ? arm_builtin_decl(ARM_BUILTIN_NEON_##N##v4sf, false) \
+ : NULL_TREE))
+
+ if (DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_NORMAL)
+ {
+ enum built_in_function fn = DECL_FUNCTION_CODE (fndecl);
+ switch (fn)
+ {
+ case BUILT_IN_FLOORF:
+ return ARM_FIND_VRINT_VARIANT (vrintm);
+ case BUILT_IN_CEILF:
+ return ARM_FIND_VRINT_VARIANT (vrintp);
+ case BUILT_IN_TRUNCF:
+ return ARM_FIND_VRINT_VARIANT (vrintz);
+ case BUILT_IN_ROUNDF:
+ return ARM_FIND_VRINT_VARIANT (vrinta);
+ default:
+ return NULL_TREE;
+ }
+ }
+ return NULL_TREE;
+}
+#undef ARM_CHECK_BUILTIN_MODE
+#undef ARM_FIND_VRINT_VARIANT
+
/* The AAPCS sets the maximum alignment of a vector to 64 bits. */
static HOST_WIDE_INT
arm_vector_alignment (const_tree type)