summaryrefslogtreecommitdiff
path: root/gcc/config/arm/arm.c
diff options
context:
space:
mode:
Diffstat (limited to 'gcc/config/arm/arm.c')
-rw-r--r--gcc/config/arm/arm.c5533
1 files changed, 1972 insertions, 3561 deletions
diff --git a/gcc/config/arm/arm.c b/gcc/config/arm/arm.c
index 439a4dea50..0466399922 100644
--- a/gcc/config/arm/arm.c
+++ b/gcc/config/arm/arm.c
@@ -1,5 +1,5 @@
/* Output routines for GCC for ARM.
- Copyright (C) 1991-2014 Free Software Foundation, Inc.
+ Copyright (C) 1991-2015 Free Software Foundation, Inc.
Contributed by Pieter `Tiggr' Schoenmakers (rcpieter@win.tue.nl)
and Martin Simmons (@harleqn.co.uk).
More major hacks by Richard Earnshaw (rearnsha@arm.com).
@@ -26,7 +26,17 @@
#include "hash-table.h"
#include "tm.h"
#include "rtl.h"
+#include "hash-set.h"
+#include "machmode.h"
+#include "vec.h"
+#include "double-int.h"
+#include "input.h"
+#include "alias.h"
+#include "symtab.h"
+#include "wide-int.h"
+#include "inchash.h"
#include "tree.h"
+#include "fold-const.h"
#include "stringpool.h"
#include "stor-layout.h"
#include "calls.h"
@@ -41,15 +51,39 @@
#include "flags.h"
#include "reload.h"
#include "function.h"
+#include "hashtab.h"
+#include "statistics.h"
+#include "real.h"
+#include "fixed-value.h"
+#include "expmed.h"
+#include "dojump.h"
+#include "explow.h"
+#include "emit-rtl.h"
+#include "stmt.h"
#include "expr.h"
+#include "insn-codes.h"
#include "optabs.h"
#include "diagnostic-core.h"
#include "recog.h"
+#include "predict.h"
+#include "dominance.h"
+#include "cfg.h"
+#include "cfgrtl.h"
+#include "cfganal.h"
+#include "lcm.h"
+#include "cfgbuild.h"
+#include "cfgcleanup.h"
+#include "basic-block.h"
+#include "hash-map.h"
+#include "is-a.h"
+#include "plugin-api.h"
+#include "ipa-ref.h"
#include "cgraph.h"
#include "ggc.h"
#include "except.h"
#include "tm_p.h"
#include "target.h"
+#include "sched-int.h"
#include "target-def.h"
#include "debug.h"
#include "langhooks.h"
@@ -59,6 +93,11 @@
#include "params.h"
#include "opts.h"
#include "dumpfile.h"
+#include "gimple-expr.h"
+#include "builtins.h"
+#include "tm-constrs.h"
+#include "rtl-iter.h"
+#include "sched-int.h"
/* Forward definitions of types. */
typedef struct minipool_node Mnode;
@@ -73,21 +112,20 @@ struct four_ints
/* Forward function declarations. */
static bool arm_const_not_ok_for_debug_p (rtx);
-static bool arm_lra_p (void);
-static bool arm_needs_doubleword_align (enum machine_mode, const_tree);
+static bool arm_needs_doubleword_align (machine_mode, const_tree);
static int arm_compute_static_chain_stack_bytes (void);
static arm_stack_offsets *arm_get_frame_offsets (void);
static void arm_add_gc_roots (void);
-static int arm_gen_constant (enum rtx_code, enum machine_mode, rtx,
+static int arm_gen_constant (enum rtx_code, machine_mode, rtx,
HOST_WIDE_INT, rtx, rtx, int, int);
static unsigned bit_count (unsigned long);
static int arm_address_register_rtx_p (rtx, int);
-static int arm_legitimate_index_p (enum machine_mode, rtx, RTX_CODE, int);
-static int thumb2_legitimate_index_p (enum machine_mode, rtx, int);
-static int thumb1_base_register_rtx_p (rtx, enum machine_mode, int);
-static rtx arm_legitimize_address (rtx, rtx, enum machine_mode);
+static int arm_legitimate_index_p (machine_mode, rtx, RTX_CODE, int);
+static int thumb2_legitimate_index_p (machine_mode, rtx, int);
+static int thumb1_base_register_rtx_p (rtx, machine_mode, int);
+static rtx arm_legitimize_address (rtx, rtx, machine_mode);
static reg_class_t arm_preferred_reload_class (rtx, reg_class_t);
-static rtx thumb_legitimize_address (rtx, rtx, enum machine_mode);
+static rtx thumb_legitimize_address (rtx, rtx, machine_mode);
inline static int thumb1_index_register_rtx_p (rtx, int);
static int thumb_far_jump_used_p (void);
static bool thumb_force_lr_save (void);
@@ -104,21 +142,21 @@ static const char *output_multi_immediate (rtx *, const char *, const char *,
static const char *shift_op (rtx, HOST_WIDE_INT *);
static struct machine_function *arm_init_machine_status (void);
static void thumb_exit (FILE *, int);
-static HOST_WIDE_INT get_jump_table_size (rtx);
+static HOST_WIDE_INT get_jump_table_size (rtx_jump_table_data *);
static Mnode *move_minipool_fix_forward_ref (Mnode *, Mnode *, HOST_WIDE_INT);
static Mnode *add_minipool_forward_ref (Mfix *);
static Mnode *move_minipool_fix_backward_ref (Mnode *, Mnode *, HOST_WIDE_INT);
static Mnode *add_minipool_backward_ref (Mfix *);
static void assign_minipool_offsets (Mfix *);
static void arm_print_value (FILE *, rtx);
-static void dump_minipool (rtx);
+static void dump_minipool (rtx_insn *);
static int arm_barrier_cost (rtx);
static Mfix *create_fix_barrier (Mfix *, HOST_WIDE_INT);
-static void push_minipool_barrier (rtx, HOST_WIDE_INT);
-static void push_minipool_fix (rtx, HOST_WIDE_INT, rtx *, enum machine_mode,
- rtx);
+static void push_minipool_barrier (rtx_insn *, HOST_WIDE_INT);
+static void push_minipool_fix (rtx_insn *, HOST_WIDE_INT, rtx *,
+ machine_mode, rtx);
static void arm_reorg (void);
-static void note_invalid_constants (rtx, HOST_WIDE_INT, int);
+static void note_invalid_constants (rtx_insn *, HOST_WIDE_INT, int);
static unsigned long arm_compute_save_reg0_reg12_mask (void);
static unsigned long arm_compute_save_reg_mask (void);
static unsigned long arm_isr_value (tree);
@@ -133,8 +171,8 @@ static void arm_output_function_epilogue (FILE *, HOST_WIDE_INT);
static void arm_output_function_prologue (FILE *, HOST_WIDE_INT);
static int arm_comp_type_attributes (const_tree, const_tree);
static void arm_set_default_type_attributes (tree);
-static int arm_adjust_cost (rtx, rtx, rtx, int);
-static int arm_sched_reorder (FILE *, int, rtx *, int *, int);
+static int arm_adjust_cost (rtx_insn *, rtx, rtx_insn *, int);
+static int arm_sched_reorder (FILE *, int, rtx_insn **, int *, int);
static int optimal_immediate_sequence (enum rtx_code code,
unsigned HOST_WIDE_INT val,
struct four_ints *return_sequence);
@@ -144,20 +182,20 @@ static int optimal_immediate_sequence_1 (enum rtx_code code,
int i);
static int arm_get_strip_length (int);
static bool arm_function_ok_for_sibcall (tree, tree);
-static enum machine_mode arm_promote_function_mode (const_tree,
- enum machine_mode, int *,
+static machine_mode arm_promote_function_mode (const_tree,
+ machine_mode, int *,
const_tree, int);
static bool arm_return_in_memory (const_tree, const_tree);
static rtx arm_function_value (const_tree, const_tree, bool);
-static rtx arm_libcall_value_1 (enum machine_mode);
-static rtx arm_libcall_value (enum machine_mode, const_rtx);
+static rtx arm_libcall_value_1 (machine_mode);
+static rtx arm_libcall_value (machine_mode, const_rtx);
static bool arm_function_value_regno_p (const unsigned int);
static void arm_internal_label (FILE *, const char *, unsigned long);
static void arm_output_mi_thunk (FILE *, tree, HOST_WIDE_INT, HOST_WIDE_INT,
tree);
static bool arm_have_conditional_execution (void);
-static bool arm_cannot_force_const_mem (enum machine_mode, rtx);
-static bool arm_legitimate_constant_p (enum machine_mode, rtx);
+static bool arm_cannot_force_const_mem (machine_mode, rtx);
+static bool arm_legitimate_constant_p (machine_mode, rtx);
static bool arm_rtx_costs_1 (rtx, enum rtx_code, int*, bool);
static bool arm_size_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *);
static bool arm_slowmul_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *, bool);
@@ -165,29 +203,22 @@ static bool arm_fastmul_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *, boo
static bool arm_xscale_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *, bool);
static bool arm_9e_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *, bool);
static bool arm_rtx_costs (rtx, int, int, int, int *, bool);
-static int arm_address_cost (rtx, enum machine_mode, addr_space_t, bool);
-static int arm_register_move_cost (enum machine_mode, reg_class_t, reg_class_t);
-static int arm_memory_move_cost (enum machine_mode, reg_class_t, bool);
-static void arm_init_builtins (void);
-static void arm_init_iwmmxt_builtins (void);
-static rtx safe_vector_operand (rtx, enum machine_mode);
-static rtx arm_expand_binop_builtin (enum insn_code, tree, rtx);
-static rtx arm_expand_unop_builtin (enum insn_code, tree, rtx, int);
-static rtx arm_expand_builtin (tree, rtx, rtx, enum machine_mode, int);
-static tree arm_builtin_decl (unsigned, bool);
+static int arm_address_cost (rtx, machine_mode, addr_space_t, bool);
+static int arm_register_move_cost (machine_mode, reg_class_t, reg_class_t);
+static int arm_memory_move_cost (machine_mode, reg_class_t, bool);
static void emit_constant_insn (rtx cond, rtx pattern);
-static rtx emit_set_insn (rtx, rtx);
+static rtx_insn *emit_set_insn (rtx, rtx);
static rtx emit_multi_reg_push (unsigned long, unsigned long);
-static int arm_arg_partial_bytes (cumulative_args_t, enum machine_mode,
+static int arm_arg_partial_bytes (cumulative_args_t, machine_mode,
tree, bool);
-static rtx arm_function_arg (cumulative_args_t, enum machine_mode,
+static rtx arm_function_arg (cumulative_args_t, machine_mode,
const_tree, bool);
-static void arm_function_arg_advance (cumulative_args_t, enum machine_mode,
+static void arm_function_arg_advance (cumulative_args_t, machine_mode,
const_tree, bool);
-static unsigned int arm_function_arg_boundary (enum machine_mode, const_tree);
-static rtx aapcs_allocate_return_reg (enum machine_mode, const_tree,
+static unsigned int arm_function_arg_boundary (machine_mode, const_tree);
+static rtx aapcs_allocate_return_reg (machine_mode, const_tree,
const_tree);
-static rtx aapcs_libcall_value (enum machine_mode);
+static rtx aapcs_libcall_value (machine_mode);
static int aapcs_select_return_coproc (const_tree, const_tree);
#ifdef OBJECT_FORMAT_ELF
@@ -201,18 +232,18 @@ static void arm_encode_section_info (tree, rtx, int);
static void arm_file_end (void);
static void arm_file_start (void);
-static void arm_setup_incoming_varargs (cumulative_args_t, enum machine_mode,
+static void arm_setup_incoming_varargs (cumulative_args_t, machine_mode,
tree, int *, int);
static bool arm_pass_by_reference (cumulative_args_t,
- enum machine_mode, const_tree, bool);
+ machine_mode, const_tree, bool);
static bool arm_promote_prototypes (const_tree);
static bool arm_default_short_enums (void);
static bool arm_align_anon_bitfield (void);
static bool arm_return_in_msb (const_tree);
-static bool arm_must_pass_in_stack (enum machine_mode, const_tree);
+static bool arm_must_pass_in_stack (machine_mode, const_tree);
static bool arm_return_in_memory (const_tree, const_tree);
#if ARM_UNWIND_INFO
-static void arm_unwind_emit (FILE *, rtx);
+static void arm_unwind_emit (FILE *, rtx_insn *);
static bool arm_output_ttype (rtx);
static void arm_asm_emit_except_personality (rtx);
static void arm_asm_init_sections (void);
@@ -233,9 +264,12 @@ static tree arm_build_builtin_va_list (void);
static void arm_expand_builtin_va_start (tree, rtx);
static tree arm_gimplify_va_arg_expr (tree, tree, gimple_seq *, gimple_seq *);
static void arm_option_override (void);
-static unsigned HOST_WIDE_INT arm_shift_truncation_mask (enum machine_mode);
-static bool arm_cannot_copy_insn_p (rtx);
+static unsigned HOST_WIDE_INT arm_shift_truncation_mask (machine_mode);
+static bool arm_macro_fusion_p (void);
+static bool arm_cannot_copy_insn_p (rtx_insn *);
static int arm_issue_rate (void);
+static int arm_first_cycle_multipass_dfa_lookahead (void);
+static int arm_first_cycle_multipass_dfa_lookahead_guard (rtx_insn *, int);
static void arm_output_dwarf_dtprel (FILE *, int, rtx) ATTRIBUTE_UNUSED;
static bool arm_output_addr_const_extra (FILE *, rtx);
static bool arm_allocate_stack_slots_for_args (void);
@@ -244,23 +278,23 @@ static const char *arm_invalid_parameter_type (const_tree t);
static const char *arm_invalid_return_type (const_tree t);
static tree arm_promoted_type (const_tree t);
static tree arm_convert_to_type (tree type, tree expr);
-static bool arm_scalar_mode_supported_p (enum machine_mode);
+static bool arm_scalar_mode_supported_p (machine_mode);
static bool arm_frame_pointer_required (void);
static bool arm_can_eliminate (const int, const int);
static void arm_asm_trampoline_template (FILE *);
static void arm_trampoline_init (rtx, tree, rtx);
static rtx arm_trampoline_adjust_address (rtx);
static rtx arm_pic_static_addr (rtx orig, rtx reg);
-static bool cortex_a9_sched_adjust_cost (rtx, rtx, rtx, int *);
-static bool xscale_sched_adjust_cost (rtx, rtx, rtx, int *);
-static bool fa726te_sched_adjust_cost (rtx, rtx, rtx, int *);
-static bool arm_array_mode_supported_p (enum machine_mode,
+static bool cortex_a9_sched_adjust_cost (rtx_insn *, rtx, rtx_insn *, int *);
+static bool xscale_sched_adjust_cost (rtx_insn *, rtx, rtx_insn *, int *);
+static bool fa726te_sched_adjust_cost (rtx_insn *, rtx, rtx_insn *, int *);
+static bool arm_array_mode_supported_p (machine_mode,
unsigned HOST_WIDE_INT);
-static enum machine_mode arm_preferred_simd_mode (enum machine_mode);
+static machine_mode arm_preferred_simd_mode (machine_mode);
static bool arm_class_likely_spilled_p (reg_class_t);
static HOST_WIDE_INT arm_vector_alignment (const_tree type);
static bool arm_vector_alignment_reachable (const_tree type, bool is_packed);
-static bool arm_builtin_support_vector_misalignment (enum machine_mode mode,
+static bool arm_builtin_support_vector_misalignment (machine_mode mode,
const_tree type,
int misalignment,
bool is_packed);
@@ -270,10 +304,13 @@ static unsigned int arm_autovectorize_vector_sizes (void);
static int arm_default_branch_cost (bool, bool);
static int arm_cortex_a5_branch_cost (bool, bool);
static int arm_cortex_m_branch_cost (bool, bool);
+static int arm_cortex_m7_branch_cost (bool, bool);
-static bool arm_vectorize_vec_perm_const_ok (enum machine_mode vmode,
+static bool arm_vectorize_vec_perm_const_ok (machine_mode vmode,
const unsigned char *sel);
+static bool aarch_macro_fusion_pair_p (rtx_insn*, rtx_insn*);
+
static int arm_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
tree vectype,
int misalign ATTRIBUTE_UNUSED);
@@ -286,6 +323,8 @@ static unsigned arm_add_stmt_cost (void *data, int count,
static void arm_canonicalize_comparison (int *code, rtx *op0, rtx *op1,
bool op0_preserve_value);
static unsigned HOST_WIDE_INT arm_asan_shadow_offset (void);
+
+static void arm_sched_fusion_priority (rtx_insn *, int, int *, int*);
/* Table of machine attributes. */
static const struct attribute_spec arm_attribute_table[] =
@@ -342,7 +381,7 @@ static const struct attribute_spec arm_attribute_table[] =
#define TARGET_LEGITIMIZE_ADDRESS arm_legitimize_address
#undef TARGET_LRA_P
-#define TARGET_LRA_P arm_lra_p
+#define TARGET_LRA_P hook_bool_void_true
#undef TARGET_ATTRIBUTE_TABLE
#define TARGET_ATTRIBUTE_TABLE arm_attribute_table
@@ -379,6 +418,12 @@ static const struct attribute_spec arm_attribute_table[] =
#undef TARGET_COMP_TYPE_ATTRIBUTES
#define TARGET_COMP_TYPE_ATTRIBUTES arm_comp_type_attributes
+#undef TARGET_SCHED_MACRO_FUSION_P
+#define TARGET_SCHED_MACRO_FUSION_P arm_macro_fusion_p
+
+#undef TARGET_SCHED_MACRO_FUSION_PAIR_P
+#define TARGET_SCHED_MACRO_FUSION_PAIR_P aarch_macro_fusion_pair_p
+
#undef TARGET_SET_DEFAULT_TYPE_ATTRIBUTES
#define TARGET_SET_DEFAULT_TYPE_ATTRIBUTES arm_set_default_type_attributes
@@ -581,9 +626,20 @@ static const struct attribute_spec arm_attribute_table[] =
#undef TARGET_SCHED_ISSUE_RATE
#define TARGET_SCHED_ISSUE_RATE arm_issue_rate
+#undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
+#define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
+ arm_first_cycle_multipass_dfa_lookahead
+
+#undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD
+#define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD \
+ arm_first_cycle_multipass_dfa_lookahead_guard
+
#undef TARGET_MANGLE_TYPE
#define TARGET_MANGLE_TYPE arm_mangle_type
+#undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
+#define TARGET_ATOMIC_ASSIGN_EXPAND_FENV arm_atomic_assign_expand_fenv
+
#undef TARGET_BUILD_BUILTIN_VA_LIST
#define TARGET_BUILD_BUILTIN_VA_LIST arm_build_builtin_va_list
#undef TARGET_EXPAND_BUILTIN_VA_START
@@ -677,6 +733,12 @@ static const struct attribute_spec arm_attribute_table[] =
#undef TARGET_CONST_NOT_OK_FOR_DEBUG_P
#define TARGET_CONST_NOT_OK_FOR_DEBUG_P arm_const_not_ok_for_debug_p
+#undef TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS
+#define TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS true
+
+#undef TARGET_SCHED_FUSION_PRIORITY
+#define TARGET_SCHED_FUSION_PRIORITY arm_sched_fusion_priority
+
struct gcc_target targetm = TARGET_INITIALIZER;
/* Obstack for minipool constant handling. */
@@ -708,77 +770,13 @@ const struct arm_fpu_desc *arm_fpu_desc;
rtx thumb_call_via_label[14];
static int thumb_call_reg_needed;
-/* Bit values used to identify processor capabilities. */
-#define FL_CO_PROC (1 << 0) /* Has external co-processor bus */
-#define FL_ARCH3M (1 << 1) /* Extended multiply */
-#define FL_MODE26 (1 << 2) /* 26-bit mode support */
-#define FL_MODE32 (1 << 3) /* 32-bit mode support */
-#define FL_ARCH4 (1 << 4) /* Architecture rel 4 */
-#define FL_ARCH5 (1 << 5) /* Architecture rel 5 */
-#define FL_THUMB (1 << 6) /* Thumb aware */
-#define FL_LDSCHED (1 << 7) /* Load scheduling necessary */
-#define FL_STRONG (1 << 8) /* StrongARM */
-#define FL_ARCH5E (1 << 9) /* DSP extensions to v5 */
-#define FL_XSCALE (1 << 10) /* XScale */
-/* spare (1 << 11) */
-#define FL_ARCH6 (1 << 12) /* Architecture rel 6. Adds
- media instructions. */
-#define FL_VFPV2 (1 << 13) /* Vector Floating Point V2. */
-#define FL_WBUF (1 << 14) /* Schedule for write buffer ops.
- Note: ARM6 & 7 derivatives only. */
-#define FL_ARCH6K (1 << 15) /* Architecture rel 6 K extensions. */
-#define FL_THUMB2 (1 << 16) /* Thumb-2. */
-#define FL_NOTM (1 << 17) /* Instructions not present in the 'M'
- profile. */
-#define FL_THUMB_DIV (1 << 18) /* Hardware divide (Thumb mode). */
-#define FL_VFPV3 (1 << 19) /* Vector Floating Point V3. */
-#define FL_NEON (1 << 20) /* Neon instructions. */
-#define FL_ARCH7EM (1 << 21) /* Instructions present in the ARMv7E-M
- architecture. */
-#define FL_ARCH7 (1 << 22) /* Architecture 7. */
-#define FL_ARM_DIV (1 << 23) /* Hardware divide (ARM mode). */
-#define FL_ARCH8 (1 << 24) /* Architecture 8. */
-#define FL_CRC32 (1 << 25) /* ARMv8 CRC32 instructions. */
-
-#define FL_IWMMXT (1 << 29) /* XScale v2 or "Intel Wireless MMX technology". */
-#define FL_IWMMXT2 (1 << 30) /* "Intel Wireless MMX2 technology". */
-
-/* Flags that only effect tuning, not available instructions. */
-#define FL_TUNE (FL_WBUF | FL_VFPV2 | FL_STRONG | FL_LDSCHED \
- | FL_CO_PROC)
-
-#define FL_FOR_ARCH2 FL_NOTM
-#define FL_FOR_ARCH3 (FL_FOR_ARCH2 | FL_MODE32)
-#define FL_FOR_ARCH3M (FL_FOR_ARCH3 | FL_ARCH3M)
-#define FL_FOR_ARCH4 (FL_FOR_ARCH3M | FL_ARCH4)
-#define FL_FOR_ARCH4T (FL_FOR_ARCH4 | FL_THUMB)
-#define FL_FOR_ARCH5 (FL_FOR_ARCH4 | FL_ARCH5)
-#define FL_FOR_ARCH5T (FL_FOR_ARCH5 | FL_THUMB)
-#define FL_FOR_ARCH5E (FL_FOR_ARCH5 | FL_ARCH5E)
-#define FL_FOR_ARCH5TE (FL_FOR_ARCH5E | FL_THUMB)
-#define FL_FOR_ARCH5TEJ FL_FOR_ARCH5TE
-#define FL_FOR_ARCH6 (FL_FOR_ARCH5TE | FL_ARCH6)
-#define FL_FOR_ARCH6J FL_FOR_ARCH6
-#define FL_FOR_ARCH6K (FL_FOR_ARCH6 | FL_ARCH6K)
-#define FL_FOR_ARCH6Z FL_FOR_ARCH6
-#define FL_FOR_ARCH6ZK FL_FOR_ARCH6K
-#define FL_FOR_ARCH6T2 (FL_FOR_ARCH6 | FL_THUMB2)
-#define FL_FOR_ARCH6M (FL_FOR_ARCH6 & ~FL_NOTM)
-#define FL_FOR_ARCH7 ((FL_FOR_ARCH6T2 & ~FL_NOTM) | FL_ARCH7)
-#define FL_FOR_ARCH7A (FL_FOR_ARCH7 | FL_NOTM | FL_ARCH6K)
-#define FL_FOR_ARCH7VE (FL_FOR_ARCH7A | FL_THUMB_DIV | FL_ARM_DIV)
-#define FL_FOR_ARCH7R (FL_FOR_ARCH7A | FL_THUMB_DIV)
-#define FL_FOR_ARCH7M (FL_FOR_ARCH7 | FL_THUMB_DIV)
-#define FL_FOR_ARCH7EM (FL_FOR_ARCH7M | FL_ARCH7EM)
-#define FL_FOR_ARCH8A (FL_FOR_ARCH7VE | FL_ARCH8)
-
/* The bits in this mask specify which
instructions we are allowed to generate. */
-static unsigned long insn_flags = 0;
+unsigned long insn_flags = 0;
/* The bits in this mask specify which instruction scheduling options should
be used. */
-static unsigned long tune_flags = 0;
+unsigned long tune_flags = 0;
/* The highest ARM architecture version supported by the
target. */
@@ -868,6 +866,9 @@ int arm_arch_thumb2;
int arm_arch_arm_hwdiv;
int arm_arch_thumb_hwdiv;
+/* Nonzero if chip disallows volatile memory access in IT block. */
+int arm_arch_no_volatile_ce;
+
/* Nonzero if we should use Neon to handle 64-bits operations rather
than core registers. */
int prefer_neon_for_64bits = 0;
@@ -878,15 +879,11 @@ bool arm_disable_literal_pool = false;
/* In case of a PRE_INC, POST_INC, PRE_DEC, POST_DEC memory reference,
we must report the mode of the memory reference from
TARGET_PRINT_OPERAND to TARGET_PRINT_OPERAND_ADDRESS. */
-enum machine_mode output_memory_reference_mode;
+machine_mode output_memory_reference_mode;
/* The register number to be used for the PIC offset register. */
unsigned arm_pic_register = INVALID_REGNUM;
-/* Set to 1 after arm_reorg has started. Reset to start at the start of
- the next function. */
-static int after_arm_reorg = 0;
-
enum arm_pcs arm_pcs_default;
/* For an explanation of these variables, see final_prescan_insn below. */
@@ -907,6 +904,9 @@ int arm_condexec_masklen = 0;
/* Nonzero if chip supports the ARMv8 CRC instructions. */
int arm_arch_crc = 0;
+/* Nonzero if the core has a very small, high-latency, multiply unit. */
+int arm_m_profile_small_mul = 0;
+
/* The condition codes of the ARM, and the inverse function. */
static const char * const arm_condition_codes[] =
{
@@ -985,6 +985,7 @@ const struct cpu_cost_table cortexa9_extra_costs =
COSTS_N_INSNS (1), /* bfi. */
COSTS_N_INSNS (1), /* bfx. */
0, /* clz. */
+ 0, /* rev. */
0, /* non_exec. */
true /* non_exec_costs_exec. */
},
@@ -1068,6 +1069,209 @@ const struct cpu_cost_table cortexa9_extra_costs =
}
};
+const struct cpu_cost_table cortexa8_extra_costs =
+{
+ /* ALU */
+ {
+ 0, /* arith. */
+ 0, /* logical. */
+ COSTS_N_INSNS (1), /* shift. */
+ 0, /* shift_reg. */
+ COSTS_N_INSNS (1), /* arith_shift. */
+ 0, /* arith_shift_reg. */
+ COSTS_N_INSNS (1), /* log_shift. */
+ 0, /* log_shift_reg. */
+ 0, /* extend. */
+ 0, /* extend_arith. */
+ 0, /* bfi. */
+ 0, /* bfx. */
+ 0, /* clz. */
+ 0, /* rev. */
+ 0, /* non_exec. */
+ true /* non_exec_costs_exec. */
+ },
+ {
+ /* MULT SImode */
+ {
+ COSTS_N_INSNS (1), /* simple. */
+ COSTS_N_INSNS (1), /* flag_setting. */
+ COSTS_N_INSNS (1), /* extend. */
+ COSTS_N_INSNS (1), /* add. */
+ COSTS_N_INSNS (1), /* extend_add. */
+ COSTS_N_INSNS (30) /* idiv. No HW div on Cortex A8. */
+ },
+ /* MULT DImode */
+ {
+ 0, /* simple (N/A). */
+ 0, /* flag_setting (N/A). */
+ COSTS_N_INSNS (2), /* extend. */
+ 0, /* add (N/A). */
+ COSTS_N_INSNS (2), /* extend_add. */
+ 0 /* idiv (N/A). */
+ }
+ },
+ /* LD/ST */
+ {
+ COSTS_N_INSNS (1), /* load. */
+ COSTS_N_INSNS (1), /* load_sign_extend. */
+ COSTS_N_INSNS (1), /* ldrd. */
+ COSTS_N_INSNS (1), /* ldm_1st. */
+ 1, /* ldm_regs_per_insn_1st. */
+ 2, /* ldm_regs_per_insn_subsequent. */
+ COSTS_N_INSNS (1), /* loadf. */
+ COSTS_N_INSNS (1), /* loadd. */
+ COSTS_N_INSNS (1), /* load_unaligned. */
+ COSTS_N_INSNS (1), /* store. */
+ COSTS_N_INSNS (1), /* strd. */
+ COSTS_N_INSNS (1), /* stm_1st. */
+ 1, /* stm_regs_per_insn_1st. */
+ 2, /* stm_regs_per_insn_subsequent. */
+ COSTS_N_INSNS (1), /* storef. */
+ COSTS_N_INSNS (1), /* stored. */
+ COSTS_N_INSNS (1) /* store_unaligned. */
+ },
+ {
+ /* FP SFmode */
+ {
+ COSTS_N_INSNS (36), /* div. */
+ COSTS_N_INSNS (11), /* mult. */
+ COSTS_N_INSNS (20), /* mult_addsub. */
+ COSTS_N_INSNS (30), /* fma. */
+ COSTS_N_INSNS (9), /* addsub. */
+ COSTS_N_INSNS (3), /* fpconst. */
+ COSTS_N_INSNS (3), /* neg. */
+ COSTS_N_INSNS (6), /* compare. */
+ COSTS_N_INSNS (4), /* widen. */
+ COSTS_N_INSNS (4), /* narrow. */
+ COSTS_N_INSNS (8), /* toint. */
+ COSTS_N_INSNS (8), /* fromint. */
+ COSTS_N_INSNS (8) /* roundint. */
+ },
+ /* FP DFmode */
+ {
+ COSTS_N_INSNS (64), /* div. */
+ COSTS_N_INSNS (16), /* mult. */
+ COSTS_N_INSNS (25), /* mult_addsub. */
+ COSTS_N_INSNS (30), /* fma. */
+ COSTS_N_INSNS (9), /* addsub. */
+ COSTS_N_INSNS (3), /* fpconst. */
+ COSTS_N_INSNS (3), /* neg. */
+ COSTS_N_INSNS (6), /* compare. */
+ COSTS_N_INSNS (6), /* widen. */
+ COSTS_N_INSNS (6), /* narrow. */
+ COSTS_N_INSNS (8), /* toint. */
+ COSTS_N_INSNS (8), /* fromint. */
+ COSTS_N_INSNS (8) /* roundint. */
+ }
+ },
+ /* Vector */
+ {
+ COSTS_N_INSNS (1) /* alu. */
+ }
+};
+
+const struct cpu_cost_table cortexa5_extra_costs =
+{
+ /* ALU */
+ {
+ 0, /* arith. */
+ 0, /* logical. */
+ COSTS_N_INSNS (1), /* shift. */
+ COSTS_N_INSNS (1), /* shift_reg. */
+ COSTS_N_INSNS (1), /* arith_shift. */
+ COSTS_N_INSNS (1), /* arith_shift_reg. */
+ COSTS_N_INSNS (1), /* log_shift. */
+ COSTS_N_INSNS (1), /* log_shift_reg. */
+ COSTS_N_INSNS (1), /* extend. */
+ COSTS_N_INSNS (1), /* extend_arith. */
+ COSTS_N_INSNS (1), /* bfi. */
+ COSTS_N_INSNS (1), /* bfx. */
+ COSTS_N_INSNS (1), /* clz. */
+ COSTS_N_INSNS (1), /* rev. */
+ 0, /* non_exec. */
+ true /* non_exec_costs_exec. */
+ },
+
+ {
+ /* MULT SImode */
+ {
+ 0, /* simple. */
+ COSTS_N_INSNS (1), /* flag_setting. */
+ COSTS_N_INSNS (1), /* extend. */
+ COSTS_N_INSNS (1), /* add. */
+ COSTS_N_INSNS (1), /* extend_add. */
+ COSTS_N_INSNS (7) /* idiv. */
+ },
+ /* MULT DImode */
+ {
+ 0, /* simple (N/A). */
+ 0, /* flag_setting (N/A). */
+ COSTS_N_INSNS (1), /* extend. */
+ 0, /* add. */
+ COSTS_N_INSNS (2), /* extend_add. */
+ 0 /* idiv (N/A). */
+ }
+ },
+ /* LD/ST */
+ {
+ COSTS_N_INSNS (1), /* load. */
+ COSTS_N_INSNS (1), /* load_sign_extend. */
+ COSTS_N_INSNS (6), /* ldrd. */
+ COSTS_N_INSNS (1), /* ldm_1st. */
+ 1, /* ldm_regs_per_insn_1st. */
+ 2, /* ldm_regs_per_insn_subsequent. */
+ COSTS_N_INSNS (2), /* loadf. */
+ COSTS_N_INSNS (4), /* loadd. */
+ COSTS_N_INSNS (1), /* load_unaligned. */
+ COSTS_N_INSNS (1), /* store. */
+ COSTS_N_INSNS (3), /* strd. */
+ COSTS_N_INSNS (1), /* stm_1st. */
+ 1, /* stm_regs_per_insn_1st. */
+ 2, /* stm_regs_per_insn_subsequent. */
+ COSTS_N_INSNS (2), /* storef. */
+ COSTS_N_INSNS (2), /* stored. */
+ COSTS_N_INSNS (1) /* store_unaligned. */
+ },
+ {
+ /* FP SFmode */
+ {
+ COSTS_N_INSNS (15), /* div. */
+ COSTS_N_INSNS (3), /* mult. */
+ COSTS_N_INSNS (7), /* mult_addsub. */
+ COSTS_N_INSNS (7), /* fma. */
+ COSTS_N_INSNS (3), /* addsub. */
+ COSTS_N_INSNS (3), /* fpconst. */
+ COSTS_N_INSNS (3), /* neg. */
+ COSTS_N_INSNS (3), /* compare. */
+ COSTS_N_INSNS (3), /* widen. */
+ COSTS_N_INSNS (3), /* narrow. */
+ COSTS_N_INSNS (3), /* toint. */
+ COSTS_N_INSNS (3), /* fromint. */
+ COSTS_N_INSNS (3) /* roundint. */
+ },
+ /* FP DFmode */
+ {
+ COSTS_N_INSNS (30), /* div. */
+ COSTS_N_INSNS (6), /* mult. */
+ COSTS_N_INSNS (10), /* mult_addsub. */
+ COSTS_N_INSNS (7), /* fma. */
+ COSTS_N_INSNS (3), /* addsub. */
+ COSTS_N_INSNS (3), /* fpconst. */
+ COSTS_N_INSNS (3), /* neg. */
+ COSTS_N_INSNS (3), /* compare. */
+ COSTS_N_INSNS (3), /* widen. */
+ COSTS_N_INSNS (3), /* narrow. */
+ COSTS_N_INSNS (3), /* toint. */
+ COSTS_N_INSNS (3), /* fromint. */
+ COSTS_N_INSNS (3) /* roundint. */
+ }
+ },
+ /* Vector */
+ {
+ COSTS_N_INSNS (1) /* alu. */
+ }
+};
+
const struct cpu_cost_table cortexa7_extra_costs =
{
@@ -1086,6 +1290,7 @@ const struct cpu_cost_table cortexa7_extra_costs =
COSTS_N_INSNS (1), /* bfi. */
COSTS_N_INSNS (1), /* bfx. */
COSTS_N_INSNS (1), /* clz. */
+ COSTS_N_INSNS (1), /* rev. */
0, /* non_exec. */
true /* non_exec_costs_exec. */
},
@@ -1187,6 +1392,7 @@ const struct cpu_cost_table cortexa12_extra_costs =
0, /* bfi. */
COSTS_N_INSNS (1), /* bfx. */
COSTS_N_INSNS (1), /* clz. */
+ COSTS_N_INSNS (1), /* rev. */
0, /* non_exec. */
true /* non_exec_costs_exec. */
},
@@ -1287,6 +1493,7 @@ const struct cpu_cost_table cortexa15_extra_costs =
COSTS_N_INSNS (1), /* bfi. */
0, /* bfx. */
0, /* clz. */
+ 0, /* rev. */
0, /* non_exec. */
true /* non_exec_costs_exec. */
},
@@ -1387,6 +1594,7 @@ const struct cpu_cost_table v7m_extra_costs =
0, /* bfi. */
0, /* bfx. */
0, /* clz. */
+ 0, /* rev. */
COSTS_N_INSNS (1), /* non_exec. */
false /* non_exec_costs_exec. */
},
@@ -1470,6 +1678,9 @@ const struct cpu_cost_table v7m_extra_costs =
}
};
+#define ARM_FUSE_NOTHING (0)
+#define ARM_FUSE_MOVW_MOVT (1 << 0)
+
const struct tune_params arm_slowmul_tune =
{
arm_slowmul_rtx_costs,
@@ -1483,7 +1694,12 @@ const struct tune_params arm_slowmul_tune =
false, /* Prefer LDRD/STRD. */
{true, true}, /* Prefer non short circuit. */
&arm_default_vec_cost, /* Vectorizer costs. */
- false /* Prefer Neon for 64-bits bitops. */
+ false, /* Prefer Neon for 64-bits bitops. */
+ false, false, /* Prefer 32-bit encodings. */
+ false, /* Prefer Neon for stringops. */
+ 8, /* Maximum insns to inline memset. */
+ ARM_FUSE_NOTHING, /* Fuseable pairs of instructions. */
+ ARM_SCHED_AUTOPREF_OFF /* Sched L2 autopref. */
};
const struct tune_params arm_fastmul_tune =
@@ -1499,7 +1715,12 @@ const struct tune_params arm_fastmul_tune =
false, /* Prefer LDRD/STRD. */
{true, true}, /* Prefer non short circuit. */
&arm_default_vec_cost, /* Vectorizer costs. */
- false /* Prefer Neon for 64-bits bitops. */
+ false, /* Prefer Neon for 64-bits bitops. */
+ false, false, /* Prefer 32-bit encodings. */
+ false, /* Prefer Neon for stringops. */
+ 8, /* Maximum insns to inline memset. */
+ ARM_FUSE_NOTHING, /* Fuseable pairs of instructions. */
+ ARM_SCHED_AUTOPREF_OFF /* Sched L2 autopref. */
};
/* StrongARM has early execution of branches, so a sequence that is worth
@@ -1518,7 +1739,12 @@ const struct tune_params arm_strongarm_tune =
false, /* Prefer LDRD/STRD. */
{true, true}, /* Prefer non short circuit. */
&arm_default_vec_cost, /* Vectorizer costs. */
- false /* Prefer Neon for 64-bits bitops. */
+ false, /* Prefer Neon for 64-bits bitops. */
+ false, false, /* Prefer 32-bit encodings. */
+ false, /* Prefer Neon for stringops. */
+ 8, /* Maximum insns to inline memset. */
+ ARM_FUSE_NOTHING, /* Fuseable pairs of instructions. */
+ ARM_SCHED_AUTOPREF_OFF /* Sched L2 autopref. */
};
const struct tune_params arm_xscale_tune =
@@ -1534,7 +1760,12 @@ const struct tune_params arm_xscale_tune =
false, /* Prefer LDRD/STRD. */
{true, true}, /* Prefer non short circuit. */
&arm_default_vec_cost, /* Vectorizer costs. */
- false /* Prefer Neon for 64-bits bitops. */
+ false, /* Prefer Neon for 64-bits bitops. */
+ false, false, /* Prefer 32-bit encodings. */
+ false, /* Prefer Neon for stringops. */
+ 8, /* Maximum insns to inline memset. */
+ ARM_FUSE_NOTHING, /* Fuseable pairs of instructions. */
+ ARM_SCHED_AUTOPREF_OFF /* Sched L2 autopref. */
};
const struct tune_params arm_9e_tune =
@@ -1550,7 +1781,12 @@ const struct tune_params arm_9e_tune =
false, /* Prefer LDRD/STRD. */
{true, true}, /* Prefer non short circuit. */
&arm_default_vec_cost, /* Vectorizer costs. */
- false /* Prefer Neon for 64-bits bitops. */
+ false, /* Prefer Neon for 64-bits bitops. */
+ false, false, /* Prefer 32-bit encodings. */
+ false, /* Prefer Neon for stringops. */
+ 8, /* Maximum insns to inline memset. */
+ ARM_FUSE_NOTHING, /* Fuseable pairs of instructions. */
+ ARM_SCHED_AUTOPREF_OFF /* Sched L2 autopref. */
};
const struct tune_params arm_v6t2_tune =
@@ -1566,7 +1802,12 @@ const struct tune_params arm_v6t2_tune =
false, /* Prefer LDRD/STRD. */
{true, true}, /* Prefer non short circuit. */
&arm_default_vec_cost, /* Vectorizer costs. */
- false /* Prefer Neon for 64-bits bitops. */
+ false, /* Prefer Neon for 64-bits bitops. */
+ false, false, /* Prefer 32-bit encodings. */
+ false, /* Prefer Neon for stringops. */
+ 8, /* Maximum insns to inline memset. */
+ ARM_FUSE_NOTHING, /* Fuseable pairs of instructions. */
+ ARM_SCHED_AUTOPREF_OFF /* Sched L2 autopref. */
};
/* Generic Cortex tuning. Use more specific tunings if appropriate. */
@@ -1583,7 +1824,33 @@ const struct tune_params arm_cortex_tune =
false, /* Prefer LDRD/STRD. */
{true, true}, /* Prefer non short circuit. */
&arm_default_vec_cost, /* Vectorizer costs. */
- false /* Prefer Neon for 64-bits bitops. */
+ false, /* Prefer Neon for 64-bits bitops. */
+ false, false, /* Prefer 32-bit encodings. */
+ false, /* Prefer Neon for stringops. */
+ 8, /* Maximum insns to inline memset. */
+ ARM_FUSE_NOTHING, /* Fuseable pairs of instructions. */
+ ARM_SCHED_AUTOPREF_OFF /* Sched L2 autopref. */
+};
+
+const struct tune_params arm_cortex_a8_tune =
+{
+ arm_9e_rtx_costs,
+ &cortexa8_extra_costs,
+ NULL, /* Sched adj cost. */
+ 1, /* Constant limit. */
+ 5, /* Max cond insns. */
+ ARM_PREFETCH_NOT_BENEFICIAL,
+ false, /* Prefer constant pool. */
+ arm_default_branch_cost,
+ false, /* Prefer LDRD/STRD. */
+ {true, true}, /* Prefer non short circuit. */
+ &arm_default_vec_cost, /* Vectorizer costs. */
+ false, /* Prefer Neon for 64-bits bitops. */
+ false, false, /* Prefer 32-bit encodings. */
+ true, /* Prefer Neon for stringops. */
+ 8, /* Maximum insns to inline memset. */
+ ARM_FUSE_NOTHING, /* Fuseable pairs of instructions. */
+ ARM_SCHED_AUTOPREF_OFF /* Sched L2 autopref. */
};
const struct tune_params arm_cortex_a7_tune =
@@ -1599,7 +1866,12 @@ const struct tune_params arm_cortex_a7_tune =
false, /* Prefer LDRD/STRD. */
{true, true}, /* Prefer non short circuit. */
&arm_default_vec_cost, /* Vectorizer costs. */
- false /* Prefer Neon for 64-bits bitops. */
+ false, /* Prefer Neon for 64-bits bitops. */
+ false, false, /* Prefer 32-bit encodings. */
+ true, /* Prefer Neon for stringops. */
+ 8, /* Maximum insns to inline memset. */
+ ARM_FUSE_NOTHING, /* Fuseable pairs of instructions. */
+ ARM_SCHED_AUTOPREF_OFF /* Sched L2 autopref. */
};
const struct tune_params arm_cortex_a15_tune =
@@ -1615,7 +1887,12 @@ const struct tune_params arm_cortex_a15_tune =
true, /* Prefer LDRD/STRD. */
{true, true}, /* Prefer non short circuit. */
&arm_default_vec_cost, /* Vectorizer costs. */
- false /* Prefer Neon for 64-bits bitops. */
+ false, /* Prefer Neon for 64-bits bitops. */
+ true, true, /* Prefer 32-bit encodings. */
+ true, /* Prefer Neon for stringops. */
+ 8, /* Maximum insns to inline memset. */
+ ARM_FUSE_NOTHING, /* Fuseable pairs of instructions. */
+ ARM_SCHED_AUTOPREF_FULL /* Sched L2 autopref. */
};
const struct tune_params arm_cortex_a53_tune =
@@ -1631,7 +1908,12 @@ const struct tune_params arm_cortex_a53_tune =
false, /* Prefer LDRD/STRD. */
{true, true}, /* Prefer non short circuit. */
&arm_default_vec_cost, /* Vectorizer costs. */
- false /* Prefer Neon for 64-bits bitops. */
+ false, /* Prefer Neon for 64-bits bitops. */
+ false, false, /* Prefer 32-bit encodings. */
+ true, /* Prefer Neon for stringops. */
+ 8, /* Maximum insns to inline memset. */
+ ARM_FUSE_MOVW_MOVT, /* Fuseable pairs of instructions. */
+ ARM_SCHED_AUTOPREF_OFF /* Sched L2 autopref. */
};
const struct tune_params arm_cortex_a57_tune =
@@ -1647,7 +1929,33 @@ const struct tune_params arm_cortex_a57_tune =
true, /* Prefer LDRD/STRD. */
{true, true}, /* Prefer non short circuit. */
&arm_default_vec_cost, /* Vectorizer costs. */
- false /* Prefer Neon for 64-bits bitops. */
+ false, /* Prefer Neon for 64-bits bitops. */
+ true, true, /* Prefer 32-bit encodings. */
+ true, /* Prefer Neon for stringops. */
+ 8, /* Maximum insns to inline memset. */
+ ARM_FUSE_MOVW_MOVT, /* Fuseable pairs of instructions. */
+ ARM_SCHED_AUTOPREF_FULL /* Sched L2 autopref. */
+};
+
+const struct tune_params arm_xgene1_tune =
+{
+ arm_9e_rtx_costs,
+ &xgene1_extra_costs,
+ NULL, /* Scheduler cost adjustment. */
+ 1, /* Constant limit. */
+ 2, /* Max cond insns. */
+ ARM_PREFETCH_NOT_BENEFICIAL,
+ false, /* Prefer constant pool. */
+ arm_default_branch_cost,
+ true, /* Prefer LDRD/STRD. */
+ {true, true}, /* Prefer non short circuit. */
+ &arm_default_vec_cost, /* Vectorizer costs. */
+ false, /* Prefer Neon for 64-bits bitops. */
+ true, true, /* Prefer 32-bit encodings. */
+ false, /* Prefer Neon for stringops. */
+ 32, /* Maximum insns to inline memset. */
+ ARM_FUSE_NOTHING, /* Fuseable pairs of instructions. */
+ ARM_SCHED_AUTOPREF_OFF /* Sched L2 autopref. */
};
/* Branches can be dual-issued on Cortex-A5, so conditional execution is
@@ -1656,7 +1964,7 @@ const struct tune_params arm_cortex_a57_tune =
const struct tune_params arm_cortex_a5_tune =
{
arm_9e_rtx_costs,
- NULL,
+ &cortexa5_extra_costs,
NULL, /* Sched adj cost. */
1, /* Constant limit. */
1, /* Max cond insns. */
@@ -1666,7 +1974,12 @@ const struct tune_params arm_cortex_a5_tune =
false, /* Prefer LDRD/STRD. */
{false, false}, /* Prefer non short circuit. */
&arm_default_vec_cost, /* Vectorizer costs. */
- false /* Prefer Neon for 64-bits bitops. */
+ false, /* Prefer Neon for 64-bits bitops. */
+ false, false, /* Prefer 32-bit encodings. */
+ true, /* Prefer Neon for stringops. */
+ 8, /* Maximum insns to inline memset. */
+ ARM_FUSE_NOTHING, /* Fuseable pairs of instructions. */
+ ARM_SCHED_AUTOPREF_OFF /* Sched L2 autopref. */
};
const struct tune_params arm_cortex_a9_tune =
@@ -1682,23 +1995,33 @@ const struct tune_params arm_cortex_a9_tune =
false, /* Prefer LDRD/STRD. */
{true, true}, /* Prefer non short circuit. */
&arm_default_vec_cost, /* Vectorizer costs. */
- false /* Prefer Neon for 64-bits bitops. */
+ false, /* Prefer Neon for 64-bits bitops. */
+ false, false, /* Prefer 32-bit encodings. */
+ false, /* Prefer Neon for stringops. */
+ 8, /* Maximum insns to inline memset. */
+ ARM_FUSE_NOTHING, /* Fuseable pairs of instructions. */
+ ARM_SCHED_AUTOPREF_OFF /* Sched L2 autopref. */
};
const struct tune_params arm_cortex_a12_tune =
{
arm_9e_rtx_costs,
&cortexa12_extra_costs,
- NULL,
+ NULL, /* Sched adj cost. */
1, /* Constant limit. */
- 5, /* Max cond insns. */
- ARM_PREFETCH_BENEFICIAL(4,32,32),
+ 2, /* Max cond insns. */
+ ARM_PREFETCH_NOT_BENEFICIAL,
false, /* Prefer constant pool. */
arm_default_branch_cost,
true, /* Prefer LDRD/STRD. */
{true, true}, /* Prefer non short circuit. */
&arm_default_vec_cost, /* Vectorizer costs. */
- false /* Prefer Neon for 64-bits bitops. */
+ false, /* Prefer Neon for 64-bits bitops. */
+ true, true, /* Prefer 32-bit encodings. */
+ true, /* Prefer Neon for stringops. */
+ 8, /* Maximum insns to inline memset. */
+ ARM_FUSE_MOVW_MOVT, /* Fuseable pairs of instructions. */
+ ARM_SCHED_AUTOPREF_OFF /* Sched L2 autopref. */
};
/* armv7m tuning. On Cortex-M4 cores for example, MOVW/MOVT take a single
@@ -1721,7 +2044,35 @@ const struct tune_params arm_v7m_tune =
false, /* Prefer LDRD/STRD. */
{false, false}, /* Prefer non short circuit. */
&arm_default_vec_cost, /* Vectorizer costs. */
- false /* Prefer Neon for 64-bits bitops. */
+ false, /* Prefer Neon for 64-bits bitops. */
+ false, false, /* Prefer 32-bit encodings. */
+ false, /* Prefer Neon for stringops. */
+ 8, /* Maximum insns to inline memset. */
+ ARM_FUSE_NOTHING, /* Fuseable pairs of instructions. */
+ ARM_SCHED_AUTOPREF_OFF /* Sched L2 autopref. */
+};
+
+/* Cortex-M7 tuning. */
+
+const struct tune_params arm_cortex_m7_tune =
+{
+ arm_9e_rtx_costs,
+ &v7m_extra_costs,
+ NULL, /* Sched adj cost. */
+ 0, /* Constant limit. */
+ 1, /* Max cond insns. */
+ ARM_PREFETCH_NOT_BENEFICIAL,
+ true, /* Prefer constant pool. */
+ arm_cortex_m7_branch_cost,
+ false, /* Prefer LDRD/STRD. */
+ {true, true}, /* Prefer non short circuit. */
+ &arm_default_vec_cost, /* Vectorizer costs. */
+ false, /* Prefer Neon for 64-bits bitops. */
+ false, false, /* Prefer 32-bit encodings. */
+ false, /* Prefer Neon for stringops. */
+ 8, /* Maximum insns to inline memset. */
+ ARM_FUSE_NOTHING, /* Fuseable pairs of instructions. */
+ ARM_SCHED_AUTOPREF_OFF /* Sched L2 autopref. */
};
/* The arm_v6m_tune is duplicated from arm_cortex_tune, rather than
@@ -1739,7 +2090,12 @@ const struct tune_params arm_v6m_tune =
false, /* Prefer LDRD/STRD. */
{false, false}, /* Prefer non short circuit. */
&arm_default_vec_cost, /* Vectorizer costs. */
- false /* Prefer Neon for 64-bits bitops. */
+ false, /* Prefer Neon for 64-bits bitops. */
+ false, false, /* Prefer 32-bit encodings. */
+ false, /* Prefer Neon for stringops. */
+ 8, /* Maximum insns to inline memset. */
+ ARM_FUSE_NOTHING, /* Fuseable pairs of instructions. */
+ ARM_SCHED_AUTOPREF_OFF /* Sched L2 autopref. */
};
const struct tune_params arm_fa726te_tune =
@@ -1755,7 +2111,12 @@ const struct tune_params arm_fa726te_tune =
false, /* Prefer LDRD/STRD. */
{true, true}, /* Prefer non short circuit. */
&arm_default_vec_cost, /* Vectorizer costs. */
- false /* Prefer Neon for 64-bits bitops. */
+ false, /* Prefer Neon for 64-bits bitops. */
+ false, false, /* Prefer 32-bit encodings. */
+ false, /* Prefer Neon for stringops. */
+ 8, /* Maximum insns to inline memset. */
+ ARM_FUSE_NOTHING, /* Fuseable pairs of instructions. */
+ ARM_SCHED_AUTOPREF_OFF /* Sched L2 autopref. */
};
@@ -1827,7 +2188,7 @@ arm_constant_limit (bool size_p)
/* Emit an insn that's a simple single-set. Both the operands must be known
to be valid. */
-inline static rtx
+inline static rtx_insn *
emit_set_insn (rtx x, rtx y)
{
return emit_insn (gen_rtx_SET (VOIDmode, x, y));
@@ -1850,14 +2211,14 @@ bit_count (unsigned long value)
typedef struct
{
- enum machine_mode mode;
+ machine_mode mode;
const char *name;
} arm_fixed_mode_set;
/* A small helper for setting fixed-point library libfuncs. */
static void
-arm_set_fixed_optab_libfunc (optab optable, enum machine_mode mode,
+arm_set_fixed_optab_libfunc (optab optable, machine_mode mode,
const char *funcname, const char *modename,
int num_suffix)
{
@@ -1872,8 +2233,8 @@ arm_set_fixed_optab_libfunc (optab optable, enum machine_mode mode,
}
static void
-arm_set_fixed_conv_libfunc (convert_optab optable, enum machine_mode to,
- enum machine_mode from, const char *funcname,
+arm_set_fixed_conv_libfunc (convert_optab optable, machine_mode to,
+ machine_mode from, const char *funcname,
const char *toname, const char *fromname)
{
char buffer[50];
@@ -1898,7 +2259,7 @@ arm_init_libfuncs (void)
{
/* For Linux, we have access to kernel support for atomic operations. */
if (arm_abi == ARM_ABI_AAPCS_LINUX)
- init_sync_libfuncs (2 * UNITS_PER_WORD);
+ init_sync_libfuncs (MAX_SYNC_LIBFUNC_SIZE);
/* There are no special library functions unless we are using the
ARM BPABI. */
@@ -2269,6 +2630,10 @@ arm_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p,
static void
arm_option_override (void)
{
+ arm_selected_arch = NULL;
+ arm_selected_cpu = NULL;
+ arm_selected_tune = NULL;
+
if (global_options_set.x_arm_arch_option)
arm_selected_arch = &all_architectures[arm_arch_option];
@@ -2471,10 +2836,6 @@ arm_option_override (void)
if (TARGET_APCS_FLOAT)
warning (0, "passing floating point arguments in fp regs not yet supported");
- if (TARGET_LITTLE_WORDS)
- warning (OPT_Wdeprecated, "%<mwords-little-endian%> is deprecated and "
- "will be removed in a future release");
-
/* Initialize boolean versions of the flags, for use in the arm.md file. */
arm_arch3m = (insn_flags & FL_ARCH3M) != 0;
arm_arch4 = (insn_flags & FL_ARCH4) != 0;
@@ -2501,8 +2862,10 @@ arm_option_override (void)
arm_arch_iwmmxt2 = (insn_flags & FL_IWMMXT2) != 0;
arm_arch_thumb_hwdiv = (insn_flags & FL_THUMB_DIV) != 0;
arm_arch_arm_hwdiv = (insn_flags & FL_ARM_DIV) != 0;
+ arm_arch_no_volatile_ce = (insn_flags & FL_NO_VOLATILE_CE) != 0;
arm_tune_cortex_a9 = (arm_tune == cortexa9) != 0;
arm_arch_crc = (insn_flags & FL_CRC32) != 0;
+ arm_m_profile_small_mul = (insn_flags & FL_SMALLMUL) != 0;
if (arm_restrict_it == 2)
arm_restrict_it = arm_arch8 && TARGET_THUMB2;
@@ -2745,6 +3108,10 @@ arm_option_override (void)
/* If optimizing for size, bump the number of instructions that we
are prepared to conditionally execute (even on a StrongARM). */
max_insns_skipped = 6;
+
+ /* For THUMB2, we limit the conditional sequence to one IT block. */
+ if (TARGET_THUMB2)
+ max_insns_skipped = MAX_INSN_PER_IT_BLOCK;
}
else
max_insns_skipped = current_tune->max_insns_skipped;
@@ -2806,7 +3173,23 @@ arm_option_override (void)
prefer_neon_for_64bits = true;
/* Use the alternative scheduling-pressure algorithm by default. */
- maybe_set_param_value (PARAM_SCHED_PRESSURE_ALGORITHM, 2,
+ maybe_set_param_value (PARAM_SCHED_PRESSURE_ALGORITHM, SCHED_PRESSURE_MODEL,
+ global_options.x_param_values,
+ global_options_set.x_param_values);
+
+ /* Look through ready list and all of queue for instructions
+ relevant for L2 auto-prefetcher. */
+ int param_sched_autopref_queue_depth;
+ if (current_tune->sched_autopref == ARM_SCHED_AUTOPREF_OFF)
+ param_sched_autopref_queue_depth = -1;
+ else if (current_tune->sched_autopref == ARM_SCHED_AUTOPREF_RANK)
+ param_sched_autopref_queue_depth = 0;
+ else if (current_tune->sched_autopref == ARM_SCHED_AUTOPREF_FULL)
+ param_sched_autopref_queue_depth = max_insn_queue_index + 1;
+ else
+ gcc_unreachable ();
+ maybe_set_param_value (PARAM_SCHED_AUTOPREF_QUEUE_DEPTH,
+ param_sched_autopref_queue_depth,
global_options.x_param_values,
global_options_set.x_param_values);
@@ -2828,6 +3211,28 @@ arm_option_override (void)
if (target_slow_flash_data)
arm_disable_literal_pool = true;
+ /* Thumb2 inline assembly code should always use unified syntax.
+ This will apply to ARM and Thumb1 eventually. */
+ if (TARGET_THUMB2)
+ inline_asm_unified = 1;
+
+ /* Disable scheduling fusion by default if it's not armv7 processor
+ or doesn't prefer ldrd/strd. */
+ if (flag_schedule_fusion == 2
+ && (!arm_arch7 || !current_tune->prefer_ldrd_strd))
+ flag_schedule_fusion = 0;
+
+ /* In Thumb1 mode, we emit the epilogue in RTL, but the last insn
+ - epilogue_insns - does not accurately model the corresponding insns
+ emitted in the asm file. In particular, see the comment in thumb_exit
+ 'Find out how many of the (return) argument registers we can corrupt'.
+ As a consequence, the epilogue may clobber registers without fipa-ra
+ finding out about it. Therefore, disable fipa-ra in Thumb1 mode.
+ TODO: Accurately model clobbers for epilogue_insns and reenable
+ fipa-ra. */
+ if (TARGET_THUMB1)
+ flag_ipa_ra = 0;
+
/* Register global variables with the garbage collector. */
arm_add_gc_roots ();
}
@@ -3352,7 +3757,7 @@ const_ok_for_dimode_op (HOST_WIDE_INT i, enum rtx_code code)
/* ??? Tweak this for thumb2. */
int
-arm_split_constant (enum rtx_code code, enum machine_mode mode, rtx insn,
+arm_split_constant (enum rtx_code code, machine_mode mode, rtx insn,
HOST_WIDE_INT val, rtx target, rtx source, int subtargets)
{
rtx cond;
@@ -3375,7 +3780,7 @@ arm_split_constant (enum rtx_code code, enum machine_mode mode, rtx insn,
Ref: gcc -O1 -mcpu=strongarm gcc.c-torture/compile/980506-2.c
*/
- if (!after_arm_reorg
+ if (!cfun->machine->after_arm_reorg
&& !cond
&& (arm_gen_constant (code, mode, NULL_RTX, val, target, source,
1, 0)
@@ -3652,7 +4057,7 @@ emit_constant_insn (rtx cond, rtx pattern)
RTL generation. */
static int
-arm_gen_constant (enum rtx_code code, enum machine_mode mode, rtx cond,
+arm_gen_constant (enum rtx_code code, machine_mode mode, rtx cond,
HOST_WIDE_INT val, rtx target, rtx source, int subtargets,
int generate)
{
@@ -4131,19 +4536,20 @@ arm_gen_constant (enum rtx_code code, enum machine_mode mode, rtx cond,
if ((remainder | shift_mask) != 0xffffffff)
{
+ HOST_WIDE_INT new_val
+ = ARM_SIGN_EXTEND (remainder | shift_mask);
+
if (generate)
{
rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
- insns = arm_gen_constant (AND, mode, cond,
- remainder | shift_mask,
+ insns = arm_gen_constant (AND, SImode, cond, new_val,
new_src, source, subtargets, 1);
source = new_src;
}
else
{
rtx targ = subtargets ? NULL_RTX : target;
- insns = arm_gen_constant (AND, mode, cond,
- remainder | shift_mask,
+ insns = arm_gen_constant (AND, mode, cond, new_val,
targ, source, subtargets, 0);
}
}
@@ -4166,12 +4572,13 @@ arm_gen_constant (enum rtx_code code, enum machine_mode mode, rtx cond,
if ((remainder | shift_mask) != 0xffffffff)
{
+ HOST_WIDE_INT new_val
+ = ARM_SIGN_EXTEND (remainder | shift_mask);
if (generate)
{
rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
- insns = arm_gen_constant (AND, mode, cond,
- remainder | shift_mask,
+ insns = arm_gen_constant (AND, mode, cond, new_val,
new_src, source, subtargets, 1);
source = new_src;
}
@@ -4179,8 +4586,7 @@ arm_gen_constant (enum rtx_code code, enum machine_mode mode, rtx cond,
{
rtx targ = subtargets ? NULL_RTX : target;
- insns = arm_gen_constant (AND, mode, cond,
- remainder | shift_mask,
+ insns = arm_gen_constant (AND, mode, cond, new_val,
targ, source, subtargets, 0);
}
}
@@ -4314,7 +4720,7 @@ static void
arm_canonicalize_comparison (int *code, rtx *op0, rtx *op1,
bool op0_preserve_value)
{
- enum machine_mode mode;
+ machine_mode mode;
unsigned HOST_WIDE_INT i, maxval;
mode = GET_MODE (*op0);
@@ -4329,7 +4735,6 @@ arm_canonicalize_comparison (int *code, rtx *op0, rtx *op1,
for GTU/LEU in Thumb mode. */
if (mode == DImode)
{
- rtx tem;
if (*code == GT || *code == LE
|| (!TARGET_ARM && (*code == GTU || *code == LEU)))
@@ -4369,9 +4774,7 @@ arm_canonicalize_comparison (int *code, rtx *op0, rtx *op1,
/* If that did not work, reverse the condition. */
if (!op0_preserve_value)
{
- tem = *op0;
- *op0 = *op1;
- *op1 = tem;
+ std::swap (*op0, *op1);
*code = (int)swap_condition ((enum rtx_code)*code);
}
}
@@ -4411,7 +4814,7 @@ arm_canonicalize_comparison (int *code, rtx *op0, rtx *op1,
if (i != maxval
&& (const_ok_for_arm (i + 1) || const_ok_for_arm (-(i + 1))))
{
- *op1 = GEN_INT (i + 1);
+ *op1 = GEN_INT (ARM_SIGN_EXTEND (i + 1));
*code = *code == GT ? GE : LT;
return;
}
@@ -4433,7 +4836,7 @@ arm_canonicalize_comparison (int *code, rtx *op0, rtx *op1,
if (i != ~((unsigned HOST_WIDE_INT) 0)
&& (const_ok_for_arm (i + 1) || const_ok_for_arm (-(i + 1))))
{
- *op1 = GEN_INT (i + 1);
+ *op1 = GEN_INT (ARM_SIGN_EXTEND (i + 1));
*code = *code == GTU ? GEU : LTU;
return;
}
@@ -4462,7 +4865,7 @@ static rtx
arm_function_value(const_tree type, const_tree func,
bool outgoing ATTRIBUTE_UNUSED)
{
- enum machine_mode mode;
+ machine_mode mode;
int unsignedp ATTRIBUTE_UNUSED;
rtx r ATTRIBUTE_UNUSED;
@@ -4513,25 +4916,25 @@ libcall_hasher::hash (const value_type *p1)
return hash_rtx (p1, VOIDmode, NULL, NULL, FALSE);
}
-typedef hash_table <libcall_hasher> libcall_table_type;
+typedef hash_table<libcall_hasher> libcall_table_type;
static void
-add_libcall (libcall_table_type htab, rtx libcall)
+add_libcall (libcall_table_type *htab, rtx libcall)
{
- *htab.find_slot (libcall, INSERT) = libcall;
+ *htab->find_slot (libcall, INSERT) = libcall;
}
static bool
arm_libcall_uses_aapcs_base (const_rtx libcall)
{
static bool init_done = false;
- static libcall_table_type libcall_htab;
+ static libcall_table_type *libcall_htab = NULL;
if (!init_done)
{
init_done = true;
- libcall_htab.create (31);
+ libcall_htab = new libcall_table_type (31);
add_libcall (libcall_htab,
convert_optab_libfunc (sfloat_optab, SFmode, SImode));
add_libcall (libcall_htab,
@@ -4590,11 +4993,11 @@ arm_libcall_uses_aapcs_base (const_rtx libcall)
DFmode));
}
- return libcall && libcall_htab.find (libcall) != NULL;
+ return libcall && libcall_htab->find (libcall) != NULL;
}
static rtx
-arm_libcall_value_1 (enum machine_mode mode)
+arm_libcall_value_1 (machine_mode mode)
{
if (TARGET_AAPCS_BASED)
return aapcs_libcall_value (mode);
@@ -4609,7 +5012,7 @@ arm_libcall_value_1 (enum machine_mode mode)
assuming the value has mode MODE. */
static rtx
-arm_libcall_value (enum machine_mode mode, const_rtx libcall)
+arm_libcall_value (machine_mode mode, const_rtx libcall)
{
if (TARGET_AAPCS_BASED && arm_pcs_default != ARM_PCS_AAPCS
&& GET_MODE_CLASS (mode) == MODE_FLOAT)
@@ -4887,7 +5290,7 @@ arm_get_pcs_model (const_tree type, const_tree decl)
so we are free to use whatever conventions are
appropriate. */
/* FIXME: remove CONST_CAST_TREE when cgraph is constified. */
- struct cgraph_local_info *i = cgraph_local_info (CONST_CAST_TREE(decl));
+ cgraph_local_info *i = cgraph_node::local_info (CONST_CAST_TREE(decl));
if (i && i->local)
return ARM_PCS_AAPCS_LOCAL;
}
@@ -4917,9 +5320,9 @@ aapcs_vfp_cum_init (CUMULATIVE_ARGS *pcum ATTRIBUTE_UNUSED,
type that doesn't match a non-VOIDmode *MODEP is found, then return -1,
otherwise return the count in the sub-tree. */
static int
-aapcs_vfp_sub_candidate (const_tree type, enum machine_mode *modep)
+aapcs_vfp_sub_candidate (const_tree type, machine_mode *modep)
{
- enum machine_mode mode;
+ machine_mode mode;
HOST_WIDE_INT size;
switch (TREE_CODE (type))
@@ -4983,8 +5386,10 @@ aapcs_vfp_sub_candidate (const_tree type, enum machine_mode *modep)
int count;
tree index = TYPE_DOMAIN (type);
- /* Can't handle incomplete types. */
- if (!COMPLETE_TYPE_P (type))
+ /* Can't handle incomplete types nor sizes that are not
+ fixed. */
+ if (!COMPLETE_TYPE_P (type)
+ || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
return -1;
count = aapcs_vfp_sub_candidate (TREE_TYPE (type), modep);
@@ -5001,9 +5406,7 @@ aapcs_vfp_sub_candidate (const_tree type, enum machine_mode *modep)
- tree_to_uhwi (TYPE_MIN_VALUE (index)));
/* There must be no padding. */
- if (!tree_fits_uhwi_p (TYPE_SIZE (type))
- || ((HOST_WIDE_INT) tree_to_uhwi (TYPE_SIZE (type))
- != count * GET_MODE_BITSIZE (*modep)))
+ if (wi::ne_p (TYPE_SIZE (type), count * GET_MODE_BITSIZE (*modep)))
return -1;
return count;
@@ -5015,8 +5418,10 @@ aapcs_vfp_sub_candidate (const_tree type, enum machine_mode *modep)
int sub_count;
tree field;
- /* Can't handle incomplete types. */
- if (!COMPLETE_TYPE_P (type))
+ /* Can't handle incomplete types nor sizes that are not
+ fixed. */
+ if (!COMPLETE_TYPE_P (type)
+ || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
return -1;
for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
@@ -5031,9 +5436,7 @@ aapcs_vfp_sub_candidate (const_tree type, enum machine_mode *modep)
}
/* There must be no padding. */
- if (!tree_fits_uhwi_p (TYPE_SIZE (type))
- || ((HOST_WIDE_INT) tree_to_uhwi (TYPE_SIZE (type))
- != count * GET_MODE_BITSIZE (*modep)))
+ if (wi::ne_p (TYPE_SIZE (type), count * GET_MODE_BITSIZE (*modep)))
return -1;
return count;
@@ -5047,8 +5450,10 @@ aapcs_vfp_sub_candidate (const_tree type, enum machine_mode *modep)
int sub_count;
tree field;
- /* Can't handle incomplete types. */
- if (!COMPLETE_TYPE_P (type))
+ /* Can't handle incomplete types nor sizes that are not
+ fixed. */
+ if (!COMPLETE_TYPE_P (type)
+ || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
return -1;
for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
@@ -5063,9 +5468,7 @@ aapcs_vfp_sub_candidate (const_tree type, enum machine_mode *modep)
}
/* There must be no padding. */
- if (!tree_fits_uhwi_p (TYPE_SIZE (type))
- || ((HOST_WIDE_INT) tree_to_uhwi (TYPE_SIZE (type))
- != count * GET_MODE_BITSIZE (*modep)))
+ if (wi::ne_p (TYPE_SIZE (type), count * GET_MODE_BITSIZE (*modep)))
return -1;
return count;
@@ -5110,10 +5513,10 @@ use_vfp_abi (enum arm_pcs pcs_variant, bool is_double)
*COUNT to hold the number of such elements. */
static bool
aapcs_vfp_is_call_or_return_candidate (enum arm_pcs pcs_variant,
- enum machine_mode mode, const_tree type,
- enum machine_mode *base_mode, int *count)
+ machine_mode mode, const_tree type,
+ machine_mode *base_mode, int *count)
{
- enum machine_mode new_mode = VOIDmode;
+ machine_mode new_mode = VOIDmode;
/* If we have the type information, prefer that to working things
out from the mode. */
@@ -5151,10 +5554,10 @@ aapcs_vfp_is_call_or_return_candidate (enum arm_pcs pcs_variant,
static bool
aapcs_vfp_is_return_candidate (enum arm_pcs pcs_variant,
- enum machine_mode mode, const_tree type)
+ machine_mode mode, const_tree type)
{
int count ATTRIBUTE_UNUSED;
- enum machine_mode ag_mode ATTRIBUTE_UNUSED;
+ machine_mode ag_mode ATTRIBUTE_UNUSED;
if (!use_vfp_abi (pcs_variant, false))
return false;
@@ -5163,7 +5566,7 @@ aapcs_vfp_is_return_candidate (enum arm_pcs pcs_variant,
}
static bool
-aapcs_vfp_is_call_candidate (CUMULATIVE_ARGS *pcum, enum machine_mode mode,
+aapcs_vfp_is_call_candidate (CUMULATIVE_ARGS *pcum, machine_mode mode,
const_tree type)
{
if (!use_vfp_abi (pcum->pcs_variant, false))
@@ -5175,7 +5578,7 @@ aapcs_vfp_is_call_candidate (CUMULATIVE_ARGS *pcum, enum machine_mode mode,
}
static bool
-aapcs_vfp_allocate (CUMULATIVE_ARGS *pcum, enum machine_mode mode,
+aapcs_vfp_allocate (CUMULATIVE_ARGS *pcum, machine_mode mode,
const_tree type ATTRIBUTE_UNUSED)
{
int shift = GET_MODE_SIZE (pcum->aapcs_vfp_rmode) / GET_MODE_SIZE (SFmode);
@@ -5193,7 +5596,7 @@ aapcs_vfp_allocate (CUMULATIVE_ARGS *pcum, enum machine_mode mode,
int i;
int rcount = pcum->aapcs_vfp_rcount;
int rshift = shift;
- enum machine_mode rmode = pcum->aapcs_vfp_rmode;
+ machine_mode rmode = pcum->aapcs_vfp_rmode;
rtx par;
if (!TARGET_NEON)
{
@@ -5229,7 +5632,7 @@ aapcs_vfp_allocate (CUMULATIVE_ARGS *pcum, enum machine_mode mode,
static rtx
aapcs_vfp_allocate_return_reg (enum arm_pcs pcs_variant ATTRIBUTE_UNUSED,
- enum machine_mode mode,
+ machine_mode mode,
const_tree type ATTRIBUTE_UNUSED)
{
if (!use_vfp_abi (pcs_variant, false))
@@ -5238,7 +5641,7 @@ aapcs_vfp_allocate_return_reg (enum arm_pcs pcs_variant ATTRIBUTE_UNUSED,
if (mode == BLKmode || (mode == TImode && !TARGET_NEON))
{
int count;
- enum machine_mode ag_mode;
+ machine_mode ag_mode;
int i;
rtx par;
int shift;
@@ -5274,7 +5677,7 @@ aapcs_vfp_allocate_return_reg (enum arm_pcs pcs_variant ATTRIBUTE_UNUSED,
static void
aapcs_vfp_advance (CUMULATIVE_ARGS *pcum ATTRIBUTE_UNUSED,
- enum machine_mode mode ATTRIBUTE_UNUSED,
+ machine_mode mode ATTRIBUTE_UNUSED,
const_tree type ATTRIBUTE_UNUSED)
{
pcum->aapcs_vfp_regs_free &= ~pcum->aapcs_vfp_reg_alloc;
@@ -5307,25 +5710,25 @@ static struct
BLKmode) is a candidate for this co-processor's registers; this
function should ignore any position-dependent state in
CUMULATIVE_ARGS and only use call-type dependent information. */
- bool (*is_call_candidate) (CUMULATIVE_ARGS *, enum machine_mode, const_tree);
+ bool (*is_call_candidate) (CUMULATIVE_ARGS *, machine_mode, const_tree);
/* Return true if the argument does get a co-processor register; it
should set aapcs_reg to an RTX of the register allocated as is
required for a return from FUNCTION_ARG. */
- bool (*allocate) (CUMULATIVE_ARGS *, enum machine_mode, const_tree);
+ bool (*allocate) (CUMULATIVE_ARGS *, machine_mode, const_tree);
/* Return true if a result of mode MODE (or type TYPE if MODE is
BLKmode) is can be returned in this co-processor's registers. */
- bool (*is_return_candidate) (enum arm_pcs, enum machine_mode, const_tree);
+ bool (*is_return_candidate) (enum arm_pcs, machine_mode, const_tree);
/* Allocate and return an RTX element to hold the return type of a
call, this routine must not fail and will only be called if
is_return_candidate returned true with the same parameters. */
- rtx (*allocate_return_reg) (enum arm_pcs, enum machine_mode, const_tree);
+ rtx (*allocate_return_reg) (enum arm_pcs, machine_mode, const_tree);
/* Finish processing this argument and prepare to start processing
the next one. */
- void (*advance) (CUMULATIVE_ARGS *, enum machine_mode, const_tree);
+ void (*advance) (CUMULATIVE_ARGS *, machine_mode, const_tree);
} aapcs_cp_arg_layout[ARM_NUM_COPROC_SLOTS] =
{
AAPCS_CP(vfp)
@@ -5334,7 +5737,7 @@ static struct
#undef AAPCS_CP
static int
-aapcs_select_call_coproc (CUMULATIVE_ARGS *pcum, enum machine_mode mode,
+aapcs_select_call_coproc (CUMULATIVE_ARGS *pcum, machine_mode mode,
const_tree type)
{
int i;
@@ -5383,7 +5786,7 @@ aapcs_select_return_coproc (const_tree type, const_tree fntype)
}
static rtx
-aapcs_allocate_return_reg (enum machine_mode mode, const_tree type,
+aapcs_allocate_return_reg (machine_mode mode, const_tree type,
const_tree fntype)
{
/* We aren't passed a decl, so we can't check that a call is local.
@@ -5438,7 +5841,7 @@ aapcs_allocate_return_reg (enum machine_mode mode, const_tree type,
}
static rtx
-aapcs_libcall_value (enum machine_mode mode)
+aapcs_libcall_value (machine_mode mode)
{
if (BYTES_BIG_ENDIAN && ALL_FIXED_POINT_MODE_P (mode)
&& GET_MODE_SIZE (mode) <= 4)
@@ -5450,7 +5853,7 @@ aapcs_libcall_value (enum machine_mode mode)
/* Lay out a function argument using the AAPCS rules. The rule
numbers referred to here are those in the AAPCS. */
static void
-aapcs_layout_arg (CUMULATIVE_ARGS *pcum, enum machine_mode mode,
+aapcs_layout_arg (CUMULATIVE_ARGS *pcum, machine_mode mode,
const_tree type, bool named)
{
int nregs, nregs2;
@@ -5613,16 +6016,9 @@ arm_init_cumulative_args (CUMULATIVE_ARGS *pcum, tree fntype,
}
}
-/* Return true if we use LRA instead of reload pass. */
-static bool
-arm_lra_p (void)
-{
- return arm_lra_flag;
-}
-
/* Return true if mode/type need doubleword alignment. */
static bool
-arm_needs_doubleword_align (enum machine_mode mode, const_tree type)
+arm_needs_doubleword_align (machine_mode mode, const_tree type)
{
return (GET_MODE_ALIGNMENT (mode) > PARM_BOUNDARY
|| (type && TYPE_ALIGN (type) > PARM_BOUNDARY));
@@ -5649,7 +6045,7 @@ arm_needs_doubleword_align (enum machine_mode mode, const_tree type)
indeed make it pass in the stack if necessary). */
static rtx
-arm_function_arg (cumulative_args_t pcum_v, enum machine_mode mode,
+arm_function_arg (cumulative_args_t pcum_v, machine_mode mode,
const_tree type, bool named)
{
CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
@@ -5702,7 +6098,7 @@ arm_function_arg (cumulative_args_t pcum_v, enum machine_mode mode,
}
static unsigned int
-arm_function_arg_boundary (enum machine_mode mode, const_tree type)
+arm_function_arg_boundary (machine_mode mode, const_tree type)
{
return (ARM_DOUBLEWORD_ALIGN && arm_needs_doubleword_align (mode, type)
? DOUBLEWORD_ALIGNMENT
@@ -5710,7 +6106,7 @@ arm_function_arg_boundary (enum machine_mode mode, const_tree type)
}
static int
-arm_arg_partial_bytes (cumulative_args_t pcum_v, enum machine_mode mode,
+arm_arg_partial_bytes (cumulative_args_t pcum_v, machine_mode mode,
tree type, bool named)
{
CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
@@ -5738,7 +6134,7 @@ arm_arg_partial_bytes (cumulative_args_t pcum_v, enum machine_mode mode,
(TYPE is null for libcalls where that information may not be available.) */
static void
-arm_function_arg_advance (cumulative_args_t pcum_v, enum machine_mode mode,
+arm_function_arg_advance (cumulative_args_t pcum_v, machine_mode mode,
const_tree type, bool named)
{
CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
@@ -5777,7 +6173,7 @@ arm_function_arg_advance (cumulative_args_t pcum_v, enum machine_mode mode,
static bool
arm_pass_by_reference (cumulative_args_t cum ATTRIBUTE_UNUSED,
- enum machine_mode mode ATTRIBUTE_UNUSED,
+ machine_mode mode ATTRIBUTE_UNUSED,
const_tree type, bool named ATTRIBUTE_UNUSED)
{
return type && TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST;
@@ -6001,21 +6397,15 @@ arm_set_default_type_attributes (tree type)
static bool
arm_function_in_section_p (tree decl, section *section)
{
- /* We can only be certain about functions defined in the same
- compilation unit. */
- if (!TREE_STATIC (decl))
- return false;
-
- /* Make sure that SYMBOL always binds to the definition in this
- compilation unit. */
- if (!targetm.binds_local_p (decl))
+ /* We can only be certain about the prevailing symbol definition. */
+ if (!decl_binds_to_current_def_p (decl))
return false;
/* If DECL_SECTION_NAME is set, assume it is trustworthy. */
if (!DECL_SECTION_NAME (decl))
{
/* Make sure that we will not create a unique section for DECL. */
- if (flag_function_sections || DECL_ONE_ONLY (decl))
+ if (flag_function_sections || DECL_COMDAT_GROUP (decl))
return false;
}
@@ -6079,11 +6469,6 @@ arm_function_ok_for_sibcall (tree decl, tree exp)
if (TARGET_VXWORKS_RTP && flag_pic && !targetm.binds_local_p (decl))
return false;
- /* Cannot tail-call to long calls, since these are out of range of
- a branch instruction. */
- if (decl && arm_is_long_call_p (decl))
- return false;
-
/* If we are interworking and the function is not declared static
then we can't tail-call it unless we know that it exists in this
compilation unit (since it might be a Thumb routine). */
@@ -6173,7 +6558,7 @@ require_pic_register (void)
}
else
{
- rtx seq, insn;
+ rtx_insn *seq, *insn;
if (!cfun->machine->pic_reg)
cfun->machine->pic_reg = gen_reg_rtx (Pmode);
@@ -6212,7 +6597,7 @@ require_pic_register (void)
}
rtx
-legitimize_pic_address (rtx orig, enum machine_mode mode, rtx reg)
+legitimize_pic_address (rtx orig, machine_mode mode, rtx reg)
{
if (GET_CODE (orig) == SYMBOL_REF
|| GET_CODE (orig) == LABEL_REF)
@@ -6537,7 +6922,7 @@ will_be_in_index_register (const_rtx x)
/* Return nonzero if X is a valid ARM state address operand. */
int
-arm_legitimate_address_outer_p (enum machine_mode mode, rtx x, RTX_CODE outer,
+arm_legitimate_address_outer_p (machine_mode mode, rtx x, RTX_CODE outer,
int strict_p)
{
bool use_ldrd;
@@ -6624,7 +7009,7 @@ arm_legitimate_address_outer_p (enum machine_mode mode, rtx x, RTX_CODE outer,
/* Return nonzero if X is a valid Thumb-2 address operand. */
static int
-thumb2_legitimate_address_p (enum machine_mode mode, rtx x, int strict_p)
+thumb2_legitimate_address_p (machine_mode mode, rtx x, int strict_p)
{
bool use_ldrd;
enum rtx_code code = GET_CODE (x);
@@ -6719,7 +7104,7 @@ thumb2_legitimate_address_p (enum machine_mode mode, rtx x, int strict_p)
/* Return nonzero if INDEX is valid for an address index operand in
ARM state. */
static int
-arm_legitimate_index_p (enum machine_mode mode, rtx index, RTX_CODE outer,
+arm_legitimate_index_p (machine_mode mode, rtx index, RTX_CODE outer,
int strict_p)
{
HOST_WIDE_INT range;
@@ -6840,7 +7225,7 @@ thumb2_index_mul_operand (rtx op)
/* Return nonzero if INDEX is a valid Thumb-2 address index operand. */
static int
-thumb2_legitimate_index_p (enum machine_mode mode, rtx index, int strict_p)
+thumb2_legitimate_index_p (machine_mode mode, rtx index, int strict_p)
{
enum rtx_code code = GET_CODE (index);
@@ -6933,7 +7318,7 @@ thumb2_legitimate_index_p (enum machine_mode mode, rtx index, int strict_p)
/* Return nonzero if X is valid as a 16-bit Thumb state base register. */
static int
-thumb1_base_register_rtx_p (rtx x, enum machine_mode mode, int strict_p)
+thumb1_base_register_rtx_p (rtx x, machine_mode mode, int strict_p)
{
int regno;
@@ -6981,7 +7366,7 @@ thumb1_index_register_rtx_p (rtx x, int strict_p)
reload pass starts. This is so that eliminating such addresses
into stack based ones won't produce impossible code. */
int
-thumb1_legitimate_address_p (enum machine_mode mode, rtx x, int strict_p)
+thumb1_legitimate_address_p (machine_mode mode, rtx x, int strict_p)
{
/* ??? Not clear if this is right. Experiment. */
if (GET_MODE_SIZE (mode) < 4
@@ -7080,7 +7465,7 @@ thumb1_legitimate_address_p (enum machine_mode mode, rtx x, int strict_p)
/* Return nonzero if VAL can be used as an offset in a Thumb-state address
instruction of mode MODE. */
int
-thumb_legitimate_offset_p (enum machine_mode mode, HOST_WIDE_INT val)
+thumb_legitimate_offset_p (machine_mode mode, HOST_WIDE_INT val)
{
switch (GET_MODE_SIZE (mode))
{
@@ -7098,7 +7483,7 @@ thumb_legitimate_offset_p (enum machine_mode mode, HOST_WIDE_INT val)
}
bool
-arm_legitimate_address_p (enum machine_mode mode, rtx x, bool strict_p)
+arm_legitimate_address_p (machine_mode mode, rtx x, bool strict_p)
{
if (TARGET_ARM)
return arm_legitimate_address_outer_p (mode, x, SET, strict_p);
@@ -7162,7 +7547,7 @@ arm_load_tp (rtx target)
emit_insn (gen_load_tp_soft ());
- tmp = gen_rtx_REG (SImode, 0);
+ tmp = gen_rtx_REG (SImode, R0_REGNUM);
emit_move_insn (target, tmp);
}
return target;
@@ -7226,13 +7611,13 @@ arm_tls_descseq_addr (rtx x, rtx reg)
gen_rtx_CONST (VOIDmode, label),
GEN_INT (!TARGET_ARM)),
UNSPEC_TLS);
- rtx reg0 = load_tls_operand (sum, gen_rtx_REG (SImode, 0));
+ rtx reg0 = load_tls_operand (sum, gen_rtx_REG (SImode, R0_REGNUM));
emit_insn (gen_tlscall (x, labelno));
if (!reg)
reg = gen_reg_rtx (SImode);
else
- gcc_assert (REGNO (reg) != 0);
+ gcc_assert (REGNO (reg) != R0_REGNUM);
emit_move_insn (reg, reg0);
@@ -7336,7 +7721,7 @@ legitimize_tls_address (rtx x, rtx reg)
/* Try machine-dependent ways of modifying an illegitimate address
to be legitimate. If we find one, return the new, valid address. */
rtx
-arm_legitimize_address (rtx x, rtx orig_x, enum machine_mode mode)
+arm_legitimize_address (rtx x, rtx orig_x, machine_mode mode)
{
if (arm_tls_referenced_p (x))
{
@@ -7482,7 +7867,7 @@ arm_legitimize_address (rtx x, rtx orig_x, enum machine_mode mode)
/* Try machine-dependent ways of modifying an illegitimate Thumb address
to be legitimate. If we find one, return the new, valid address. */
rtx
-thumb_legitimize_address (rtx x, rtx orig_x, enum machine_mode mode)
+thumb_legitimize_address (rtx x, rtx orig_x, machine_mode mode)
{
if (GET_CODE (x) == PLUS
&& CONST_INT_P (XEXP (x, 1))
@@ -7548,7 +7933,7 @@ thumb_legitimize_address (rtx x, rtx orig_x, enum machine_mode mode)
bool
arm_legitimize_reload_address (rtx *p,
- enum machine_mode mode,
+ machine_mode mode,
int opnum, int type,
int ind_levels ATTRIBUTE_UNUSED)
{
@@ -7735,7 +8120,7 @@ arm_legitimize_reload_address (rtx *p,
rtx
thumb_legitimize_reload_address (rtx *x_p,
- enum machine_mode mode,
+ machine_mode mode,
int opnum, int type,
int ind_levels ATTRIBUTE_UNUSED)
{
@@ -7776,25 +8161,6 @@ thumb_legitimize_reload_address (rtx *x_p,
return NULL;
}
-/* Test for various thread-local symbols. */
-
-/* Helper for arm_tls_referenced_p. */
-
-static int
-arm_tls_operand_p_1 (rtx *x, void *data ATTRIBUTE_UNUSED)
-{
- if (GET_CODE (*x) == SYMBOL_REF)
- return SYMBOL_REF_TLS_MODEL (*x) != 0;
-
- /* Don't recurse into UNSPEC_TLS looking for TLS symbols; these are
- TLS offsets, not real symbol references. */
- if (GET_CODE (*x) == UNSPEC
- && XINT (*x, 1) == UNSPEC_TLS)
- return -1;
-
- return 0;
-}
-
/* Return TRUE if X contains any TLS symbol references. */
bool
@@ -7803,7 +8169,19 @@ arm_tls_referenced_p (rtx x)
if (! TARGET_HAVE_TLS)
return false;
- return for_each_rtx (&x, arm_tls_operand_p_1, NULL);
+ subrtx_iterator::array_type array;
+ FOR_EACH_SUBRTX (iter, array, x, ALL)
+ {
+ const_rtx x = *iter;
+ if (GET_CODE (x) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (x) != 0)
+ return true;
+
+ /* Don't recurse into UNSPEC_TLS looking for TLS symbols; these are
+ TLS offsets, not real symbol references. */
+ if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
+ iter.skip_subrtxes ();
+ }
+ return false;
}
/* Implement TARGET_LEGITIMATE_CONSTANT_P.
@@ -7815,19 +8193,13 @@ arm_tls_referenced_p (rtx x)
When generating pic allow anything. */
static bool
-arm_legitimate_constant_p_1 (enum machine_mode mode, rtx x)
+arm_legitimate_constant_p_1 (machine_mode, rtx x)
{
- /* At present, we have no support for Neon structure constants, so forbid
- them here. It might be possible to handle simple cases like 0 and -1
- in future. */
- if (TARGET_NEON && VALID_NEON_STRUCT_MODE (mode))
- return false;
-
return flag_pic || !label_mentioned_p (x);
}
static bool
-thumb_legitimate_constant_p (enum machine_mode mode ATTRIBUTE_UNUSED, rtx x)
+thumb_legitimate_constant_p (machine_mode mode ATTRIBUTE_UNUSED, rtx x)
{
return (CONST_INT_P (x)
|| CONST_DOUBLE_P (x)
@@ -7836,7 +8208,7 @@ thumb_legitimate_constant_p (enum machine_mode mode ATTRIBUTE_UNUSED, rtx x)
}
static bool
-arm_legitimate_constant_p (enum machine_mode mode, rtx x)
+arm_legitimate_constant_p (machine_mode mode, rtx x)
{
return (!arm_cannot_force_const_mem (mode, x)
&& (TARGET_32BIT
@@ -7847,7 +8219,7 @@ arm_legitimate_constant_p (enum machine_mode mode, rtx x)
/* Implement TARGET_CANNOT_FORCE_CONST_MEM. */
static bool
-arm_cannot_force_const_mem (enum machine_mode mode ATTRIBUTE_UNUSED, rtx x)
+arm_cannot_force_const_mem (machine_mode mode ATTRIBUTE_UNUSED, rtx x)
{
rtx base, offset;
@@ -7871,7 +8243,7 @@ arm_cannot_force_const_mem (enum machine_mode mode ATTRIBUTE_UNUSED, rtx x)
static inline int
thumb1_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer)
{
- enum machine_mode mode = GET_MODE (x);
+ machine_mode mode = GET_MODE (x);
int total, words;
switch (code)
@@ -7999,7 +8371,7 @@ thumb1_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer)
static inline bool
arm_rtx_costs_1 (rtx x, enum rtx_code outer, int* total, bool speed)
{
- enum machine_mode mode = GET_MODE (x);
+ machine_mode mode = GET_MODE (x);
enum rtx_code subcode;
rtx operand;
enum rtx_code code = GET_CODE (x);
@@ -8461,7 +8833,7 @@ arm_rtx_costs_1 (rtx x, enum rtx_code outer, int* total, bool speed)
if (GET_MODE_CLASS (mode) == MODE_INT)
{
rtx op = XEXP (x, 0);
- enum machine_mode opmode = GET_MODE (op);
+ machine_mode opmode = GET_MODE (op);
if (mode == DImode)
*total += COSTS_N_INSNS (1);
@@ -8604,7 +8976,7 @@ arm_rtx_costs_1 (rtx x, enum rtx_code outer, int* total, bool speed)
static inline int
thumb1_size_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer)
{
- enum machine_mode mode = GET_MODE (x);
+ machine_mode mode = GET_MODE (x);
int words;
switch (code)
@@ -8637,7 +9009,13 @@ thumb1_size_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer)
/* Thumb1 mul instruction can't operate on const. We must Load it
into a register first. */
int const_size = thumb1_size_rtx_costs (XEXP (x, 1), CONST_INT, SET);
- return COSTS_N_INSNS (1) + const_size;
+ /* For the targets which have a very small and high-latency multiply
+ unit, we prefer to synthesize the mult with up to 5 instructions,
+ giving a good balance between size and performance. */
+ if (arm_arch6m && arm_m_profile_small_mul)
+ return COSTS_N_INSNS (5);
+ else
+ return COSTS_N_INSNS (1) + const_size;
}
return COSTS_N_INSNS (1);
@@ -8645,9 +9023,11 @@ thumb1_size_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer)
/* A SET doesn't have a mode, so let's look at the SET_DEST to get
the mode. */
words = ARM_NUM_INTS (GET_MODE_SIZE (GET_MODE (SET_DEST (x))));
- return (COSTS_N_INSNS (words)
- + 4 * ((MEM_P (SET_SRC (x)))
- + MEM_P (SET_DEST (x))));
+ return COSTS_N_INSNS (words)
+ + COSTS_N_INSNS (1) * (satisfies_constraint_J (SET_SRC (x))
+ || satisfies_constraint_K (SET_SRC (x))
+ /* thumb1_movdi_insn. */
+ || ((words > 1) && MEM_P (SET_SRC (x))));
case CONST_INT:
if (outer == SET)
@@ -8700,16 +9080,14 @@ thumb1_size_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer)
case AND:
case XOR:
case IOR:
- /* XXX guess. */
- return 8;
+ return COSTS_N_INSNS (1);
case MEM:
- /* XXX another guess. */
- /* Memory costs quite a lot for the first word, but subsequent words
- load at the equivalent of a single insn each. */
- return (10 + 4 * ((GET_MODE_SIZE (mode) - 1) / UNITS_PER_WORD)
+ return (COSTS_N_INSNS (1)
+ + COSTS_N_INSNS (1)
+ * ((GET_MODE_SIZE (mode) - 1) / UNITS_PER_WORD)
+ ((GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
- ? 4 : 0));
+ ? COSTS_N_INSNS (1) : 0));
case IF_THEN_ELSE:
/* XXX a guess. */
@@ -8746,7 +9124,7 @@ static bool
arm_size_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
int *total)
{
- enum machine_mode mode = GET_MODE (x);
+ machine_mode mode = GET_MODE (x);
if (TARGET_THUMB1)
{
*total = thumb1_size_rtx_costs (x, code, outer_code);
@@ -9101,7 +9479,7 @@ arm_new_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
const struct cpu_cost_table *extra_cost,
int *cost, bool speed_p)
{
- enum machine_mode mode = GET_MODE (x);
+ machine_mode mode = GET_MODE (x);
if (TARGET_THUMB1)
{
@@ -9337,6 +9715,47 @@ arm_new_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
*cost = LIBCALL_COST (2);
return false;
+ case BSWAP:
+ if (arm_arch6)
+ {
+ if (mode == SImode)
+ {
+ *cost = COSTS_N_INSNS (1);
+ if (speed_p)
+ *cost += extra_cost->alu.rev;
+
+ return false;
+ }
+ }
+ else
+ {
+ /* No rev instruction available. Look at arm_legacy_rev
+ and thumb_legacy_rev for the form of RTL used then. */
+ if (TARGET_THUMB)
+ {
+ *cost = COSTS_N_INSNS (10);
+
+ if (speed_p)
+ {
+ *cost += 6 * extra_cost->alu.shift;
+ *cost += 3 * extra_cost->alu.logical;
+ }
+ }
+ else
+ {
+ *cost = COSTS_N_INSNS (5);
+
+ if (speed_p)
+ {
+ *cost += 2 * extra_cost->alu.shift;
+ *cost += extra_cost->alu.arith_shift;
+ *cost += 2 * extra_cost->alu.logical;
+ }
+ }
+ return true;
+ }
+ return false;
+
case MINUS:
if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
&& (mode == SFmode || !TARGET_VFP_SINGLE))
@@ -9436,6 +9855,8 @@ arm_new_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
*cost += rtx_cost (XEXP (x, 1), code, 1, speed_p);
return true;
}
+ else if (speed_p)
+ *cost += extra_cost->alu.arith;
return false;
}
@@ -9671,6 +10092,9 @@ arm_new_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
*cost += rtx_cost (XEXP (x, 0), PLUS, 0, speed_p);
return true;
}
+ else if (speed_p)
+ *cost += extra_cost->alu.arith;
+
return false;
}
@@ -9719,8 +10143,17 @@ arm_new_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
/* Vector mode? */
*cost = LIBCALL_COST (2);
return false;
+ case IOR:
+ if (mode == SImode && arm_arch6 && aarch_rev16_p (x))
+ {
+ *cost = COSTS_N_INSNS (1);
+ if (speed_p)
+ *cost += extra_cost->alu.rev;
- case AND: case XOR: case IOR:
+ return true;
+ }
+ /* Fall through. */
+ case AND: case XOR:
if (mode == SImode)
{
enum rtx_code subcode = GET_CODE (XEXP (x, 0));
@@ -10047,7 +10480,7 @@ arm_new_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
*cost = 0;
else
{
- enum machine_mode op0mode;
+ machine_mode op0mode;
/* We'll mostly assume that the cost of a compare is the cost of the
LHS. However, there are some notable exceptions. */
@@ -10619,6 +11052,36 @@ arm_new_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
*cost = LIBCALL_COST (1);
return false;
+ case FMA:
+ if (TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_FMA)
+ {
+ rtx op0 = XEXP (x, 0);
+ rtx op1 = XEXP (x, 1);
+ rtx op2 = XEXP (x, 2);
+
+ *cost = COSTS_N_INSNS (1);
+
+ /* vfms or vfnma. */
+ if (GET_CODE (op0) == NEG)
+ op0 = XEXP (op0, 0);
+
+ /* vfnms or vfnma. */
+ if (GET_CODE (op2) == NEG)
+ op2 = XEXP (op2, 0);
+
+ *cost += rtx_cost (op0, FMA, 0, speed_p);
+ *cost += rtx_cost (op1, FMA, 1, speed_p);
+ *cost += rtx_cost (op2, FMA, 2, speed_p);
+
+ if (speed_p)
+ *cost += extra_cost->fp[mode ==DFmode].fma;
+
+ return true;
+ }
+
+ *cost = LIBCALL_COST (3);
+ return false;
+
case FIX:
case UNSIGNED_FIX:
if (TARGET_HARD_FLOAT)
@@ -10669,10 +11132,16 @@ arm_new_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
return true;
case ASM_OPERANDS:
- /* Just a guess. Cost one insn per input. */
- *cost = COSTS_N_INSNS (ASM_OPERANDS_INPUT_LENGTH (x));
- return true;
+ {
+ /* Just a guess. Guess number of instructions in the asm
+ plus one insn per input. Always a minimum of COSTS_N_INSNS (1)
+ though (see PR60663). */
+ int asm_length = MAX (1, asm_str_count (ASM_OPERANDS_TEMPLATE (x)));
+ int num_operands = ASM_OPERANDS_INPUT_LENGTH (x);
+ *cost = COSTS_N_INSNS (asm_length + num_operands);
+ return true;
+ }
default:
if (mode != VOIDmode)
*cost = COSTS_N_INSNS (ARM_NUM_REGS (mode));
@@ -10735,7 +11204,7 @@ static bool
arm_slowmul_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
int *total, bool speed)
{
- enum machine_mode mode = GET_MODE (x);
+ machine_mode mode = GET_MODE (x);
if (TARGET_THUMB)
{
@@ -10789,7 +11258,7 @@ static bool
arm_fastmul_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
int *total, bool speed)
{
- enum machine_mode mode = GET_MODE (x);
+ machine_mode mode = GET_MODE (x);
if (TARGET_THUMB1)
{
@@ -10873,7 +11342,7 @@ static bool
arm_xscale_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
int *total, bool speed)
{
- enum machine_mode mode = GET_MODE (x);
+ machine_mode mode = GET_MODE (x);
if (TARGET_THUMB)
{
@@ -10962,14 +11431,18 @@ static bool
arm_9e_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
int *total, bool speed)
{
- enum machine_mode mode = GET_MODE (x);
+ machine_mode mode = GET_MODE (x);
if (TARGET_THUMB1)
{
switch (code)
{
case MULT:
- *total = COSTS_N_INSNS (3);
+ /* Small multiply: 32 cycles for an integer multiply inst. */
+ if (arm_arch6m && arm_m_profile_small_mul)
+ *total = COSTS_N_INSNS (32);
+ else
+ *total = COSTS_N_INSNS (3);
return true;
default:
@@ -11067,7 +11540,7 @@ arm_thumb_address_cost (rtx x)
}
static int
-arm_address_cost (rtx x, enum machine_mode mode ATTRIBUTE_UNUSED,
+arm_address_cost (rtx x, machine_mode mode ATTRIBUTE_UNUSED,
addr_space_t as ATTRIBUTE_UNUSED, bool speed ATTRIBUTE_UNUSED)
{
return TARGET_32BIT ? arm_arm_address_cost (x) : arm_thumb_address_cost (x);
@@ -11075,7 +11548,7 @@ arm_address_cost (rtx x, enum machine_mode mode ATTRIBUTE_UNUSED,
/* Adjust cost hook for XScale. */
static bool
-xscale_sched_adjust_cost (rtx insn, rtx link, rtx dep, int * cost)
+xscale_sched_adjust_cost (rtx_insn *insn, rtx link, rtx_insn *dep, int * cost)
{
/* Some true dependencies can have a higher cost depending
on precisely how certain input operands are used. */
@@ -11115,7 +11588,7 @@ xscale_sched_adjust_cost (rtx insn, rtx link, rtx dep, int * cost)
that overlaps with SHIFTED_OPERAND, then we have increase the
cost of this dependency. */
extract_insn (dep);
- preprocess_constraints ();
+ preprocess_constraints (dep);
for (opno = 0; opno < recog_data.n_operands; opno++)
{
/* We can ignore strict inputs. */
@@ -11136,7 +11609,7 @@ xscale_sched_adjust_cost (rtx insn, rtx link, rtx dep, int * cost)
/* Adjust cost hook for Cortex A9. */
static bool
-cortex_a9_sched_adjust_cost (rtx insn, rtx link, rtx dep, int * cost)
+cortex_a9_sched_adjust_cost (rtx_insn *insn, rtx link, rtx_insn *dep, int * cost)
{
switch (REG_NOTE_KIND (link))
{
@@ -11209,7 +11682,7 @@ cortex_a9_sched_adjust_cost (rtx insn, rtx link, rtx dep, int * cost)
/* Adjust cost hook for FA726TE. */
static bool
-fa726te_sched_adjust_cost (rtx insn, rtx link, rtx dep, int * cost)
+fa726te_sched_adjust_cost (rtx_insn *insn, rtx link, rtx_insn *dep, int * cost)
{
/* For FA726TE, true dependency on CPSR (i.e. set cond followed by predicated)
have penalty of 3. */
@@ -11245,7 +11718,7 @@ fa726te_sched_adjust_cost (rtx insn, rtx link, rtx dep, int * cost)
point to integer conversion does not go through memory. */
int
-arm_register_move_cost (enum machine_mode mode ATTRIBUTE_UNUSED,
+arm_register_move_cost (machine_mode mode ATTRIBUTE_UNUSED,
reg_class_t from, reg_class_t to)
{
if (TARGET_32BIT)
@@ -11273,7 +11746,7 @@ arm_register_move_cost (enum machine_mode mode ATTRIBUTE_UNUSED,
/* Implement TARGET_MEMORY_MOVE_COST. */
int
-arm_memory_move_cost (enum machine_mode mode, reg_class_t rclass,
+arm_memory_move_cost (machine_mode mode, reg_class_t rclass,
bool in ATTRIBUTE_UNUSED)
{
if (TARGET_32BIT)
@@ -11378,15 +11851,16 @@ arm_add_stmt_cost (void *data, int count, enum vect_cost_for_stmt kind,
/* Return true if and only if this insn can dual-issue only as older. */
static bool
-cortexa7_older_only (rtx insn)
+cortexa7_older_only (rtx_insn *insn)
{
if (recog_memoized (insn) < 0)
return false;
switch (get_attr_type (insn))
{
- case TYPE_ALU_REG:
- case TYPE_ALUS_REG:
+ case TYPE_ALU_DSP_REG:
+ case TYPE_ALU_SREG:
+ case TYPE_ALUS_SREG:
case TYPE_LOGIC_REG:
case TYPE_LOGICS_REG:
case TYPE_ADC_REG:
@@ -11429,7 +11903,7 @@ cortexa7_older_only (rtx insn)
/* Return true if and only if this insn can dual-issue as younger. */
static bool
-cortexa7_younger (FILE *file, int verbose, rtx insn)
+cortexa7_younger (FILE *file, int verbose, rtx_insn *insn)
{
if (recog_memoized (insn) < 0)
{
@@ -11467,8 +11941,8 @@ cortexa7_younger (FILE *file, int verbose, rtx insn)
instructions. This heuristic may affect dual issue opportunities
in the current cycle. */
static void
-cortexa7_sched_reorder (FILE *file, int verbose, rtx *ready, int *n_readyp,
- int clock)
+cortexa7_sched_reorder (FILE *file, int verbose, rtx_insn **ready,
+ int *n_readyp, int clock)
{
int i;
int first_older_only = -1, first_younger = -1;
@@ -11485,7 +11959,7 @@ cortexa7_sched_reorder (FILE *file, int verbose, rtx *ready, int *n_readyp,
older. */
for (i = *n_readyp - 1; i >= 0; i--)
{
- rtx insn = ready[i];
+ rtx_insn *insn = ready[i];
if (cortexa7_older_only (insn))
{
first_older_only = i;
@@ -11520,7 +11994,7 @@ cortexa7_sched_reorder (FILE *file, int verbose, rtx *ready, int *n_readyp,
fprintf (file, ";; cortexa7_sched_reorder insn %d before %d\n",
INSN_UID(ready [first_older_only]),
INSN_UID(ready [first_younger]));
- rtx first_older_only_insn = ready [first_older_only];
+ rtx_insn *first_older_only_insn = ready [first_older_only];
for (i = first_older_only; i < first_younger; i++)
{
ready[i] = ready[i+1];
@@ -11532,7 +12006,7 @@ cortexa7_sched_reorder (FILE *file, int verbose, rtx *ready, int *n_readyp,
/* Implement TARGET_SCHED_REORDER. */
static int
-arm_sched_reorder (FILE *file, int verbose, rtx *ready, int *n_readyp,
+arm_sched_reorder (FILE *file, int verbose, rtx_insn **ready, int *n_readyp,
int clock)
{
switch (arm_tune)
@@ -11556,7 +12030,7 @@ arm_sched_reorder (FILE *file, int verbose, rtx *ready, int *n_readyp,
adjust_cost function. Only put bits of code into arm_adjust_cost that
are common across all cores. */
static int
-arm_adjust_cost (rtx insn, rtx link, rtx dep, int cost)
+arm_adjust_cost (rtx_insn *insn, rtx link, rtx_insn *dep, int cost)
{
rtx i_pat, d_pat;
@@ -11643,6 +12117,12 @@ arm_cortex_m_branch_cost (bool speed_p, bool predictable_p)
: arm_default_branch_cost (speed_p, predictable_p);
}
+static int
+arm_cortex_m7_branch_cost (bool speed_p, bool predictable_p)
+{
+ return speed_p ? 0 : arm_default_branch_cost (speed_p, predictable_p);
+}
+
static bool fp_consts_inited = false;
static REAL_VALUE_TYPE value_fp0;
@@ -11701,8 +12181,8 @@ vfp3_const_double_index (rtx x)
int sign, exponent;
unsigned HOST_WIDE_INT mantissa, mant_hi;
unsigned HOST_WIDE_INT mask;
- HOST_WIDE_INT m1, m2;
int point_pos = 2 * HOST_BITS_PER_WIDE_INT - 1;
+ bool fail;
if (!TARGET_VFP3 || !CONST_DOUBLE_P (x))
return -1;
@@ -11722,9 +12202,9 @@ vfp3_const_double_index (rtx x)
WARNING: If there's ever a VFP version which uses more than 2 * H_W_I - 1
bits for the mantissa, this may fail (low bits would be lost). */
real_ldexp (&m, &r, point_pos - exponent);
- REAL_VALUE_TO_INT (&m1, &m2, m);
- mantissa = m1;
- mant_hi = m2;
+ wide_int w = real_to_integer (&m, &fail, HOST_BITS_PER_WIDE_INT * 2);
+ mantissa = w.elt (0);
+ mant_hi = w.elt (1);
/* If there are bits set in the low part of the mantissa, we can't
represent this value. */
@@ -11831,7 +12311,7 @@ vfp3_const_double_rtx (rtx x)
-1 if the given value doesn't match any of the listed patterns.
*/
static int
-neon_valid_immediate (rtx op, enum machine_mode mode, int inverse,
+neon_valid_immediate (rtx op, machine_mode mode, int inverse,
rtx *modconst, int *elementwidth)
{
#define CHECK(STRIDE, ELSIZE, CLASS, TEST) \
@@ -12036,7 +12516,7 @@ neon_valid_immediate (rtx op, enum machine_mode mode, int inverse,
VMOV) in *MODCONST. */
int
-neon_immediate_valid_for_move (rtx op, enum machine_mode mode,
+neon_immediate_valid_for_move (rtx op, machine_mode mode,
rtx *modconst, int *elementwidth)
{
rtx tmpconst;
@@ -12061,7 +12541,7 @@ neon_immediate_valid_for_move (rtx op, enum machine_mode mode,
*ELEMENTWIDTH. See neon_valid_immediate for description of INVERSE. */
int
-neon_immediate_valid_for_logic (rtx op, enum machine_mode mode, int inverse,
+neon_immediate_valid_for_logic (rtx op, machine_mode mode, int inverse,
rtx *modconst, int *elementwidth)
{
rtx tmpconst;
@@ -12087,7 +12567,7 @@ neon_immediate_valid_for_logic (rtx op, enum machine_mode mode, int inverse,
because they have different limitations. */
int
-neon_immediate_valid_for_shift (rtx op, enum machine_mode mode,
+neon_immediate_valid_for_shift (rtx op, machine_mode mode,
rtx *modconst, int *elementwidth,
bool isleftshift)
{
@@ -12144,7 +12624,7 @@ neon_immediate_valid_for_shift (rtx op, enum machine_mode mode,
MNEM. */
char *
-neon_output_logic_immediate (const char *mnem, rtx *op2, enum machine_mode mode,
+neon_output_logic_immediate (const char *mnem, rtx *op2, machine_mode mode,
int inverse, int quad)
{
int width, is_valid;
@@ -12167,7 +12647,7 @@ neon_output_logic_immediate (const char *mnem, rtx *op2, enum machine_mode mode,
char *
neon_output_shift_immediate (const char *mnem, char sign, rtx *op2,
- enum machine_mode mode, int quad,
+ machine_mode mode, int quad,
bool isleftshift)
{
int width, is_valid;
@@ -12195,10 +12675,10 @@ neon_output_shift_immediate (const char *mnem, char sign, rtx *op2,
for no particular gain. */
void
-neon_pairwise_reduce (rtx op0, rtx op1, enum machine_mode mode,
+neon_pairwise_reduce (rtx op0, rtx op1, machine_mode mode,
rtx (*reduc) (rtx, rtx, rtx))
{
- enum machine_mode inner = GET_MODE_INNER (mode);
+ machine_mode inner = GET_MODE_INNER (mode);
unsigned int i, parts = GET_MODE_SIZE (mode) / GET_MODE_SIZE (inner);
rtx tmpsum = op1;
@@ -12217,8 +12697,8 @@ neon_pairwise_reduce (rtx op0, rtx op1, enum machine_mode mode,
static rtx
neon_vdup_constant (rtx vals)
{
- enum machine_mode mode = GET_MODE (vals);
- enum machine_mode inner_mode = GET_MODE_INNER (mode);
+ machine_mode mode = GET_MODE (vals);
+ machine_mode inner_mode = GET_MODE_INNER (mode);
int n_elts = GET_MODE_NUNITS (mode);
bool all_same = true;
rtx x;
@@ -12257,7 +12737,7 @@ neon_vdup_constant (rtx vals)
rtx
neon_make_constant (rtx vals)
{
- enum machine_mode mode = GET_MODE (vals);
+ machine_mode mode = GET_MODE (vals);
rtx target;
rtx const_vec = NULL_RTX;
int n_elts = GET_MODE_NUNITS (mode);
@@ -12309,8 +12789,8 @@ neon_make_constant (rtx vals)
void
neon_expand_vector_init (rtx target, rtx vals)
{
- enum machine_mode mode = GET_MODE (target);
- enum machine_mode inner_mode = GET_MODE_INNER (mode);
+ machine_mode mode = GET_MODE (target);
+ machine_mode inner_mode = GET_MODE_INNER (mode);
int n_elts = GET_MODE_NUNITS (mode);
int n_var = 0, one_var = -1;
bool all_same = true;
@@ -12440,7 +12920,7 @@ neon_const_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high)
}
HOST_WIDE_INT
-neon_element_bits (enum machine_mode mode)
+neon_element_bits (machine_mode mode)
{
if (mode == DImode)
return GET_MODE_BITSIZE (mode);
@@ -12566,7 +13046,11 @@ neon_vector_mem_operand (rtx op, int type, bool strict)
|| (type == 0 && GET_CODE (ind) == PRE_DEC))
return arm_address_register_rtx_p (XEXP (ind, 0), 0);
- /* FIXME: vld1 allows register post-modify. */
+ /* Allow post-increment by register for VLDn */
+ if (type == 2 && GET_CODE (ind) == POST_MODIFY
+ && GET_CODE (XEXP (ind, 1)) == PLUS
+ && REG_P (XEXP (XEXP (ind, 1), 1)))
+ return true;
/* Match:
(plus (reg)
@@ -12645,7 +13129,7 @@ arm_eliminable_register (rtx x)
coprocessor registers. Otherwise return NO_REGS. */
enum reg_class
-coproc_secondary_reload_class (enum machine_mode mode, rtx x, bool wb)
+coproc_secondary_reload_class (machine_mode mode, rtx x, bool wb)
{
if (mode == HFmode)
{
@@ -12770,25 +13254,24 @@ tls_mentioned_p (rtx x)
/* Must not copy any rtx that uses a pc-relative address. */
-static int
-arm_note_pic_base (rtx *x, void *date ATTRIBUTE_UNUSED)
-{
- if (GET_CODE (*x) == UNSPEC
- && (XINT (*x, 1) == UNSPEC_PIC_BASE
- || XINT (*x, 1) == UNSPEC_PIC_UNIFIED))
- return 1;
- return 0;
-}
-
static bool
-arm_cannot_copy_insn_p (rtx insn)
+arm_cannot_copy_insn_p (rtx_insn *insn)
{
/* The tls call insn cannot be copied, as it is paired with a data
word. */
if (recog_memoized (insn) == CODE_FOR_tlscall)
return true;
- return for_each_rtx (&PATTERN (insn), arm_note_pic_base, NULL);
+ subrtx_iterator::array_type array;
+ FOR_EACH_SUBRTX (iter, array, PATTERN (insn), ALL)
+ {
+ const_rtx x = *iter;
+ if (GET_CODE (x) == UNSPEC
+ && (XINT (x, 1) == UNSPEC_PIC_BASE
+ || XINT (x, 1) == UNSPEC_PIC_UNIFIED))
+ return true;
+ }
+ return false;
}
enum rtx_code
@@ -12930,7 +13413,7 @@ adjacent_mem_locations (rtx a, rtx b)
REGNO (R_dk) = REGNO (R_d0) + k.
The pattern for store is similar. */
bool
-ldm_stm_operation_p (rtx op, bool load, enum machine_mode mode,
+ldm_stm_operation_p (rtx op, bool load, machine_mode mode,
bool consecutive, bool return_pc)
{
HOST_WIDE_INT count = XVECLEN (op, 0);
@@ -14156,7 +14639,7 @@ arm_block_move_unaligned_loop (rtx dest, rtx src, HOST_WIDE_INT length,
unsigned int interleave_factor,
HOST_WIDE_INT bytes_per_iter)
{
- rtx label, src_reg, dest_reg, final_src, test;
+ rtx src_reg, dest_reg, final_src, test;
HOST_WIDE_INT leftover;
leftover = length % bytes_per_iter;
@@ -14172,7 +14655,7 @@ arm_block_move_unaligned_loop (rtx dest, rtx src, HOST_WIDE_INT length,
0, 0, OPTAB_WIDEN);
/* Emit the start of the loop. */
- label = gen_label_rtx ();
+ rtx_code_label *label = gen_label_rtx ();
emit_label (label);
/* Emit the loop body. */
@@ -14292,7 +14775,7 @@ arm_gen_movmemqi (rtx *operands)
else
{
mem = adjust_automodify_address (dstbase, SImode, dst, dstoffset);
- emit_move_insn (mem, gen_rtx_REG (SImode, 0));
+ emit_move_insn (mem, gen_rtx_REG (SImode, R0_REGNUM));
if (last_bytes != 0)
{
emit_insn (gen_addsi3 (dst, dst, GEN_INT (4)));
@@ -14388,7 +14871,7 @@ by mode size. */
inline static rtx
next_consecutive_mem (rtx mem)
{
- enum machine_mode mode = GET_MODE (mem);
+ machine_mode mode = GET_MODE (mem);
HOST_WIDE_INT offset = GET_MODE_SIZE (mode);
rtx addr = plus_constant (Pmode, XEXP (mem, 0), offset);
@@ -14528,7 +15011,7 @@ gen_movmem_ldrd_strd (rtx *operands)
here. If we are unable to support a dominance comparison we return
CC mode. This will then fail to match for the RTL expressions that
generate this call. */
-enum machine_mode
+machine_mode
arm_select_dominance_cc_mode (rtx x, rtx y, HOST_WIDE_INT cond_or)
{
enum rtx_code cond1, cond2;
@@ -14557,11 +15040,7 @@ arm_select_dominance_cc_mode (rtx x, rtx y, HOST_WIDE_INT cond_or)
return CCmode;
if (swapped)
- {
- enum rtx_code temp = cond1;
- cond1 = cond2;
- cond2 = temp;
- }
+ std::swap (cond1, cond2);
switch (cond1)
{
@@ -14670,7 +15149,7 @@ arm_select_dominance_cc_mode (rtx x, rtx y, HOST_WIDE_INT cond_or)
}
}
-enum machine_mode
+machine_mode
arm_select_cc_mode (enum rtx_code op, rtx x, rtx y)
{
/* All floating point compares return CCFP if it is an equality
@@ -14853,7 +15332,7 @@ arm_select_cc_mode (enum rtx_code op, rtx x, rtx y)
rtx
arm_gen_compare_reg (enum rtx_code code, rtx x, rtx y, rtx scratch)
{
- enum machine_mode mode;
+ machine_mode mode;
rtx cc_reg;
int dimode_comparison = GET_MODE (x) == DImode || GET_MODE (y) == DImode;
@@ -15070,11 +15549,7 @@ arm_reload_out_hi (rtx *operands)
/* Updating base_plus might destroy outval, see if we can
swap the scratch and base_plus. */
if (!reg_overlap_mentioned_p (scratch, outval))
- {
- rtx tmp = scratch;
- scratch = base_plus;
- base_plus = tmp;
- }
+ std::swap (scratch, base_plus);
else
{
rtx scratch_hi = gen_rtx_REG (HImode, REGNO (operands[2]));
@@ -15129,11 +15604,7 @@ arm_reload_out_hi (rtx *operands)
/* Updating base_plus might destroy outval, see if we
can swap the scratch and base_plus. */
if (!reg_overlap_mentioned_p (scratch, outval))
- {
- rtx tmp = scratch;
- scratch = base_plus;
- base_plus = tmp;
- }
+ std::swap (scratch, base_plus);
else
{
rtx scratch_hi = gen_rtx_REG (HImode, REGNO (operands[2]));
@@ -15189,7 +15660,7 @@ arm_reload_out_hi (rtx *operands)
(padded to the size of a word) should be passed in a register. */
static bool
-arm_must_pass_in_stack (enum machine_mode mode, const_tree type)
+arm_must_pass_in_stack (machine_mode mode, const_tree type)
{
if (TARGET_AAPCS_BASED)
return must_pass_in_stack_var_size (mode, type);
@@ -15205,7 +15676,7 @@ arm_must_pass_in_stack (enum machine_mode mode, const_tree type)
aggregate types are placed in the lowest memory address. */
bool
-arm_pad_arg_upward (enum machine_mode mode ATTRIBUTE_UNUSED, const_tree type)
+arm_pad_arg_upward (machine_mode mode ATTRIBUTE_UNUSED, const_tree type)
{
if (!TARGET_AAPCS_BASED)
return DEFAULT_FUNCTION_ARG_PADDING(mode, type) == upward;
@@ -15223,7 +15694,7 @@ arm_pad_arg_upward (enum machine_mode mode ATTRIBUTE_UNUSED, const_tree type)
significant byte does. */
bool
-arm_pad_reg_upward (enum machine_mode mode,
+arm_pad_reg_upward (machine_mode mode,
tree type, int first ATTRIBUTE_UNUSED)
{
if (TARGET_AAPCS_BASED && BYTES_BIG_ENDIAN)
@@ -15366,8 +15837,6 @@ mem_ok_for_ldrd_strd (rtx mem, rtx *base, rtx *offset)
return false;
}
-#define SWAP_RTX(x,y) do { rtx tmp = x; x = y; y = tmp; } while (0)
-
/* Called from a peephole2 to replace two word-size accesses with a
single LDRD/STRD instruction. Returns true iff we can generate a
new instruction sequence. That is, both accesses use the same base
@@ -15507,10 +15976,10 @@ gen_operands_ldrd_strd (rtx *operands, bool load,
offset = offsets[1];
/* Swap the instructions such that lower memory is accessed first. */
- SWAP_RTX (operands[0], operands[1]);
- SWAP_RTX (operands[2], operands[3]);
+ std::swap (operands[0], operands[1]);
+ std::swap (operands[2], operands[3]);
if (const_store)
- SWAP_RTX (operands[4], operands[5]);
+ std::swap (operands[4], operands[5]);
}
else
{
@@ -15535,7 +16004,7 @@ gen_operands_ldrd_strd (rtx *operands, bool load,
if (load && commute)
{
/* Try reordering registers. */
- SWAP_RTX (operands[0], operands[1]);
+ std::swap (operands[0], operands[1]);
if (operands_ok_ldrd_strd (operands[0], operands[1], base, offset,
false, load))
return true;
@@ -15564,7 +16033,7 @@ gen_operands_ldrd_strd (rtx *operands, bool load,
if (operands_ok_ldrd_strd (operands[1], operands[0], base, offset,
false, false))
{
- SWAP_RTX (operands[0], operands[1]);
+ std::swap (operands[0], operands[1]);
return true;
}
@@ -15595,7 +16064,6 @@ gen_operands_ldrd_strd (rtx *operands, bool load,
return false;
}
-#undef SWAP_RTX
@@ -15748,7 +16216,7 @@ struct minipool_node
/* The value in table. */
rtx value;
/* The mode of value. */
- enum machine_mode mode;
+ machine_mode mode;
/* The size of the value. With iWMMXt enabled
sizes > 4 also imply an alignment of 8-bytes. */
int fix_size;
@@ -15757,10 +16225,10 @@ struct minipool_node
struct minipool_fixup
{
Mfix * next;
- rtx insn;
+ rtx_insn * insn;
HOST_WIDE_INT address;
rtx * loc;
- enum machine_mode mode;
+ machine_mode mode;
int fix_size;
rtx value;
Mnode * minipool;
@@ -15774,7 +16242,7 @@ struct minipool_fixup
static Mnode * minipool_vector_head;
static Mnode * minipool_vector_tail;
-static rtx minipool_vector_label;
+static rtx_code_label *minipool_vector_label;
static int minipool_pad;
/* The linked list of all minipool fixes required for this function. */
@@ -15788,7 +16256,7 @@ Mfix * minipool_barrier;
#endif
static HOST_WIDE_INT
-get_jump_table_size (rtx insn)
+get_jump_table_size (rtx_jump_table_data *insn)
{
/* ADDR_VECs only take room if read-only data does into the text
section. */
@@ -16237,7 +16705,7 @@ assign_minipool_offsets (Mfix *barrier)
/* Output the literal table */
static void
-dump_minipool (rtx scan)
+dump_minipool (rtx_insn *scan)
{
Mnode * mp;
Mnode * nmp;
@@ -16274,7 +16742,7 @@ dump_minipool (rtx scan)
fputc ('\n', dump_file);
}
- switch (mp->fix_size)
+ switch (GET_MODE_SIZE (mp->mode))
{
#ifdef HAVE_consttable_1
case 1:
@@ -16360,23 +16828,23 @@ static Mfix *
create_fix_barrier (Mfix *fix, HOST_WIDE_INT max_address)
{
HOST_WIDE_INT count = 0;
- rtx barrier;
- rtx from = fix->insn;
+ rtx_barrier *barrier;
+ rtx_insn *from = fix->insn;
/* The instruction after which we will insert the jump. */
- rtx selected = NULL;
+ rtx_insn *selected = NULL;
int selected_cost;
/* The address at which the jump instruction will be placed. */
HOST_WIDE_INT selected_address;
Mfix * new_fix;
HOST_WIDE_INT max_count = max_address - fix->address;
- rtx label = gen_label_rtx ();
+ rtx_code_label *label = gen_label_rtx ();
selected_cost = arm_barrier_cost (from);
selected_address = fix->address;
while (from && count < max_count)
{
- rtx tmp;
+ rtx_jump_table_data *tmp;
int new_cost;
/* This code shouldn't have been called if there was a natural barrier
@@ -16433,7 +16901,7 @@ create_fix_barrier (Mfix *fix, HOST_WIDE_INT max_address)
CALL_ARG_LOCATION note. */
if (CALL_P (selected))
{
- rtx next = NEXT_INSN (selected);
+ rtx_insn *next = NEXT_INSN (selected);
if (next && NOTE_P (next)
&& NOTE_KIND (next) == NOTE_INSN_CALL_ARG_LOCATION)
selected = next;
@@ -16458,7 +16926,7 @@ create_fix_barrier (Mfix *fix, HOST_WIDE_INT max_address)
/* Record that there is a natural barrier in the insn stream at
ADDRESS. */
static void
-push_minipool_barrier (rtx insn, HOST_WIDE_INT address)
+push_minipool_barrier (rtx_insn *insn, HOST_WIDE_INT address)
{
Mfix * fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* fix));
@@ -16480,8 +16948,8 @@ push_minipool_barrier (rtx insn, HOST_WIDE_INT address)
fixing; VALUE is the constant that must be loaded, which is of type
MODE. */
static void
-push_minipool_fix (rtx insn, HOST_WIDE_INT address, rtx *loc,
- enum machine_mode mode, rtx value)
+push_minipool_fix (rtx_insn *insn, HOST_WIDE_INT address, rtx *loc,
+ machine_mode mode, rtx value)
{
Mfix * fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* fix));
@@ -16548,7 +17016,7 @@ int
arm_const_double_inline_cost (rtx val)
{
rtx lowpart, highpart;
- enum machine_mode mode;
+ machine_mode mode;
mode = GET_MODE (val);
@@ -16569,6 +17037,14 @@ arm_const_double_inline_cost (rtx val)
NULL_RTX, NULL_RTX, 0, 0));
}
+/* Cost of loading a SImode constant. */
+static inline int
+arm_const_inline_cost (enum rtx_code code, rtx val)
+{
+ return arm_gen_constant (code, SImode, NULL_RTX, INTVAL (val),
+ NULL_RTX, NULL_RTX, 1, 0);
+}
+
/* Return true if it is worthwhile to split a 64-bit constant into two
32-bit operations. This is the case if optimizing for size, or
if we have load delay slots, or if one 32-bit part can be done with
@@ -16576,7 +17052,7 @@ arm_const_double_inline_cost (rtx val)
bool
arm_const_double_by_parts (rtx val)
{
- enum machine_mode mode = GET_MODE (val);
+ machine_mode mode = GET_MODE (val);
rtx part;
if (optimize_size || arm_ld_sched)
@@ -16609,7 +17085,7 @@ arm_const_double_by_parts (rtx val)
bool
arm_const_double_by_immediates (rtx val)
{
- enum machine_mode mode = GET_MODE (val);
+ machine_mode mode = GET_MODE (val);
rtx part;
if (mode == VOIDmode)
@@ -16636,22 +17112,20 @@ arm_const_double_by_immediates (rtx val)
If DO_PUSHES is false we do not actually push any of the fixups
needed. */
static void
-note_invalid_constants (rtx insn, HOST_WIDE_INT address, int do_pushes)
+note_invalid_constants (rtx_insn *insn, HOST_WIDE_INT address, int do_pushes)
{
int opno;
- extract_insn (insn);
-
- if (!constrain_operands (1))
- fatal_insn_not_found (insn);
+ extract_constrain_insn (insn);
if (recog_data.n_alternatives == 0)
return;
/* Fill in recog_op_alt with information about the constraints of
this insn. */
- preprocess_constraints ();
+ preprocess_constraints (insn);
+ const operand_alternative *op_alt = which_op_alt ();
for (opno = 0; opno < recog_data.n_operands; opno++)
{
/* Things we need to fix can only occur in inputs. */
@@ -16662,7 +17136,7 @@ note_invalid_constants (rtx insn, HOST_WIDE_INT address, int do_pushes)
of constants in this alternative is really to fool reload
into allowing us to accept one there. We need to fix them up
now so that we output the right code. */
- if (recog_op_alt[opno][which_alternative].memory_ok)
+ if (op_alt[opno].memory_ok)
{
rtx op = recog_data.operand[opno];
@@ -16712,7 +17186,7 @@ thumb1_reorg (void)
{
rtx dest, src;
rtx pat, op0, set = NULL;
- rtx prev, insn = BB_END (bb);
+ rtx_insn *prev, *insn = BB_END (bb);
bool insn_clobbered = false;
while (insn != BB_HEAD (bb) && !NONDEBUG_INSN_P (insn))
@@ -16787,9 +17261,20 @@ thumb2_reorg (void)
compute_bb_for_insn ();
df_analyze ();
+ enum Convert_Action {SKIP, CONV, SWAP_CONV};
+
FOR_EACH_BB_FN (bb, cfun)
{
- rtx insn;
+ if (current_tune->disparage_flag_setting_t16_encodings
+ && optimize_bb_for_speed_p (bb))
+ continue;
+
+ rtx_insn *insn;
+ Convert_Action action = SKIP;
+ Convert_Action action_for_partial_flag_setting
+ = (current_tune->disparage_partial_flag_setting_t16_encodings
+ && optimize_bb_for_speed_p (bb))
+ ? SKIP : CONV;
COPY_REG_SET (&live, DF_LR_OUT (bb));
df_simulate_initialize_backwards (bb, &live);
@@ -16799,13 +17284,13 @@ thumb2_reorg (void)
&& !REGNO_REG_SET_P (&live, CC_REGNUM)
&& GET_CODE (PATTERN (insn)) == SET)
{
- enum {SKIP, CONV, SWAP_CONV} action = SKIP;
+ action = SKIP;
rtx pat = PATTERN (insn);
rtx dst = XEXP (pat, 0);
rtx src = XEXP (pat, 1);
rtx op0 = NULL_RTX, op1 = NULL_RTX;
- if (!OBJECT_P (src))
+ if (UNARY_P (src) || BINARY_P (src))
op0 = XEXP (src, 0);
if (BINARY_P (src))
@@ -16880,10 +17365,11 @@ thumb2_reorg (void)
/* ANDS <Rdn>,<Rm> */
if (rtx_equal_p (dst, op0)
&& low_register_operand (op1, SImode))
- action = CONV;
+ action = action_for_partial_flag_setting;
else if (rtx_equal_p (dst, op1)
&& low_register_operand (op0, SImode))
- action = SWAP_CONV;
+ action = action_for_partial_flag_setting == SKIP
+ ? SKIP : SWAP_CONV;
break;
case ASHIFTRT:
@@ -16894,26 +17380,30 @@ thumb2_reorg (void)
/* LSLS <Rdn>,<Rm> */
if (rtx_equal_p (dst, op0)
&& low_register_operand (op1, SImode))
- action = CONV;
+ action = action_for_partial_flag_setting;
/* ASRS <Rd>,<Rm>,#<imm5> */
/* LSRS <Rd>,<Rm>,#<imm5> */
/* LSLS <Rd>,<Rm>,#<imm5> */
else if (low_register_operand (op0, SImode)
&& CONST_INT_P (op1)
&& IN_RANGE (INTVAL (op1), 0, 31))
- action = CONV;
+ action = action_for_partial_flag_setting;
break;
case ROTATERT:
/* RORS <Rdn>,<Rm> */
if (rtx_equal_p (dst, op0)
&& low_register_operand (op1, SImode))
- action = CONV;
+ action = action_for_partial_flag_setting;
break;
case NOT:
- case NEG:
/* MVNS <Rd>,<Rm> */
+ if (low_register_operand (op0, SImode))
+ action = action_for_partial_flag_setting;
+ break;
+
+ case NEG:
/* NEGS <Rd>,<Rm> (a.k.a RSBS) */
if (low_register_operand (op0, SImode))
action = CONV;
@@ -16923,7 +17413,7 @@ thumb2_reorg (void)
/* MOVS <Rd>,#<imm8> */
if (CONST_INT_P (src)
&& IN_RANGE (INTVAL (src), 0, 255))
- action = CONV;
+ action = action_for_partial_flag_setting;
break;
case REG:
@@ -16973,7 +17463,7 @@ thumb2_reorg (void)
static void
arm_reorg (void)
{
- rtx insn;
+ rtx_insn *insn;
HOST_WIDE_INT address = 0;
Mfix * fix;
@@ -17004,7 +17494,7 @@ arm_reorg (void)
push_minipool_barrier (insn, address);
else if (INSN_P (insn))
{
- rtx table;
+ rtx_jump_table_data *table;
note_invalid_constants (insn, address, true);
address += get_attr_length (insn);
@@ -17136,7 +17626,7 @@ arm_reorg (void)
/* From now on we must synthesize any constants that we can't handle
directly. This can happen if the RTL gets split during final
instruction generation. */
- after_arm_reorg = 1;
+ cfun->machine->after_arm_reorg = 1;
/* Free the minipool memory. */
obstack_free (&minipool_obstack, minipool_startobj);
@@ -17144,24 +17634,7 @@ arm_reorg (void)
/* Routines to output assembly language. */
-/* If the rtx is the correct value then return the string of the number.
- In this way we can ensure that valid double constants are generated even
- when cross compiling. */
-const char *
-fp_immediate_constant (rtx x)
-{
- REAL_VALUE_TYPE r;
-
- if (!fp_consts_inited)
- init_fp_table ();
-
- REAL_VALUE_FROM_CONST_DOUBLE (r, x);
-
- gcc_assert (REAL_VALUES_EQUAL (r, value_fp0));
- return "0";
-}
-
-/* As for fp_immediate_constant, but value is passed directly, not in rtx. */
+/* Return string representation of passed in real value. */
static const char *
fp_const_from_val (REAL_VALUE_TYPE *r)
{
@@ -17252,14 +17725,22 @@ arm_output_multireg_pop (rtx *operands, bool return_pc, rtx cond, bool reverse,
/* Output the assembly for a store multiple. */
const char *
-vfp_output_fstmd (rtx * operands)
+vfp_output_vstmd (rtx * operands)
{
char pattern[100];
int p;
int base;
int i;
+ rtx addr_reg = REG_P (XEXP (operands[0], 0))
+ ? XEXP (operands[0], 0)
+ : XEXP (XEXP (operands[0], 0), 0);
+ bool push_p = REGNO (addr_reg) == SP_REGNUM;
+
+ if (push_p)
+ strcpy (pattern, "vpush%?.64\t{%P1");
+ else
+ strcpy (pattern, "vstmdb%?.64\t%m0!, {%P1");
- strcpy (pattern, "fstmfdd%?\t%m0!, {%P1");
p = strlen (pattern);
gcc_assert (REG_P (operands[1]));
@@ -17368,7 +17849,7 @@ vfp_emit_fstmd (int base_reg, int count)
the call target. */
void
-arm_emit_call_insn (rtx pat, rtx addr)
+arm_emit_call_insn (rtx pat, rtx addr, bool sibcall)
{
rtx insn;
@@ -17379,6 +17860,7 @@ arm_emit_call_insn (rtx pat, rtx addr)
to the instruction's CALL_INSN_FUNCTION_USAGE. */
if (TARGET_VXWORKS_RTP
&& flag_pic
+ && !sibcall
&& GET_CODE (addr) == SYMBOL_REF
&& (SYMBOL_REF_DECL (addr)
? !targetm.binds_local_p (SYMBOL_REF_DECL (addr))
@@ -17387,6 +17869,16 @@ arm_emit_call_insn (rtx pat, rtx addr)
require_pic_register ();
use_reg (&CALL_INSN_FUNCTION_USAGE (insn), cfun->machine->pic_reg);
}
+
+ if (TARGET_AAPCS_BASED)
+ {
+ /* For AAPCS, IP and CC can be clobbered by veneers inserted by the
+ linker. We need to add an IP clobber to allow setting
+ TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS to true. A CC clobber
+ is not needed since it's a fixed register. */
+ rtx *fusage = &CALL_INSN_FUNCTION_USAGE (insn);
+ clobber_reg (fusage, gen_rtx_REG (word_mode, IP_REGNUM));
+ }
}
/* Output a 'call' insn. */
@@ -17717,12 +18209,9 @@ output_move_double (rtx *operands, bool emit, int *count)
if (reg_overlap_mentioned_p (operands[0],
otherops[2]))
{
- rtx tmp;
/* Swap base and index registers over to
avoid a conflict. */
- tmp = otherops[1];
- otherops[1] = otherops[2];
- otherops[2] = tmp;
+ std::swap (otherops[1], otherops[2]);
}
/* If both registers conflict, it will usually
have been fixed by a splitter. */
@@ -18045,7 +18534,7 @@ output_move_vfp (rtx *operands)
int integer_p = GET_MODE_CLASS (GET_MODE (operands[0])) == MODE_INT;
const char *templ;
char buff[50];
- enum machine_mode mode;
+ machine_mode mode;
reg = operands[!load];
mem = operands[load];
@@ -18066,19 +18555,19 @@ output_move_vfp (rtx *operands)
switch (GET_CODE (addr))
{
case PRE_DEC:
- templ = "f%smdb%c%%?\t%%0!, {%%%s1}%s";
+ templ = "v%smdb%%?.%s\t%%0!, {%%%s1}%s";
ops[0] = XEXP (addr, 0);
ops[1] = reg;
break;
case POST_INC:
- templ = "f%smia%c%%?\t%%0!, {%%%s1}%s";
+ templ = "v%smia%%?.%s\t%%0!, {%%%s1}%s";
ops[0] = XEXP (addr, 0);
ops[1] = reg;
break;
default:
- templ = "f%s%c%%?\t%%%s0, %%1%s";
+ templ = "v%sr%%?.%s\t%%%s0, %%1%s";
ops[0] = reg;
ops[1] = mem;
break;
@@ -18086,7 +18575,7 @@ output_move_vfp (rtx *operands)
sprintf (buff, templ,
load ? "ld" : "st",
- dp ? 'd' : 's',
+ dp ? "64" : "32",
dp ? "P" : "",
integer_p ? "\t%@ int" : "");
output_asm_insn (buff, ops);
@@ -18141,7 +18630,7 @@ output_move_neon (rtx *operands)
int regno, nregs, load = REG_P (operands[0]);
const char *templ;
char buff[50];
- enum machine_mode mode;
+ machine_mode mode;
reg = operands[!load];
mem = operands[load];
@@ -18193,6 +18682,20 @@ output_move_neon (rtx *operands)
/* FIXME: Not currently enabled in neon_vector_mem_operand. */
gcc_unreachable ();
+ case REG:
+ /* We have to use vldm / vstm for too-large modes. */
+ if (nregs > 1)
+ {
+ if (nregs > 4)
+ templ = "v%smia%%?\t%%m0, %%h1";
+ else
+ templ = "v%s1.64\t%%h1, %%A0";
+
+ ops[0] = mem;
+ ops[1] = reg;
+ break;
+ }
+ /* Fall through. */
case LABEL_REF:
case PLUS:
{
@@ -18226,14 +18729,7 @@ output_move_neon (rtx *operands)
}
default:
- /* We have to use vldm / vstm for too-large modes. */
- if (nregs > 4)
- templ = "v%smia%%?\t%%m0, %%h1";
- else
- templ = "v%s1.64\t%%h1, %%A0";
-
- ops[0] = mem;
- ops[1] = reg;
+ gcc_unreachable ();
}
sprintf (buff, templ, load ? "ld" : "st");
@@ -18245,11 +18741,11 @@ output_move_neon (rtx *operands)
/* Compute and return the length of neon_mov<mode>, where <mode> is
one of VSTRUCT modes: EI, OI, CI or XI. */
int
-arm_attr_length_move_neon (rtx insn)
+arm_attr_length_move_neon (rtx_insn *insn)
{
rtx reg, mem, addr;
int load;
- enum machine_mode mode;
+ machine_mode mode;
extract_insn_cached (insn);
@@ -18296,7 +18792,7 @@ arm_attr_length_move_neon (rtx insn)
return zero. */
int
-arm_address_offset_is_imm (rtx insn)
+arm_address_offset_is_imm (rtx_insn *insn)
{
rtx mem, addr;
@@ -18601,6 +19097,14 @@ output_ascii_pseudo_op (FILE *stream, const unsigned char *p, int len)
fputs ("\"\n", stream);
}
+/* Whether a register is callee saved or not. This is necessary because high
+ registers are marked as caller saved when optimizing for size on Thumb-1
+ targets despite being callee saved in order to avoid using them. */
+#define callee_saved_reg_p(reg) \
+ (!call_used_regs[reg] \
+ || (TARGET_THUMB1 && optimize_size \
+ && reg >= FIRST_HI_REGNUM && reg <= LAST_HI_REGNUM))
+
/* Compute the register save mask for registers 0 through 12
inclusive. This code is used by arm_compute_save_reg_mask. */
@@ -18661,7 +19165,7 @@ arm_compute_save_reg0_reg12_mask (void)
/* In the normal case we only need to save those registers
which are call saved and which are used by this function. */
for (reg = 0; reg <= 11; reg++)
- if (df_regs_ever_live_p (reg) && ! call_used_regs[reg])
+ if (df_regs_ever_live_p (reg) && callee_saved_reg_p (reg))
save_reg_mask |= (1 << reg);
/* Handle the frame pointer as a special case. */
@@ -18766,6 +19270,7 @@ arm_compute_save_reg_mask (void)
|| (save_reg_mask
&& optimize_size
&& ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL
+ && !crtl->tail_call_emit
&& !crtl->calls_eh_return))
save_reg_mask |= 1 << LR_REGNUM;
@@ -18824,7 +19329,7 @@ thumb1_compute_save_reg_mask (void)
mask = 0;
for (reg = 0; reg < 12; reg ++)
- if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
+ if (df_regs_ever_live_p (reg) && callee_saved_reg_p (reg))
mask |= 1 << reg;
if (flag_pic
@@ -18857,7 +19362,7 @@ thumb1_compute_save_reg_mask (void)
if (reg * UNITS_PER_WORD <= (unsigned) arm_size_return_regs ())
reg = LAST_LO_REGNUM;
- if (! call_used_regs[reg])
+ if (callee_saved_reg_p (reg))
mask |= 1 << reg;
}
@@ -19285,9 +19790,6 @@ arm_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
|| (cfun->machine->return_used_this_function != 0)
|| offsets->saved_regs == offsets->outgoing_args
|| frame_pointer_needed);
-
- /* Reset the ARM-specific per-function variables. */
- after_arm_reorg = 0;
}
}
@@ -19768,11 +20270,10 @@ arm_emit_multi_reg_pop (unsigned long saved_regs_mask)
rtx par;
rtx dwarf = NULL_RTX;
rtx tmp, reg;
- bool return_in_pc;
+ bool return_in_pc = saved_regs_mask & (1 << PC_REGNUM);
int offset_adj;
int emit_update;
- return_in_pc = (saved_regs_mask & (1 << PC_REGNUM)) ? true : false;
offset_adj = return_in_pc ? 1 : 0;
for (i = 0; i <= LAST_ARM_REGNUM; i++)
if (saved_regs_mask & (1 << i))
@@ -19788,10 +20289,7 @@ arm_emit_multi_reg_pop (unsigned long saved_regs_mask)
par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (num_regs + emit_update + offset_adj));
if (return_in_pc)
- {
- tmp = ret_rtx;
- XVECEXP (par, 0, 0) = tmp;
- }
+ XVECEXP (par, 0, 0) = ret_rtx;
if (emit_update)
{
@@ -19941,9 +20439,8 @@ thumb2_emit_ldrd_pop (unsigned long saved_regs_mask)
rtx par = NULL_RTX;
rtx dwarf = NULL_RTX;
rtx tmp, reg, tmp1;
- bool return_in_pc;
+ bool return_in_pc = saved_regs_mask & (1 << PC_REGNUM);
- return_in_pc = (saved_regs_mask & (1 << PC_REGNUM)) ? true : false;
for (i = 0; i <= LAST_ARM_REGNUM; i++)
if (saved_regs_mask & (1 << i))
num_regs++;
@@ -20204,7 +20701,7 @@ arm_emit_ldrd_pop (unsigned long saved_regs_mask)
static unsigned
arm_size_return_regs (void)
{
- enum machine_mode mode;
+ machine_mode mode;
if (crtl->return_rtx != 0)
mode = GET_MODE (crtl->return_rtx);
@@ -20426,30 +20923,47 @@ arm_get_frame_offsets (void)
{
int reg = -1;
+ /* Register r3 is caller-saved. Normally it does not need to be
+ saved on entry by the prologue. However if we choose to save
+ it for padding then we may confuse the compiler into thinking
+ a prologue sequence is required when in fact it is not. This
+ will occur when shrink-wrapping if r3 is used as a scratch
+ register and there are no other callee-saved writes.
+
+ This situation can be avoided when other callee-saved registers
+ are available and r3 is not mandatory if we choose a callee-saved
+ register for padding. */
+ bool prefer_callee_reg_p = false;
+
/* If it is safe to use r3, then do so. This sometimes
generates better code on Thumb-2 by avoiding the need to
use 32-bit push/pop instructions. */
if (! any_sibcall_could_use_r3 ()
&& arm_size_return_regs () <= 12
&& (offsets->saved_regs_mask & (1 << 3)) == 0
- && (TARGET_THUMB2
+ && (TARGET_THUMB2
|| !(TARGET_LDRD && current_tune->prefer_ldrd_strd)))
{
reg = 3;
+ if (!TARGET_THUMB2)
+ prefer_callee_reg_p = true;
+ }
+ if (reg == -1
+ || prefer_callee_reg_p)
+ {
+ for (i = 4; i <= (TARGET_THUMB1 ? LAST_LO_REGNUM : 11); i++)
+ {
+ /* Avoid fixed registers; they may be changed at
+ arbitrary times so it's unsafe to restore them
+ during the epilogue. */
+ if (!fixed_regs[i]
+ && (offsets->saved_regs_mask & (1 << i)) == 0)
+ {
+ reg = i;
+ break;
+ }
+ }
}
- else
- for (i = 4; i <= (TARGET_THUMB1 ? LAST_LO_REGNUM : 11); i++)
- {
- /* Avoid fixed registers; they may be changed at
- arbitrary times so it's unsafe to restore them
- during the epilogue. */
- if (!fixed_regs[i]
- && (offsets->saved_regs_mask & (1 << i)) == 0)
- {
- reg = i;
- break;
- }
- }
if (reg != -1)
{
@@ -20698,8 +21212,8 @@ arm_expand_prologue (void)
Just tell it we saved SP in r0. */
gcc_assert (TARGET_THUMB2 && !arm_arch_notm && args_to_push == 0);
- r0 = gen_rtx_REG (SImode, 0);
- r1 = gen_rtx_REG (SImode, 1);
+ r0 = gen_rtx_REG (SImode, R0_REGNUM);
+ r1 = gen_rtx_REG (SImode, R1_REGNUM);
insn = emit_insn (gen_movsi (r0, stack_pointer_rtx));
RTX_FRAME_RELATED_P (insn) = 1;
@@ -20949,7 +21463,7 @@ arm_expand_prologue (void)
{
/* This add can produce multiple insns for a large constant, so we
need to get tricky. */
- rtx last = get_last_insn ();
+ rtx_insn *last = get_last_insn ();
amount = GEN_INT (offsets->saved_args + saved_regs
- offsets->outgoing_args);
@@ -21039,7 +21553,15 @@ arm_print_condition (FILE *stream)
}
-/* If CODE is 'd', then the X is a condition operand and the instruction
+/* Globally reserved letters: acln
+ Puncutation letters currently used: @_|?().!#
+ Lower case letters currently used: bcdefhimpqtvwxyz
+ Upper case letters currently used: ABCDFGHJKLMNOPQRSTU
+ Letters previously used, but now deprecated/obsolete: sVWXYZ.
+
+ Note that the global reservation for 'c' is only for CONSTANT_ADDRESS_P.
+
+ If CODE is 'd', then the X is a condition operand and the instruction
should only be executed if the condition is true.
if CODE is 'D', then the X is a condition operand and the instruction
should only be executed if the condition is false: however, if the mode
@@ -21179,6 +21701,19 @@ arm_print_operand (FILE *stream, rtx x, int code)
}
return;
+ case 'b':
+ /* Print the log2 of a CONST_INT. */
+ {
+ HOST_WIDE_INT val;
+
+ if (!CONST_INT_P (x)
+ || (val = exact_log2 (INTVAL (x) & 0xffffffff)) < 0)
+ output_operand_lossage ("Unsupported operand for code '%c'", code);
+ else
+ fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, val);
+ }
+ return;
+
case 'L':
/* The low 16 bits of an immediate constant. */
fprintf (stream, HOST_WIDE_INT_PRINT_DEC, INTVAL(x) & 0xffff);
@@ -21260,7 +21795,7 @@ arm_print_operand (FILE *stream, rtx x, int code)
case 'R':
if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
{
- enum machine_mode mode = GET_MODE (x);
+ machine_mode mode = GET_MODE (x);
rtx part;
if (mode == VOIDmode)
@@ -21421,7 +21956,7 @@ arm_print_operand (FILE *stream, rtx x, int code)
register. */
case 'p':
{
- int mode = GET_MODE (x);
+ machine_mode mode = GET_MODE (x);
int regno;
if (GET_MODE_SIZE (mode) != 8 || !REG_P (x))
@@ -21445,7 +21980,7 @@ arm_print_operand (FILE *stream, rtx x, int code)
case 'P':
case 'q':
{
- int mode = GET_MODE (x);
+ machine_mode mode = GET_MODE (x);
int is_quad = (code == 'q');
int regno;
@@ -21481,7 +22016,7 @@ arm_print_operand (FILE *stream, rtx x, int code)
case 'e':
case 'f':
{
- int mode = GET_MODE (x);
+ machine_mode mode = GET_MODE (x);
int regno;
if ((GET_MODE_SIZE (mode) != 16
@@ -21563,6 +22098,7 @@ arm_print_operand (FILE *stream, rtx x, int code)
{
rtx addr;
bool postinc = FALSE;
+ rtx postinc_reg = NULL;
unsigned align, memsize, align_bits;
gcc_assert (MEM_P (x));
@@ -21572,6 +22108,11 @@ arm_print_operand (FILE *stream, rtx x, int code)
postinc = 1;
addr = XEXP (addr, 0);
}
+ if (GET_CODE (addr) == POST_MODIFY)
+ {
+ postinc_reg = XEXP( XEXP (addr, 1), 1);
+ addr = XEXP (addr, 0);
+ }
asm_fprintf (stream, "[%r", REGNO (addr));
/* We know the alignment of this access, so we can emit a hint in the
@@ -21597,6 +22138,8 @@ arm_print_operand (FILE *stream, rtx x, int code)
if (postinc)
fputs("!", stream);
+ if (postinc_reg)
+ asm_fprintf (stream, ", %r", REGNO (postinc_reg));
}
return;
@@ -21614,7 +22157,7 @@ arm_print_operand (FILE *stream, rtx x, int code)
/* Translate an S register number into a D register number and element index. */
case 'y':
{
- int mode = GET_MODE (x);
+ machine_mode mode = GET_MODE (x);
int regno;
if (GET_MODE_SIZE (mode) != 4 || !REG_P (x))
@@ -21648,7 +22191,7 @@ arm_print_operand (FILE *stream, rtx x, int code)
number into a D register number and element index. */
case 'z':
{
- int mode = GET_MODE (x);
+ machine_mode mode = GET_MODE (x);
int regno;
if (GET_MODE_SIZE (mode) != 2 || !REG_P (x))
@@ -21688,15 +22231,12 @@ arm_print_operand (FILE *stream, rtx x, int code)
break;
case CONST_DOUBLE:
- if (TARGET_NEON)
- {
- char fpstr[20];
- real_to_decimal (fpstr, CONST_DOUBLE_REAL_VALUE (x),
- sizeof (fpstr), 0, 1);
- fprintf (stream, "#%s", fpstr);
- }
- else
- fprintf (stream, "#%s", fp_immediate_constant (x));
+ {
+ char fpstr[20];
+ real_to_decimal (fpstr, CONST_DOUBLE_REAL_VALUE (x),
+ sizeof (fpstr), 0, 1);
+ fprintf (stream, "#%s", fpstr);
+ }
break;
default:
@@ -21735,9 +22275,7 @@ arm_print_operand_address (FILE *stream, rtx x)
/* Ensure that BASE is a register. */
/* (one of them must be). */
/* Also ensure the SP is not used as in index register. */
- rtx temp = base;
- base = index;
- index = temp;
+ std::swap (base, index);
}
switch (GET_CODE (index))
{
@@ -21776,7 +22314,7 @@ arm_print_operand_address (FILE *stream, rtx x)
else if (GET_CODE (x) == PRE_INC || GET_CODE (x) == POST_INC
|| GET_CODE (x) == PRE_DEC || GET_CODE (x) == POST_DEC)
{
- extern enum machine_mode output_memory_reference_mode;
+ extern machine_mode output_memory_reference_mode;
gcc_assert (REG_P (XEXP (x, 0)));
@@ -21853,7 +22391,7 @@ arm_print_operand_punct_valid_p (unsigned char code)
static bool
arm_assemble_integer (rtx x, unsigned int size, int aligned_p)
{
- enum machine_mode mode;
+ machine_mode mode;
if (size == UNITS_PER_WORD && aligned_p)
{
@@ -22009,7 +22547,7 @@ arm_elf_asm_destructor (rtx symbol, int priority)
enum arm_cond_code
maybe_get_arm_condition_code (rtx comparison)
{
- enum machine_mode mode = GET_MODE (XEXP (comparison, 0));
+ machine_mode mode = GET_MODE (XEXP (comparison, 0));
enum arm_cond_code code;
enum rtx_code comp_code = GET_CODE (comparison);
@@ -22164,9 +22702,9 @@ get_arm_condition_code (rtx comparison)
/* Tell arm_asm_output_opcode to output IT blocks for conditionally executed
instructions. */
void
-thumb2_final_prescan_insn (rtx insn)
+thumb2_final_prescan_insn (rtx_insn *insn)
{
- rtx first_insn = insn;
+ rtx_insn *first_insn = insn;
rtx body = PATTERN (insn);
rtx predicate;
enum arm_cond_code code;
@@ -22248,7 +22786,7 @@ thumb2_final_prescan_insn (rtx insn)
}
void
-arm_final_prescan_insn (rtx insn)
+arm_final_prescan_insn (rtx_insn *insn)
{
/* BODY will hold the body of INSN. */
rtx body = PATTERN (insn);
@@ -22263,7 +22801,7 @@ arm_final_prescan_insn (rtx insn)
/* START_INSN will hold the insn from where we start looking. This is the
first insn after the following code_label if REVERSE is true. */
- rtx start_insn = insn;
+ rtx_insn *start_insn = insn;
/* If in state 4, check if the target branch is reached, in order to
change back to state 0. */
@@ -22335,7 +22873,8 @@ arm_final_prescan_insn (rtx insn)
int fail = FALSE, succeed = FALSE;
/* Flag which part of the IF_THEN_ELSE is the LABEL_REF. */
int then_not_else = TRUE;
- rtx this_insn = start_insn, label = 0;
+ rtx_insn *this_insn = start_insn;
+ rtx label = 0;
/* Register the insn jumped to. */
if (reverse)
@@ -22557,13 +23096,16 @@ thumb2_asm_output_opcode (FILE * stream)
/* Returns true if REGNO is a valid register
for holding a quantity of type MODE. */
int
-arm_hard_regno_mode_ok (unsigned int regno, enum machine_mode mode)
+arm_hard_regno_mode_ok (unsigned int regno, machine_mode mode)
{
if (GET_MODE_CLASS (mode) == MODE_CC)
return (regno == CC_REGNUM
|| (TARGET_HARD_FLOAT && TARGET_VFP
&& regno == VFPCC_REGNUM));
+ if (regno == CC_REGNUM && GET_MODE_CLASS (mode) != MODE_CC)
+ return false;
+
if (TARGET_THUMB1)
/* For the Thumb we only allow values bigger than SImode in
registers 0 - 6, so that there is always a second low
@@ -22609,12 +23151,19 @@ arm_hard_regno_mode_ok (unsigned int regno, enum machine_mode mode)
}
/* We allow almost any value to be stored in the general registers.
- Restrict doubleword quantities to even register pairs so that we can
- use ldrd. Do not allow very large Neon structure opaque modes in
- general registers; they would use too many. */
+ Restrict doubleword quantities to even register pairs in ARM state
+ so that we can use ldrd. Do not allow very large Neon structure
+ opaque modes in general registers; they would use too many. */
if (regno <= LAST_ARM_REGNUM)
- return !(TARGET_LDRD && GET_MODE_SIZE (mode) > 4 && (regno & 1) != 0)
- && ARM_NUM_REGS (mode) <= 4;
+ {
+ if (ARM_NUM_REGS (mode) > 4)
+ return FALSE;
+
+ if (TARGET_THUMB2)
+ return TRUE;
+
+ return !(TARGET_LDRD && GET_MODE_SIZE (mode) > 4 && (regno & 1) != 0);
+ }
if (regno == FRAME_POINTER_REGNUM
|| regno == ARG_POINTER_REGNUM)
@@ -22627,7 +23176,7 @@ arm_hard_regno_mode_ok (unsigned int regno, enum machine_mode mode)
/* Implement MODES_TIEABLE_P. */
bool
-arm_modes_tieable_p (enum machine_mode mode1, enum machine_mode mode2)
+arm_modes_tieable_p (machine_mode mode1, machine_mode mode2)
{
if (GET_MODE_CLASS (mode1) == GET_MODE_CLASS (mode2))
return true;
@@ -22653,6 +23202,9 @@ arm_modes_tieable_p (enum machine_mode mode1, enum machine_mode mode2)
enum reg_class
arm_regno_class (int regno)
{
+ if (regno == PC_REGNUM)
+ return NO_REGS;
+
if (TARGET_THUMB1)
{
if (regno == STACK_POINTER_REGNUM)
@@ -22699,7 +23251,7 @@ arm_regno_class (int regno)
int
arm_debugger_arg_offset (int value, rtx addr)
{
- rtx insn;
+ rtx_insn *insn;
/* We are only interested if dbxout_parms() failed to compute the offset. */
if (value != 0)
@@ -22778,1769 +23330,6 @@ arm_debugger_arg_offset (int value, rtx addr)
return value;
}
-typedef enum {
- T_V8QI,
- T_V4HI,
- T_V4HF,
- T_V2SI,
- T_V2SF,
- T_DI,
- T_V16QI,
- T_V8HI,
- T_V4SI,
- T_V4SF,
- T_V2DI,
- T_TI,
- T_EI,
- T_OI,
- T_MAX /* Size of enum. Keep last. */
-} neon_builtin_type_mode;
-
-#define TYPE_MODE_BIT(X) (1 << (X))
-
-#define TB_DREG (TYPE_MODE_BIT (T_V8QI) | TYPE_MODE_BIT (T_V4HI) \
- | TYPE_MODE_BIT (T_V4HF) | TYPE_MODE_BIT (T_V2SI) \
- | TYPE_MODE_BIT (T_V2SF) | TYPE_MODE_BIT (T_DI))
-#define TB_QREG (TYPE_MODE_BIT (T_V16QI) | TYPE_MODE_BIT (T_V8HI) \
- | TYPE_MODE_BIT (T_V4SI) | TYPE_MODE_BIT (T_V4SF) \
- | TYPE_MODE_BIT (T_V2DI) | TYPE_MODE_BIT (T_TI))
-
-#define v8qi_UP T_V8QI
-#define v4hi_UP T_V4HI
-#define v4hf_UP T_V4HF
-#define v2si_UP T_V2SI
-#define v2sf_UP T_V2SF
-#define di_UP T_DI
-#define v16qi_UP T_V16QI
-#define v8hi_UP T_V8HI
-#define v4si_UP T_V4SI
-#define v4sf_UP T_V4SF
-#define v2di_UP T_V2DI
-#define ti_UP T_TI
-#define ei_UP T_EI
-#define oi_UP T_OI
-
-#define UP(X) X##_UP
-
-typedef enum {
- NEON_BINOP,
- NEON_TERNOP,
- NEON_UNOP,
- NEON_GETLANE,
- NEON_SETLANE,
- NEON_CREATE,
- NEON_RINT,
- NEON_DUP,
- NEON_DUPLANE,
- NEON_COMBINE,
- NEON_SPLIT,
- NEON_LANEMUL,
- NEON_LANEMULL,
- NEON_LANEMULH,
- NEON_LANEMAC,
- NEON_SCALARMUL,
- NEON_SCALARMULL,
- NEON_SCALARMULH,
- NEON_SCALARMAC,
- NEON_CONVERT,
- NEON_FLOAT_WIDEN,
- NEON_FLOAT_NARROW,
- NEON_FIXCONV,
- NEON_SELECT,
- NEON_RESULTPAIR,
- NEON_REINTERP,
- NEON_VTBL,
- NEON_VTBX,
- NEON_LOAD1,
- NEON_LOAD1LANE,
- NEON_STORE1,
- NEON_STORE1LANE,
- NEON_LOADSTRUCT,
- NEON_LOADSTRUCTLANE,
- NEON_STORESTRUCT,
- NEON_STORESTRUCTLANE,
- NEON_LOGICBINOP,
- NEON_SHIFTINSERT,
- NEON_SHIFTIMM,
- NEON_SHIFTACC
-} neon_itype;
-
-typedef struct {
- const char *name;
- const neon_itype itype;
- const neon_builtin_type_mode mode;
- const enum insn_code code;
- unsigned int fcode;
-} neon_builtin_datum;
-
-#define CF(N,X) CODE_FOR_neon_##N##X
-
-#define VAR1(T, N, A) \
- {#N, NEON_##T, UP (A), CF (N, A), 0}
-#define VAR2(T, N, A, B) \
- VAR1 (T, N, A), \
- {#N, NEON_##T, UP (B), CF (N, B), 0}
-#define VAR3(T, N, A, B, C) \
- VAR2 (T, N, A, B), \
- {#N, NEON_##T, UP (C), CF (N, C), 0}
-#define VAR4(T, N, A, B, C, D) \
- VAR3 (T, N, A, B, C), \
- {#N, NEON_##T, UP (D), CF (N, D), 0}
-#define VAR5(T, N, A, B, C, D, E) \
- VAR4 (T, N, A, B, C, D), \
- {#N, NEON_##T, UP (E), CF (N, E), 0}
-#define VAR6(T, N, A, B, C, D, E, F) \
- VAR5 (T, N, A, B, C, D, E), \
- {#N, NEON_##T, UP (F), CF (N, F), 0}
-#define VAR7(T, N, A, B, C, D, E, F, G) \
- VAR6 (T, N, A, B, C, D, E, F), \
- {#N, NEON_##T, UP (G), CF (N, G), 0}
-#define VAR8(T, N, A, B, C, D, E, F, G, H) \
- VAR7 (T, N, A, B, C, D, E, F, G), \
- {#N, NEON_##T, UP (H), CF (N, H), 0}
-#define VAR9(T, N, A, B, C, D, E, F, G, H, I) \
- VAR8 (T, N, A, B, C, D, E, F, G, H), \
- {#N, NEON_##T, UP (I), CF (N, I), 0}
-#define VAR10(T, N, A, B, C, D, E, F, G, H, I, J) \
- VAR9 (T, N, A, B, C, D, E, F, G, H, I), \
- {#N, NEON_##T, UP (J), CF (N, J), 0}
-
-/* The NEON builtin data can be found in arm_neon_builtins.def.
- The mode entries in the following table correspond to the "key" type of the
- instruction variant, i.e. equivalent to that which would be specified after
- the assembler mnemonic, which usually refers to the last vector operand.
- (Signed/unsigned/polynomial types are not differentiated between though, and
- are all mapped onto the same mode for a given element size.) The modes
- listed per instruction should be the same as those defined for that
- instruction's pattern in neon.md. */
-
-static neon_builtin_datum neon_builtin_data[] =
-{
-#include "arm_neon_builtins.def"
-};
-
-#undef CF
-#undef VAR1
-#undef VAR2
-#undef VAR3
-#undef VAR4
-#undef VAR5
-#undef VAR6
-#undef VAR7
-#undef VAR8
-#undef VAR9
-#undef VAR10
-
-#define CF(N,X) ARM_BUILTIN_NEON_##N##X
-#define VAR1(T, N, A) \
- CF (N, A)
-#define VAR2(T, N, A, B) \
- VAR1 (T, N, A), \
- CF (N, B)
-#define VAR3(T, N, A, B, C) \
- VAR2 (T, N, A, B), \
- CF (N, C)
-#define VAR4(T, N, A, B, C, D) \
- VAR3 (T, N, A, B, C), \
- CF (N, D)
-#define VAR5(T, N, A, B, C, D, E) \
- VAR4 (T, N, A, B, C, D), \
- CF (N, E)
-#define VAR6(T, N, A, B, C, D, E, F) \
- VAR5 (T, N, A, B, C, D, E), \
- CF (N, F)
-#define VAR7(T, N, A, B, C, D, E, F, G) \
- VAR6 (T, N, A, B, C, D, E, F), \
- CF (N, G)
-#define VAR8(T, N, A, B, C, D, E, F, G, H) \
- VAR7 (T, N, A, B, C, D, E, F, G), \
- CF (N, H)
-#define VAR9(T, N, A, B, C, D, E, F, G, H, I) \
- VAR8 (T, N, A, B, C, D, E, F, G, H), \
- CF (N, I)
-#define VAR10(T, N, A, B, C, D, E, F, G, H, I, J) \
- VAR9 (T, N, A, B, C, D, E, F, G, H, I), \
- CF (N, J)
-enum arm_builtins
-{
- ARM_BUILTIN_GETWCGR0,
- ARM_BUILTIN_GETWCGR1,
- ARM_BUILTIN_GETWCGR2,
- ARM_BUILTIN_GETWCGR3,
-
- ARM_BUILTIN_SETWCGR0,
- ARM_BUILTIN_SETWCGR1,
- ARM_BUILTIN_SETWCGR2,
- ARM_BUILTIN_SETWCGR3,
-
- ARM_BUILTIN_WZERO,
-
- ARM_BUILTIN_WAVG2BR,
- ARM_BUILTIN_WAVG2HR,
- ARM_BUILTIN_WAVG2B,
- ARM_BUILTIN_WAVG2H,
-
- ARM_BUILTIN_WACCB,
- ARM_BUILTIN_WACCH,
- ARM_BUILTIN_WACCW,
-
- ARM_BUILTIN_WMACS,
- ARM_BUILTIN_WMACSZ,
- ARM_BUILTIN_WMACU,
- ARM_BUILTIN_WMACUZ,
-
- ARM_BUILTIN_WSADB,
- ARM_BUILTIN_WSADBZ,
- ARM_BUILTIN_WSADH,
- ARM_BUILTIN_WSADHZ,
-
- ARM_BUILTIN_WALIGNI,
- ARM_BUILTIN_WALIGNR0,
- ARM_BUILTIN_WALIGNR1,
- ARM_BUILTIN_WALIGNR2,
- ARM_BUILTIN_WALIGNR3,
-
- ARM_BUILTIN_TMIA,
- ARM_BUILTIN_TMIAPH,
- ARM_BUILTIN_TMIABB,
- ARM_BUILTIN_TMIABT,
- ARM_BUILTIN_TMIATB,
- ARM_BUILTIN_TMIATT,
-
- ARM_BUILTIN_TMOVMSKB,
- ARM_BUILTIN_TMOVMSKH,
- ARM_BUILTIN_TMOVMSKW,
-
- ARM_BUILTIN_TBCSTB,
- ARM_BUILTIN_TBCSTH,
- ARM_BUILTIN_TBCSTW,
-
- ARM_BUILTIN_WMADDS,
- ARM_BUILTIN_WMADDU,
-
- ARM_BUILTIN_WPACKHSS,
- ARM_BUILTIN_WPACKWSS,
- ARM_BUILTIN_WPACKDSS,
- ARM_BUILTIN_WPACKHUS,
- ARM_BUILTIN_WPACKWUS,
- ARM_BUILTIN_WPACKDUS,
-
- ARM_BUILTIN_WADDB,
- ARM_BUILTIN_WADDH,
- ARM_BUILTIN_WADDW,
- ARM_BUILTIN_WADDSSB,
- ARM_BUILTIN_WADDSSH,
- ARM_BUILTIN_WADDSSW,
- ARM_BUILTIN_WADDUSB,
- ARM_BUILTIN_WADDUSH,
- ARM_BUILTIN_WADDUSW,
- ARM_BUILTIN_WSUBB,
- ARM_BUILTIN_WSUBH,
- ARM_BUILTIN_WSUBW,
- ARM_BUILTIN_WSUBSSB,
- ARM_BUILTIN_WSUBSSH,
- ARM_BUILTIN_WSUBSSW,
- ARM_BUILTIN_WSUBUSB,
- ARM_BUILTIN_WSUBUSH,
- ARM_BUILTIN_WSUBUSW,
-
- ARM_BUILTIN_WAND,
- ARM_BUILTIN_WANDN,
- ARM_BUILTIN_WOR,
- ARM_BUILTIN_WXOR,
-
- ARM_BUILTIN_WCMPEQB,
- ARM_BUILTIN_WCMPEQH,
- ARM_BUILTIN_WCMPEQW,
- ARM_BUILTIN_WCMPGTUB,
- ARM_BUILTIN_WCMPGTUH,
- ARM_BUILTIN_WCMPGTUW,
- ARM_BUILTIN_WCMPGTSB,
- ARM_BUILTIN_WCMPGTSH,
- ARM_BUILTIN_WCMPGTSW,
-
- ARM_BUILTIN_TEXTRMSB,
- ARM_BUILTIN_TEXTRMSH,
- ARM_BUILTIN_TEXTRMSW,
- ARM_BUILTIN_TEXTRMUB,
- ARM_BUILTIN_TEXTRMUH,
- ARM_BUILTIN_TEXTRMUW,
- ARM_BUILTIN_TINSRB,
- ARM_BUILTIN_TINSRH,
- ARM_BUILTIN_TINSRW,
-
- ARM_BUILTIN_WMAXSW,
- ARM_BUILTIN_WMAXSH,
- ARM_BUILTIN_WMAXSB,
- ARM_BUILTIN_WMAXUW,
- ARM_BUILTIN_WMAXUH,
- ARM_BUILTIN_WMAXUB,
- ARM_BUILTIN_WMINSW,
- ARM_BUILTIN_WMINSH,
- ARM_BUILTIN_WMINSB,
- ARM_BUILTIN_WMINUW,
- ARM_BUILTIN_WMINUH,
- ARM_BUILTIN_WMINUB,
-
- ARM_BUILTIN_WMULUM,
- ARM_BUILTIN_WMULSM,
- ARM_BUILTIN_WMULUL,
-
- ARM_BUILTIN_PSADBH,
- ARM_BUILTIN_WSHUFH,
-
- ARM_BUILTIN_WSLLH,
- ARM_BUILTIN_WSLLW,
- ARM_BUILTIN_WSLLD,
- ARM_BUILTIN_WSRAH,
- ARM_BUILTIN_WSRAW,
- ARM_BUILTIN_WSRAD,
- ARM_BUILTIN_WSRLH,
- ARM_BUILTIN_WSRLW,
- ARM_BUILTIN_WSRLD,
- ARM_BUILTIN_WRORH,
- ARM_BUILTIN_WRORW,
- ARM_BUILTIN_WRORD,
- ARM_BUILTIN_WSLLHI,
- ARM_BUILTIN_WSLLWI,
- ARM_BUILTIN_WSLLDI,
- ARM_BUILTIN_WSRAHI,
- ARM_BUILTIN_WSRAWI,
- ARM_BUILTIN_WSRADI,
- ARM_BUILTIN_WSRLHI,
- ARM_BUILTIN_WSRLWI,
- ARM_BUILTIN_WSRLDI,
- ARM_BUILTIN_WRORHI,
- ARM_BUILTIN_WRORWI,
- ARM_BUILTIN_WRORDI,
-
- ARM_BUILTIN_WUNPCKIHB,
- ARM_BUILTIN_WUNPCKIHH,
- ARM_BUILTIN_WUNPCKIHW,
- ARM_BUILTIN_WUNPCKILB,
- ARM_BUILTIN_WUNPCKILH,
- ARM_BUILTIN_WUNPCKILW,
-
- ARM_BUILTIN_WUNPCKEHSB,
- ARM_BUILTIN_WUNPCKEHSH,
- ARM_BUILTIN_WUNPCKEHSW,
- ARM_BUILTIN_WUNPCKEHUB,
- ARM_BUILTIN_WUNPCKEHUH,
- ARM_BUILTIN_WUNPCKEHUW,
- ARM_BUILTIN_WUNPCKELSB,
- ARM_BUILTIN_WUNPCKELSH,
- ARM_BUILTIN_WUNPCKELSW,
- ARM_BUILTIN_WUNPCKELUB,
- ARM_BUILTIN_WUNPCKELUH,
- ARM_BUILTIN_WUNPCKELUW,
-
- ARM_BUILTIN_WABSB,
- ARM_BUILTIN_WABSH,
- ARM_BUILTIN_WABSW,
-
- ARM_BUILTIN_WADDSUBHX,
- ARM_BUILTIN_WSUBADDHX,
-
- ARM_BUILTIN_WABSDIFFB,
- ARM_BUILTIN_WABSDIFFH,
- ARM_BUILTIN_WABSDIFFW,
-
- ARM_BUILTIN_WADDCH,
- ARM_BUILTIN_WADDCW,
-
- ARM_BUILTIN_WAVG4,
- ARM_BUILTIN_WAVG4R,
-
- ARM_BUILTIN_WMADDSX,
- ARM_BUILTIN_WMADDUX,
-
- ARM_BUILTIN_WMADDSN,
- ARM_BUILTIN_WMADDUN,
-
- ARM_BUILTIN_WMULWSM,
- ARM_BUILTIN_WMULWUM,
-
- ARM_BUILTIN_WMULWSMR,
- ARM_BUILTIN_WMULWUMR,
-
- ARM_BUILTIN_WMULWL,
-
- ARM_BUILTIN_WMULSMR,
- ARM_BUILTIN_WMULUMR,
-
- ARM_BUILTIN_WQMULM,
- ARM_BUILTIN_WQMULMR,
-
- ARM_BUILTIN_WQMULWM,
- ARM_BUILTIN_WQMULWMR,
-
- ARM_BUILTIN_WADDBHUSM,
- ARM_BUILTIN_WADDBHUSL,
-
- ARM_BUILTIN_WQMIABB,
- ARM_BUILTIN_WQMIABT,
- ARM_BUILTIN_WQMIATB,
- ARM_BUILTIN_WQMIATT,
-
- ARM_BUILTIN_WQMIABBN,
- ARM_BUILTIN_WQMIABTN,
- ARM_BUILTIN_WQMIATBN,
- ARM_BUILTIN_WQMIATTN,
-
- ARM_BUILTIN_WMIABB,
- ARM_BUILTIN_WMIABT,
- ARM_BUILTIN_WMIATB,
- ARM_BUILTIN_WMIATT,
-
- ARM_BUILTIN_WMIABBN,
- ARM_BUILTIN_WMIABTN,
- ARM_BUILTIN_WMIATBN,
- ARM_BUILTIN_WMIATTN,
-
- ARM_BUILTIN_WMIAWBB,
- ARM_BUILTIN_WMIAWBT,
- ARM_BUILTIN_WMIAWTB,
- ARM_BUILTIN_WMIAWTT,
-
- ARM_BUILTIN_WMIAWBBN,
- ARM_BUILTIN_WMIAWBTN,
- ARM_BUILTIN_WMIAWTBN,
- ARM_BUILTIN_WMIAWTTN,
-
- ARM_BUILTIN_WMERGE,
-
- ARM_BUILTIN_CRC32B,
- ARM_BUILTIN_CRC32H,
- ARM_BUILTIN_CRC32W,
- ARM_BUILTIN_CRC32CB,
- ARM_BUILTIN_CRC32CH,
- ARM_BUILTIN_CRC32CW,
-
-#undef CRYPTO1
-#undef CRYPTO2
-#undef CRYPTO3
-
-#define CRYPTO1(L, U, M1, M2) \
- ARM_BUILTIN_CRYPTO_##U,
-#define CRYPTO2(L, U, M1, M2, M3) \
- ARM_BUILTIN_CRYPTO_##U,
-#define CRYPTO3(L, U, M1, M2, M3, M4) \
- ARM_BUILTIN_CRYPTO_##U,
-
-#include "crypto.def"
-
-#undef CRYPTO1
-#undef CRYPTO2
-#undef CRYPTO3
-
-#include "arm_neon_builtins.def"
-
- ,ARM_BUILTIN_MAX
-};
-
-#define ARM_BUILTIN_NEON_BASE (ARM_BUILTIN_MAX - ARRAY_SIZE (neon_builtin_data))
-
-#undef CF
-#undef VAR1
-#undef VAR2
-#undef VAR3
-#undef VAR4
-#undef VAR5
-#undef VAR6
-#undef VAR7
-#undef VAR8
-#undef VAR9
-#undef VAR10
-
-static GTY(()) tree arm_builtin_decls[ARM_BUILTIN_MAX];
-
-#define NUM_DREG_TYPES 5
-#define NUM_QREG_TYPES 6
-
-static void
-arm_init_neon_builtins (void)
-{
- unsigned int i, fcode;
- tree decl;
-
- tree neon_intQI_type_node;
- tree neon_intHI_type_node;
- tree neon_floatHF_type_node;
- tree neon_polyQI_type_node;
- tree neon_polyHI_type_node;
- tree neon_intSI_type_node;
- tree neon_intDI_type_node;
- tree neon_intUTI_type_node;
- tree neon_float_type_node;
-
- tree intQI_pointer_node;
- tree intHI_pointer_node;
- tree intSI_pointer_node;
- tree intDI_pointer_node;
- tree float_pointer_node;
-
- tree const_intQI_node;
- tree const_intHI_node;
- tree const_intSI_node;
- tree const_intDI_node;
- tree const_float_node;
-
- tree const_intQI_pointer_node;
- tree const_intHI_pointer_node;
- tree const_intSI_pointer_node;
- tree const_intDI_pointer_node;
- tree const_float_pointer_node;
-
- tree V8QI_type_node;
- tree V4HI_type_node;
- tree V4HF_type_node;
- tree V2SI_type_node;
- tree V2SF_type_node;
- tree V16QI_type_node;
- tree V8HI_type_node;
- tree V4SI_type_node;
- tree V4SF_type_node;
- tree V2DI_type_node;
-
- tree intUQI_type_node;
- tree intUHI_type_node;
- tree intUSI_type_node;
- tree intUDI_type_node;
-
- tree intEI_type_node;
- tree intOI_type_node;
- tree intCI_type_node;
- tree intXI_type_node;
-
- tree V8QI_pointer_node;
- tree V4HI_pointer_node;
- tree V2SI_pointer_node;
- tree V2SF_pointer_node;
- tree V16QI_pointer_node;
- tree V8HI_pointer_node;
- tree V4SI_pointer_node;
- tree V4SF_pointer_node;
- tree V2DI_pointer_node;
-
- tree void_ftype_pv8qi_v8qi_v8qi;
- tree void_ftype_pv4hi_v4hi_v4hi;
- tree void_ftype_pv2si_v2si_v2si;
- tree void_ftype_pv2sf_v2sf_v2sf;
- tree void_ftype_pdi_di_di;
- tree void_ftype_pv16qi_v16qi_v16qi;
- tree void_ftype_pv8hi_v8hi_v8hi;
- tree void_ftype_pv4si_v4si_v4si;
- tree void_ftype_pv4sf_v4sf_v4sf;
- tree void_ftype_pv2di_v2di_v2di;
-
- tree reinterp_ftype_dreg[NUM_DREG_TYPES][NUM_DREG_TYPES];
- tree reinterp_ftype_qreg[NUM_QREG_TYPES][NUM_QREG_TYPES];
- tree dreg_types[NUM_DREG_TYPES], qreg_types[NUM_QREG_TYPES];
-
- /* Create distinguished type nodes for NEON vector element types,
- and pointers to values of such types, so we can detect them later. */
- neon_intQI_type_node = make_signed_type (GET_MODE_PRECISION (QImode));
- neon_intHI_type_node = make_signed_type (GET_MODE_PRECISION (HImode));
- neon_polyQI_type_node = make_signed_type (GET_MODE_PRECISION (QImode));
- neon_polyHI_type_node = make_signed_type (GET_MODE_PRECISION (HImode));
- neon_intSI_type_node = make_signed_type (GET_MODE_PRECISION (SImode));
- neon_intDI_type_node = make_signed_type (GET_MODE_PRECISION (DImode));
- neon_float_type_node = make_node (REAL_TYPE);
- TYPE_PRECISION (neon_float_type_node) = FLOAT_TYPE_SIZE;
- layout_type (neon_float_type_node);
- neon_floatHF_type_node = make_node (REAL_TYPE);
- TYPE_PRECISION (neon_floatHF_type_node) = GET_MODE_PRECISION (HFmode);
- layout_type (neon_floatHF_type_node);
-
- /* Define typedefs which exactly correspond to the modes we are basing vector
- types on. If you change these names you'll need to change
- the table used by arm_mangle_type too. */
- (*lang_hooks.types.register_builtin_type) (neon_intQI_type_node,
- "__builtin_neon_qi");
- (*lang_hooks.types.register_builtin_type) (neon_intHI_type_node,
- "__builtin_neon_hi");
- (*lang_hooks.types.register_builtin_type) (neon_floatHF_type_node,
- "__builtin_neon_hf");
- (*lang_hooks.types.register_builtin_type) (neon_intSI_type_node,
- "__builtin_neon_si");
- (*lang_hooks.types.register_builtin_type) (neon_float_type_node,
- "__builtin_neon_sf");
- (*lang_hooks.types.register_builtin_type) (neon_intDI_type_node,
- "__builtin_neon_di");
- (*lang_hooks.types.register_builtin_type) (neon_polyQI_type_node,
- "__builtin_neon_poly8");
- (*lang_hooks.types.register_builtin_type) (neon_polyHI_type_node,
- "__builtin_neon_poly16");
-
- intQI_pointer_node = build_pointer_type (neon_intQI_type_node);
- intHI_pointer_node = build_pointer_type (neon_intHI_type_node);
- intSI_pointer_node = build_pointer_type (neon_intSI_type_node);
- intDI_pointer_node = build_pointer_type (neon_intDI_type_node);
- float_pointer_node = build_pointer_type (neon_float_type_node);
-
- /* Next create constant-qualified versions of the above types. */
- const_intQI_node = build_qualified_type (neon_intQI_type_node,
- TYPE_QUAL_CONST);
- const_intHI_node = build_qualified_type (neon_intHI_type_node,
- TYPE_QUAL_CONST);
- const_intSI_node = build_qualified_type (neon_intSI_type_node,
- TYPE_QUAL_CONST);
- const_intDI_node = build_qualified_type (neon_intDI_type_node,
- TYPE_QUAL_CONST);
- const_float_node = build_qualified_type (neon_float_type_node,
- TYPE_QUAL_CONST);
-
- const_intQI_pointer_node = build_pointer_type (const_intQI_node);
- const_intHI_pointer_node = build_pointer_type (const_intHI_node);
- const_intSI_pointer_node = build_pointer_type (const_intSI_node);
- const_intDI_pointer_node = build_pointer_type (const_intDI_node);
- const_float_pointer_node = build_pointer_type (const_float_node);
-
- /* Now create vector types based on our NEON element types. */
- /* 64-bit vectors. */
- V8QI_type_node =
- build_vector_type_for_mode (neon_intQI_type_node, V8QImode);
- V4HI_type_node =
- build_vector_type_for_mode (neon_intHI_type_node, V4HImode);
- V4HF_type_node =
- build_vector_type_for_mode (neon_floatHF_type_node, V4HFmode);
- V2SI_type_node =
- build_vector_type_for_mode (neon_intSI_type_node, V2SImode);
- V2SF_type_node =
- build_vector_type_for_mode (neon_float_type_node, V2SFmode);
- /* 128-bit vectors. */
- V16QI_type_node =
- build_vector_type_for_mode (neon_intQI_type_node, V16QImode);
- V8HI_type_node =
- build_vector_type_for_mode (neon_intHI_type_node, V8HImode);
- V4SI_type_node =
- build_vector_type_for_mode (neon_intSI_type_node, V4SImode);
- V4SF_type_node =
- build_vector_type_for_mode (neon_float_type_node, V4SFmode);
- V2DI_type_node =
- build_vector_type_for_mode (neon_intDI_type_node, V2DImode);
-
- /* Unsigned integer types for various mode sizes. */
- intUQI_type_node = make_unsigned_type (GET_MODE_PRECISION (QImode));
- intUHI_type_node = make_unsigned_type (GET_MODE_PRECISION (HImode));
- intUSI_type_node = make_unsigned_type (GET_MODE_PRECISION (SImode));
- intUDI_type_node = make_unsigned_type (GET_MODE_PRECISION (DImode));
- neon_intUTI_type_node = make_unsigned_type (GET_MODE_PRECISION (TImode));
-
-
- (*lang_hooks.types.register_builtin_type) (intUQI_type_node,
- "__builtin_neon_uqi");
- (*lang_hooks.types.register_builtin_type) (intUHI_type_node,
- "__builtin_neon_uhi");
- (*lang_hooks.types.register_builtin_type) (intUSI_type_node,
- "__builtin_neon_usi");
- (*lang_hooks.types.register_builtin_type) (intUDI_type_node,
- "__builtin_neon_udi");
- (*lang_hooks.types.register_builtin_type) (intUDI_type_node,
- "__builtin_neon_poly64");
- (*lang_hooks.types.register_builtin_type) (neon_intUTI_type_node,
- "__builtin_neon_poly128");
-
- /* Opaque integer types for structures of vectors. */
- intEI_type_node = make_signed_type (GET_MODE_PRECISION (EImode));
- intOI_type_node = make_signed_type (GET_MODE_PRECISION (OImode));
- intCI_type_node = make_signed_type (GET_MODE_PRECISION (CImode));
- intXI_type_node = make_signed_type (GET_MODE_PRECISION (XImode));
-
- (*lang_hooks.types.register_builtin_type) (intTI_type_node,
- "__builtin_neon_ti");
- (*lang_hooks.types.register_builtin_type) (intEI_type_node,
- "__builtin_neon_ei");
- (*lang_hooks.types.register_builtin_type) (intOI_type_node,
- "__builtin_neon_oi");
- (*lang_hooks.types.register_builtin_type) (intCI_type_node,
- "__builtin_neon_ci");
- (*lang_hooks.types.register_builtin_type) (intXI_type_node,
- "__builtin_neon_xi");
-
- /* Pointers to vector types. */
- V8QI_pointer_node = build_pointer_type (V8QI_type_node);
- V4HI_pointer_node = build_pointer_type (V4HI_type_node);
- V2SI_pointer_node = build_pointer_type (V2SI_type_node);
- V2SF_pointer_node = build_pointer_type (V2SF_type_node);
- V16QI_pointer_node = build_pointer_type (V16QI_type_node);
- V8HI_pointer_node = build_pointer_type (V8HI_type_node);
- V4SI_pointer_node = build_pointer_type (V4SI_type_node);
- V4SF_pointer_node = build_pointer_type (V4SF_type_node);
- V2DI_pointer_node = build_pointer_type (V2DI_type_node);
-
- /* Operations which return results as pairs. */
- void_ftype_pv8qi_v8qi_v8qi =
- build_function_type_list (void_type_node, V8QI_pointer_node, V8QI_type_node,
- V8QI_type_node, NULL);
- void_ftype_pv4hi_v4hi_v4hi =
- build_function_type_list (void_type_node, V4HI_pointer_node, V4HI_type_node,
- V4HI_type_node, NULL);
- void_ftype_pv2si_v2si_v2si =
- build_function_type_list (void_type_node, V2SI_pointer_node, V2SI_type_node,
- V2SI_type_node, NULL);
- void_ftype_pv2sf_v2sf_v2sf =
- build_function_type_list (void_type_node, V2SF_pointer_node, V2SF_type_node,
- V2SF_type_node, NULL);
- void_ftype_pdi_di_di =
- build_function_type_list (void_type_node, intDI_pointer_node,
- neon_intDI_type_node, neon_intDI_type_node, NULL);
- void_ftype_pv16qi_v16qi_v16qi =
- build_function_type_list (void_type_node, V16QI_pointer_node,
- V16QI_type_node, V16QI_type_node, NULL);
- void_ftype_pv8hi_v8hi_v8hi =
- build_function_type_list (void_type_node, V8HI_pointer_node, V8HI_type_node,
- V8HI_type_node, NULL);
- void_ftype_pv4si_v4si_v4si =
- build_function_type_list (void_type_node, V4SI_pointer_node, V4SI_type_node,
- V4SI_type_node, NULL);
- void_ftype_pv4sf_v4sf_v4sf =
- build_function_type_list (void_type_node, V4SF_pointer_node, V4SF_type_node,
- V4SF_type_node, NULL);
- void_ftype_pv2di_v2di_v2di =
- build_function_type_list (void_type_node, V2DI_pointer_node, V2DI_type_node,
- V2DI_type_node, NULL);
-
- if (TARGET_CRYPTO && TARGET_HARD_FLOAT)
- {
- tree V4USI_type_node =
- build_vector_type_for_mode (intUSI_type_node, V4SImode);
-
- tree V16UQI_type_node =
- build_vector_type_for_mode (intUQI_type_node, V16QImode);
-
- tree v16uqi_ftype_v16uqi
- = build_function_type_list (V16UQI_type_node, V16UQI_type_node, NULL_TREE);
-
- tree v16uqi_ftype_v16uqi_v16uqi
- = build_function_type_list (V16UQI_type_node, V16UQI_type_node,
- V16UQI_type_node, NULL_TREE);
-
- tree v4usi_ftype_v4usi
- = build_function_type_list (V4USI_type_node, V4USI_type_node, NULL_TREE);
-
- tree v4usi_ftype_v4usi_v4usi
- = build_function_type_list (V4USI_type_node, V4USI_type_node,
- V4USI_type_node, NULL_TREE);
-
- tree v4usi_ftype_v4usi_v4usi_v4usi
- = build_function_type_list (V4USI_type_node, V4USI_type_node,
- V4USI_type_node, V4USI_type_node, NULL_TREE);
-
- tree uti_ftype_udi_udi
- = build_function_type_list (neon_intUTI_type_node, intUDI_type_node,
- intUDI_type_node, NULL_TREE);
-
- #undef CRYPTO1
- #undef CRYPTO2
- #undef CRYPTO3
- #undef C
- #undef N
- #undef CF
- #undef FT1
- #undef FT2
- #undef FT3
-
- #define C(U) \
- ARM_BUILTIN_CRYPTO_##U
- #define N(L) \
- "__builtin_arm_crypto_"#L
- #define FT1(R, A) \
- R##_ftype_##A
- #define FT2(R, A1, A2) \
- R##_ftype_##A1##_##A2
- #define FT3(R, A1, A2, A3) \
- R##_ftype_##A1##_##A2##_##A3
- #define CRYPTO1(L, U, R, A) \
- arm_builtin_decls[C (U)] = add_builtin_function (N (L), FT1 (R, A), \
- C (U), BUILT_IN_MD, \
- NULL, NULL_TREE);
- #define CRYPTO2(L, U, R, A1, A2) \
- arm_builtin_decls[C (U)] = add_builtin_function (N (L), FT2 (R, A1, A2), \
- C (U), BUILT_IN_MD, \
- NULL, NULL_TREE);
-
- #define CRYPTO3(L, U, R, A1, A2, A3) \
- arm_builtin_decls[C (U)] = add_builtin_function (N (L), FT3 (R, A1, A2, A3), \
- C (U), BUILT_IN_MD, \
- NULL, NULL_TREE);
- #include "crypto.def"
-
- #undef CRYPTO1
- #undef CRYPTO2
- #undef CRYPTO3
- #undef C
- #undef N
- #undef FT1
- #undef FT2
- #undef FT3
- }
- dreg_types[0] = V8QI_type_node;
- dreg_types[1] = V4HI_type_node;
- dreg_types[2] = V2SI_type_node;
- dreg_types[3] = V2SF_type_node;
- dreg_types[4] = neon_intDI_type_node;
-
- qreg_types[0] = V16QI_type_node;
- qreg_types[1] = V8HI_type_node;
- qreg_types[2] = V4SI_type_node;
- qreg_types[3] = V4SF_type_node;
- qreg_types[4] = V2DI_type_node;
- qreg_types[5] = neon_intUTI_type_node;
-
- for (i = 0; i < NUM_QREG_TYPES; i++)
- {
- int j;
- for (j = 0; j < NUM_QREG_TYPES; j++)
- {
- if (i < NUM_DREG_TYPES && j < NUM_DREG_TYPES)
- reinterp_ftype_dreg[i][j]
- = build_function_type_list (dreg_types[i], dreg_types[j], NULL);
-
- reinterp_ftype_qreg[i][j]
- = build_function_type_list (qreg_types[i], qreg_types[j], NULL);
- }
- }
-
- for (i = 0, fcode = ARM_BUILTIN_NEON_BASE;
- i < ARRAY_SIZE (neon_builtin_data);
- i++, fcode++)
- {
- neon_builtin_datum *d = &neon_builtin_data[i];
-
- const char* const modenames[] = {
- "v8qi", "v4hi", "v4hf", "v2si", "v2sf", "di",
- "v16qi", "v8hi", "v4si", "v4sf", "v2di",
- "ti", "ei", "oi"
- };
- char namebuf[60];
- tree ftype = NULL;
- int is_load = 0, is_store = 0;
-
- gcc_assert (ARRAY_SIZE (modenames) == T_MAX);
-
- d->fcode = fcode;
-
- switch (d->itype)
- {
- case NEON_LOAD1:
- case NEON_LOAD1LANE:
- case NEON_LOADSTRUCT:
- case NEON_LOADSTRUCTLANE:
- is_load = 1;
- /* Fall through. */
- case NEON_STORE1:
- case NEON_STORE1LANE:
- case NEON_STORESTRUCT:
- case NEON_STORESTRUCTLANE:
- if (!is_load)
- is_store = 1;
- /* Fall through. */
- case NEON_UNOP:
- case NEON_RINT:
- case NEON_BINOP:
- case NEON_LOGICBINOP:
- case NEON_SHIFTINSERT:
- case NEON_TERNOP:
- case NEON_GETLANE:
- case NEON_SETLANE:
- case NEON_CREATE:
- case NEON_DUP:
- case NEON_DUPLANE:
- case NEON_SHIFTIMM:
- case NEON_SHIFTACC:
- case NEON_COMBINE:
- case NEON_SPLIT:
- case NEON_CONVERT:
- case NEON_FIXCONV:
- case NEON_LANEMUL:
- case NEON_LANEMULL:
- case NEON_LANEMULH:
- case NEON_LANEMAC:
- case NEON_SCALARMUL:
- case NEON_SCALARMULL:
- case NEON_SCALARMULH:
- case NEON_SCALARMAC:
- case NEON_SELECT:
- case NEON_VTBL:
- case NEON_VTBX:
- {
- int k;
- tree return_type = void_type_node, args = void_list_node;
-
- /* Build a function type directly from the insn_data for
- this builtin. The build_function_type() function takes
- care of removing duplicates for us. */
- for (k = insn_data[d->code].n_generator_args - 1; k >= 0; k--)
- {
- tree eltype;
-
- if (is_load && k == 1)
- {
- /* Neon load patterns always have the memory
- operand in the operand 1 position. */
- gcc_assert (insn_data[d->code].operand[k].predicate
- == neon_struct_operand);
-
- switch (d->mode)
- {
- case T_V8QI:
- case T_V16QI:
- eltype = const_intQI_pointer_node;
- break;
-
- case T_V4HI:
- case T_V8HI:
- eltype = const_intHI_pointer_node;
- break;
-
- case T_V2SI:
- case T_V4SI:
- eltype = const_intSI_pointer_node;
- break;
-
- case T_V2SF:
- case T_V4SF:
- eltype = const_float_pointer_node;
- break;
-
- case T_DI:
- case T_V2DI:
- eltype = const_intDI_pointer_node;
- break;
-
- default: gcc_unreachable ();
- }
- }
- else if (is_store && k == 0)
- {
- /* Similarly, Neon store patterns use operand 0 as
- the memory location to store to. */
- gcc_assert (insn_data[d->code].operand[k].predicate
- == neon_struct_operand);
-
- switch (d->mode)
- {
- case T_V8QI:
- case T_V16QI:
- eltype = intQI_pointer_node;
- break;
-
- case T_V4HI:
- case T_V8HI:
- eltype = intHI_pointer_node;
- break;
-
- case T_V2SI:
- case T_V4SI:
- eltype = intSI_pointer_node;
- break;
-
- case T_V2SF:
- case T_V4SF:
- eltype = float_pointer_node;
- break;
-
- case T_DI:
- case T_V2DI:
- eltype = intDI_pointer_node;
- break;
-
- default: gcc_unreachable ();
- }
- }
- else
- {
- switch (insn_data[d->code].operand[k].mode)
- {
- case VOIDmode: eltype = void_type_node; break;
- /* Scalars. */
- case QImode: eltype = neon_intQI_type_node; break;
- case HImode: eltype = neon_intHI_type_node; break;
- case SImode: eltype = neon_intSI_type_node; break;
- case SFmode: eltype = neon_float_type_node; break;
- case DImode: eltype = neon_intDI_type_node; break;
- case TImode: eltype = intTI_type_node; break;
- case EImode: eltype = intEI_type_node; break;
- case OImode: eltype = intOI_type_node; break;
- case CImode: eltype = intCI_type_node; break;
- case XImode: eltype = intXI_type_node; break;
- /* 64-bit vectors. */
- case V8QImode: eltype = V8QI_type_node; break;
- case V4HImode: eltype = V4HI_type_node; break;
- case V2SImode: eltype = V2SI_type_node; break;
- case V2SFmode: eltype = V2SF_type_node; break;
- /* 128-bit vectors. */
- case V16QImode: eltype = V16QI_type_node; break;
- case V8HImode: eltype = V8HI_type_node; break;
- case V4SImode: eltype = V4SI_type_node; break;
- case V4SFmode: eltype = V4SF_type_node; break;
- case V2DImode: eltype = V2DI_type_node; break;
- default: gcc_unreachable ();
- }
- }
-
- if (k == 0 && !is_store)
- return_type = eltype;
- else
- args = tree_cons (NULL_TREE, eltype, args);
- }
-
- ftype = build_function_type (return_type, args);
- }
- break;
-
- case NEON_RESULTPAIR:
- {
- switch (insn_data[d->code].operand[1].mode)
- {
- case V8QImode: ftype = void_ftype_pv8qi_v8qi_v8qi; break;
- case V4HImode: ftype = void_ftype_pv4hi_v4hi_v4hi; break;
- case V2SImode: ftype = void_ftype_pv2si_v2si_v2si; break;
- case V2SFmode: ftype = void_ftype_pv2sf_v2sf_v2sf; break;
- case DImode: ftype = void_ftype_pdi_di_di; break;
- case V16QImode: ftype = void_ftype_pv16qi_v16qi_v16qi; break;
- case V8HImode: ftype = void_ftype_pv8hi_v8hi_v8hi; break;
- case V4SImode: ftype = void_ftype_pv4si_v4si_v4si; break;
- case V4SFmode: ftype = void_ftype_pv4sf_v4sf_v4sf; break;
- case V2DImode: ftype = void_ftype_pv2di_v2di_v2di; break;
- default: gcc_unreachable ();
- }
- }
- break;
-
- case NEON_REINTERP:
- {
- /* We iterate over NUM_DREG_TYPES doubleword types,
- then NUM_QREG_TYPES quadword types.
- V4HF is not a type used in reinterpret, so we translate
- d->mode to the correct index in reinterp_ftype_dreg. */
- bool qreg_p
- = GET_MODE_SIZE (insn_data[d->code].operand[0].mode) > 8;
- int rhs = (d->mode - ((!qreg_p && (d->mode > T_V4HF)) ? 1 : 0))
- % NUM_QREG_TYPES;
- switch (insn_data[d->code].operand[0].mode)
- {
- case V8QImode: ftype = reinterp_ftype_dreg[0][rhs]; break;
- case V4HImode: ftype = reinterp_ftype_dreg[1][rhs]; break;
- case V2SImode: ftype = reinterp_ftype_dreg[2][rhs]; break;
- case V2SFmode: ftype = reinterp_ftype_dreg[3][rhs]; break;
- case DImode: ftype = reinterp_ftype_dreg[4][rhs]; break;
- case V16QImode: ftype = reinterp_ftype_qreg[0][rhs]; break;
- case V8HImode: ftype = reinterp_ftype_qreg[1][rhs]; break;
- case V4SImode: ftype = reinterp_ftype_qreg[2][rhs]; break;
- case V4SFmode: ftype = reinterp_ftype_qreg[3][rhs]; break;
- case V2DImode: ftype = reinterp_ftype_qreg[4][rhs]; break;
- case TImode: ftype = reinterp_ftype_qreg[5][rhs]; break;
- default: gcc_unreachable ();
- }
- }
- break;
- case NEON_FLOAT_WIDEN:
- {
- tree eltype = NULL_TREE;
- tree return_type = NULL_TREE;
-
- switch (insn_data[d->code].operand[1].mode)
- {
- case V4HFmode:
- eltype = V4HF_type_node;
- return_type = V4SF_type_node;
- break;
- default: gcc_unreachable ();
- }
- ftype = build_function_type_list (return_type, eltype, NULL);
- break;
- }
- case NEON_FLOAT_NARROW:
- {
- tree eltype = NULL_TREE;
- tree return_type = NULL_TREE;
-
- switch (insn_data[d->code].operand[1].mode)
- {
- case V4SFmode:
- eltype = V4SF_type_node;
- return_type = V4HF_type_node;
- break;
- default: gcc_unreachable ();
- }
- ftype = build_function_type_list (return_type, eltype, NULL);
- break;
- }
- default:
- gcc_unreachable ();
- }
-
- gcc_assert (ftype != NULL);
-
- sprintf (namebuf, "__builtin_neon_%s%s", d->name, modenames[d->mode]);
-
- decl = add_builtin_function (namebuf, ftype, fcode, BUILT_IN_MD, NULL,
- NULL_TREE);
- arm_builtin_decls[fcode] = decl;
- }
-}
-
-#undef NUM_DREG_TYPES
-#undef NUM_QREG_TYPES
-
-#define def_mbuiltin(MASK, NAME, TYPE, CODE) \
- do \
- { \
- if ((MASK) & insn_flags) \
- { \
- tree bdecl; \
- bdecl = add_builtin_function ((NAME), (TYPE), (CODE), \
- BUILT_IN_MD, NULL, NULL_TREE); \
- arm_builtin_decls[CODE] = bdecl; \
- } \
- } \
- while (0)
-
-struct builtin_description
-{
- const unsigned int mask;
- const enum insn_code icode;
- const char * const name;
- const enum arm_builtins code;
- const enum rtx_code comparison;
- const unsigned int flag;
-};
-
-static const struct builtin_description bdesc_2arg[] =
-{
-#define IWMMXT_BUILTIN(code, string, builtin) \
- { FL_IWMMXT, CODE_FOR_##code, "__builtin_arm_" string, \
- ARM_BUILTIN_##builtin, UNKNOWN, 0 },
-
-#define IWMMXT2_BUILTIN(code, string, builtin) \
- { FL_IWMMXT2, CODE_FOR_##code, "__builtin_arm_" string, \
- ARM_BUILTIN_##builtin, UNKNOWN, 0 },
-
- IWMMXT_BUILTIN (addv8qi3, "waddb", WADDB)
- IWMMXT_BUILTIN (addv4hi3, "waddh", WADDH)
- IWMMXT_BUILTIN (addv2si3, "waddw", WADDW)
- IWMMXT_BUILTIN (subv8qi3, "wsubb", WSUBB)
- IWMMXT_BUILTIN (subv4hi3, "wsubh", WSUBH)
- IWMMXT_BUILTIN (subv2si3, "wsubw", WSUBW)
- IWMMXT_BUILTIN (ssaddv8qi3, "waddbss", WADDSSB)
- IWMMXT_BUILTIN (ssaddv4hi3, "waddhss", WADDSSH)
- IWMMXT_BUILTIN (ssaddv2si3, "waddwss", WADDSSW)
- IWMMXT_BUILTIN (sssubv8qi3, "wsubbss", WSUBSSB)
- IWMMXT_BUILTIN (sssubv4hi3, "wsubhss", WSUBSSH)
- IWMMXT_BUILTIN (sssubv2si3, "wsubwss", WSUBSSW)
- IWMMXT_BUILTIN (usaddv8qi3, "waddbus", WADDUSB)
- IWMMXT_BUILTIN (usaddv4hi3, "waddhus", WADDUSH)
- IWMMXT_BUILTIN (usaddv2si3, "waddwus", WADDUSW)
- IWMMXT_BUILTIN (ussubv8qi3, "wsubbus", WSUBUSB)
- IWMMXT_BUILTIN (ussubv4hi3, "wsubhus", WSUBUSH)
- IWMMXT_BUILTIN (ussubv2si3, "wsubwus", WSUBUSW)
- IWMMXT_BUILTIN (mulv4hi3, "wmulul", WMULUL)
- IWMMXT_BUILTIN (smulv4hi3_highpart, "wmulsm", WMULSM)
- IWMMXT_BUILTIN (umulv4hi3_highpart, "wmulum", WMULUM)
- IWMMXT_BUILTIN (eqv8qi3, "wcmpeqb", WCMPEQB)
- IWMMXT_BUILTIN (eqv4hi3, "wcmpeqh", WCMPEQH)
- IWMMXT_BUILTIN (eqv2si3, "wcmpeqw", WCMPEQW)
- IWMMXT_BUILTIN (gtuv8qi3, "wcmpgtub", WCMPGTUB)
- IWMMXT_BUILTIN (gtuv4hi3, "wcmpgtuh", WCMPGTUH)
- IWMMXT_BUILTIN (gtuv2si3, "wcmpgtuw", WCMPGTUW)
- IWMMXT_BUILTIN (gtv8qi3, "wcmpgtsb", WCMPGTSB)
- IWMMXT_BUILTIN (gtv4hi3, "wcmpgtsh", WCMPGTSH)
- IWMMXT_BUILTIN (gtv2si3, "wcmpgtsw", WCMPGTSW)
- IWMMXT_BUILTIN (umaxv8qi3, "wmaxub", WMAXUB)
- IWMMXT_BUILTIN (smaxv8qi3, "wmaxsb", WMAXSB)
- IWMMXT_BUILTIN (umaxv4hi3, "wmaxuh", WMAXUH)
- IWMMXT_BUILTIN (smaxv4hi3, "wmaxsh", WMAXSH)
- IWMMXT_BUILTIN (umaxv2si3, "wmaxuw", WMAXUW)
- IWMMXT_BUILTIN (smaxv2si3, "wmaxsw", WMAXSW)
- IWMMXT_BUILTIN (uminv8qi3, "wminub", WMINUB)
- IWMMXT_BUILTIN (sminv8qi3, "wminsb", WMINSB)
- IWMMXT_BUILTIN (uminv4hi3, "wminuh", WMINUH)
- IWMMXT_BUILTIN (sminv4hi3, "wminsh", WMINSH)
- IWMMXT_BUILTIN (uminv2si3, "wminuw", WMINUW)
- IWMMXT_BUILTIN (sminv2si3, "wminsw", WMINSW)
- IWMMXT_BUILTIN (iwmmxt_anddi3, "wand", WAND)
- IWMMXT_BUILTIN (iwmmxt_nanddi3, "wandn", WANDN)
- IWMMXT_BUILTIN (iwmmxt_iordi3, "wor", WOR)
- IWMMXT_BUILTIN (iwmmxt_xordi3, "wxor", WXOR)
- IWMMXT_BUILTIN (iwmmxt_uavgv8qi3, "wavg2b", WAVG2B)
- IWMMXT_BUILTIN (iwmmxt_uavgv4hi3, "wavg2h", WAVG2H)
- IWMMXT_BUILTIN (iwmmxt_uavgrndv8qi3, "wavg2br", WAVG2BR)
- IWMMXT_BUILTIN (iwmmxt_uavgrndv4hi3, "wavg2hr", WAVG2HR)
- IWMMXT_BUILTIN (iwmmxt_wunpckilb, "wunpckilb", WUNPCKILB)
- IWMMXT_BUILTIN (iwmmxt_wunpckilh, "wunpckilh", WUNPCKILH)
- IWMMXT_BUILTIN (iwmmxt_wunpckilw, "wunpckilw", WUNPCKILW)
- IWMMXT_BUILTIN (iwmmxt_wunpckihb, "wunpckihb", WUNPCKIHB)
- IWMMXT_BUILTIN (iwmmxt_wunpckihh, "wunpckihh", WUNPCKIHH)
- IWMMXT_BUILTIN (iwmmxt_wunpckihw, "wunpckihw", WUNPCKIHW)
- IWMMXT2_BUILTIN (iwmmxt_waddsubhx, "waddsubhx", WADDSUBHX)
- IWMMXT2_BUILTIN (iwmmxt_wsubaddhx, "wsubaddhx", WSUBADDHX)
- IWMMXT2_BUILTIN (iwmmxt_wabsdiffb, "wabsdiffb", WABSDIFFB)
- IWMMXT2_BUILTIN (iwmmxt_wabsdiffh, "wabsdiffh", WABSDIFFH)
- IWMMXT2_BUILTIN (iwmmxt_wabsdiffw, "wabsdiffw", WABSDIFFW)
- IWMMXT2_BUILTIN (iwmmxt_avg4, "wavg4", WAVG4)
- IWMMXT2_BUILTIN (iwmmxt_avg4r, "wavg4r", WAVG4R)
- IWMMXT2_BUILTIN (iwmmxt_wmulwsm, "wmulwsm", WMULWSM)
- IWMMXT2_BUILTIN (iwmmxt_wmulwum, "wmulwum", WMULWUM)
- IWMMXT2_BUILTIN (iwmmxt_wmulwsmr, "wmulwsmr", WMULWSMR)
- IWMMXT2_BUILTIN (iwmmxt_wmulwumr, "wmulwumr", WMULWUMR)
- IWMMXT2_BUILTIN (iwmmxt_wmulwl, "wmulwl", WMULWL)
- IWMMXT2_BUILTIN (iwmmxt_wmulsmr, "wmulsmr", WMULSMR)
- IWMMXT2_BUILTIN (iwmmxt_wmulumr, "wmulumr", WMULUMR)
- IWMMXT2_BUILTIN (iwmmxt_wqmulm, "wqmulm", WQMULM)
- IWMMXT2_BUILTIN (iwmmxt_wqmulmr, "wqmulmr", WQMULMR)
- IWMMXT2_BUILTIN (iwmmxt_wqmulwm, "wqmulwm", WQMULWM)
- IWMMXT2_BUILTIN (iwmmxt_wqmulwmr, "wqmulwmr", WQMULWMR)
- IWMMXT_BUILTIN (iwmmxt_walignr0, "walignr0", WALIGNR0)
- IWMMXT_BUILTIN (iwmmxt_walignr1, "walignr1", WALIGNR1)
- IWMMXT_BUILTIN (iwmmxt_walignr2, "walignr2", WALIGNR2)
- IWMMXT_BUILTIN (iwmmxt_walignr3, "walignr3", WALIGNR3)
-
-#define IWMMXT_BUILTIN2(code, builtin) \
- { FL_IWMMXT, CODE_FOR_##code, NULL, ARM_BUILTIN_##builtin, UNKNOWN, 0 },
-
-#define IWMMXT2_BUILTIN2(code, builtin) \
- { FL_IWMMXT2, CODE_FOR_##code, NULL, ARM_BUILTIN_##builtin, UNKNOWN, 0 },
-
- IWMMXT2_BUILTIN2 (iwmmxt_waddbhusm, WADDBHUSM)
- IWMMXT2_BUILTIN2 (iwmmxt_waddbhusl, WADDBHUSL)
- IWMMXT_BUILTIN2 (iwmmxt_wpackhss, WPACKHSS)
- IWMMXT_BUILTIN2 (iwmmxt_wpackwss, WPACKWSS)
- IWMMXT_BUILTIN2 (iwmmxt_wpackdss, WPACKDSS)
- IWMMXT_BUILTIN2 (iwmmxt_wpackhus, WPACKHUS)
- IWMMXT_BUILTIN2 (iwmmxt_wpackwus, WPACKWUS)
- IWMMXT_BUILTIN2 (iwmmxt_wpackdus, WPACKDUS)
- IWMMXT_BUILTIN2 (iwmmxt_wmacuz, WMACUZ)
- IWMMXT_BUILTIN2 (iwmmxt_wmacsz, WMACSZ)
-
-#define CRC32_BUILTIN(L, U) \
- {0, CODE_FOR_##L, "__builtin_arm_"#L, ARM_BUILTIN_##U, \
- UNKNOWN, 0},
- CRC32_BUILTIN (crc32b, CRC32B)
- CRC32_BUILTIN (crc32h, CRC32H)
- CRC32_BUILTIN (crc32w, CRC32W)
- CRC32_BUILTIN (crc32cb, CRC32CB)
- CRC32_BUILTIN (crc32ch, CRC32CH)
- CRC32_BUILTIN (crc32cw, CRC32CW)
-#undef CRC32_BUILTIN
-
-
-#define CRYPTO_BUILTIN(L, U) \
- {0, CODE_FOR_crypto_##L, "__builtin_arm_crypto_"#L, ARM_BUILTIN_CRYPTO_##U, \
- UNKNOWN, 0},
-#undef CRYPTO1
-#undef CRYPTO2
-#undef CRYPTO3
-#define CRYPTO2(L, U, R, A1, A2) CRYPTO_BUILTIN (L, U)
-#define CRYPTO1(L, U, R, A)
-#define CRYPTO3(L, U, R, A1, A2, A3)
-#include "crypto.def"
-#undef CRYPTO1
-#undef CRYPTO2
-#undef CRYPTO3
-
-};
-
-static const struct builtin_description bdesc_1arg[] =
-{
- IWMMXT_BUILTIN (iwmmxt_tmovmskb, "tmovmskb", TMOVMSKB)
- IWMMXT_BUILTIN (iwmmxt_tmovmskh, "tmovmskh", TMOVMSKH)
- IWMMXT_BUILTIN (iwmmxt_tmovmskw, "tmovmskw", TMOVMSKW)
- IWMMXT_BUILTIN (iwmmxt_waccb, "waccb", WACCB)
- IWMMXT_BUILTIN (iwmmxt_wacch, "wacch", WACCH)
- IWMMXT_BUILTIN (iwmmxt_waccw, "waccw", WACCW)
- IWMMXT_BUILTIN (iwmmxt_wunpckehub, "wunpckehub", WUNPCKEHUB)
- IWMMXT_BUILTIN (iwmmxt_wunpckehuh, "wunpckehuh", WUNPCKEHUH)
- IWMMXT_BUILTIN (iwmmxt_wunpckehuw, "wunpckehuw", WUNPCKEHUW)
- IWMMXT_BUILTIN (iwmmxt_wunpckehsb, "wunpckehsb", WUNPCKEHSB)
- IWMMXT_BUILTIN (iwmmxt_wunpckehsh, "wunpckehsh", WUNPCKEHSH)
- IWMMXT_BUILTIN (iwmmxt_wunpckehsw, "wunpckehsw", WUNPCKEHSW)
- IWMMXT_BUILTIN (iwmmxt_wunpckelub, "wunpckelub", WUNPCKELUB)
- IWMMXT_BUILTIN (iwmmxt_wunpckeluh, "wunpckeluh", WUNPCKELUH)
- IWMMXT_BUILTIN (iwmmxt_wunpckeluw, "wunpckeluw", WUNPCKELUW)
- IWMMXT_BUILTIN (iwmmxt_wunpckelsb, "wunpckelsb", WUNPCKELSB)
- IWMMXT_BUILTIN (iwmmxt_wunpckelsh, "wunpckelsh", WUNPCKELSH)
- IWMMXT_BUILTIN (iwmmxt_wunpckelsw, "wunpckelsw", WUNPCKELSW)
- IWMMXT2_BUILTIN (iwmmxt_wabsv8qi3, "wabsb", WABSB)
- IWMMXT2_BUILTIN (iwmmxt_wabsv4hi3, "wabsh", WABSH)
- IWMMXT2_BUILTIN (iwmmxt_wabsv2si3, "wabsw", WABSW)
- IWMMXT_BUILTIN (tbcstv8qi, "tbcstb", TBCSTB)
- IWMMXT_BUILTIN (tbcstv4hi, "tbcsth", TBCSTH)
- IWMMXT_BUILTIN (tbcstv2si, "tbcstw", TBCSTW)
-
-#define CRYPTO1(L, U, R, A) CRYPTO_BUILTIN (L, U)
-#define CRYPTO2(L, U, R, A1, A2)
-#define CRYPTO3(L, U, R, A1, A2, A3)
-#include "crypto.def"
-#undef CRYPTO1
-#undef CRYPTO2
-#undef CRYPTO3
-};
-
-static const struct builtin_description bdesc_3arg[] =
-{
-#define CRYPTO3(L, U, R, A1, A2, A3) CRYPTO_BUILTIN (L, U)
-#define CRYPTO1(L, U, R, A)
-#define CRYPTO2(L, U, R, A1, A2)
-#include "crypto.def"
-#undef CRYPTO1
-#undef CRYPTO2
-#undef CRYPTO3
- };
-#undef CRYPTO_BUILTIN
-
-/* Set up all the iWMMXt builtins. This is not called if
- TARGET_IWMMXT is zero. */
-
-static void
-arm_init_iwmmxt_builtins (void)
-{
- const struct builtin_description * d;
- size_t i;
-
- tree V2SI_type_node = build_vector_type_for_mode (intSI_type_node, V2SImode);
- tree V4HI_type_node = build_vector_type_for_mode (intHI_type_node, V4HImode);
- tree V8QI_type_node = build_vector_type_for_mode (intQI_type_node, V8QImode);
-
- tree v8qi_ftype_v8qi_v8qi_int
- = build_function_type_list (V8QI_type_node,
- V8QI_type_node, V8QI_type_node,
- integer_type_node, NULL_TREE);
- tree v4hi_ftype_v4hi_int
- = build_function_type_list (V4HI_type_node,
- V4HI_type_node, integer_type_node, NULL_TREE);
- tree v2si_ftype_v2si_int
- = build_function_type_list (V2SI_type_node,
- V2SI_type_node, integer_type_node, NULL_TREE);
- tree v2si_ftype_di_di
- = build_function_type_list (V2SI_type_node,
- long_long_integer_type_node,
- long_long_integer_type_node,
- NULL_TREE);
- tree di_ftype_di_int
- = build_function_type_list (long_long_integer_type_node,
- long_long_integer_type_node,
- integer_type_node, NULL_TREE);
- tree di_ftype_di_int_int
- = build_function_type_list (long_long_integer_type_node,
- long_long_integer_type_node,
- integer_type_node,
- integer_type_node, NULL_TREE);
- tree int_ftype_v8qi
- = build_function_type_list (integer_type_node,
- V8QI_type_node, NULL_TREE);
- tree int_ftype_v4hi
- = build_function_type_list (integer_type_node,
- V4HI_type_node, NULL_TREE);
- tree int_ftype_v2si
- = build_function_type_list (integer_type_node,
- V2SI_type_node, NULL_TREE);
- tree int_ftype_v8qi_int
- = build_function_type_list (integer_type_node,
- V8QI_type_node, integer_type_node, NULL_TREE);
- tree int_ftype_v4hi_int
- = build_function_type_list (integer_type_node,
- V4HI_type_node, integer_type_node, NULL_TREE);
- tree int_ftype_v2si_int
- = build_function_type_list (integer_type_node,
- V2SI_type_node, integer_type_node, NULL_TREE);
- tree v8qi_ftype_v8qi_int_int
- = build_function_type_list (V8QI_type_node,
- V8QI_type_node, integer_type_node,
- integer_type_node, NULL_TREE);
- tree v4hi_ftype_v4hi_int_int
- = build_function_type_list (V4HI_type_node,
- V4HI_type_node, integer_type_node,
- integer_type_node, NULL_TREE);
- tree v2si_ftype_v2si_int_int
- = build_function_type_list (V2SI_type_node,
- V2SI_type_node, integer_type_node,
- integer_type_node, NULL_TREE);
- /* Miscellaneous. */
- tree v8qi_ftype_v4hi_v4hi
- = build_function_type_list (V8QI_type_node,
- V4HI_type_node, V4HI_type_node, NULL_TREE);
- tree v4hi_ftype_v2si_v2si
- = build_function_type_list (V4HI_type_node,
- V2SI_type_node, V2SI_type_node, NULL_TREE);
- tree v8qi_ftype_v4hi_v8qi
- = build_function_type_list (V8QI_type_node,
- V4HI_type_node, V8QI_type_node, NULL_TREE);
- tree v2si_ftype_v4hi_v4hi
- = build_function_type_list (V2SI_type_node,
- V4HI_type_node, V4HI_type_node, NULL_TREE);
- tree v2si_ftype_v8qi_v8qi
- = build_function_type_list (V2SI_type_node,
- V8QI_type_node, V8QI_type_node, NULL_TREE);
- tree v4hi_ftype_v4hi_di
- = build_function_type_list (V4HI_type_node,
- V4HI_type_node, long_long_integer_type_node,
- NULL_TREE);
- tree v2si_ftype_v2si_di
- = build_function_type_list (V2SI_type_node,
- V2SI_type_node, long_long_integer_type_node,
- NULL_TREE);
- tree di_ftype_void
- = build_function_type_list (long_long_unsigned_type_node, NULL_TREE);
- tree int_ftype_void
- = build_function_type_list (integer_type_node, NULL_TREE);
- tree di_ftype_v8qi
- = build_function_type_list (long_long_integer_type_node,
- V8QI_type_node, NULL_TREE);
- tree di_ftype_v4hi
- = build_function_type_list (long_long_integer_type_node,
- V4HI_type_node, NULL_TREE);
- tree di_ftype_v2si
- = build_function_type_list (long_long_integer_type_node,
- V2SI_type_node, NULL_TREE);
- tree v2si_ftype_v4hi
- = build_function_type_list (V2SI_type_node,
- V4HI_type_node, NULL_TREE);
- tree v4hi_ftype_v8qi
- = build_function_type_list (V4HI_type_node,
- V8QI_type_node, NULL_TREE);
- tree v8qi_ftype_v8qi
- = build_function_type_list (V8QI_type_node,
- V8QI_type_node, NULL_TREE);
- tree v4hi_ftype_v4hi
- = build_function_type_list (V4HI_type_node,
- V4HI_type_node, NULL_TREE);
- tree v2si_ftype_v2si
- = build_function_type_list (V2SI_type_node,
- V2SI_type_node, NULL_TREE);
-
- tree di_ftype_di_v4hi_v4hi
- = build_function_type_list (long_long_unsigned_type_node,
- long_long_unsigned_type_node,
- V4HI_type_node, V4HI_type_node,
- NULL_TREE);
-
- tree di_ftype_v4hi_v4hi
- = build_function_type_list (long_long_unsigned_type_node,
- V4HI_type_node,V4HI_type_node,
- NULL_TREE);
-
- tree v2si_ftype_v2si_v4hi_v4hi
- = build_function_type_list (V2SI_type_node,
- V2SI_type_node, V4HI_type_node,
- V4HI_type_node, NULL_TREE);
-
- tree v2si_ftype_v2si_v8qi_v8qi
- = build_function_type_list (V2SI_type_node,
- V2SI_type_node, V8QI_type_node,
- V8QI_type_node, NULL_TREE);
-
- tree di_ftype_di_v2si_v2si
- = build_function_type_list (long_long_unsigned_type_node,
- long_long_unsigned_type_node,
- V2SI_type_node, V2SI_type_node,
- NULL_TREE);
-
- tree di_ftype_di_di_int
- = build_function_type_list (long_long_unsigned_type_node,
- long_long_unsigned_type_node,
- long_long_unsigned_type_node,
- integer_type_node, NULL_TREE);
-
- tree void_ftype_int
- = build_function_type_list (void_type_node,
- integer_type_node, NULL_TREE);
-
- tree v8qi_ftype_char
- = build_function_type_list (V8QI_type_node,
- signed_char_type_node, NULL_TREE);
-
- tree v4hi_ftype_short
- = build_function_type_list (V4HI_type_node,
- short_integer_type_node, NULL_TREE);
-
- tree v2si_ftype_int
- = build_function_type_list (V2SI_type_node,
- integer_type_node, NULL_TREE);
-
- /* Normal vector binops. */
- tree v8qi_ftype_v8qi_v8qi
- = build_function_type_list (V8QI_type_node,
- V8QI_type_node, V8QI_type_node, NULL_TREE);
- tree v4hi_ftype_v4hi_v4hi
- = build_function_type_list (V4HI_type_node,
- V4HI_type_node,V4HI_type_node, NULL_TREE);
- tree v2si_ftype_v2si_v2si
- = build_function_type_list (V2SI_type_node,
- V2SI_type_node, V2SI_type_node, NULL_TREE);
- tree di_ftype_di_di
- = build_function_type_list (long_long_unsigned_type_node,
- long_long_unsigned_type_node,
- long_long_unsigned_type_node,
- NULL_TREE);
-
- /* Add all builtins that are more or less simple operations on two
- operands. */
- for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
- {
- /* Use one of the operands; the target can have a different mode for
- mask-generating compares. */
- enum machine_mode mode;
- tree type;
-
- if (d->name == 0 || !(d->mask == FL_IWMMXT || d->mask == FL_IWMMXT2))
- continue;
-
- mode = insn_data[d->icode].operand[1].mode;
-
- switch (mode)
- {
- case V8QImode:
- type = v8qi_ftype_v8qi_v8qi;
- break;
- case V4HImode:
- type = v4hi_ftype_v4hi_v4hi;
- break;
- case V2SImode:
- type = v2si_ftype_v2si_v2si;
- break;
- case DImode:
- type = di_ftype_di_di;
- break;
-
- default:
- gcc_unreachable ();
- }
-
- def_mbuiltin (d->mask, d->name, type, d->code);
- }
-
- /* Add the remaining MMX insns with somewhat more complicated types. */
-#define iwmmx_mbuiltin(NAME, TYPE, CODE) \
- def_mbuiltin (FL_IWMMXT, "__builtin_arm_" NAME, (TYPE), \
- ARM_BUILTIN_ ## CODE)
-
-#define iwmmx2_mbuiltin(NAME, TYPE, CODE) \
- def_mbuiltin (FL_IWMMXT2, "__builtin_arm_" NAME, (TYPE), \
- ARM_BUILTIN_ ## CODE)
-
- iwmmx_mbuiltin ("wzero", di_ftype_void, WZERO);
- iwmmx_mbuiltin ("setwcgr0", void_ftype_int, SETWCGR0);
- iwmmx_mbuiltin ("setwcgr1", void_ftype_int, SETWCGR1);
- iwmmx_mbuiltin ("setwcgr2", void_ftype_int, SETWCGR2);
- iwmmx_mbuiltin ("setwcgr3", void_ftype_int, SETWCGR3);
- iwmmx_mbuiltin ("getwcgr0", int_ftype_void, GETWCGR0);
- iwmmx_mbuiltin ("getwcgr1", int_ftype_void, GETWCGR1);
- iwmmx_mbuiltin ("getwcgr2", int_ftype_void, GETWCGR2);
- iwmmx_mbuiltin ("getwcgr3", int_ftype_void, GETWCGR3);
-
- iwmmx_mbuiltin ("wsllh", v4hi_ftype_v4hi_di, WSLLH);
- iwmmx_mbuiltin ("wsllw", v2si_ftype_v2si_di, WSLLW);
- iwmmx_mbuiltin ("wslld", di_ftype_di_di, WSLLD);
- iwmmx_mbuiltin ("wsllhi", v4hi_ftype_v4hi_int, WSLLHI);
- iwmmx_mbuiltin ("wsllwi", v2si_ftype_v2si_int, WSLLWI);
- iwmmx_mbuiltin ("wslldi", di_ftype_di_int, WSLLDI);
-
- iwmmx_mbuiltin ("wsrlh", v4hi_ftype_v4hi_di, WSRLH);
- iwmmx_mbuiltin ("wsrlw", v2si_ftype_v2si_di, WSRLW);
- iwmmx_mbuiltin ("wsrld", di_ftype_di_di, WSRLD);
- iwmmx_mbuiltin ("wsrlhi", v4hi_ftype_v4hi_int, WSRLHI);
- iwmmx_mbuiltin ("wsrlwi", v2si_ftype_v2si_int, WSRLWI);
- iwmmx_mbuiltin ("wsrldi", di_ftype_di_int, WSRLDI);
-
- iwmmx_mbuiltin ("wsrah", v4hi_ftype_v4hi_di, WSRAH);
- iwmmx_mbuiltin ("wsraw", v2si_ftype_v2si_di, WSRAW);
- iwmmx_mbuiltin ("wsrad", di_ftype_di_di, WSRAD);
- iwmmx_mbuiltin ("wsrahi", v4hi_ftype_v4hi_int, WSRAHI);
- iwmmx_mbuiltin ("wsrawi", v2si_ftype_v2si_int, WSRAWI);
- iwmmx_mbuiltin ("wsradi", di_ftype_di_int, WSRADI);
-
- iwmmx_mbuiltin ("wrorh", v4hi_ftype_v4hi_di, WRORH);
- iwmmx_mbuiltin ("wrorw", v2si_ftype_v2si_di, WRORW);
- iwmmx_mbuiltin ("wrord", di_ftype_di_di, WRORD);
- iwmmx_mbuiltin ("wrorhi", v4hi_ftype_v4hi_int, WRORHI);
- iwmmx_mbuiltin ("wrorwi", v2si_ftype_v2si_int, WRORWI);
- iwmmx_mbuiltin ("wrordi", di_ftype_di_int, WRORDI);
-
- iwmmx_mbuiltin ("wshufh", v4hi_ftype_v4hi_int, WSHUFH);
-
- iwmmx_mbuiltin ("wsadb", v2si_ftype_v2si_v8qi_v8qi, WSADB);
- iwmmx_mbuiltin ("wsadh", v2si_ftype_v2si_v4hi_v4hi, WSADH);
- iwmmx_mbuiltin ("wmadds", v2si_ftype_v4hi_v4hi, WMADDS);
- iwmmx2_mbuiltin ("wmaddsx", v2si_ftype_v4hi_v4hi, WMADDSX);
- iwmmx2_mbuiltin ("wmaddsn", v2si_ftype_v4hi_v4hi, WMADDSN);
- iwmmx_mbuiltin ("wmaddu", v2si_ftype_v4hi_v4hi, WMADDU);
- iwmmx2_mbuiltin ("wmaddux", v2si_ftype_v4hi_v4hi, WMADDUX);
- iwmmx2_mbuiltin ("wmaddun", v2si_ftype_v4hi_v4hi, WMADDUN);
- iwmmx_mbuiltin ("wsadbz", v2si_ftype_v8qi_v8qi, WSADBZ);
- iwmmx_mbuiltin ("wsadhz", v2si_ftype_v4hi_v4hi, WSADHZ);
-
- iwmmx_mbuiltin ("textrmsb", int_ftype_v8qi_int, TEXTRMSB);
- iwmmx_mbuiltin ("textrmsh", int_ftype_v4hi_int, TEXTRMSH);
- iwmmx_mbuiltin ("textrmsw", int_ftype_v2si_int, TEXTRMSW);
- iwmmx_mbuiltin ("textrmub", int_ftype_v8qi_int, TEXTRMUB);
- iwmmx_mbuiltin ("textrmuh", int_ftype_v4hi_int, TEXTRMUH);
- iwmmx_mbuiltin ("textrmuw", int_ftype_v2si_int, TEXTRMUW);
- iwmmx_mbuiltin ("tinsrb", v8qi_ftype_v8qi_int_int, TINSRB);
- iwmmx_mbuiltin ("tinsrh", v4hi_ftype_v4hi_int_int, TINSRH);
- iwmmx_mbuiltin ("tinsrw", v2si_ftype_v2si_int_int, TINSRW);
-
- iwmmx_mbuiltin ("waccb", di_ftype_v8qi, WACCB);
- iwmmx_mbuiltin ("wacch", di_ftype_v4hi, WACCH);
- iwmmx_mbuiltin ("waccw", di_ftype_v2si, WACCW);
-
- iwmmx_mbuiltin ("tmovmskb", int_ftype_v8qi, TMOVMSKB);
- iwmmx_mbuiltin ("tmovmskh", int_ftype_v4hi, TMOVMSKH);
- iwmmx_mbuiltin ("tmovmskw", int_ftype_v2si, TMOVMSKW);
-
- iwmmx2_mbuiltin ("waddbhusm", v8qi_ftype_v4hi_v8qi, WADDBHUSM);
- iwmmx2_mbuiltin ("waddbhusl", v8qi_ftype_v4hi_v8qi, WADDBHUSL);
-
- iwmmx_mbuiltin ("wpackhss", v8qi_ftype_v4hi_v4hi, WPACKHSS);
- iwmmx_mbuiltin ("wpackhus", v8qi_ftype_v4hi_v4hi, WPACKHUS);
- iwmmx_mbuiltin ("wpackwus", v4hi_ftype_v2si_v2si, WPACKWUS);
- iwmmx_mbuiltin ("wpackwss", v4hi_ftype_v2si_v2si, WPACKWSS);
- iwmmx_mbuiltin ("wpackdus", v2si_ftype_di_di, WPACKDUS);
- iwmmx_mbuiltin ("wpackdss", v2si_ftype_di_di, WPACKDSS);
-
- iwmmx_mbuiltin ("wunpckehub", v4hi_ftype_v8qi, WUNPCKEHUB);
- iwmmx_mbuiltin ("wunpckehuh", v2si_ftype_v4hi, WUNPCKEHUH);
- iwmmx_mbuiltin ("wunpckehuw", di_ftype_v2si, WUNPCKEHUW);
- iwmmx_mbuiltin ("wunpckehsb", v4hi_ftype_v8qi, WUNPCKEHSB);
- iwmmx_mbuiltin ("wunpckehsh", v2si_ftype_v4hi, WUNPCKEHSH);
- iwmmx_mbuiltin ("wunpckehsw", di_ftype_v2si, WUNPCKEHSW);
- iwmmx_mbuiltin ("wunpckelub", v4hi_ftype_v8qi, WUNPCKELUB);
- iwmmx_mbuiltin ("wunpckeluh", v2si_ftype_v4hi, WUNPCKELUH);
- iwmmx_mbuiltin ("wunpckeluw", di_ftype_v2si, WUNPCKELUW);
- iwmmx_mbuiltin ("wunpckelsb", v4hi_ftype_v8qi, WUNPCKELSB);
- iwmmx_mbuiltin ("wunpckelsh", v2si_ftype_v4hi, WUNPCKELSH);
- iwmmx_mbuiltin ("wunpckelsw", di_ftype_v2si, WUNPCKELSW);
-
- iwmmx_mbuiltin ("wmacs", di_ftype_di_v4hi_v4hi, WMACS);
- iwmmx_mbuiltin ("wmacsz", di_ftype_v4hi_v4hi, WMACSZ);
- iwmmx_mbuiltin ("wmacu", di_ftype_di_v4hi_v4hi, WMACU);
- iwmmx_mbuiltin ("wmacuz", di_ftype_v4hi_v4hi, WMACUZ);
-
- iwmmx_mbuiltin ("walign", v8qi_ftype_v8qi_v8qi_int, WALIGNI);
- iwmmx_mbuiltin ("tmia", di_ftype_di_int_int, TMIA);
- iwmmx_mbuiltin ("tmiaph", di_ftype_di_int_int, TMIAPH);
- iwmmx_mbuiltin ("tmiabb", di_ftype_di_int_int, TMIABB);
- iwmmx_mbuiltin ("tmiabt", di_ftype_di_int_int, TMIABT);
- iwmmx_mbuiltin ("tmiatb", di_ftype_di_int_int, TMIATB);
- iwmmx_mbuiltin ("tmiatt", di_ftype_di_int_int, TMIATT);
-
- iwmmx2_mbuiltin ("wabsb", v8qi_ftype_v8qi, WABSB);
- iwmmx2_mbuiltin ("wabsh", v4hi_ftype_v4hi, WABSH);
- iwmmx2_mbuiltin ("wabsw", v2si_ftype_v2si, WABSW);
-
- iwmmx2_mbuiltin ("wqmiabb", v2si_ftype_v2si_v4hi_v4hi, WQMIABB);
- iwmmx2_mbuiltin ("wqmiabt", v2si_ftype_v2si_v4hi_v4hi, WQMIABT);
- iwmmx2_mbuiltin ("wqmiatb", v2si_ftype_v2si_v4hi_v4hi, WQMIATB);
- iwmmx2_mbuiltin ("wqmiatt", v2si_ftype_v2si_v4hi_v4hi, WQMIATT);
-
- iwmmx2_mbuiltin ("wqmiabbn", v2si_ftype_v2si_v4hi_v4hi, WQMIABBN);
- iwmmx2_mbuiltin ("wqmiabtn", v2si_ftype_v2si_v4hi_v4hi, WQMIABTN);
- iwmmx2_mbuiltin ("wqmiatbn", v2si_ftype_v2si_v4hi_v4hi, WQMIATBN);
- iwmmx2_mbuiltin ("wqmiattn", v2si_ftype_v2si_v4hi_v4hi, WQMIATTN);
-
- iwmmx2_mbuiltin ("wmiabb", di_ftype_di_v4hi_v4hi, WMIABB);
- iwmmx2_mbuiltin ("wmiabt", di_ftype_di_v4hi_v4hi, WMIABT);
- iwmmx2_mbuiltin ("wmiatb", di_ftype_di_v4hi_v4hi, WMIATB);
- iwmmx2_mbuiltin ("wmiatt", di_ftype_di_v4hi_v4hi, WMIATT);
-
- iwmmx2_mbuiltin ("wmiabbn", di_ftype_di_v4hi_v4hi, WMIABBN);
- iwmmx2_mbuiltin ("wmiabtn", di_ftype_di_v4hi_v4hi, WMIABTN);
- iwmmx2_mbuiltin ("wmiatbn", di_ftype_di_v4hi_v4hi, WMIATBN);
- iwmmx2_mbuiltin ("wmiattn", di_ftype_di_v4hi_v4hi, WMIATTN);
-
- iwmmx2_mbuiltin ("wmiawbb", di_ftype_di_v2si_v2si, WMIAWBB);
- iwmmx2_mbuiltin ("wmiawbt", di_ftype_di_v2si_v2si, WMIAWBT);
- iwmmx2_mbuiltin ("wmiawtb", di_ftype_di_v2si_v2si, WMIAWTB);
- iwmmx2_mbuiltin ("wmiawtt", di_ftype_di_v2si_v2si, WMIAWTT);
-
- iwmmx2_mbuiltin ("wmiawbbn", di_ftype_di_v2si_v2si, WMIAWBBN);
- iwmmx2_mbuiltin ("wmiawbtn", di_ftype_di_v2si_v2si, WMIAWBTN);
- iwmmx2_mbuiltin ("wmiawtbn", di_ftype_di_v2si_v2si, WMIAWTBN);
- iwmmx2_mbuiltin ("wmiawttn", di_ftype_di_v2si_v2si, WMIAWTTN);
-
- iwmmx2_mbuiltin ("wmerge", di_ftype_di_di_int, WMERGE);
-
- iwmmx_mbuiltin ("tbcstb", v8qi_ftype_char, TBCSTB);
- iwmmx_mbuiltin ("tbcsth", v4hi_ftype_short, TBCSTH);
- iwmmx_mbuiltin ("tbcstw", v2si_ftype_int, TBCSTW);
-
-#undef iwmmx_mbuiltin
-#undef iwmmx2_mbuiltin
-}
-
-static void
-arm_init_fp16_builtins (void)
-{
- tree fp16_type = make_node (REAL_TYPE);
- TYPE_PRECISION (fp16_type) = 16;
- layout_type (fp16_type);
- (*lang_hooks.types.register_builtin_type) (fp16_type, "__fp16");
-}
-
-static void
-arm_init_crc32_builtins ()
-{
- tree si_ftype_si_qi
- = build_function_type_list (unsigned_intSI_type_node,
- unsigned_intSI_type_node,
- unsigned_intQI_type_node, NULL_TREE);
- tree si_ftype_si_hi
- = build_function_type_list (unsigned_intSI_type_node,
- unsigned_intSI_type_node,
- unsigned_intHI_type_node, NULL_TREE);
- tree si_ftype_si_si
- = build_function_type_list (unsigned_intSI_type_node,
- unsigned_intSI_type_node,
- unsigned_intSI_type_node, NULL_TREE);
-
- arm_builtin_decls[ARM_BUILTIN_CRC32B]
- = add_builtin_function ("__builtin_arm_crc32b", si_ftype_si_qi,
- ARM_BUILTIN_CRC32B, BUILT_IN_MD, NULL, NULL_TREE);
- arm_builtin_decls[ARM_BUILTIN_CRC32H]
- = add_builtin_function ("__builtin_arm_crc32h", si_ftype_si_hi,
- ARM_BUILTIN_CRC32H, BUILT_IN_MD, NULL, NULL_TREE);
- arm_builtin_decls[ARM_BUILTIN_CRC32W]
- = add_builtin_function ("__builtin_arm_crc32w", si_ftype_si_si,
- ARM_BUILTIN_CRC32W, BUILT_IN_MD, NULL, NULL_TREE);
- arm_builtin_decls[ARM_BUILTIN_CRC32CB]
- = add_builtin_function ("__builtin_arm_crc32cb", si_ftype_si_qi,
- ARM_BUILTIN_CRC32CB, BUILT_IN_MD, NULL, NULL_TREE);
- arm_builtin_decls[ARM_BUILTIN_CRC32CH]
- = add_builtin_function ("__builtin_arm_crc32ch", si_ftype_si_hi,
- ARM_BUILTIN_CRC32CH, BUILT_IN_MD, NULL, NULL_TREE);
- arm_builtin_decls[ARM_BUILTIN_CRC32CW]
- = add_builtin_function ("__builtin_arm_crc32cw", si_ftype_si_si,
- ARM_BUILTIN_CRC32CW, BUILT_IN_MD, NULL, NULL_TREE);
-}
-
-static void
-arm_init_builtins (void)
-{
- if (TARGET_REALLY_IWMMXT)
- arm_init_iwmmxt_builtins ();
-
- if (TARGET_NEON)
- arm_init_neon_builtins ();
-
- if (arm_fp16_format)
- arm_init_fp16_builtins ();
-
- if (TARGET_CRC32)
- arm_init_crc32_builtins ();
-}
-
-/* Return the ARM builtin for CODE. */
-
-static tree
-arm_builtin_decl (unsigned code, bool initialize_p ATTRIBUTE_UNUSED)
-{
- if (code >= ARM_BUILTIN_MAX)
- return error_mark_node;
-
- return arm_builtin_decls[code];
-}
-
/* Implement TARGET_INVALID_PARAMETER_TYPE. */
static const char *
@@ -24595,7 +23384,7 @@ arm_convert_to_type (tree type, tree expr)
special-cased in the default hook. */
static bool
-arm_scalar_mode_supported_p (enum machine_mode mode)
+arm_scalar_mode_supported_p (machine_mode mode)
{
if (mode == HFmode)
return (arm_fp16_format != ARM_FP16_FORMAT_NONE);
@@ -24605,511 +23394,6 @@ arm_scalar_mode_supported_p (enum machine_mode mode)
return default_scalar_mode_supported_p (mode);
}
-/* Errors in the source file can cause expand_expr to return const0_rtx
- where we expect a vector. To avoid crashing, use one of the vector
- clear instructions. */
-
-static rtx
-safe_vector_operand (rtx x, enum machine_mode mode)
-{
- if (x != const0_rtx)
- return x;
- x = gen_reg_rtx (mode);
-
- emit_insn (gen_iwmmxt_clrdi (mode == DImode ? x
- : gen_rtx_SUBREG (DImode, x, 0)));
- return x;
-}
-
-/* Function to expand ternary builtins. */
-static rtx
-arm_expand_ternop_builtin (enum insn_code icode,
- tree exp, rtx target)
-{
- rtx pat;
- tree arg0 = CALL_EXPR_ARG (exp, 0);
- tree arg1 = CALL_EXPR_ARG (exp, 1);
- tree arg2 = CALL_EXPR_ARG (exp, 2);
-
- rtx op0 = expand_normal (arg0);
- rtx op1 = expand_normal (arg1);
- rtx op2 = expand_normal (arg2);
- rtx op3 = NULL_RTX;
-
- /* The sha1c, sha1p, sha1m crypto builtins require a different vec_select
- lane operand depending on endianness. */
- bool builtin_sha1cpm_p = false;
-
- if (insn_data[icode].n_operands == 5)
- {
- gcc_assert (icode == CODE_FOR_crypto_sha1c
- || icode == CODE_FOR_crypto_sha1p
- || icode == CODE_FOR_crypto_sha1m);
- builtin_sha1cpm_p = true;
- }
- enum machine_mode tmode = insn_data[icode].operand[0].mode;
- enum machine_mode mode0 = insn_data[icode].operand[1].mode;
- enum machine_mode mode1 = insn_data[icode].operand[2].mode;
- enum machine_mode mode2 = insn_data[icode].operand[3].mode;
-
-
- if (VECTOR_MODE_P (mode0))
- op0 = safe_vector_operand (op0, mode0);
- if (VECTOR_MODE_P (mode1))
- op1 = safe_vector_operand (op1, mode1);
- if (VECTOR_MODE_P (mode2))
- op2 = safe_vector_operand (op2, mode2);
-
- if (! target
- || GET_MODE (target) != tmode
- || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
- target = gen_reg_rtx (tmode);
-
- gcc_assert ((GET_MODE (op0) == mode0 || GET_MODE (op0) == VOIDmode)
- && (GET_MODE (op1) == mode1 || GET_MODE (op1) == VOIDmode)
- && (GET_MODE (op2) == mode2 || GET_MODE (op2) == VOIDmode));
-
- if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
- op0 = copy_to_mode_reg (mode0, op0);
- if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
- op1 = copy_to_mode_reg (mode1, op1);
- if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
- op2 = copy_to_mode_reg (mode2, op2);
- if (builtin_sha1cpm_p)
- op3 = GEN_INT (TARGET_BIG_END ? 1 : 0);
-
- if (builtin_sha1cpm_p)
- pat = GEN_FCN (icode) (target, op0, op1, op2, op3);
- else
- pat = GEN_FCN (icode) (target, op0, op1, op2);
- if (! pat)
- return 0;
- emit_insn (pat);
- return target;
-}
-
-/* Subroutine of arm_expand_builtin to take care of binop insns. */
-
-static rtx
-arm_expand_binop_builtin (enum insn_code icode,
- tree exp, rtx target)
-{
- rtx pat;
- tree arg0 = CALL_EXPR_ARG (exp, 0);
- tree arg1 = CALL_EXPR_ARG (exp, 1);
- rtx op0 = expand_normal (arg0);
- rtx op1 = expand_normal (arg1);
- enum machine_mode tmode = insn_data[icode].operand[0].mode;
- enum machine_mode mode0 = insn_data[icode].operand[1].mode;
- enum machine_mode mode1 = insn_data[icode].operand[2].mode;
-
- if (VECTOR_MODE_P (mode0))
- op0 = safe_vector_operand (op0, mode0);
- if (VECTOR_MODE_P (mode1))
- op1 = safe_vector_operand (op1, mode1);
-
- if (! target
- || GET_MODE (target) != tmode
- || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
- target = gen_reg_rtx (tmode);
-
- gcc_assert ((GET_MODE (op0) == mode0 || GET_MODE (op0) == VOIDmode)
- && (GET_MODE (op1) == mode1 || GET_MODE (op1) == VOIDmode));
-
- if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
- op0 = copy_to_mode_reg (mode0, op0);
- if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
- op1 = copy_to_mode_reg (mode1, op1);
-
- pat = GEN_FCN (icode) (target, op0, op1);
- if (! pat)
- return 0;
- emit_insn (pat);
- return target;
-}
-
-/* Subroutine of arm_expand_builtin to take care of unop insns. */
-
-static rtx
-arm_expand_unop_builtin (enum insn_code icode,
- tree exp, rtx target, int do_load)
-{
- rtx pat;
- tree arg0 = CALL_EXPR_ARG (exp, 0);
- rtx op0 = expand_normal (arg0);
- rtx op1 = NULL_RTX;
- enum machine_mode tmode = insn_data[icode].operand[0].mode;
- enum machine_mode mode0 = insn_data[icode].operand[1].mode;
- bool builtin_sha1h_p = false;
-
- if (insn_data[icode].n_operands == 3)
- {
- gcc_assert (icode == CODE_FOR_crypto_sha1h);
- builtin_sha1h_p = true;
- }
-
- if (! target
- || GET_MODE (target) != tmode
- || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
- target = gen_reg_rtx (tmode);
- if (do_load)
- op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
- else
- {
- if (VECTOR_MODE_P (mode0))
- op0 = safe_vector_operand (op0, mode0);
-
- if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
- op0 = copy_to_mode_reg (mode0, op0);
- }
- if (builtin_sha1h_p)
- op1 = GEN_INT (TARGET_BIG_END ? 1 : 0);
-
- if (builtin_sha1h_p)
- pat = GEN_FCN (icode) (target, op0, op1);
- else
- pat = GEN_FCN (icode) (target, op0);
- if (! pat)
- return 0;
- emit_insn (pat);
- return target;
-}
-
-typedef enum {
- NEON_ARG_COPY_TO_REG,
- NEON_ARG_CONSTANT,
- NEON_ARG_MEMORY,
- NEON_ARG_STOP
-} builtin_arg;
-
-#define NEON_MAX_BUILTIN_ARGS 5
-
-/* EXP is a pointer argument to a Neon load or store intrinsic. Derive
- and return an expression for the accessed memory.
-
- The intrinsic function operates on a block of registers that has
- mode REG_MODE. This block contains vectors of type TYPE_MODE. The
- function references the memory at EXP of type TYPE and in mode
- MEM_MODE; this mode may be BLKmode if no more suitable mode is
- available. */
-
-static tree
-neon_dereference_pointer (tree exp, tree type, enum machine_mode mem_mode,
- enum machine_mode reg_mode,
- neon_builtin_type_mode type_mode)
-{
- HOST_WIDE_INT reg_size, vector_size, nvectors, nelems;
- tree elem_type, upper_bound, array_type;
-
- /* Work out the size of the register block in bytes. */
- reg_size = GET_MODE_SIZE (reg_mode);
-
- /* Work out the size of each vector in bytes. */
- gcc_assert (TYPE_MODE_BIT (type_mode) & (TB_DREG | TB_QREG));
- vector_size = (TYPE_MODE_BIT (type_mode) & TB_QREG ? 16 : 8);
-
- /* Work out how many vectors there are. */
- gcc_assert (reg_size % vector_size == 0);
- nvectors = reg_size / vector_size;
-
- /* Work out the type of each element. */
- gcc_assert (POINTER_TYPE_P (type));
- elem_type = TREE_TYPE (type);
-
- /* Work out how many elements are being loaded or stored.
- MEM_MODE == REG_MODE implies a one-to-one mapping between register
- and memory elements; anything else implies a lane load or store. */
- if (mem_mode == reg_mode)
- nelems = vector_size * nvectors / int_size_in_bytes (elem_type);
- else
- nelems = nvectors;
-
- /* Create a type that describes the full access. */
- upper_bound = build_int_cst (size_type_node, nelems - 1);
- array_type = build_array_type (elem_type, build_index_type (upper_bound));
-
- /* Dereference EXP using that type. */
- return fold_build2 (MEM_REF, array_type, exp,
- build_int_cst (build_pointer_type (array_type), 0));
-}
-
-/* Expand a Neon builtin. */
-static rtx
-arm_expand_neon_args (rtx target, int icode, int have_retval,
- neon_builtin_type_mode type_mode,
- tree exp, int fcode, ...)
-{
- va_list ap;
- rtx pat;
- tree arg[NEON_MAX_BUILTIN_ARGS];
- rtx op[NEON_MAX_BUILTIN_ARGS];
- tree arg_type;
- tree formals;
- enum machine_mode tmode = insn_data[icode].operand[0].mode;
- enum machine_mode mode[NEON_MAX_BUILTIN_ARGS];
- enum machine_mode other_mode;
- int argc = 0;
- int opno;
-
- if (have_retval
- && (!target
- || GET_MODE (target) != tmode
- || !(*insn_data[icode].operand[0].predicate) (target, tmode)))
- target = gen_reg_rtx (tmode);
-
- va_start (ap, fcode);
-
- formals = TYPE_ARG_TYPES (TREE_TYPE (arm_builtin_decls[fcode]));
-
- for (;;)
- {
- builtin_arg thisarg = (builtin_arg) va_arg (ap, int);
-
- if (thisarg == NEON_ARG_STOP)
- break;
- else
- {
- opno = argc + have_retval;
- mode[argc] = insn_data[icode].operand[opno].mode;
- arg[argc] = CALL_EXPR_ARG (exp, argc);
- arg_type = TREE_VALUE (formals);
- if (thisarg == NEON_ARG_MEMORY)
- {
- other_mode = insn_data[icode].operand[1 - opno].mode;
- arg[argc] = neon_dereference_pointer (arg[argc], arg_type,
- mode[argc], other_mode,
- type_mode);
- }
-
- /* Use EXPAND_MEMORY for NEON_ARG_MEMORY to ensure a MEM_P
- be returned. */
- op[argc] = expand_expr (arg[argc], NULL_RTX, VOIDmode,
- (thisarg == NEON_ARG_MEMORY
- ? EXPAND_MEMORY : EXPAND_NORMAL));
-
- switch (thisarg)
- {
- case NEON_ARG_COPY_TO_REG:
- /*gcc_assert (GET_MODE (op[argc]) == mode[argc]);*/
- if (!(*insn_data[icode].operand[opno].predicate)
- (op[argc], mode[argc]))
- op[argc] = copy_to_mode_reg (mode[argc], op[argc]);
- break;
-
- case NEON_ARG_CONSTANT:
- /* FIXME: This error message is somewhat unhelpful. */
- if (!(*insn_data[icode].operand[opno].predicate)
- (op[argc], mode[argc]))
- error ("argument must be a constant");
- break;
-
- case NEON_ARG_MEMORY:
- /* Check if expand failed. */
- if (op[argc] == const0_rtx)
- return 0;
- gcc_assert (MEM_P (op[argc]));
- PUT_MODE (op[argc], mode[argc]);
- /* ??? arm_neon.h uses the same built-in functions for signed
- and unsigned accesses, casting where necessary. This isn't
- alias safe. */
- set_mem_alias_set (op[argc], 0);
- if (!(*insn_data[icode].operand[opno].predicate)
- (op[argc], mode[argc]))
- op[argc] = (replace_equiv_address
- (op[argc], force_reg (Pmode, XEXP (op[argc], 0))));
- break;
-
- case NEON_ARG_STOP:
- gcc_unreachable ();
- }
-
- argc++;
- formals = TREE_CHAIN (formals);
- }
- }
-
- va_end (ap);
-
- if (have_retval)
- switch (argc)
- {
- case 1:
- pat = GEN_FCN (icode) (target, op[0]);
- break;
-
- case 2:
- pat = GEN_FCN (icode) (target, op[0], op[1]);
- break;
-
- case 3:
- pat = GEN_FCN (icode) (target, op[0], op[1], op[2]);
- break;
-
- case 4:
- pat = GEN_FCN (icode) (target, op[0], op[1], op[2], op[3]);
- break;
-
- case 5:
- pat = GEN_FCN (icode) (target, op[0], op[1], op[2], op[3], op[4]);
- break;
-
- default:
- gcc_unreachable ();
- }
- else
- switch (argc)
- {
- case 1:
- pat = GEN_FCN (icode) (op[0]);
- break;
-
- case 2:
- pat = GEN_FCN (icode) (op[0], op[1]);
- break;
-
- case 3:
- pat = GEN_FCN (icode) (op[0], op[1], op[2]);
- break;
-
- case 4:
- pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3]);
- break;
-
- case 5:
- pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3], op[4]);
- break;
-
- default:
- gcc_unreachable ();
- }
-
- if (!pat)
- return 0;
-
- emit_insn (pat);
-
- return target;
-}
-
-/* Expand a Neon builtin. These are "special" because they don't have symbolic
- constants defined per-instruction or per instruction-variant. Instead, the
- required info is looked up in the table neon_builtin_data. */
-static rtx
-arm_expand_neon_builtin (int fcode, tree exp, rtx target)
-{
- neon_builtin_datum *d = &neon_builtin_data[fcode - ARM_BUILTIN_NEON_BASE];
- neon_itype itype = d->itype;
- enum insn_code icode = d->code;
- neon_builtin_type_mode type_mode = d->mode;
-
- switch (itype)
- {
- case NEON_UNOP:
- case NEON_CONVERT:
- case NEON_DUPLANE:
- return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
- NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT, NEON_ARG_STOP);
-
- case NEON_BINOP:
- case NEON_SETLANE:
- case NEON_SCALARMUL:
- case NEON_SCALARMULL:
- case NEON_SCALARMULH:
- case NEON_SHIFTINSERT:
- case NEON_LOGICBINOP:
- return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
- NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
- NEON_ARG_STOP);
-
- case NEON_TERNOP:
- return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
- NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
- NEON_ARG_CONSTANT, NEON_ARG_STOP);
-
- case NEON_GETLANE:
- case NEON_FIXCONV:
- case NEON_SHIFTIMM:
- return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
- NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT, NEON_ARG_CONSTANT,
- NEON_ARG_STOP);
-
- case NEON_CREATE:
- return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
- NEON_ARG_COPY_TO_REG, NEON_ARG_STOP);
-
- case NEON_DUP:
- case NEON_RINT:
- case NEON_SPLIT:
- case NEON_FLOAT_WIDEN:
- case NEON_FLOAT_NARROW:
- case NEON_REINTERP:
- return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
- NEON_ARG_COPY_TO_REG, NEON_ARG_STOP);
-
- case NEON_COMBINE:
- case NEON_VTBL:
- return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
- NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_STOP);
-
- case NEON_RESULTPAIR:
- return arm_expand_neon_args (target, icode, 0, type_mode, exp, fcode,
- NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
- NEON_ARG_STOP);
-
- case NEON_LANEMUL:
- case NEON_LANEMULL:
- case NEON_LANEMULH:
- return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
- NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
- NEON_ARG_CONSTANT, NEON_ARG_STOP);
-
- case NEON_LANEMAC:
- return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
- NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
- NEON_ARG_CONSTANT, NEON_ARG_CONSTANT, NEON_ARG_STOP);
-
- case NEON_SHIFTACC:
- return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
- NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
- NEON_ARG_CONSTANT, NEON_ARG_STOP);
-
- case NEON_SCALARMAC:
- return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
- NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
- NEON_ARG_CONSTANT, NEON_ARG_STOP);
-
- case NEON_SELECT:
- case NEON_VTBX:
- return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
- NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
- NEON_ARG_STOP);
-
- case NEON_LOAD1:
- case NEON_LOADSTRUCT:
- return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
- NEON_ARG_MEMORY, NEON_ARG_STOP);
-
- case NEON_LOAD1LANE:
- case NEON_LOADSTRUCTLANE:
- return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
- NEON_ARG_MEMORY, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
- NEON_ARG_STOP);
-
- case NEON_STORE1:
- case NEON_STORESTRUCT:
- return arm_expand_neon_args (target, icode, 0, type_mode, exp, fcode,
- NEON_ARG_MEMORY, NEON_ARG_COPY_TO_REG, NEON_ARG_STOP);
-
- case NEON_STORE1LANE:
- case NEON_STORESTRUCTLANE:
- return arm_expand_neon_args (target, icode, 0, type_mode, exp, fcode,
- NEON_ARG_MEMORY, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
- NEON_ARG_STOP);
- }
-
- gcc_unreachable ();
-}
-
/* Emit code to reinterpret one Neon type as another, without altering bits. */
void
neon_reinterpret (rtx dest, rtx src)
@@ -25117,24 +23401,6 @@ neon_reinterpret (rtx dest, rtx src)
emit_move_insn (dest, gen_lowpart (GET_MODE (dest), src));
}
-/* Emit code to place a Neon pair result in memory locations (with equal
- registers). */
-void
-neon_emit_pair_result_insn (enum machine_mode mode,
- rtx (*intfn) (rtx, rtx, rtx, rtx), rtx destaddr,
- rtx op1, rtx op2)
-{
- rtx mem = gen_rtx_MEM (mode, destaddr);
- rtx tmp1 = gen_reg_rtx (mode);
- rtx tmp2 = gen_reg_rtx (mode);
-
- emit_insn (intfn (tmp1, op1, op2, tmp2));
-
- emit_move_insn (mem, tmp1);
- mem = adjust_address (mem, mode, GET_MODE_SIZE (mode));
- emit_move_insn (mem, tmp2);
-}
-
/* Set up OPERANDS for a register copy from SRC to DEST, taking care
not to early-clobber SRC registers in the process.
@@ -25173,7 +23439,7 @@ neon_split_vcombine (rtx operands[3])
unsigned int dest = REGNO (operands[0]);
unsigned int src1 = REGNO (operands[1]);
unsigned int src2 = REGNO (operands[2]);
- enum machine_mode halfmode = GET_MODE (operands[1]);
+ machine_mode halfmode = GET_MODE (operands[1]);
unsigned int halfregs = HARD_REGNO_NREGS (src1, halfmode);
rtx destlo, desthi;
@@ -25215,531 +23481,6 @@ neon_split_vcombine (rtx operands[3])
emit_move_insn (destlo, operands[1]);
}
}
-
-/* Expand an expression EXP that calls a built-in function,
- with result going to TARGET if that's convenient
- (and in mode MODE if that's convenient).
- SUBTARGET may be used as the target for computing one of EXP's operands.
- IGNORE is nonzero if the value is to be ignored. */
-
-static rtx
-arm_expand_builtin (tree exp,
- rtx target,
- rtx subtarget ATTRIBUTE_UNUSED,
- enum machine_mode mode ATTRIBUTE_UNUSED,
- int ignore ATTRIBUTE_UNUSED)
-{
- const struct builtin_description * d;
- enum insn_code icode;
- tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
- tree arg0;
- tree arg1;
- tree arg2;
- rtx op0;
- rtx op1;
- rtx op2;
- rtx pat;
- unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
- size_t i;
- enum machine_mode tmode;
- enum machine_mode mode0;
- enum machine_mode mode1;
- enum machine_mode mode2;
- int opint;
- int selector;
- int mask;
- int imm;
-
- if (fcode >= ARM_BUILTIN_NEON_BASE)
- return arm_expand_neon_builtin (fcode, exp, target);
-
- switch (fcode)
- {
- case ARM_BUILTIN_TEXTRMSB:
- case ARM_BUILTIN_TEXTRMUB:
- case ARM_BUILTIN_TEXTRMSH:
- case ARM_BUILTIN_TEXTRMUH:
- case ARM_BUILTIN_TEXTRMSW:
- case ARM_BUILTIN_TEXTRMUW:
- icode = (fcode == ARM_BUILTIN_TEXTRMSB ? CODE_FOR_iwmmxt_textrmsb
- : fcode == ARM_BUILTIN_TEXTRMUB ? CODE_FOR_iwmmxt_textrmub
- : fcode == ARM_BUILTIN_TEXTRMSH ? CODE_FOR_iwmmxt_textrmsh
- : fcode == ARM_BUILTIN_TEXTRMUH ? CODE_FOR_iwmmxt_textrmuh
- : CODE_FOR_iwmmxt_textrmw);
-
- arg0 = CALL_EXPR_ARG (exp, 0);
- arg1 = CALL_EXPR_ARG (exp, 1);
- op0 = expand_normal (arg0);
- op1 = expand_normal (arg1);
- tmode = insn_data[icode].operand[0].mode;
- mode0 = insn_data[icode].operand[1].mode;
- mode1 = insn_data[icode].operand[2].mode;
-
- if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
- op0 = copy_to_mode_reg (mode0, op0);
- if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
- {
- /* @@@ better error message */
- error ("selector must be an immediate");
- return gen_reg_rtx (tmode);
- }
-
- opint = INTVAL (op1);
- if (fcode == ARM_BUILTIN_TEXTRMSB || fcode == ARM_BUILTIN_TEXTRMUB)
- {
- if (opint > 7 || opint < 0)
- error ("the range of selector should be in 0 to 7");
- }
- else if (fcode == ARM_BUILTIN_TEXTRMSH || fcode == ARM_BUILTIN_TEXTRMUH)
- {
- if (opint > 3 || opint < 0)
- error ("the range of selector should be in 0 to 3");
- }
- else /* ARM_BUILTIN_TEXTRMSW || ARM_BUILTIN_TEXTRMUW. */
- {
- if (opint > 1 || opint < 0)
- error ("the range of selector should be in 0 to 1");
- }
-
- if (target == 0
- || GET_MODE (target) != tmode
- || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
- target = gen_reg_rtx (tmode);
- pat = GEN_FCN (icode) (target, op0, op1);
- if (! pat)
- return 0;
- emit_insn (pat);
- return target;
-
- case ARM_BUILTIN_WALIGNI:
- /* If op2 is immediate, call walighi, else call walighr. */
- arg0 = CALL_EXPR_ARG (exp, 0);
- arg1 = CALL_EXPR_ARG (exp, 1);
- arg2 = CALL_EXPR_ARG (exp, 2);
- op0 = expand_normal (arg0);
- op1 = expand_normal (arg1);
- op2 = expand_normal (arg2);
- if (CONST_INT_P (op2))
- {
- icode = CODE_FOR_iwmmxt_waligni;
- tmode = insn_data[icode].operand[0].mode;
- mode0 = insn_data[icode].operand[1].mode;
- mode1 = insn_data[icode].operand[2].mode;
- mode2 = insn_data[icode].operand[3].mode;
- if (!(*insn_data[icode].operand[1].predicate) (op0, mode0))
- op0 = copy_to_mode_reg (mode0, op0);
- if (!(*insn_data[icode].operand[2].predicate) (op1, mode1))
- op1 = copy_to_mode_reg (mode1, op1);
- gcc_assert ((*insn_data[icode].operand[3].predicate) (op2, mode2));
- selector = INTVAL (op2);
- if (selector > 7 || selector < 0)
- error ("the range of selector should be in 0 to 7");
- }
- else
- {
- icode = CODE_FOR_iwmmxt_walignr;
- tmode = insn_data[icode].operand[0].mode;
- mode0 = insn_data[icode].operand[1].mode;
- mode1 = insn_data[icode].operand[2].mode;
- mode2 = insn_data[icode].operand[3].mode;
- if (!(*insn_data[icode].operand[1].predicate) (op0, mode0))
- op0 = copy_to_mode_reg (mode0, op0);
- if (!(*insn_data[icode].operand[2].predicate) (op1, mode1))
- op1 = copy_to_mode_reg (mode1, op1);
- if (!(*insn_data[icode].operand[3].predicate) (op2, mode2))
- op2 = copy_to_mode_reg (mode2, op2);
- }
- if (target == 0
- || GET_MODE (target) != tmode
- || !(*insn_data[icode].operand[0].predicate) (target, tmode))
- target = gen_reg_rtx (tmode);
- pat = GEN_FCN (icode) (target, op0, op1, op2);
- if (!pat)
- return 0;
- emit_insn (pat);
- return target;
-
- case ARM_BUILTIN_TINSRB:
- case ARM_BUILTIN_TINSRH:
- case ARM_BUILTIN_TINSRW:
- case ARM_BUILTIN_WMERGE:
- icode = (fcode == ARM_BUILTIN_TINSRB ? CODE_FOR_iwmmxt_tinsrb
- : fcode == ARM_BUILTIN_TINSRH ? CODE_FOR_iwmmxt_tinsrh
- : fcode == ARM_BUILTIN_WMERGE ? CODE_FOR_iwmmxt_wmerge
- : CODE_FOR_iwmmxt_tinsrw);
- arg0 = CALL_EXPR_ARG (exp, 0);
- arg1 = CALL_EXPR_ARG (exp, 1);
- arg2 = CALL_EXPR_ARG (exp, 2);
- op0 = expand_normal (arg0);
- op1 = expand_normal (arg1);
- op2 = expand_normal (arg2);
- tmode = insn_data[icode].operand[0].mode;
- mode0 = insn_data[icode].operand[1].mode;
- mode1 = insn_data[icode].operand[2].mode;
- mode2 = insn_data[icode].operand[3].mode;
-
- if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
- op0 = copy_to_mode_reg (mode0, op0);
- if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
- op1 = copy_to_mode_reg (mode1, op1);
- if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
- {
- error ("selector must be an immediate");
- return const0_rtx;
- }
- if (icode == CODE_FOR_iwmmxt_wmerge)
- {
- selector = INTVAL (op2);
- if (selector > 7 || selector < 0)
- error ("the range of selector should be in 0 to 7");
- }
- if ((icode == CODE_FOR_iwmmxt_tinsrb)
- || (icode == CODE_FOR_iwmmxt_tinsrh)
- || (icode == CODE_FOR_iwmmxt_tinsrw))
- {
- mask = 0x01;
- selector= INTVAL (op2);
- if (icode == CODE_FOR_iwmmxt_tinsrb && (selector < 0 || selector > 7))
- error ("the range of selector should be in 0 to 7");
- else if (icode == CODE_FOR_iwmmxt_tinsrh && (selector < 0 ||selector > 3))
- error ("the range of selector should be in 0 to 3");
- else if (icode == CODE_FOR_iwmmxt_tinsrw && (selector < 0 ||selector > 1))
- error ("the range of selector should be in 0 to 1");
- mask <<= selector;
- op2 = GEN_INT (mask);
- }
- if (target == 0
- || GET_MODE (target) != tmode
- || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
- target = gen_reg_rtx (tmode);
- pat = GEN_FCN (icode) (target, op0, op1, op2);
- if (! pat)
- return 0;
- emit_insn (pat);
- return target;
-
- case ARM_BUILTIN_SETWCGR0:
- case ARM_BUILTIN_SETWCGR1:
- case ARM_BUILTIN_SETWCGR2:
- case ARM_BUILTIN_SETWCGR3:
- icode = (fcode == ARM_BUILTIN_SETWCGR0 ? CODE_FOR_iwmmxt_setwcgr0
- : fcode == ARM_BUILTIN_SETWCGR1 ? CODE_FOR_iwmmxt_setwcgr1
- : fcode == ARM_BUILTIN_SETWCGR2 ? CODE_FOR_iwmmxt_setwcgr2
- : CODE_FOR_iwmmxt_setwcgr3);
- arg0 = CALL_EXPR_ARG (exp, 0);
- op0 = expand_normal (arg0);
- mode0 = insn_data[icode].operand[0].mode;
- if (!(*insn_data[icode].operand[0].predicate) (op0, mode0))
- op0 = copy_to_mode_reg (mode0, op0);
- pat = GEN_FCN (icode) (op0);
- if (!pat)
- return 0;
- emit_insn (pat);
- return 0;
-
- case ARM_BUILTIN_GETWCGR0:
- case ARM_BUILTIN_GETWCGR1:
- case ARM_BUILTIN_GETWCGR2:
- case ARM_BUILTIN_GETWCGR3:
- icode = (fcode == ARM_BUILTIN_GETWCGR0 ? CODE_FOR_iwmmxt_getwcgr0
- : fcode == ARM_BUILTIN_GETWCGR1 ? CODE_FOR_iwmmxt_getwcgr1
- : fcode == ARM_BUILTIN_GETWCGR2 ? CODE_FOR_iwmmxt_getwcgr2
- : CODE_FOR_iwmmxt_getwcgr3);
- tmode = insn_data[icode].operand[0].mode;
- if (target == 0
- || GET_MODE (target) != tmode
- || !(*insn_data[icode].operand[0].predicate) (target, tmode))
- target = gen_reg_rtx (tmode);
- pat = GEN_FCN (icode) (target);
- if (!pat)
- return 0;
- emit_insn (pat);
- return target;
-
- case ARM_BUILTIN_WSHUFH:
- icode = CODE_FOR_iwmmxt_wshufh;
- arg0 = CALL_EXPR_ARG (exp, 0);
- arg1 = CALL_EXPR_ARG (exp, 1);
- op0 = expand_normal (arg0);
- op1 = expand_normal (arg1);
- tmode = insn_data[icode].operand[0].mode;
- mode1 = insn_data[icode].operand[1].mode;
- mode2 = insn_data[icode].operand[2].mode;
-
- if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
- op0 = copy_to_mode_reg (mode1, op0);
- if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
- {
- error ("mask must be an immediate");
- return const0_rtx;
- }
- selector = INTVAL (op1);
- if (selector < 0 || selector > 255)
- error ("the range of mask should be in 0 to 255");
- if (target == 0
- || GET_MODE (target) != tmode
- || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
- target = gen_reg_rtx (tmode);
- pat = GEN_FCN (icode) (target, op0, op1);
- if (! pat)
- return 0;
- emit_insn (pat);
- return target;
-
- case ARM_BUILTIN_WMADDS:
- return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wmadds, exp, target);
- case ARM_BUILTIN_WMADDSX:
- return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wmaddsx, exp, target);
- case ARM_BUILTIN_WMADDSN:
- return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wmaddsn, exp, target);
- case ARM_BUILTIN_WMADDU:
- return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wmaddu, exp, target);
- case ARM_BUILTIN_WMADDUX:
- return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wmaddux, exp, target);
- case ARM_BUILTIN_WMADDUN:
- return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wmaddun, exp, target);
- case ARM_BUILTIN_WSADBZ:
- return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wsadbz, exp, target);
- case ARM_BUILTIN_WSADHZ:
- return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wsadhz, exp, target);
-
- /* Several three-argument builtins. */
- case ARM_BUILTIN_WMACS:
- case ARM_BUILTIN_WMACU:
- case ARM_BUILTIN_TMIA:
- case ARM_BUILTIN_TMIAPH:
- case ARM_BUILTIN_TMIATT:
- case ARM_BUILTIN_TMIATB:
- case ARM_BUILTIN_TMIABT:
- case ARM_BUILTIN_TMIABB:
- case ARM_BUILTIN_WQMIABB:
- case ARM_BUILTIN_WQMIABT:
- case ARM_BUILTIN_WQMIATB:
- case ARM_BUILTIN_WQMIATT:
- case ARM_BUILTIN_WQMIABBN:
- case ARM_BUILTIN_WQMIABTN:
- case ARM_BUILTIN_WQMIATBN:
- case ARM_BUILTIN_WQMIATTN:
- case ARM_BUILTIN_WMIABB:
- case ARM_BUILTIN_WMIABT:
- case ARM_BUILTIN_WMIATB:
- case ARM_BUILTIN_WMIATT:
- case ARM_BUILTIN_WMIABBN:
- case ARM_BUILTIN_WMIABTN:
- case ARM_BUILTIN_WMIATBN:
- case ARM_BUILTIN_WMIATTN:
- case ARM_BUILTIN_WMIAWBB:
- case ARM_BUILTIN_WMIAWBT:
- case ARM_BUILTIN_WMIAWTB:
- case ARM_BUILTIN_WMIAWTT:
- case ARM_BUILTIN_WMIAWBBN:
- case ARM_BUILTIN_WMIAWBTN:
- case ARM_BUILTIN_WMIAWTBN:
- case ARM_BUILTIN_WMIAWTTN:
- case ARM_BUILTIN_WSADB:
- case ARM_BUILTIN_WSADH:
- icode = (fcode == ARM_BUILTIN_WMACS ? CODE_FOR_iwmmxt_wmacs
- : fcode == ARM_BUILTIN_WMACU ? CODE_FOR_iwmmxt_wmacu
- : fcode == ARM_BUILTIN_TMIA ? CODE_FOR_iwmmxt_tmia
- : fcode == ARM_BUILTIN_TMIAPH ? CODE_FOR_iwmmxt_tmiaph
- : fcode == ARM_BUILTIN_TMIABB ? CODE_FOR_iwmmxt_tmiabb
- : fcode == ARM_BUILTIN_TMIABT ? CODE_FOR_iwmmxt_tmiabt
- : fcode == ARM_BUILTIN_TMIATB ? CODE_FOR_iwmmxt_tmiatb
- : fcode == ARM_BUILTIN_TMIATT ? CODE_FOR_iwmmxt_tmiatt
- : fcode == ARM_BUILTIN_WQMIABB ? CODE_FOR_iwmmxt_wqmiabb
- : fcode == ARM_BUILTIN_WQMIABT ? CODE_FOR_iwmmxt_wqmiabt
- : fcode == ARM_BUILTIN_WQMIATB ? CODE_FOR_iwmmxt_wqmiatb
- : fcode == ARM_BUILTIN_WQMIATT ? CODE_FOR_iwmmxt_wqmiatt
- : fcode == ARM_BUILTIN_WQMIABBN ? CODE_FOR_iwmmxt_wqmiabbn
- : fcode == ARM_BUILTIN_WQMIABTN ? CODE_FOR_iwmmxt_wqmiabtn
- : fcode == ARM_BUILTIN_WQMIATBN ? CODE_FOR_iwmmxt_wqmiatbn
- : fcode == ARM_BUILTIN_WQMIATTN ? CODE_FOR_iwmmxt_wqmiattn
- : fcode == ARM_BUILTIN_WMIABB ? CODE_FOR_iwmmxt_wmiabb
- : fcode == ARM_BUILTIN_WMIABT ? CODE_FOR_iwmmxt_wmiabt
- : fcode == ARM_BUILTIN_WMIATB ? CODE_FOR_iwmmxt_wmiatb
- : fcode == ARM_BUILTIN_WMIATT ? CODE_FOR_iwmmxt_wmiatt
- : fcode == ARM_BUILTIN_WMIABBN ? CODE_FOR_iwmmxt_wmiabbn
- : fcode == ARM_BUILTIN_WMIABTN ? CODE_FOR_iwmmxt_wmiabtn
- : fcode == ARM_BUILTIN_WMIATBN ? CODE_FOR_iwmmxt_wmiatbn
- : fcode == ARM_BUILTIN_WMIATTN ? CODE_FOR_iwmmxt_wmiattn
- : fcode == ARM_BUILTIN_WMIAWBB ? CODE_FOR_iwmmxt_wmiawbb
- : fcode == ARM_BUILTIN_WMIAWBT ? CODE_FOR_iwmmxt_wmiawbt
- : fcode == ARM_BUILTIN_WMIAWTB ? CODE_FOR_iwmmxt_wmiawtb
- : fcode == ARM_BUILTIN_WMIAWTT ? CODE_FOR_iwmmxt_wmiawtt
- : fcode == ARM_BUILTIN_WMIAWBBN ? CODE_FOR_iwmmxt_wmiawbbn
- : fcode == ARM_BUILTIN_WMIAWBTN ? CODE_FOR_iwmmxt_wmiawbtn
- : fcode == ARM_BUILTIN_WMIAWTBN ? CODE_FOR_iwmmxt_wmiawtbn
- : fcode == ARM_BUILTIN_WMIAWTTN ? CODE_FOR_iwmmxt_wmiawttn
- : fcode == ARM_BUILTIN_WSADB ? CODE_FOR_iwmmxt_wsadb
- : CODE_FOR_iwmmxt_wsadh);
- arg0 = CALL_EXPR_ARG (exp, 0);
- arg1 = CALL_EXPR_ARG (exp, 1);
- arg2 = CALL_EXPR_ARG (exp, 2);
- op0 = expand_normal (arg0);
- op1 = expand_normal (arg1);
- op2 = expand_normal (arg2);
- tmode = insn_data[icode].operand[0].mode;
- mode0 = insn_data[icode].operand[1].mode;
- mode1 = insn_data[icode].operand[2].mode;
- mode2 = insn_data[icode].operand[3].mode;
-
- if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
- op0 = copy_to_mode_reg (mode0, op0);
- if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
- op1 = copy_to_mode_reg (mode1, op1);
- if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
- op2 = copy_to_mode_reg (mode2, op2);
- if (target == 0
- || GET_MODE (target) != tmode
- || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
- target = gen_reg_rtx (tmode);
- pat = GEN_FCN (icode) (target, op0, op1, op2);
- if (! pat)
- return 0;
- emit_insn (pat);
- return target;
-
- case ARM_BUILTIN_WZERO:
- target = gen_reg_rtx (DImode);
- emit_insn (gen_iwmmxt_clrdi (target));
- return target;
-
- case ARM_BUILTIN_WSRLHI:
- case ARM_BUILTIN_WSRLWI:
- case ARM_BUILTIN_WSRLDI:
- case ARM_BUILTIN_WSLLHI:
- case ARM_BUILTIN_WSLLWI:
- case ARM_BUILTIN_WSLLDI:
- case ARM_BUILTIN_WSRAHI:
- case ARM_BUILTIN_WSRAWI:
- case ARM_BUILTIN_WSRADI:
- case ARM_BUILTIN_WRORHI:
- case ARM_BUILTIN_WRORWI:
- case ARM_BUILTIN_WRORDI:
- case ARM_BUILTIN_WSRLH:
- case ARM_BUILTIN_WSRLW:
- case ARM_BUILTIN_WSRLD:
- case ARM_BUILTIN_WSLLH:
- case ARM_BUILTIN_WSLLW:
- case ARM_BUILTIN_WSLLD:
- case ARM_BUILTIN_WSRAH:
- case ARM_BUILTIN_WSRAW:
- case ARM_BUILTIN_WSRAD:
- case ARM_BUILTIN_WRORH:
- case ARM_BUILTIN_WRORW:
- case ARM_BUILTIN_WRORD:
- icode = (fcode == ARM_BUILTIN_WSRLHI ? CODE_FOR_lshrv4hi3_iwmmxt
- : fcode == ARM_BUILTIN_WSRLWI ? CODE_FOR_lshrv2si3_iwmmxt
- : fcode == ARM_BUILTIN_WSRLDI ? CODE_FOR_lshrdi3_iwmmxt
- : fcode == ARM_BUILTIN_WSLLHI ? CODE_FOR_ashlv4hi3_iwmmxt
- : fcode == ARM_BUILTIN_WSLLWI ? CODE_FOR_ashlv2si3_iwmmxt
- : fcode == ARM_BUILTIN_WSLLDI ? CODE_FOR_ashldi3_iwmmxt
- : fcode == ARM_BUILTIN_WSRAHI ? CODE_FOR_ashrv4hi3_iwmmxt
- : fcode == ARM_BUILTIN_WSRAWI ? CODE_FOR_ashrv2si3_iwmmxt
- : fcode == ARM_BUILTIN_WSRADI ? CODE_FOR_ashrdi3_iwmmxt
- : fcode == ARM_BUILTIN_WRORHI ? CODE_FOR_rorv4hi3
- : fcode == ARM_BUILTIN_WRORWI ? CODE_FOR_rorv2si3
- : fcode == ARM_BUILTIN_WRORDI ? CODE_FOR_rordi3
- : fcode == ARM_BUILTIN_WSRLH ? CODE_FOR_lshrv4hi3_di
- : fcode == ARM_BUILTIN_WSRLW ? CODE_FOR_lshrv2si3_di
- : fcode == ARM_BUILTIN_WSRLD ? CODE_FOR_lshrdi3_di
- : fcode == ARM_BUILTIN_WSLLH ? CODE_FOR_ashlv4hi3_di
- : fcode == ARM_BUILTIN_WSLLW ? CODE_FOR_ashlv2si3_di
- : fcode == ARM_BUILTIN_WSLLD ? CODE_FOR_ashldi3_di
- : fcode == ARM_BUILTIN_WSRAH ? CODE_FOR_ashrv4hi3_di
- : fcode == ARM_BUILTIN_WSRAW ? CODE_FOR_ashrv2si3_di
- : fcode == ARM_BUILTIN_WSRAD ? CODE_FOR_ashrdi3_di
- : fcode == ARM_BUILTIN_WRORH ? CODE_FOR_rorv4hi3_di
- : fcode == ARM_BUILTIN_WRORW ? CODE_FOR_rorv2si3_di
- : fcode == ARM_BUILTIN_WRORD ? CODE_FOR_rordi3_di
- : CODE_FOR_nothing);
- arg1 = CALL_EXPR_ARG (exp, 1);
- op1 = expand_normal (arg1);
- if (GET_MODE (op1) == VOIDmode)
- {
- imm = INTVAL (op1);
- if ((fcode == ARM_BUILTIN_WRORHI || fcode == ARM_BUILTIN_WRORWI
- || fcode == ARM_BUILTIN_WRORH || fcode == ARM_BUILTIN_WRORW)
- && (imm < 0 || imm > 32))
- {
- if (fcode == ARM_BUILTIN_WRORHI)
- error ("the range of count should be in 0 to 32. please check the intrinsic _mm_rori_pi16 in code.");
- else if (fcode == ARM_BUILTIN_WRORWI)
- error ("the range of count should be in 0 to 32. please check the intrinsic _mm_rori_pi32 in code.");
- else if (fcode == ARM_BUILTIN_WRORH)
- error ("the range of count should be in 0 to 32. please check the intrinsic _mm_ror_pi16 in code.");
- else
- error ("the range of count should be in 0 to 32. please check the intrinsic _mm_ror_pi32 in code.");
- }
- else if ((fcode == ARM_BUILTIN_WRORDI || fcode == ARM_BUILTIN_WRORD)
- && (imm < 0 || imm > 64))
- {
- if (fcode == ARM_BUILTIN_WRORDI)
- error ("the range of count should be in 0 to 64. please check the intrinsic _mm_rori_si64 in code.");
- else
- error ("the range of count should be in 0 to 64. please check the intrinsic _mm_ror_si64 in code.");
- }
- else if (imm < 0)
- {
- if (fcode == ARM_BUILTIN_WSRLHI)
- error ("the count should be no less than 0. please check the intrinsic _mm_srli_pi16 in code.");
- else if (fcode == ARM_BUILTIN_WSRLWI)
- error ("the count should be no less than 0. please check the intrinsic _mm_srli_pi32 in code.");
- else if (fcode == ARM_BUILTIN_WSRLDI)
- error ("the count should be no less than 0. please check the intrinsic _mm_srli_si64 in code.");
- else if (fcode == ARM_BUILTIN_WSLLHI)
- error ("the count should be no less than 0. please check the intrinsic _mm_slli_pi16 in code.");
- else if (fcode == ARM_BUILTIN_WSLLWI)
- error ("the count should be no less than 0. please check the intrinsic _mm_slli_pi32 in code.");
- else if (fcode == ARM_BUILTIN_WSLLDI)
- error ("the count should be no less than 0. please check the intrinsic _mm_slli_si64 in code.");
- else if (fcode == ARM_BUILTIN_WSRAHI)
- error ("the count should be no less than 0. please check the intrinsic _mm_srai_pi16 in code.");
- else if (fcode == ARM_BUILTIN_WSRAWI)
- error ("the count should be no less than 0. please check the intrinsic _mm_srai_pi32 in code.");
- else if (fcode == ARM_BUILTIN_WSRADI)
- error ("the count should be no less than 0. please check the intrinsic _mm_srai_si64 in code.");
- else if (fcode == ARM_BUILTIN_WSRLH)
- error ("the count should be no less than 0. please check the intrinsic _mm_srl_pi16 in code.");
- else if (fcode == ARM_BUILTIN_WSRLW)
- error ("the count should be no less than 0. please check the intrinsic _mm_srl_pi32 in code.");
- else if (fcode == ARM_BUILTIN_WSRLD)
- error ("the count should be no less than 0. please check the intrinsic _mm_srl_si64 in code.");
- else if (fcode == ARM_BUILTIN_WSLLH)
- error ("the count should be no less than 0. please check the intrinsic _mm_sll_pi16 in code.");
- else if (fcode == ARM_BUILTIN_WSLLW)
- error ("the count should be no less than 0. please check the intrinsic _mm_sll_pi32 in code.");
- else if (fcode == ARM_BUILTIN_WSLLD)
- error ("the count should be no less than 0. please check the intrinsic _mm_sll_si64 in code.");
- else if (fcode == ARM_BUILTIN_WSRAH)
- error ("the count should be no less than 0. please check the intrinsic _mm_sra_pi16 in code.");
- else if (fcode == ARM_BUILTIN_WSRAW)
- error ("the count should be no less than 0. please check the intrinsic _mm_sra_pi32 in code.");
- else
- error ("the count should be no less than 0. please check the intrinsic _mm_sra_si64 in code.");
- }
- }
- return arm_expand_binop_builtin (icode, exp, target);
-
- default:
- break;
- }
-
- for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
- if (d->code == (const enum arm_builtins) fcode)
- return arm_expand_binop_builtin (d->icode, exp, target);
-
- for (i = 0, d = bdesc_1arg; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
- if (d->code == (const enum arm_builtins) fcode)
- return arm_expand_unop_builtin (d->icode, exp, target, 0);
-
- for (i = 0, d = bdesc_3arg; i < ARRAY_SIZE (bdesc_3arg); i++, d++)
- if (d->code == (const enum arm_builtins) fcode)
- return arm_expand_ternop_builtin (d->icode, exp, target);
-
- /* @@@ Should really do something sensible here. */
- return NULL_RTX;
-}
/* Return the number (counting from 0) of
the least significant set bit in MASK. */
@@ -25755,11 +23496,12 @@ number_of_first_bit_set (unsigned mask)
to be saved; REAL_REGS is the set of registers to be described as
saved. If REAL_REGS is 0, only describe the stack adjustment. */
-static rtx
+static rtx_insn *
thumb1_emit_multi_reg_push (unsigned long mask, unsigned long real_regs)
{
unsigned long regno;
- rtx par[10], tmp, reg, insn;
+ rtx par[10], tmp, reg;
+ rtx_insn *insn;
int i, j;
/* Build the parallel of the registers actually being stored. */
@@ -25888,7 +23630,7 @@ thumb_exit (FILE *f, int reg_containing_return_addr)
int pops_needed;
unsigned available;
unsigned required;
- int mode;
+ machine_mode mode;
int size;
int restore_a4 = FALSE;
@@ -26155,7 +23897,7 @@ thumb_exit (FILE *f, int reg_containing_return_addr)
For Thumb-1, we track the status of the condition codes; this
information is used in the cbranchsi4_insn pattern. */
void
-thumb1_final_prescan_insn (rtx insn)
+thumb1_final_prescan_insn (rtx_insn *insn)
{
if (flag_print_asm_name)
asm_fprintf (asm_out_file, "%@ 0x%04x\n",
@@ -26224,7 +23966,7 @@ thumb_shiftable_const (unsigned HOST_WIDE_INT val)
static int
thumb_far_jump_used_p (void)
{
- rtx insn;
+ rtx_insn *insn;
bool far_jump = false;
unsigned int func_size = 0;
@@ -26578,7 +24320,7 @@ static struct machine_function *
arm_init_machine_status (void)
{
struct machine_function *machine;
- machine = ggc_alloc_cleared_machine_function ();
+ machine = ggc_cleared_alloc<machine_function> ();
#if ARM_FT_UNKNOWN != 0
machine->func_type = ARM_FT_UNKNOWN;
@@ -26673,7 +24415,7 @@ thumb_compute_initial_elimination_offset (unsigned int from, unsigned int to)
void
thumb1_expand_prologue (void)
{
- rtx insn;
+ rtx_insn *insn;
HOST_WIDE_INT amount;
arm_stack_offsets *offsets;
@@ -27125,7 +24867,7 @@ arm_expand_epilogue_apcs_frame (bool really_return)
int saved_size = arm_get_vfp_saved_size ();
if (saved_size > 0)
{
- rtx insn;
+ rtx_insn *insn;
floats_from_frame += saved_size;
insn = emit_insn (gen_addsi3 (ip_rtx,
hard_frame_pointer_rtx,
@@ -27164,7 +24906,7 @@ arm_expand_epilogue_apcs_frame (bool really_return)
{
/* The frame pointer is guaranteed to be non-double-word aligned, as
it is set to double-word-aligned old_stack_pointer - 4. */
- rtx insn;
+ rtx_insn *insn;
int lrm_count = (num_regs % 2) ? (num_regs + 2) : (num_regs + 1);
for (i = LAST_IWMMXT_REGNUM; i >= FIRST_IWMMXT_REGNUM; i--)
@@ -27204,7 +24946,7 @@ arm_expand_epilogue_apcs_frame (bool really_return)
num_regs = bit_count (saved_regs_mask);
if ((offsets->outgoing_args != (1 + num_regs)) || cfun->calls_alloca)
{
- rtx insn;
+ rtx_insn *insn;
emit_insn (gen_blockage ());
/* Unwind the stack to just below the saved registers. */
insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
@@ -27221,7 +24963,7 @@ arm_expand_epilogue_apcs_frame (bool really_return)
{
/* Interrupt handlers will have pushed the
IP onto the stack, so restore it now. */
- rtx insn;
+ rtx_insn *insn;
rtx addr = gen_rtx_MEM (SImode,
gen_rtx_POST_INC (SImode,
stack_pointer_rtx));
@@ -27244,7 +24986,7 @@ arm_expand_epilogue_apcs_frame (bool really_return)
/* Restore the original stack pointer. Before prologue, the stack was
realigned and the original stack pointer saved in r0. For details,
see comment in arm_expand_prologue. */
- emit_insn (gen_movsi (stack_pointer_rtx, gen_rtx_REG (SImode, 0)));
+ emit_insn (gen_movsi (stack_pointer_rtx, gen_rtx_REG (SImode, R0_REGNUM)));
emit_jump_insn (simple_return_rtx);
}
@@ -27290,7 +25032,7 @@ arm_expand_epilogue (bool really_return)
if (frame_pointer_needed)
{
- rtx insn;
+ rtx_insn *insn;
/* Restore stack pointer if necessary. */
if (TARGET_ARM)
{
@@ -27347,7 +25089,7 @@ arm_expand_epilogue (bool really_return)
amount = offsets->outgoing_args - offsets->saved_regs;
if (amount)
{
- rtx tmp;
+ rtx_insn *tmp;
/* Force out any pending memory operations that reference stacked data
before stack de-allocation occurs. */
emit_insn (gen_blockage ());
@@ -27399,7 +25141,7 @@ arm_expand_epilogue (bool really_return)
for (i = FIRST_IWMMXT_REGNUM; i <= LAST_IWMMXT_REGNUM; i++)
if (df_regs_ever_live_p (i) && !call_used_regs[i])
{
- rtx insn;
+ rtx_insn *insn;
rtx addr = gen_rtx_MEM (V2SImode,
gen_rtx_POST_INC (SImode,
stack_pointer_rtx));
@@ -27480,7 +25222,7 @@ arm_expand_epilogue (bool really_return)
arm_emit_multi_reg_pop (saved_regs_mask);
}
- if (return_in_pc == true)
+ if (return_in_pc)
return;
}
@@ -27488,9 +25230,10 @@ arm_expand_epilogue (bool really_return)
{
int i, j;
rtx dwarf = NULL_RTX;
- rtx tmp = emit_insn (gen_addsi3 (stack_pointer_rtx,
- stack_pointer_rtx,
- GEN_INT (crtl->args.pretend_args_size)));
+ rtx_insn *tmp =
+ emit_insn (gen_addsi3 (stack_pointer_rtx,
+ stack_pointer_rtx,
+ GEN_INT (crtl->args.pretend_args_size)));
RTX_FRAME_RELATED_P (tmp) = 1;
@@ -27525,7 +25268,7 @@ arm_expand_epilogue (bool really_return)
/* Restore the original stack pointer. Before prologue, the stack was
realigned and the original stack pointer saved in r0. For details,
see comment in arm_expand_prologue. */
- emit_insn (gen_movsi (stack_pointer_rtx, gen_rtx_REG (SImode, 0)));
+ emit_insn (gen_movsi (stack_pointer_rtx, gen_rtx_REG (SImode, R0_REGNUM)));
emit_jump_insn (simple_return_rtx);
}
@@ -27705,23 +25448,11 @@ thumb_output_move_mem_multiple (int n, rtx *operands)
case 3:
if (REGNO (operands[4]) > REGNO (operands[5]))
- {
- tmp = operands[4];
- operands[4] = operands[5];
- operands[5] = tmp;
- }
+ std::swap (operands[4], operands[5]);
if (REGNO (operands[5]) > REGNO (operands[6]))
- {
- tmp = operands[5];
- operands[5] = operands[6];
- operands[6] = tmp;
- }
+ std::swap (operands[5], operands[6]);
if (REGNO (operands[4]) > REGNO (operands[5]))
- {
- tmp = operands[4];
- operands[4] = operands[5];
- operands[5] = tmp;
- }
+ std::swap (operands[4], operands[5]);
output_asm_insn ("ldmia\t%1!, {%4, %5, %6}", operands);
output_asm_insn ("stmia\t%0!, {%4, %5, %6}", operands);
@@ -27896,6 +25627,59 @@ arm_emit_eabi_attribute (const char *name, int num, int val)
asm_fprintf (asm_out_file, "\n");
}
+/* This function is used to print CPU tuning information as comment
+ in assembler file. Pointers are not printed for now. */
+
+void
+arm_print_tune_info (void)
+{
+ asm_fprintf (asm_out_file, "\t@.tune parameters\n");
+ asm_fprintf (asm_out_file, "\t\t@constant_limit:\t%d\n",
+ current_tune->constant_limit);
+ asm_fprintf (asm_out_file, "\t\t@max_insns_skipped:\t%d\n",
+ current_tune->max_insns_skipped);
+ asm_fprintf (asm_out_file, "\t\t@num_prefetch_slots:\t%d\n",
+ current_tune->num_prefetch_slots);
+ asm_fprintf (asm_out_file, "\t\t@l1_cache_size:\t%d\n",
+ current_tune->l1_cache_size);
+ asm_fprintf (asm_out_file, "\t\t@l1_cache_line_size:\t%d\n",
+ current_tune->l1_cache_line_size);
+ asm_fprintf (asm_out_file, "\t\t@prefer_constant_pool:\t%d\n",
+ (int) current_tune->prefer_constant_pool);
+ asm_fprintf (asm_out_file, "\t\t@branch_cost:\t(s:speed, p:predictable)\n");
+ asm_fprintf (asm_out_file, "\t\t\t\ts&p\tcost\n");
+ asm_fprintf (asm_out_file, "\t\t\t\t00\t%d\n",
+ current_tune->branch_cost (false, false));
+ asm_fprintf (asm_out_file, "\t\t\t\t01\t%d\n",
+ current_tune->branch_cost (false, true));
+ asm_fprintf (asm_out_file, "\t\t\t\t10\t%d\n",
+ current_tune->branch_cost (true, false));
+ asm_fprintf (asm_out_file, "\t\t\t\t11\t%d\n",
+ current_tune->branch_cost (true, true));
+ asm_fprintf (asm_out_file, "\t\t@prefer_ldrd_strd:\t%d\n",
+ (int) current_tune->prefer_ldrd_strd);
+ asm_fprintf (asm_out_file, "\t\t@logical_op_non_short_circuit:\t[%d,%d]\n",
+ (int) current_tune->logical_op_non_short_circuit[0],
+ (int) current_tune->logical_op_non_short_circuit[1]);
+ asm_fprintf (asm_out_file, "\t\t@prefer_neon_for_64bits:\t%d\n",
+ (int) current_tune->prefer_neon_for_64bits);
+ asm_fprintf (asm_out_file,
+ "\t\t@disparage_flag_setting_t16_encodings:\t%d\n",
+ (int) current_tune->disparage_flag_setting_t16_encodings);
+ asm_fprintf (asm_out_file,
+ "\t\t@disparage_partial_flag_setting_t16_encodings:\t%d\n",
+ (int) current_tune
+ ->disparage_partial_flag_setting_t16_encodings);
+ asm_fprintf (asm_out_file, "\t\t@string_ops_prefer_neon:\t%d\n",
+ (int) current_tune->string_ops_prefer_neon);
+ asm_fprintf (asm_out_file, "\t\t@max_insns_inline_memset:\t%d\n",
+ current_tune->max_insns_inline_memset);
+ asm_fprintf (asm_out_file, "\t\t@fuseable_ops:\t%u\n",
+ current_tune->fuseable_ops);
+ asm_fprintf (asm_out_file, "\t\t@sched_autopref:\t%d\n",
+ (int) current_tune->sched_autopref);
+}
+
static void
arm_file_start (void)
{
@@ -27947,6 +25731,9 @@ arm_file_start (void)
asm_fprintf (asm_out_file, "\t.cpu %s\n", truncated_name);
}
+ if (print_tune_info)
+ arm_print_tune_info ();
+
if (TARGET_SOFT_FLOAT)
{
fpu_name = "softvfp";
@@ -27956,8 +25743,9 @@ arm_file_start (void)
fpu_name = arm_fpu_desc->name;
if (arm_fpu_desc->model == ARM_FP_MODEL_VFP)
{
- if (TARGET_HARD_FLOAT)
- arm_emit_eabi_attribute ("Tag_ABI_HardFP_use", 27, 3);
+ if (TARGET_HARD_FLOAT && TARGET_VFP_SINGLE)
+ arm_emit_eabi_attribute ("Tag_ABI_HardFP_use", 27, 1);
+
if (TARGET_HARD_FLOAT_ABI)
arm_emit_eabi_attribute ("Tag_ABI_VFP_args", 28, 1);
}
@@ -28145,12 +25933,14 @@ arm_output_mi_thunk (FILE *file, tree thunk ATTRIBUTE_UNUSED,
fputs ("\tldr\tr3, ", file);
assemble_name (file, label);
fputs ("+4\n", file);
- asm_fprintf (file, "\t%s\t%r, %r, r3\n",
+ asm_fprintf (file, "\t%ss\t%r, %r, r3\n",
mi_op, this_regno, this_regno);
}
else if (mi_delta != 0)
{
- asm_fprintf (file, "\t%s\t%r, %r, #%d\n",
+ /* Thumb1 unified syntax requires s suffix in instruction name when
+ one of the operands is immediate. */
+ asm_fprintf (file, "\t%ss\t%r, %r, #%d\n",
mi_op, this_regno, this_regno,
mi_delta);
}
@@ -28298,7 +26088,7 @@ arm_output_load_gr (rtx *operands)
static void
arm_setup_incoming_varargs (cumulative_args_t pcum_v,
- enum machine_mode mode,
+ machine_mode mode,
tree type,
int *pretend_size,
int second_time ATTRIBUTE_UNUSED)
@@ -28329,9 +26119,9 @@ arm_promote_prototypes (const_tree t ATTRIBUTE_UNUSED)
return !TARGET_AAPCS_BASED;
}
-static enum machine_mode
+static machine_mode
arm_promote_function_mode (const_tree type ATTRIBUTE_UNUSED,
- enum machine_mode mode,
+ machine_mode mode,
int *punsignedp ATTRIBUTE_UNUSED,
const_tree fntype ATTRIBUTE_UNUSED,
int for_return ATTRIBUTE_UNUSED)
@@ -28492,7 +26282,11 @@ arm_set_return_address (rtx source, rtx scratch)
addr = plus_constant (Pmode, addr, delta);
}
- emit_move_insn (gen_frame_mem (Pmode, addr), source);
+ /* The store needs to be marked as frame related in order to prevent
+ DSE from deleting it as dead if it is based on fp. */
+ rtx insn = emit_move_insn (gen_frame_mem (Pmode, addr), source);
+ RTX_FRAME_RELATED_P (insn) = 1;
+ add_reg_note (insn, REG_CFA_RESTORE, gen_rtx_REG (Pmode, LR_REGNUM));
}
}
@@ -28544,7 +26338,11 @@ thumb_set_return_address (rtx source, rtx scratch)
else
addr = plus_constant (Pmode, addr, delta);
- emit_move_insn (gen_frame_mem (Pmode, addr), source);
+ /* The store needs to be marked as frame related in order to prevent
+ DSE from deleting it as dead if it is based on fp. */
+ rtx insn = emit_move_insn (gen_frame_mem (Pmode, addr), source);
+ RTX_FRAME_RELATED_P (insn) = 1;
+ add_reg_note (insn, REG_CFA_RESTORE, gen_rtx_REG (Pmode, LR_REGNUM));
}
else
emit_move_insn (gen_rtx_REG (Pmode, LR_REGNUM), source);
@@ -28552,7 +26350,7 @@ thumb_set_return_address (rtx source, rtx scratch)
/* Implements target hook vector_mode_supported_p. */
bool
-arm_vector_mode_supported_p (enum machine_mode mode)
+arm_vector_mode_supported_p (machine_mode mode)
{
/* Neon also supports V2SImode, etc. listed in the clause below. */
if (TARGET_NEON && (mode == V2SFmode || mode == V4SImode || mode == V8HImode
@@ -28576,7 +26374,7 @@ arm_vector_mode_supported_p (enum machine_mode mode)
/* Implements target hook array_mode_supported_p. */
static bool
-arm_array_mode_supported_p (enum machine_mode mode,
+arm_array_mode_supported_p (machine_mode mode,
unsigned HOST_WIDE_INT nelems)
{
if (TARGET_NEON
@@ -28591,8 +26389,8 @@ arm_array_mode_supported_p (enum machine_mode mode,
registers when autovectorizing for Neon, at least until multiple vector
widths are supported properly by the middle-end. */
-static enum machine_mode
-arm_preferred_simd_mode (enum machine_mode mode)
+static machine_mode
+arm_preferred_simd_mode (machine_mode mode)
{
if (TARGET_NEON)
switch (mode)
@@ -28649,7 +26447,7 @@ arm_class_likely_spilled_p (reg_class_t rclass)
/* Implements target hook small_register_classes_for_mode_p. */
bool
-arm_small_register_classes_for_mode_p (enum machine_mode mode ATTRIBUTE_UNUSED)
+arm_small_register_classes_for_mode_p (machine_mode mode ATTRIBUTE_UNUSED)
{
return TARGET_THUMB1;
}
@@ -28660,7 +26458,7 @@ arm_small_register_classes_for_mode_p (enum machine_mode mode ATTRIBUTE_UNUSED)
guarantee no particular behavior for out-of-range counts. */
static unsigned HOST_WIDE_INT
-arm_shift_truncation_mask (enum machine_mode mode)
+arm_shift_truncation_mask (machine_mode mode)
{
return mode == SImode ? 255 : 0;
}
@@ -28698,7 +26496,7 @@ arm_dbx_register_number (unsigned int regno)
static rtx
arm_dwarf_register_span (rtx rtl)
{
- enum machine_mode mode;
+ machine_mode mode;
unsigned regno;
rtx parts[16];
int nregs;
@@ -28954,7 +26752,7 @@ arm_unwind_emit_set (FILE * asm_out_file, rtx p)
/* Emit unwind directives for the given insn. */
static void
-arm_unwind_emit (FILE * asm_out_file, rtx insn)
+arm_unwind_emit (FILE * asm_out_file, rtx_insn *insn)
{
rtx note, pat;
bool handled_one = false;
@@ -29253,7 +27051,7 @@ arm_output_iwmmxt_shift_immediate (const char *insn_name, rtx *operands, bool wr
{
int shift = INTVAL (operands[2]);
char templ[50];
- enum machine_mode opmode = GET_MODE (operands[0]);
+ machine_mode opmode = GET_MODE (operands[0]);
gcc_assert (shift >= 0);
@@ -29340,7 +27138,7 @@ arm_output_iwmmxt_tinsr (rtx *operands)
const char *
thumb1_output_casesi (rtx *operands)
{
- rtx diff_vec = PATTERN (NEXT_INSN (operands[0]));
+ rtx diff_vec = PATTERN (NEXT_INSN (as_a <rtx_insn *> (operands[0])));
gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC);
@@ -29363,7 +27161,7 @@ thumb1_output_casesi (rtx *operands)
const char *
thumb2_output_casesi (rtx *operands)
{
- rtx diff_vec = PATTERN (NEXT_INSN (operands[2]));
+ rtx diff_vec = PATTERN (NEXT_INSN (as_a <rtx_insn *> (operands[2])));
gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC);
@@ -29400,10 +27198,15 @@ arm_issue_rate (void)
{
switch (arm_tune)
{
+ case xgene1:
+ return 4;
+
case cortexa15:
case cortexa57:
+ case exynosm1:
return 3;
+ case cortexm7:
case cortexr4:
case cortexr4f:
case cortexr5:
@@ -29413,6 +27216,7 @@ arm_issue_rate (void)
case cortexa8:
case cortexa9:
case cortexa12:
+ case cortexa17:
case cortexa53:
case fa726te:
case marvell_pj4:
@@ -29423,50 +27227,26 @@ arm_issue_rate (void)
}
}
-/* A table and a function to perform ARM-specific name mangling for
- NEON vector types in order to conform to the AAPCS (see "Procedure
- Call Standard for the ARM Architecture", Appendix A). To qualify
- for emission with the mangled names defined in that document, a
- vector type must not only be of the correct mode but also be
- composed of NEON vector element types (e.g. __builtin_neon_qi). */
-typedef struct
+/* Return how many instructions should scheduler lookahead to choose the
+ best one. */
+static int
+arm_first_cycle_multipass_dfa_lookahead (void)
{
- enum machine_mode mode;
- const char *element_type_name;
- const char *aapcs_name;
-} arm_mangle_map_entry;
-
-static arm_mangle_map_entry arm_mangle_map[] = {
- /* 64-bit containerized types. */
- { V8QImode, "__builtin_neon_qi", "15__simd64_int8_t" },
- { V8QImode, "__builtin_neon_uqi", "16__simd64_uint8_t" },
- { V4HImode, "__builtin_neon_hi", "16__simd64_int16_t" },
- { V4HImode, "__builtin_neon_uhi", "17__simd64_uint16_t" },
- { V4HFmode, "__builtin_neon_hf", "18__simd64_float16_t" },
- { V2SImode, "__builtin_neon_si", "16__simd64_int32_t" },
- { V2SImode, "__builtin_neon_usi", "17__simd64_uint32_t" },
- { V2SFmode, "__builtin_neon_sf", "18__simd64_float32_t" },
- { V8QImode, "__builtin_neon_poly8", "16__simd64_poly8_t" },
- { V4HImode, "__builtin_neon_poly16", "17__simd64_poly16_t" },
-
- /* 128-bit containerized types. */
- { V16QImode, "__builtin_neon_qi", "16__simd128_int8_t" },
- { V16QImode, "__builtin_neon_uqi", "17__simd128_uint8_t" },
- { V8HImode, "__builtin_neon_hi", "17__simd128_int16_t" },
- { V8HImode, "__builtin_neon_uhi", "18__simd128_uint16_t" },
- { V4SImode, "__builtin_neon_si", "17__simd128_int32_t" },
- { V4SImode, "__builtin_neon_usi", "18__simd128_uint32_t" },
- { V4SFmode, "__builtin_neon_sf", "19__simd128_float32_t" },
- { V16QImode, "__builtin_neon_poly8", "17__simd128_poly8_t" },
- { V8HImode, "__builtin_neon_poly16", "18__simd128_poly16_t" },
- { VOIDmode, NULL, NULL }
-};
+ int issue_rate = arm_issue_rate ();
+
+ return issue_rate > 1 && !sched_fusion ? issue_rate : 0;
+}
+
+/* Enable modeling of L2 auto-prefetcher. */
+static int
+arm_first_cycle_multipass_dfa_lookahead_guard (rtx_insn *insn, int ready_index)
+{
+ return autopref_multipass_dfa_lookahead_guard (insn, ready_index);
+}
const char *
arm_mangle_type (const_tree type)
{
- arm_mangle_map_entry *pos = arm_mangle_map;
-
/* The ARM ABI documents (10th October 2008) say that "__va_list"
has to be managled as if it is in the "std" namespace. */
if (TARGET_AAPCS_BASED
@@ -29477,26 +27257,12 @@ arm_mangle_type (const_tree type)
if (TREE_CODE (type) == REAL_TYPE && TYPE_PRECISION (type) == 16)
return "Dh";
- if (TREE_CODE (type) != VECTOR_TYPE)
- return NULL;
-
- /* Check the mode of the vector type, and the name of the vector
- element type, against the table. */
- while (pos->mode != VOIDmode)
- {
- tree elt_type = TREE_TYPE (type);
-
- if (pos->mode == TYPE_MODE (type)
- && TREE_CODE (TYPE_NAME (elt_type)) == TYPE_DECL
- && !strcmp (IDENTIFIER_POINTER (DECL_NAME (TYPE_NAME (elt_type))),
- pos->element_type_name))
- return pos->aapcs_name;
-
- pos++;
- }
+ /* Try mangling as a Neon type, TYPE_NAME is non-NULL if this is a
+ builtin type. */
+ if (TYPE_NAME (type) != NULL)
+ return arm_mangle_builtin_type (type);
- /* Use the default mangling for unrecognized (possibly user-defined)
- vector types. */
+ /* Use the default mangling. */
return NULL;
}
@@ -29542,60 +27308,6 @@ arm_have_conditional_execution (void)
return !TARGET_THUMB1;
}
-tree
-arm_builtin_vectorized_function (tree fndecl, tree type_out, tree type_in)
-{
- enum machine_mode in_mode, out_mode;
- int in_n, out_n;
-
- if (TREE_CODE (type_out) != VECTOR_TYPE
- || TREE_CODE (type_in) != VECTOR_TYPE
- || !(TARGET_NEON && TARGET_FPU_ARMV8 && flag_unsafe_math_optimizations))
- return NULL_TREE;
-
- out_mode = TYPE_MODE (TREE_TYPE (type_out));
- out_n = TYPE_VECTOR_SUBPARTS (type_out);
- in_mode = TYPE_MODE (TREE_TYPE (type_in));
- in_n = TYPE_VECTOR_SUBPARTS (type_in);
-
-/* ARM_CHECK_BUILTIN_MODE and ARM_FIND_VRINT_VARIANT are used to find the
- decl of the vectorized builtin for the appropriate vector mode.
- NULL_TREE is returned if no such builtin is available. */
-#undef ARM_CHECK_BUILTIN_MODE
-#define ARM_CHECK_BUILTIN_MODE(C) \
- (out_mode == SFmode && out_n == C \
- && in_mode == SFmode && in_n == C)
-
-#undef ARM_FIND_VRINT_VARIANT
-#define ARM_FIND_VRINT_VARIANT(N) \
- (ARM_CHECK_BUILTIN_MODE (2) \
- ? arm_builtin_decl(ARM_BUILTIN_NEON_##N##v2sf, false) \
- : (ARM_CHECK_BUILTIN_MODE (4) \
- ? arm_builtin_decl(ARM_BUILTIN_NEON_##N##v4sf, false) \
- : NULL_TREE))
-
- if (DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_NORMAL)
- {
- enum built_in_function fn = DECL_FUNCTION_CODE (fndecl);
- switch (fn)
- {
- case BUILT_IN_FLOORF:
- return ARM_FIND_VRINT_VARIANT (vrintm);
- case BUILT_IN_CEILF:
- return ARM_FIND_VRINT_VARIANT (vrintp);
- case BUILT_IN_TRUNCF:
- return ARM_FIND_VRINT_VARIANT (vrintz);
- case BUILT_IN_ROUNDF:
- return ARM_FIND_VRINT_VARIANT (vrinta);
- default:
- return NULL_TREE;
- }
- }
- return NULL_TREE;
-}
-#undef ARM_CHECK_BUILTIN_MODE
-#undef ARM_FIND_VRINT_VARIANT
-
/* The AAPCS sets the maximum alignment of a vector to 64 bits. */
static HOST_WIDE_INT
arm_vector_alignment (const_tree type)
@@ -29626,7 +27338,7 @@ arm_vector_alignment_reachable (const_tree type, bool is_packed)
}
static bool
-arm_builtin_support_vector_misalignment (enum machine_mode mode,
+arm_builtin_support_vector_misalignment (machine_mode mode,
const_tree type, int misalignment,
bool is_packed)
{
@@ -29663,8 +27375,7 @@ arm_conditional_register_usage (void)
/* When optimizing for size on Thumb-1, it's better not
to use the HI regs, because of the overhead of
stacking them. */
- for (regno = FIRST_HI_REGNUM;
- regno <= LAST_HI_REGNUM; ++regno)
+ for (regno = FIRST_HI_REGNUM; regno <= LAST_HI_REGNUM; ++regno)
fixed_regs[regno] = call_used_regs[regno] = 1;
}
@@ -29850,7 +27561,7 @@ arm_post_atomic_barrier (enum memmodel model)
Use acquire and release versions if necessary. */
static void
-arm_emit_load_exclusive (enum machine_mode mode, rtx rval, rtx mem, bool acq)
+arm_emit_load_exclusive (machine_mode mode, rtx rval, rtx mem, bool acq)
{
rtx (*gen) (rtx, rtx);
@@ -29883,7 +27594,7 @@ arm_emit_load_exclusive (enum machine_mode mode, rtx rval, rtx mem, bool acq)
}
static void
-arm_emit_store_exclusive (enum machine_mode mode, rtx bval, rtx rval,
+arm_emit_store_exclusive (machine_mode mode, rtx bval, rtx rval,
rtx mem, bool rel)
{
rtx (*gen) (rtx, rtx, rtx);
@@ -29933,7 +27644,7 @@ void
arm_expand_compare_and_swap (rtx operands[])
{
rtx bval, rval, mem, oldval, newval, is_weak, mod_s, mod_f, x;
- enum machine_mode mode;
+ machine_mode mode;
rtx (*gen) (rtx, rtx, rtx, rtx, rtx, rtx, rtx);
bval = operands[0];
@@ -30014,10 +27725,11 @@ void
arm_split_compare_and_swap (rtx operands[])
{
rtx rval, mem, oldval, newval, scratch;
- enum machine_mode mode;
+ machine_mode mode;
enum memmodel mod_s, mod_f;
bool is_weak;
- rtx label1, label2, x, cond;
+ rtx_code_label *label1, *label2;
+ rtx x, cond;
rval = operands[0];
mem = operands[1];
@@ -30043,7 +27755,7 @@ arm_split_compare_and_swap (rtx operands[])
if (!(use_acquire || use_release))
arm_pre_atomic_barrier (mod_s);
- label1 = NULL_RTX;
+ label1 = NULL;
if (!is_weak)
{
label1 = gen_label_rtx ();
@@ -30091,9 +27803,10 @@ arm_split_atomic_op (enum rtx_code code, rtx old_out, rtx new_out, rtx mem,
rtx value, rtx model_rtx, rtx cond)
{
enum memmodel model = (enum memmodel) INTVAL (model_rtx);
- enum machine_mode mode = GET_MODE (mem);
- enum machine_mode wmode = (mode == DImode ? DImode : SImode);
- rtx label, x;
+ machine_mode mode = GET_MODE (mem);
+ machine_mode wmode = (mode == DImode ? DImode : SImode);
+ rtx_code_label *label;
+ rtx x;
bool use_acquire = TARGET_HAVE_LDACQ
&& !(model == MEMMODEL_RELAXED
@@ -30185,7 +27898,7 @@ struct expand_vec_perm_d
{
rtx target, op0, op1;
unsigned char perm[MAX_VECT_LEN];
- enum machine_mode vmode;
+ machine_mode vmode;
unsigned char nelt;
bool one_vector_p;
bool testing_p;
@@ -30196,7 +27909,7 @@ struct expand_vec_perm_d
static void
arm_expand_vec_perm_1 (rtx target, rtx op0, rtx op1, rtx sel)
{
- enum machine_mode vmode = GET_MODE (target);
+ machine_mode vmode = GET_MODE (target);
bool one_vector_p = rtx_equal_p (op0, op1);
gcc_checking_assert (vmode == V8QImode || vmode == V16QImode);
@@ -30235,7 +27948,7 @@ arm_expand_vec_perm_1 (rtx target, rtx op0, rtx op1, rtx sel)
void
arm_expand_vec_perm (rtx target, rtx op0, rtx op1, rtx sel)
{
- enum machine_mode vmode = GET_MODE (target);
+ machine_mode vmode = GET_MODE (target);
unsigned int i, nelt = GET_MODE_NUNITS (vmode);
bool one_vector_p = rtx_equal_p (op0, op1);
rtx rmask[MAX_VECT_LEN], mask;
@@ -30393,7 +28106,7 @@ static bool
arm_evpc_neon_vrev (struct expand_vec_perm_d *d)
{
unsigned int i, j, diff, nelt = d->nelt;
- rtx (*gen)(rtx, rtx, rtx);
+ rtx (*gen)(rtx, rtx);
if (!d->one_vector_p)
return false;
@@ -30457,9 +28170,7 @@ arm_evpc_neon_vrev (struct expand_vec_perm_d *d)
if (d->testing_p)
return true;
- /* ??? The third operand is an artifact of the builtin infrastructure
- and is ignored by the actual instruction. */
- emit_insn (gen (d->target, d->op0, const0_rtx));
+ emit_insn (gen (d->target, d->op0));
return true;
}
@@ -30603,7 +28314,7 @@ static bool
arm_evpc_neon_vtbl (struct expand_vec_perm_d *d)
{
rtx rperm[MAX_VECT_LEN], sel;
- enum machine_mode vmode = d->vmode;
+ machine_mode vmode = d->vmode;
unsigned int i, nelt = d->nelt;
/* TODO: ARM's VTBL indexing is little-endian. In order to handle GCC's
@@ -30728,7 +28439,7 @@ arm_expand_vec_perm_const (rtx target, rtx op0, rtx op1, rtx sel)
/* Implement TARGET_VECTORIZE_VEC_PERM_CONST_OK. */
static bool
-arm_vectorize_vec_perm_const_ok (enum machine_mode vmode,
+arm_vectorize_vec_perm_const_ok (machine_mode vmode,
const unsigned char *sel)
{
struct expand_vec_perm_d d;
@@ -30769,7 +28480,7 @@ arm_vectorize_vec_perm_const_ok (enum machine_mode vmode,
}
bool
-arm_autoinc_modes_ok_p (enum machine_mode mode, enum arm_auto_incmodes code)
+arm_autoinc_modes_ok_p (machine_mode mode, enum arm_auto_incmodes code)
{
/* If we are soft float and we do not have ldrd
then all auto increment forms are ok. */
@@ -31019,7 +28730,7 @@ arm_emit_coreregs_64bit_shift (enum rtx_code code, rtx out, rtx in,
ORR (SHIFT (ASHIFT, in_up, scratch1), out_down)));
if (code == ASHIFTRT)
{
- rtx done_label = gen_label_rtx ();
+ rtx_code_label *done_label = gen_label_rtx ();
emit_jump_insn (BRANCH (LT, done_label));
emit_insn (SET (out_down, ORR (SHIFT (ASHIFTRT, in_up, scratch2),
out_down)));
@@ -31038,7 +28749,7 @@ arm_emit_coreregs_64bit_shift (enum rtx_code code, rtx out, rtx in,
if (code == ASHIFTRT)
{
- rtx done_label = gen_label_rtx ();
+ rtx_code_label *done_label = gen_label_rtx ();
emit_jump_insn (BRANCH (LT, done_label));
emit_insn (SET (scratch2, SHIFT (ASHIFTRT, in_up, scratch2)));
emit_insn (SET (out_down, ORR (out_down, scratch2)));
@@ -31073,7 +28784,7 @@ arm_validize_comparison (rtx *comparison, rtx * op1, rtx * op2)
{
enum rtx_code code = GET_CODE (*comparison);
int code_int;
- enum machine_mode mode = (GET_MODE (*op1) == VOIDmode)
+ machine_mode mode = (GET_MODE (*op1) == VOIDmode)
? GET_MODE (*op2) : GET_MODE (*op1);
gcc_assert (GET_MODE (*op1) != VOIDmode || GET_MODE (*op2) != VOIDmode);
@@ -31116,6 +28827,586 @@ arm_validize_comparison (rtx *comparison, rtx * op1, rtx * op2)
}
+/* Maximum number of instructions to set block of memory. */
+static int
+arm_block_set_max_insns (void)
+{
+ if (optimize_function_for_size_p (cfun))
+ return 4;
+ else
+ return current_tune->max_insns_inline_memset;
+}
+
+/* Return TRUE if it's profitable to set block of memory for
+ non-vectorized case. VAL is the value to set the memory
+ with. LENGTH is the number of bytes to set. ALIGN is the
+ alignment of the destination memory in bytes. UNALIGNED_P
+ is TRUE if we can only set the memory with instructions
+ meeting alignment requirements. USE_STRD_P is TRUE if we
+ can use strd to set the memory. */
+static bool
+arm_block_set_non_vect_profit_p (rtx val,
+ unsigned HOST_WIDE_INT length,
+ unsigned HOST_WIDE_INT align,
+ bool unaligned_p, bool use_strd_p)
+{
+ int num = 0;
+ /* For leftovers in bytes of 0-7, we can set the memory block using
+ strb/strh/str with minimum instruction number. */
+ const int leftover[8] = {0, 1, 1, 2, 1, 2, 2, 3};
+
+ if (unaligned_p)
+ {
+ num = arm_const_inline_cost (SET, val);
+ num += length / align + length % align;
+ }
+ else if (use_strd_p)
+ {
+ num = arm_const_double_inline_cost (val);
+ num += (length >> 3) + leftover[length & 7];
+ }
+ else
+ {
+ num = arm_const_inline_cost (SET, val);
+ num += (length >> 2) + leftover[length & 3];
+ }
+
+ /* We may be able to combine last pair STRH/STRB into a single STR
+ by shifting one byte back. */
+ if (unaligned_access && length > 3 && (length & 3) == 3)
+ num--;
+
+ return (num <= arm_block_set_max_insns ());
+}
+
+/* Return TRUE if it's profitable to set block of memory for
+ vectorized case. LENGTH is the number of bytes to set.
+ ALIGN is the alignment of destination memory in bytes.
+ MODE is the vector mode used to set the memory. */
+static bool
+arm_block_set_vect_profit_p (unsigned HOST_WIDE_INT length,
+ unsigned HOST_WIDE_INT align,
+ machine_mode mode)
+{
+ int num;
+ bool unaligned_p = ((align & 3) != 0);
+ unsigned int nelt = GET_MODE_NUNITS (mode);
+
+ /* Instruction loading constant value. */
+ num = 1;
+ /* Instructions storing the memory. */
+ num += (length + nelt - 1) / nelt;
+ /* Instructions adjusting the address expression. Only need to
+ adjust address expression if it's 4 bytes aligned and bytes
+ leftover can only be stored by mis-aligned store instruction. */
+ if (!unaligned_p && (length & 3) != 0)
+ num++;
+
+ /* Store the first 16 bytes using vst1:v16qi for the aligned case. */
+ if (!unaligned_p && mode == V16QImode)
+ num--;
+
+ return (num <= arm_block_set_max_insns ());
+}
+
+/* Set a block of memory using vectorization instructions for the
+ unaligned case. We fill the first LENGTH bytes of the memory
+ area starting from DSTBASE with byte constant VALUE. ALIGN is
+ the alignment requirement of memory. Return TRUE if succeeded. */
+static bool
+arm_block_set_unaligned_vect (rtx dstbase,
+ unsigned HOST_WIDE_INT length,
+ unsigned HOST_WIDE_INT value,
+ unsigned HOST_WIDE_INT align)
+{
+ unsigned int i, j, nelt_v16, nelt_v8, nelt_mode;
+ rtx dst, mem;
+ rtx val_elt, val_vec, reg;
+ rtx rval[MAX_VECT_LEN];
+ rtx (*gen_func) (rtx, rtx);
+ machine_mode mode;
+ unsigned HOST_WIDE_INT v = value;
+
+ gcc_assert ((align & 0x3) != 0);
+ nelt_v8 = GET_MODE_NUNITS (V8QImode);
+ nelt_v16 = GET_MODE_NUNITS (V16QImode);
+ if (length >= nelt_v16)
+ {
+ mode = V16QImode;
+ gen_func = gen_movmisalignv16qi;
+ }
+ else
+ {
+ mode = V8QImode;
+ gen_func = gen_movmisalignv8qi;
+ }
+ nelt_mode = GET_MODE_NUNITS (mode);
+ gcc_assert (length >= nelt_mode);
+ /* Skip if it isn't profitable. */
+ if (!arm_block_set_vect_profit_p (length, align, mode))
+ return false;
+
+ dst = copy_addr_to_reg (XEXP (dstbase, 0));
+ mem = adjust_automodify_address (dstbase, mode, dst, 0);
+
+ v = sext_hwi (v, BITS_PER_WORD);
+ val_elt = GEN_INT (v);
+ for (j = 0; j < nelt_mode; j++)
+ rval[j] = val_elt;
+
+ reg = gen_reg_rtx (mode);
+ val_vec = gen_rtx_CONST_VECTOR (mode, gen_rtvec_v (nelt_mode, rval));
+ /* Emit instruction loading the constant value. */
+ emit_move_insn (reg, val_vec);
+
+ /* Handle nelt_mode bytes in a vector. */
+ for (i = 0; (i + nelt_mode <= length); i += nelt_mode)
+ {
+ emit_insn ((*gen_func) (mem, reg));
+ if (i + 2 * nelt_mode <= length)
+ emit_insn (gen_add2_insn (dst, GEN_INT (nelt_mode)));
+ }
+
+ /* If there are not less than nelt_v8 bytes leftover, we must be in
+ V16QI mode. */
+ gcc_assert ((i + nelt_v8) > length || mode == V16QImode);
+
+ /* Handle (8, 16) bytes leftover. */
+ if (i + nelt_v8 < length)
+ {
+ emit_insn (gen_add2_insn (dst, GEN_INT (length - i)));
+ /* We are shifting bytes back, set the alignment accordingly. */
+ if ((length & 1) != 0 && align >= 2)
+ set_mem_align (mem, BITS_PER_UNIT);
+
+ emit_insn (gen_movmisalignv16qi (mem, reg));
+ }
+ /* Handle (0, 8] bytes leftover. */
+ else if (i < length && i + nelt_v8 >= length)
+ {
+ if (mode == V16QImode)
+ {
+ reg = gen_lowpart (V8QImode, reg);
+ mem = adjust_automodify_address (dstbase, V8QImode, dst, 0);
+ }
+ emit_insn (gen_add2_insn (dst, GEN_INT ((length - i)
+ + (nelt_mode - nelt_v8))));
+ /* We are shifting bytes back, set the alignment accordingly. */
+ if ((length & 1) != 0 && align >= 2)
+ set_mem_align (mem, BITS_PER_UNIT);
+
+ emit_insn (gen_movmisalignv8qi (mem, reg));
+ }
+
+ return true;
+}
+
+/* Set a block of memory using vectorization instructions for the
+ aligned case. We fill the first LENGTH bytes of the memory area
+ starting from DSTBASE with byte constant VALUE. ALIGN is the
+ alignment requirement of memory. Return TRUE if succeeded. */
+static bool
+arm_block_set_aligned_vect (rtx dstbase,
+ unsigned HOST_WIDE_INT length,
+ unsigned HOST_WIDE_INT value,
+ unsigned HOST_WIDE_INT align)
+{
+ unsigned int i, j, nelt_v8, nelt_v16, nelt_mode;
+ rtx dst, addr, mem;
+ rtx val_elt, val_vec, reg;
+ rtx rval[MAX_VECT_LEN];
+ machine_mode mode;
+ unsigned HOST_WIDE_INT v = value;
+
+ gcc_assert ((align & 0x3) == 0);
+ nelt_v8 = GET_MODE_NUNITS (V8QImode);
+ nelt_v16 = GET_MODE_NUNITS (V16QImode);
+ if (length >= nelt_v16 && unaligned_access && !BYTES_BIG_ENDIAN)
+ mode = V16QImode;
+ else
+ mode = V8QImode;
+
+ nelt_mode = GET_MODE_NUNITS (mode);
+ gcc_assert (length >= nelt_mode);
+ /* Skip if it isn't profitable. */
+ if (!arm_block_set_vect_profit_p (length, align, mode))
+ return false;
+
+ dst = copy_addr_to_reg (XEXP (dstbase, 0));
+
+ v = sext_hwi (v, BITS_PER_WORD);
+ val_elt = GEN_INT (v);
+ for (j = 0; j < nelt_mode; j++)
+ rval[j] = val_elt;
+
+ reg = gen_reg_rtx (mode);
+ val_vec = gen_rtx_CONST_VECTOR (mode, gen_rtvec_v (nelt_mode, rval));
+ /* Emit instruction loading the constant value. */
+ emit_move_insn (reg, val_vec);
+
+ i = 0;
+ /* Handle first 16 bytes specially using vst1:v16qi instruction. */
+ if (mode == V16QImode)
+ {
+ mem = adjust_automodify_address (dstbase, mode, dst, 0);
+ emit_insn (gen_movmisalignv16qi (mem, reg));
+ i += nelt_mode;
+ /* Handle (8, 16) bytes leftover using vst1:v16qi again. */
+ if (i + nelt_v8 < length && i + nelt_v16 > length)
+ {
+ emit_insn (gen_add2_insn (dst, GEN_INT (length - nelt_mode)));
+ mem = adjust_automodify_address (dstbase, mode, dst, 0);
+ /* We are shifting bytes back, set the alignment accordingly. */
+ if ((length & 0x3) == 0)
+ set_mem_align (mem, BITS_PER_UNIT * 4);
+ else if ((length & 0x1) == 0)
+ set_mem_align (mem, BITS_PER_UNIT * 2);
+ else
+ set_mem_align (mem, BITS_PER_UNIT);
+
+ emit_insn (gen_movmisalignv16qi (mem, reg));
+ return true;
+ }
+ /* Fall through for bytes leftover. */
+ mode = V8QImode;
+ nelt_mode = GET_MODE_NUNITS (mode);
+ reg = gen_lowpart (V8QImode, reg);
+ }
+
+ /* Handle 8 bytes in a vector. */
+ for (; (i + nelt_mode <= length); i += nelt_mode)
+ {
+ addr = plus_constant (Pmode, dst, i);
+ mem = adjust_automodify_address (dstbase, mode, addr, i);
+ emit_move_insn (mem, reg);
+ }
+
+ /* Handle single word leftover by shifting 4 bytes back. We can
+ use aligned access for this case. */
+ if (i + UNITS_PER_WORD == length)
+ {
+ addr = plus_constant (Pmode, dst, i - UNITS_PER_WORD);
+ mem = adjust_automodify_address (dstbase, mode,
+ addr, i - UNITS_PER_WORD);
+ /* We are shifting 4 bytes back, set the alignment accordingly. */
+ if (align > UNITS_PER_WORD)
+ set_mem_align (mem, BITS_PER_UNIT * UNITS_PER_WORD);
+
+ emit_move_insn (mem, reg);
+ }
+ /* Handle (0, 4), (4, 8) bytes leftover by shifting bytes back.
+ We have to use unaligned access for this case. */
+ else if (i < length)
+ {
+ emit_insn (gen_add2_insn (dst, GEN_INT (length - nelt_mode)));
+ mem = adjust_automodify_address (dstbase, mode, dst, 0);
+ /* We are shifting bytes back, set the alignment accordingly. */
+ if ((length & 1) == 0)
+ set_mem_align (mem, BITS_PER_UNIT * 2);
+ else
+ set_mem_align (mem, BITS_PER_UNIT);
+
+ emit_insn (gen_movmisalignv8qi (mem, reg));
+ }
+
+ return true;
+}
+
+/* Set a block of memory using plain strh/strb instructions, only
+ using instructions allowed by ALIGN on processor. We fill the
+ first LENGTH bytes of the memory area starting from DSTBASE
+ with byte constant VALUE. ALIGN is the alignment requirement
+ of memory. */
+static bool
+arm_block_set_unaligned_non_vect (rtx dstbase,
+ unsigned HOST_WIDE_INT length,
+ unsigned HOST_WIDE_INT value,
+ unsigned HOST_WIDE_INT align)
+{
+ unsigned int i;
+ rtx dst, addr, mem;
+ rtx val_exp, val_reg, reg;
+ machine_mode mode;
+ HOST_WIDE_INT v = value;
+
+ gcc_assert (align == 1 || align == 2);
+
+ if (align == 2)
+ v |= (value << BITS_PER_UNIT);
+
+ v = sext_hwi (v, BITS_PER_WORD);
+ val_exp = GEN_INT (v);
+ /* Skip if it isn't profitable. */
+ if (!arm_block_set_non_vect_profit_p (val_exp, length,
+ align, true, false))
+ return false;
+
+ dst = copy_addr_to_reg (XEXP (dstbase, 0));
+ mode = (align == 2 ? HImode : QImode);
+ val_reg = force_reg (SImode, val_exp);
+ reg = gen_lowpart (mode, val_reg);
+
+ for (i = 0; (i + GET_MODE_SIZE (mode) <= length); i += GET_MODE_SIZE (mode))
+ {
+ addr = plus_constant (Pmode, dst, i);
+ mem = adjust_automodify_address (dstbase, mode, addr, i);
+ emit_move_insn (mem, reg);
+ }
+
+ /* Handle single byte leftover. */
+ if (i + 1 == length)
+ {
+ reg = gen_lowpart (QImode, val_reg);
+ addr = plus_constant (Pmode, dst, i);
+ mem = adjust_automodify_address (dstbase, QImode, addr, i);
+ emit_move_insn (mem, reg);
+ i++;
+ }
+
+ gcc_assert (i == length);
+ return true;
+}
+
+/* Set a block of memory using plain strd/str/strh/strb instructions,
+ to permit unaligned copies on processors which support unaligned
+ semantics for those instructions. We fill the first LENGTH bytes
+ of the memory area starting from DSTBASE with byte constant VALUE.
+ ALIGN is the alignment requirement of memory. */
+static bool
+arm_block_set_aligned_non_vect (rtx dstbase,
+ unsigned HOST_WIDE_INT length,
+ unsigned HOST_WIDE_INT value,
+ unsigned HOST_WIDE_INT align)
+{
+ unsigned int i;
+ rtx dst, addr, mem;
+ rtx val_exp, val_reg, reg;
+ unsigned HOST_WIDE_INT v;
+ bool use_strd_p;
+
+ use_strd_p = (length >= 2 * UNITS_PER_WORD && (align & 3) == 0
+ && TARGET_LDRD && current_tune->prefer_ldrd_strd);
+
+ v = (value | (value << 8) | (value << 16) | (value << 24));
+ if (length < UNITS_PER_WORD)
+ v &= (0xFFFFFFFF >> (UNITS_PER_WORD - length) * BITS_PER_UNIT);
+
+ if (use_strd_p)
+ v |= (v << BITS_PER_WORD);
+ else
+ v = sext_hwi (v, BITS_PER_WORD);
+
+ val_exp = GEN_INT (v);
+ /* Skip if it isn't profitable. */
+ if (!arm_block_set_non_vect_profit_p (val_exp, length,
+ align, false, use_strd_p))
+ {
+ if (!use_strd_p)
+ return false;
+
+ /* Try without strd. */
+ v = (v >> BITS_PER_WORD);
+ v = sext_hwi (v, BITS_PER_WORD);
+ val_exp = GEN_INT (v);
+ use_strd_p = false;
+ if (!arm_block_set_non_vect_profit_p (val_exp, length,
+ align, false, use_strd_p))
+ return false;
+ }
+
+ i = 0;
+ dst = copy_addr_to_reg (XEXP (dstbase, 0));
+ /* Handle double words using strd if possible. */
+ if (use_strd_p)
+ {
+ val_reg = force_reg (DImode, val_exp);
+ reg = val_reg;
+ for (; (i + 8 <= length); i += 8)
+ {
+ addr = plus_constant (Pmode, dst, i);
+ mem = adjust_automodify_address (dstbase, DImode, addr, i);
+ emit_move_insn (mem, reg);
+ }
+ }
+ else
+ val_reg = force_reg (SImode, val_exp);
+
+ /* Handle words. */
+ reg = (use_strd_p ? gen_lowpart (SImode, val_reg) : val_reg);
+ for (; (i + 4 <= length); i += 4)
+ {
+ addr = plus_constant (Pmode, dst, i);
+ mem = adjust_automodify_address (dstbase, SImode, addr, i);
+ if ((align & 3) == 0)
+ emit_move_insn (mem, reg);
+ else
+ emit_insn (gen_unaligned_storesi (mem, reg));
+ }
+
+ /* Merge last pair of STRH and STRB into a STR if possible. */
+ if (unaligned_access && i > 0 && (i + 3) == length)
+ {
+ addr = plus_constant (Pmode, dst, i - 1);
+ mem = adjust_automodify_address (dstbase, SImode, addr, i - 1);
+ /* We are shifting one byte back, set the alignment accordingly. */
+ if ((align & 1) == 0)
+ set_mem_align (mem, BITS_PER_UNIT);
+
+ /* Most likely this is an unaligned access, and we can't tell at
+ compilation time. */
+ emit_insn (gen_unaligned_storesi (mem, reg));
+ return true;
+ }
+
+ /* Handle half word leftover. */
+ if (i + 2 <= length)
+ {
+ reg = gen_lowpart (HImode, val_reg);
+ addr = plus_constant (Pmode, dst, i);
+ mem = adjust_automodify_address (dstbase, HImode, addr, i);
+ if ((align & 1) == 0)
+ emit_move_insn (mem, reg);
+ else
+ emit_insn (gen_unaligned_storehi (mem, reg));
+
+ i += 2;
+ }
+
+ /* Handle single byte leftover. */
+ if (i + 1 == length)
+ {
+ reg = gen_lowpart (QImode, val_reg);
+ addr = plus_constant (Pmode, dst, i);
+ mem = adjust_automodify_address (dstbase, QImode, addr, i);
+ emit_move_insn (mem, reg);
+ }
+
+ return true;
+}
+
+/* Set a block of memory using vectorization instructions for both
+ aligned and unaligned cases. We fill the first LENGTH bytes of
+ the memory area starting from DSTBASE with byte constant VALUE.
+ ALIGN is the alignment requirement of memory. */
+static bool
+arm_block_set_vect (rtx dstbase,
+ unsigned HOST_WIDE_INT length,
+ unsigned HOST_WIDE_INT value,
+ unsigned HOST_WIDE_INT align)
+{
+ /* Check whether we need to use unaligned store instruction. */
+ if (((align & 3) != 0 || (length & 3) != 0)
+ /* Check whether unaligned store instruction is available. */
+ && (!unaligned_access || BYTES_BIG_ENDIAN))
+ return false;
+
+ if ((align & 3) == 0)
+ return arm_block_set_aligned_vect (dstbase, length, value, align);
+ else
+ return arm_block_set_unaligned_vect (dstbase, length, value, align);
+}
+
+/* Expand string store operation. Firstly we try to do that by using
+ vectorization instructions, then try with ARM unaligned access and
+ double-word store if profitable. OPERANDS[0] is the destination,
+ OPERANDS[1] is the number of bytes, operands[2] is the value to
+ initialize the memory, OPERANDS[3] is the known alignment of the
+ destination. */
+bool
+arm_gen_setmem (rtx *operands)
+{
+ rtx dstbase = operands[0];
+ unsigned HOST_WIDE_INT length;
+ unsigned HOST_WIDE_INT value;
+ unsigned HOST_WIDE_INT align;
+
+ if (!CONST_INT_P (operands[2]) || !CONST_INT_P (operands[1]))
+ return false;
+
+ length = UINTVAL (operands[1]);
+ if (length > 64)
+ return false;
+
+ value = (UINTVAL (operands[2]) & 0xFF);
+ align = UINTVAL (operands[3]);
+ if (TARGET_NEON && length >= 8
+ && current_tune->string_ops_prefer_neon
+ && arm_block_set_vect (dstbase, length, value, align))
+ return true;
+
+ if (!unaligned_access && (align & 3) != 0)
+ return arm_block_set_unaligned_non_vect (dstbase, length, value, align);
+
+ return arm_block_set_aligned_non_vect (dstbase, length, value, align);
+}
+
+
+static bool
+arm_macro_fusion_p (void)
+{
+ return current_tune->fuseable_ops != ARM_FUSE_NOTHING;
+}
+
+
+static bool
+aarch_macro_fusion_pair_p (rtx_insn* prev, rtx_insn* curr)
+{
+ rtx set_dest;
+ rtx prev_set = single_set (prev);
+ rtx curr_set = single_set (curr);
+
+ if (!prev_set
+ || !curr_set)
+ return false;
+
+ if (any_condjump_p (curr))
+ return false;
+
+ if (!arm_macro_fusion_p ())
+ return false;
+
+ if (current_tune->fuseable_ops & ARM_FUSE_MOVW_MOVT)
+ {
+ /* We are trying to fuse
+ movw imm / movt imm
+ instructions as a group that gets scheduled together. */
+
+ set_dest = SET_DEST (curr_set);
+
+ if (GET_MODE (set_dest) != SImode)
+ return false;
+
+ /* We are trying to match:
+ prev (movw) == (set (reg r0) (const_int imm16))
+ curr (movt) == (set (zero_extract (reg r0)
+ (const_int 16)
+ (const_int 16))
+ (const_int imm16_1))
+ or
+ prev (movw) == (set (reg r1)
+ (high (symbol_ref ("SYM"))))
+ curr (movt) == (set (reg r0)
+ (lo_sum (reg r1)
+ (symbol_ref ("SYM")))) */
+ if (GET_CODE (set_dest) == ZERO_EXTRACT)
+ {
+ if (CONST_INT_P (SET_SRC (curr_set))
+ && CONST_INT_P (SET_SRC (prev_set))
+ && REG_P (XEXP (set_dest, 0))
+ && REG_P (SET_DEST (prev_set))
+ && REGNO (XEXP (set_dest, 0)) == REGNO (SET_DEST (prev_set)))
+ return true;
+ }
+ else if (GET_CODE (SET_SRC (curr_set)) == LO_SUM
+ && REG_P (SET_DEST (curr_set))
+ && REG_P (SET_DEST (prev_set))
+ && GET_CODE (SET_SRC (prev_set)) == HIGH
+ && REGNO (SET_DEST (curr_set)) == REGNO (SET_DEST (prev_set)))
+ return true;
+ }
+ return false;
+}
+
/* Implement the TARGET_ASAN_SHADOW_OFFSET hook. */
static unsigned HOST_WIDE_INT
@@ -31175,4 +29466,124 @@ arm_is_constant_pool_ref (rtx x)
&& CONSTANT_POOL_ADDRESS_P (XEXP (x, 0)));
}
+/* If MEM is in the form of [base+offset], extract the two parts
+ of address and set to BASE and OFFSET, otherwise return false
+ after clearing BASE and OFFSET. */
+
+static bool
+extract_base_offset_in_addr (rtx mem, rtx *base, rtx *offset)
+{
+ rtx addr;
+
+ gcc_assert (MEM_P (mem));
+
+ addr = XEXP (mem, 0);
+
+ /* Strip off const from addresses like (const (addr)). */
+ if (GET_CODE (addr) == CONST)
+ addr = XEXP (addr, 0);
+
+ if (GET_CODE (addr) == REG)
+ {
+ *base = addr;
+ *offset = const0_rtx;
+ return true;
+ }
+
+ if (GET_CODE (addr) == PLUS
+ && GET_CODE (XEXP (addr, 0)) == REG
+ && CONST_INT_P (XEXP (addr, 1)))
+ {
+ *base = XEXP (addr, 0);
+ *offset = XEXP (addr, 1);
+ return true;
+ }
+
+ *base = NULL_RTX;
+ *offset = NULL_RTX;
+
+ return false;
+}
+
+/* If INSN is a load or store of address in the form of [base+offset],
+ extract the two parts and set to BASE and OFFSET. IS_LOAD is set
+ to TRUE if it's a load. Return TRUE if INSN is such an instruction,
+ otherwise return FALSE. */
+
+static bool
+fusion_load_store (rtx_insn *insn, rtx *base, rtx *offset, bool *is_load)
+{
+ rtx x, dest, src;
+
+ gcc_assert (INSN_P (insn));
+ x = PATTERN (insn);
+ if (GET_CODE (x) != SET)
+ return false;
+
+ src = SET_SRC (x);
+ dest = SET_DEST (x);
+ if (GET_CODE (src) == REG && GET_CODE (dest) == MEM)
+ {
+ *is_load = false;
+ extract_base_offset_in_addr (dest, base, offset);
+ }
+ else if (GET_CODE (src) == MEM && GET_CODE (dest) == REG)
+ {
+ *is_load = true;
+ extract_base_offset_in_addr (src, base, offset);
+ }
+ else
+ return false;
+
+ return (*base != NULL_RTX && *offset != NULL_RTX);
+}
+
+/* Implement the TARGET_SCHED_FUSION_PRIORITY hook.
+
+ Currently we only support to fuse ldr or str instructions, so FUSION_PRI
+ and PRI are only calculated for these instructions. For other instruction,
+ FUSION_PRI and PRI are simply set to MAX_PRI. In the future, other kind
+ instruction fusion can be supported by returning different priorities.
+
+ It's important that irrelevant instructions get the largest FUSION_PRI. */
+
+static void
+arm_sched_fusion_priority (rtx_insn *insn, int max_pri,
+ int *fusion_pri, int *pri)
+{
+ int tmp, off_val;
+ bool is_load;
+ rtx base, offset;
+
+ gcc_assert (INSN_P (insn));
+
+ tmp = max_pri - 1;
+ if (!fusion_load_store (insn, &base, &offset, &is_load))
+ {
+ *pri = tmp;
+ *fusion_pri = tmp;
+ return;
+ }
+
+ /* Load goes first. */
+ if (is_load)
+ *fusion_pri = tmp - 1;
+ else
+ *fusion_pri = tmp - 2;
+
+ tmp /= 2;
+
+ /* INSN with smaller base register goes first. */
+ tmp -= ((REGNO (base) & 0xff) << 20);
+
+ /* INSN with smaller offset goes first. */
+ off_val = (int)(INTVAL (offset));
+ if (off_val >= 0)
+ tmp -= (off_val & 0xfffff);
+ else
+ tmp += ((- off_val) & 0xfffff);
+
+ *pri = tmp;
+ return;
+}
#include "gt-arm.h"