diff options
author | amylaar <amylaar@138bc75d-0d04-0410-961f-82ee72b054a4> | 2013-10-01 17:03:46 +0000 |
---|---|---|
committer | amylaar <amylaar@138bc75d-0d04-0410-961f-82ee72b054a4> | 2013-10-01 17:03:46 +0000 |
commit | 09cb6a17e71bd40d2fbfaf82a1502fc210e33c87 (patch) | |
tree | 75a517d751430164090a2f398e9524f7505b2d62 | |
parent | 05a0ff3338858318c1a7820fbe38989cd52a65a7 (diff) | |
download | gcc-09cb6a17e71bd40d2fbfaf82a1502fc210e33c87.tar.gz |
2013-10-01 Saurabh Verma <saurabh.verma@codito.com>
Ramana Radhakrishnan <ramana.radhakrishnan@codito.com>
Joern Rennecke <joern.rennecke@embecosm.com>
Muhammad Khurram Riaz <khurram.riaz@arc.com>
Brendan Kehoe <brendan@zen.org>
Michael Eager <eager@eagercon.com>
Simon Cook <simon.cook@embecosm.com>
Jeremy Bennett <jeremy.bennett@embecosm.com>
* config/arc, common/config/arc: New directories.
git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@203072 138bc75d-0d04-0410-961f-82ee72b054a4
-rw-r--r-- | gcc/ChangeLog | 11 | ||||
-rw-r--r-- | gcc/common/config/arc/arc-common.c | 117 | ||||
-rw-r--r-- | gcc/config/arc/arc-modes.def | 37 | ||||
-rw-r--r-- | gcc/config/arc/arc-opts.h | 28 | ||||
-rw-r--r-- | gcc/config/arc/arc-protos.h | 118 | ||||
-rw-r--r-- | gcc/config/arc/arc-simd.h | 186 | ||||
-rw-r--r-- | gcc/config/arc/arc.c | 9201 | ||||
-rw-r--r-- | gcc/config/arc/arc.h | 1683 | ||||
-rw-r--r-- | gcc/config/arc/arc.md | 5190 | ||||
-rw-r--r-- | gcc/config/arc/arc.opt | 390 | ||||
-rw-r--r-- | gcc/config/arc/arc600.md | 63 | ||||
-rw-r--r-- | gcc/config/arc/arc700.md | 170 | ||||
-rw-r--r-- | gcc/config/arc/constraints.md | 399 | ||||
-rw-r--r-- | gcc/config/arc/fpx.md | 674 | ||||
-rw-r--r-- | gcc/config/arc/predicates.md | 807 | ||||
-rw-r--r-- | gcc/config/arc/simdext.md | 1313 | ||||
-rw-r--r-- | gcc/config/arc/t-arc-newlib | 38 | ||||
-rw-r--r-- | gcc/config/arc/t-arc-uClibc | 20 |
18 files changed, 20445 insertions, 0 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 5b71a25acf2..968b0d58506 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,14 @@ +2013-10-01 Saurabh Verma <saurabh.verma@codito.com> + Ramana Radhakrishnan <ramana.radhakrishnan@codito.com> + Joern Rennecke <joern.rennecke@embecosm.com> + Muhammad Khurram Riaz <khurram.riaz@arc.com> + Brendan Kehoe <brendan@zen.org> + Michael Eager <eager@eagercon.com> + Simon Cook <simon.cook@embecosm.com> + Jeremy Bennett <jeremy.bennett@embecosm.com> + + * config/arc, common/config/arc: New directories. + 2013-10-01 Joern Rennecke <joern.rennecke@embecosm.com> Brendan Kehoe <brendan@zen.org> Simon Cook <simon.cook@embecosm.com> diff --git a/gcc/common/config/arc/arc-common.c b/gcc/common/config/arc/arc-common.c new file mode 100644 index 00000000000..36e60ebc9d8 --- /dev/null +++ b/gcc/common/config/arc/arc-common.c @@ -0,0 +1,117 @@ +/* Common hooks for Synopsys DesignWare ARC + Copyright (C) 1994, 1995, 1997, 1998, 2007-2013 + Free Software Foundation, Inc. + Contributor: Joern Rennecke <joern.rennecke@embecosm.com> + on behalf of Synopsys Inc. + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 3, or (at your option) +any later version. + +GCC is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with GCC; see the file COPYING3. If not see +<http://www.gnu.org/licenses/>. */ + +#include "config.h" +#include "system.h" +#include "coretypes.h" +#include "diagnostic-core.h" +#include "tm.h" +#include "common/common-target.h" +#include "opts.h" +#include "flags.h" + +static void +arc_option_init_struct (struct gcc_options *opts) +{ + opts->x_flag_no_common = 255; /* Mark as not user-initialized. */ + + /* Which cpu we're compiling for (A5, ARC600, ARC601, ARC700). */ + arc_cpu = PROCESSOR_NONE; +} + +/* Set default optimization options. */ +/* The conditions are incomplete, so we rely on the evaluation order here, + which goes from first to last, i.e. the last match prevails. */ +/* ??? But this trick only works for reject_negative options. Approximate + missing option combination. */ +#define OPT_LEVELS_3_PLUS_SPEED_ONLY OPT_LEVELS_3_PLUS +static const struct default_options arc_option_optimization_table[] = + { + { OPT_LEVELS_1_PLUS, OPT_fomit_frame_pointer, NULL, 1 }, + { OPT_LEVELS_ALL, OPT_mRcq, NULL, 1 }, + { OPT_LEVELS_ALL, OPT_mRcw, NULL, 1 }, + { OPT_LEVELS_ALL, OPT_msize_level_, NULL, 1 }, + { OPT_LEVELS_3_PLUS_SPEED_ONLY, OPT_msize_level_, NULL, 0 }, + { OPT_LEVELS_SIZE, OPT_msize_level_, NULL, 3 }, + { OPT_LEVELS_3_PLUS_SPEED_ONLY, OPT_malign_call, NULL, 1 }, + { OPT_LEVELS_ALL, OPT_mearly_cbranchsi, NULL, 1 }, + { OPT_LEVELS_ALL, OPT_mbbit_peephole, NULL, 1 }, + { OPT_LEVELS_SIZE, OPT_mq_class, NULL, 1 }, + { OPT_LEVELS_SIZE, OPT_mcase_vector_pcrel, NULL, 1 }, + { OPT_LEVELS_SIZE, OPT_mcompact_casesi, NULL, 1 }, + { OPT_LEVELS_NONE, 0, NULL, 0 } + }; + +/* Process options. */ +static bool +arc_handle_option (struct gcc_options *opts, struct gcc_options *opts_set, + const struct cl_decoded_option *decoded, + location_t loc) +{ + size_t code = decoded->opt_index; + int value = decoded->value; + + switch (code) + { + static int mcpu_seen = PROCESSOR_NONE; + case OPT_mcpu_: + /* N.B., at this point arc_cpu has already been set to its new value by + our caller, so comparing arc_cpu with PROCESSOR_NONE is pointless. */ + + if (mcpu_seen != PROCESSOR_NONE && mcpu_seen != value) + warning_at (loc, 0, "multiple -mcpu= options specified."); + mcpu_seen = value; + + switch (value) + { + case PROCESSOR_A5: + case PROCESSOR_ARC600: + case PROCESSOR_ARC700: + if (! (opts_set->x_target_flags & MASK_BARREL_SHIFTER) ) + opts->x_target_flags |= MASK_BARREL_SHIFTER; + break; + case PROCESSOR_ARC601: + if (! (opts_set->x_target_flags & MASK_BARREL_SHIFTER) ) + opts->x_target_flags &= ~MASK_BARREL_SHIFTER; + break; + default: + gcc_unreachable (); + } + } + + return true; +} + +#define TARGET_OPTION_INIT_STRUCT arc_option_init_struct +#define TARGET_OPTION_OPTIMIZATION_TABLE arc_option_optimization_table +#define TARGET_HANDLE_OPTION arc_handle_option + +#define DEFAULT_NO_SDATA (TARGET_SDATA_DEFAULT ? 0 : MASK_NO_SDATA_SET) + +/* We default to ARC700, which has the barrel shifter enabled. */ +#define TARGET_DEFAULT_TARGET_FLAGS \ + (MASK_BARREL_SHIFTER|MASK_VOLATILE_CACHE_SET|DEFAULT_NO_SDATA) + + +#include "common/common-target-def.h" + +struct gcc_targetm_common targetm_common = TARGETM_COMMON_INITIALIZER; diff --git a/gcc/config/arc/arc-modes.def b/gcc/config/arc/arc-modes.def new file mode 100644 index 00000000000..e6ec727798e --- /dev/null +++ b/gcc/config/arc/arc-modes.def @@ -0,0 +1,37 @@ +/* Definitions of target machine for GNU compiler, Synopsys DesignWare ARC cpu. + Copyright (C) 2002, 2007-2012 Free Software Foundation, Inc. + Contributor: Joern Rennecke <joern.rennecke@embecosm.com> + on behalf of Synopsys Inc. + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 3, or (at your option) +any later version. + +GCC is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with GCC; see the file COPYING3. If not see +<http://www.gnu.org/licenses/>. */ + +/* Some insns set all condition code flags, some only set the ZNC flags, and + some only set the ZN flags. */ + +CC_MODE (CC_ZN); +CC_MODE (CC_Z); +CC_MODE (CC_C); +CC_MODE (CC_FP_GT); +CC_MODE (CC_FP_GE); +CC_MODE (CC_FP_ORD); +CC_MODE (CC_FP_UNEQ); +CC_MODE (CC_FPX); + +/* Vector modes. */ +VECTOR_MODES (INT, 4); /* V4QI V2HI */ +VECTOR_MODES (INT, 8); /* V8QI V4HI V2SI */ +VECTOR_MODES (INT, 16); /* V16QI V8HI V4SI V2DI */ diff --git a/gcc/config/arc/arc-opts.h b/gcc/config/arc/arc-opts.h new file mode 100644 index 00000000000..17ff2286e65 --- /dev/null +++ b/gcc/config/arc/arc-opts.h @@ -0,0 +1,28 @@ +/* GCC option-handling definitions for the Synopsys DesignWare ARC architecture. + + Copyright (C) 2007-2012 Free Software Foundation, Inc. + + This file is part of GCC. + + GCC is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License as published + by the Free Software Foundation; either version 3, or (at your + option) any later version. + + GCC is distributed in the hope that it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public + License for more details. + + You should have received a copy of the GNU General Public License + along with GCC; see the file COPYING3. If not see + <http://www.gnu.org/licenses/>. */ + +enum processor_type +{ + PROCESSOR_NONE, + PROCESSOR_A5, + PROCESSOR_ARC600, + PROCESSOR_ARC601, + PROCESSOR_ARC700 +}; diff --git a/gcc/config/arc/arc-protos.h b/gcc/config/arc/arc-protos.h new file mode 100644 index 00000000000..0939bc04bb4 --- /dev/null +++ b/gcc/config/arc/arc-protos.h @@ -0,0 +1,118 @@ +/* Definitions of target machine for GNU compiler, Synopsys DesignWare ARC cpu. + Copyright (C) 2000, 2007-2013 Free Software Foundation, Inc. + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 3, or (at your option) +any later version. + +GCC is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with GCC; see the file COPYING3. If not see +<http://www.gnu.org/licenses/>. */ + +#ifdef RTX_CODE + +extern enum machine_mode arc_select_cc_mode (enum rtx_code, rtx, rtx); + +/* Define the function that build the compare insn for scc, bcc and mov*cc. */ +extern struct rtx_def *gen_compare_reg (rtx, enum machine_mode); + +/* Declarations for various fns used in the .md file. */ +extern void arc_output_function_epilogue (FILE *, HOST_WIDE_INT, int); +extern const char *output_shift (rtx *); +extern bool compact_sda_memory_operand (rtx op,enum machine_mode mode); +extern bool arc_double_limm_p (rtx); +extern void arc_print_operand (FILE *, rtx, int); +extern void arc_print_operand_address (FILE *, rtx); +extern void arc_final_prescan_insn (rtx, rtx *, int); +extern void arc_set_default_type_attributes(tree type); +extern const char *arc_output_libcall (const char *); +extern bool prepare_extend_operands (rtx *operands, enum rtx_code code, + enum machine_mode omode); +extern int arc_output_addsi (rtx *operands, bool, bool); +extern int arc_output_commutative_cond_exec (rtx *operands, bool); +extern bool arc_expand_movmem (rtx *operands); +extern bool prepare_move_operands (rtx *operands, enum machine_mode mode); +extern void emit_shift (enum rtx_code, rtx, rtx, rtx); +#endif /* RTX_CODE */ + +#ifdef TREE_CODE +extern enum arc_function_type arc_compute_function_type (struct function *); +#endif /* TREE_CODE */ + + +extern void arc_init (void); +extern unsigned int arc_compute_frame_size (int); +extern bool arc_ccfsm_branch_deleted_p (void); +extern void arc_ccfsm_record_branch_deleted (void); + +extern rtx arc_legitimize_pic_address (rtx, rtx); +void arc_asm_output_aligned_decl_local (FILE *, tree, const char *, + unsigned HOST_WIDE_INT, + unsigned HOST_WIDE_INT, + unsigned HOST_WIDE_INT); +extern rtx arc_return_addr_rtx (int , rtx); +extern bool check_if_valid_regno_const (rtx *, int); +extern bool check_if_valid_sleep_operand (rtx *, int); +extern bool arc_legitimate_constant_p (enum machine_mode, rtx); +extern bool arc_legitimate_pc_offset_p (rtx); +extern bool arc_legitimate_pic_addr_p (rtx); +extern void emit_pic_move (rtx *, enum machine_mode); +extern bool arc_raw_symbolic_reference_mentioned_p (rtx, bool); +extern bool arc_legitimate_pic_operand_p (rtx); +extern bool arc_is_longcall_p (rtx); +extern bool arc_is_shortcall_p (rtx); +extern bool arc_profile_call (rtx callee); +extern bool valid_brcc_with_delay_p (rtx *); +extern bool small_data_pattern (rtx , enum machine_mode); +extern rtx arc_rewrite_small_data (rtx); +extern bool arc_ccfsm_cond_exec_p (void); +struct secondary_reload_info; +extern int arc_register_move_cost (enum machine_mode, enum reg_class, + enum reg_class); +extern rtx disi_highpart (rtx); +extern int arc_adjust_insn_length (rtx, int, bool); +extern int arc_corereg_hazard (rtx, rtx); +extern int arc_hazard (rtx, rtx); +extern int arc_write_ext_corereg (rtx); +extern rtx gen_acc1 (void); +extern rtx gen_acc2 (void); +extern rtx gen_mlo (void); +extern rtx gen_mhi (void); +extern bool arc_branch_size_unknown_p (void); +struct arc_ccfsm; +extern void arc_ccfsm_record_condition (rtx, bool, rtx, struct arc_ccfsm *); +extern void arc_expand_prologue (void); +extern void arc_expand_epilogue (int); +extern void arc_init_expanders (void); +extern int arc_check_millicode (rtx op, int offset, int load_p); +extern int arc_get_unalign (void); +extern void arc_clear_unalign (void); +extern void arc_toggle_unalign (void); +extern void split_addsi (rtx *); +extern void split_subsi (rtx *); +extern void arc_pad_return (void); +extern rtx arc_split_move (rtx *); +extern int arc_verify_short (rtx insn, int unalign, int); +extern const char *arc_short_long (rtx insn, const char *, const char *); +extern rtx arc_regno_use_in (unsigned int, rtx); +extern int arc_attr_type (rtx); +extern bool arc_scheduling_not_expected (void); +extern bool arc_sets_cc_p (rtx insn); +extern int arc_label_align (rtx label); +extern bool arc_need_delay (rtx insn); +extern bool arc_text_label (rtx); +extern int arc_decl_pretend_args (tree decl); +extern bool arc_short_comparison_p (rtx, int); +extern bool arc_epilogue_uses (int regno); +/* insn-attrtab.c doesn't include reload.h, which declares regno_clobbered_p. */ +extern int regno_clobbered_p (unsigned int, rtx, enum machine_mode, int); +extern int arc_return_slot_offset (void); +extern bool arc_legitimize_reload_address (rtx *, enum machine_mode, int, int); diff --git a/gcc/config/arc/arc-simd.h b/gcc/config/arc/arc-simd.h new file mode 100644 index 00000000000..608bd41f80a --- /dev/null +++ b/gcc/config/arc/arc-simd.h @@ -0,0 +1,186 @@ +/* Synopsys DesignWare ARC SIMD include file. + Copyright (C) 2007-2012 Free Software Foundation, Inc. + Written by Saurabh Verma (saurabh.verma@celunite.com) on behalf os Synopsys + Inc. + + This file is part of GCC. + + GCC is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License as published + by the Free Software Foundation; either version 3, or (at your + option) any later version. + + GCC is distributed in the hope that it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public + License for more details. + + You should have received a copy of the GNU General Public License + along with GCC; see the file COPYING3. If not see + <http://www.gnu.org/licenses/>. */ + +/* As a special exception, if you include this header file into source + files compiled by GCC, this header file does not by itself cause + the resulting executable to be covered by the GNU General Public + License. This exception does not however invalidate any other + reasons why the executable file might be covered by the GNU General + Public License. */ + +#ifndef _ARC_SIMD_H +#define _ARC_SIMD_H 1 + +#ifndef __ARC_SIMD__ +#error Use the "-msimd" flag to enable ARC SIMD support +#endif + +/* I0-I7 registers. */ +#define _IREG_I0 0 +#define _IREG_I1 1 +#define _IREG_I2 2 +#define _IREG_I3 3 +#define _IREG_I4 4 +#define _IREG_I5 5 +#define _IREG_I6 6 +#define _IREG_I7 7 + +/* DMA configuration registers. */ +#define _DMA_REG_DR0 0 +#define _DMA_SDM_SRC_ADR_REG _DMA_REG_DR0 +#define _DMA_SDM_DEST_ADR_REG _DMA_REG_DR0 + +#define _DMA_REG_DR1 1 +#define _DMA_SDM_STRIDE_REG _DMA_REG_DR1 + +#define _DMA_REG_DR2 2 +#define _DMA_BLK_REG _DMA_REG_DR2 + +#define _DMA_REG_DR3 3 +#define _DMA_LOC_REG _DMA_REG_DR3 + +#define _DMA_REG_DR4 4 +#define _DMA_SYS_SRC_ADR_REG _DMA_REG_DR4 +#define _DMA_SYS_DEST_ADR_REG _DMA_REG_DR4 + +#define _DMA_REG_DR5 5 +#define _DMA_SYS_STRIDE_REG _DMA_REG_DR5 + +#define _DMA_REG_DR6 6 +#define _DMA_CFG_REG _DMA_REG_DR6 + +#define _DMA_REG_DR7 7 +#define _DMA_FT_BASE_ADR_REG _DMA_REG_DR7 + +/* Predefined types used in vector instructions. */ +typedef int __v4si __attribute__((vector_size(16))); +typedef short __v8hi __attribute__((vector_size(16))); + +/* Synonyms */ +#define _vaddaw __builtin_arc_vaddaw +#define _vaddw __builtin_arc_vaddw +#define _vavb __builtin_arc_vavb +#define _vavrb __builtin_arc_vavrb +#define _vdifaw __builtin_arc_vdifaw +#define _vdifw __builtin_arc_vdifw +#define _vmaxaw __builtin_arc_vmaxaw +#define _vmaxw __builtin_arc_vmaxw +#define _vminaw __builtin_arc_vminaw +#define _vminw __builtin_arc_vminw +#define _vmulaw __builtin_arc_vmulaw +#define _vmulfaw __builtin_arc_vmulfaw +#define _vmulfw __builtin_arc_vmulfw +#define _vmulw __builtin_arc_vmulw +#define _vsubaw __builtin_arc_vsubaw +#define _vsubw __builtin_arc_vsubw +#define _vsummw __builtin_arc_vsummw +#define _vand __builtin_arc_vand +#define _vandaw __builtin_arc_vandaw +#define _vbic __builtin_arc_vbic +#define _vbicaw __builtin_arc_vbicaw +#define _vor __builtin_arc_vor +#define _vxor __builtin_arc_vxor +#define _vxoraw __builtin_arc_vxoraw +#define _veqw __builtin_arc_veqw +#define _vlew __builtin_arc_vlew +#define _vltw __builtin_arc_vltw +#define _vnew __builtin_arc_vnew +#define _vmr1aw __builtin_arc_vmr1aw +#define _vmr1w __builtin_arc_vmr1w +#define _vmr2aw __builtin_arc_vmr2aw +#define _vmr2w __builtin_arc_vmr2w +#define _vmr3aw __builtin_arc_vmr3aw +#define _vmr3w __builtin_arc_vmr3w +#define _vmr4aw __builtin_arc_vmr4aw +#define _vmr4w __builtin_arc_vmr4w +#define _vmr5aw __builtin_arc_vmr5aw +#define _vmr5w __builtin_arc_vmr5w +#define _vmr6aw __builtin_arc_vmr6aw +#define _vmr6w __builtin_arc_vmr6w +#define _vmr7aw __builtin_arc_vmr7aw +#define _vmr7w __builtin_arc_vmr7w +#define _vmrb __builtin_arc_vmrb +#define _vh264f __builtin_arc_vh264f +#define _vh264ft __builtin_arc_vh264ft +#define _vh264fw __builtin_arc_vh264fw +#define _vvc1f __builtin_arc_vvc1f +#define _vvc1ft __builtin_arc_vvc1ft +#define _vbaddw __builtin_arc_vbaddw +#define _vbmaxw __builtin_arc_vbmaxw +#define _vbminw __builtin_arc_vbminw +#define _vbmulaw __builtin_arc_vbmulaw +#define _vbmulfw __builtin_arc_vbmulfw +#define _vbmulw __builtin_arc_vbmulw +#define _vbrsubw __builtin_arc_vbrsubw +#define _vbsubw __builtin_arc_vbsubw +#define _vasrw __builtin_arc_vasrw +#define _vsr8 __builtin_arc_vsr8 +#define _vsr8aw __builtin_arc_vsr8aw +#define _vasrrwi __builtin_arc_vasrrwi +#define _vasrsrwi __builtin_arc_vasrsrwi +#define _vasrwi __builtin_arc_vasrwi +#define _vasrpwbi __builtin_arc_vasrpwbi +#define _vasrrpwbi __builtin_arc_vasrrpwbi +#define _vsr8awi __builtin_arc_vsr8awi +#define _vsr8i __builtin_arc_vsr8i +#define _vmvaw __builtin_arc_vmvaw +#define _vmvw __builtin_arc_vmvw +#define _vmvzw __builtin_arc_vmvzw +#define _vd6tapf __builtin_arc_vd6tapf +#define _vmovaw __builtin_arc_vmovaw +#define _vmovw __builtin_arc_vmovw +#define _vmovzw __builtin_arc_vmovzw +#define _vabsaw __builtin_arc_vabsaw +#define _vabsw __builtin_arc_vabsw +#define _vaddsuw __builtin_arc_vaddsuw +#define _vsignw __builtin_arc_vsignw +#define _vexch1 __builtin_arc_vexch1 +#define _vexch2 __builtin_arc_vexch2 +#define _vexch4 __builtin_arc_vexch4 +#define _vupbaw __builtin_arc_vupbaw +#define _vupbw __builtin_arc_vupbw +#define _vupsbaw __builtin_arc_vupsbaw +#define _vupsbw __builtin_arc_vupsbw +#define _vdirun __builtin_arc_vdirun +#define _vdorun __builtin_arc_vdorun +#define _vdiwr __builtin_arc_vdiwr +#define _vdowr __builtin_arc_vdowr +#define _vrec __builtin_arc_vrec +#define _vrun __builtin_arc_vrun +#define _vrecrun __builtin_arc_vrecrun +#define _vendrec __builtin_arc_vendrec +#define _vld32wh __builtin_arc_vld32wh +#define _vld32wl __builtin_arc_vld32wl +#define _vld64 __builtin_arc_vld64 +#define _vld32 __builtin_arc_vld32 +#define _vld64w __builtin_arc_vld64w +#define _vld128 __builtin_arc_vld128 +#define _vst128 __builtin_arc_vst128 +#define _vst64 __builtin_arc_vst64 +#define _vst16_n __builtin_arc_vst16_n +#define _vst32_n __builtin_arc_vst32_n +#define _vinti __builtin_arc_vinti + +/* Additional synonyms to ease programming. */ +#define _setup_dma_in_channel_reg _vdiwr +#define _setup_dma_out_channel_reg _vdowr + +#endif /* _ARC_SIMD_H */ diff --git a/gcc/config/arc/arc.c b/gcc/config/arc/arc.c new file mode 100644 index 00000000000..51ad7d7e9da --- /dev/null +++ b/gcc/config/arc/arc.c @@ -0,0 +1,9201 @@ +/* Subroutines used for code generation on the Synopsys DesignWare ARC cpu. + Copyright (C) 1994, 1995, 1997, 2004, 2007-2013 + Free Software Foundation, Inc. + + Sources derived from work done by Sankhya Technologies (www.sankhya.com) on + behalf of Synopsys Inc. + + Position Independent Code support added,Code cleaned up, + Comments and Support For ARC700 instructions added by + Saurabh Verma (saurabh.verma@codito.com) + Ramana Radhakrishnan(ramana.radhakrishnan@codito.com) + + Fixing ABI inconsistencies, optimizations for ARC600 / ARC700 pipelines, + profiling support added by Joern Rennecke <joern.rennecke@embecosm.com> + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 3, or (at your option) +any later version. + +GCC is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with GCC; see the file COPYING3. If not see +<http://www.gnu.org/licenses/>. */ + +#include "config.h" +#include <stdio.h> +#include "system.h" +#include "coretypes.h" +#include "tm.h" +#include "tree.h" +#include "rtl.h" +#include "regs.h" +#include "hard-reg-set.h" +#include "real.h" +#include "insn-config.h" +#include "conditions.h" +#include "insn-flags.h" +#include "function.h" +#include "toplev.h" +#include "ggc.h" +#include "tm_p.h" +#include "target.h" +#include "output.h" +#include "insn-attr.h" +#include "flags.h" +#include "expr.h" +#include "recog.h" +#include "debug.h" +#include "diagnostic.h" +#include "insn-codes.h" +#include "langhooks.h" +#include "optabs.h" +#include "tm-constrs.h" +#include "reload.h" /* For operands_match_p */ +#include "df.h" +#include "tree-pass.h" +#include "context.h" +#include "pass_manager.h" + +/* Which cpu we're compiling for (A5, ARC600, ARC601, ARC700). */ +static const char *arc_cpu_string = ""; + +/* ??? Loads can handle any constant, stores can only handle small ones. */ +/* OTOH, LIMMs cost extra, so their usefulness is limited. */ +#define RTX_OK_FOR_OFFSET_P(MODE, X) \ +(GET_CODE (X) == CONST_INT \ + && SMALL_INT_RANGE (INTVAL (X), (GET_MODE_SIZE (MODE) - 1) & -4, \ + (INTVAL (X) & (GET_MODE_SIZE (MODE) - 1) & 3 \ + ? 0 \ + : -(-GET_MODE_SIZE (MODE) | -4) >> 1))) + +#define LEGITIMATE_OFFSET_ADDRESS_P(MODE, X, INDEX, STRICT) \ +(GET_CODE (X) == PLUS \ + && RTX_OK_FOR_BASE_P (XEXP (X, 0), (STRICT)) \ + && ((INDEX && RTX_OK_FOR_INDEX_P (XEXP (X, 1), (STRICT)) \ + && GET_MODE_SIZE ((MODE)) <= 4) \ + || RTX_OK_FOR_OFFSET_P (MODE, XEXP (X, 1)))) + +#define LEGITIMATE_SCALED_ADDRESS_P(MODE, X, STRICT) \ +(GET_CODE (X) == PLUS \ + && GET_CODE (XEXP (X, 0)) == MULT \ + && RTX_OK_FOR_INDEX_P (XEXP (XEXP (X, 0), 0), (STRICT)) \ + && GET_CODE (XEXP (XEXP (X, 0), 1)) == CONST_INT \ + && ((GET_MODE_SIZE (MODE) == 2 && INTVAL (XEXP (XEXP (X, 0), 1)) == 2) \ + || (GET_MODE_SIZE (MODE) == 4 && INTVAL (XEXP (XEXP (X, 0), 1)) == 4)) \ + && (RTX_OK_FOR_BASE_P (XEXP (X, 1), (STRICT)) \ + || (flag_pic ? CONST_INT_P (XEXP (X, 1)) : CONSTANT_P (XEXP (X, 1))))) + +#define LEGITIMATE_SMALL_DATA_ADDRESS_P(X) \ + (GET_CODE (X) == PLUS \ + && (REG_P (XEXP ((X), 0)) && REGNO (XEXP ((X), 0)) == SDATA_BASE_REGNUM) \ + && ((GET_CODE (XEXP((X),1)) == SYMBOL_REF \ + && SYMBOL_REF_SMALL_P (XEXP ((X), 1))) \ + || (GET_CODE (XEXP ((X), 1)) == CONST \ + && GET_CODE (XEXP (XEXP ((X), 1), 0)) == PLUS \ + && GET_CODE (XEXP (XEXP (XEXP ((X), 1), 0), 0)) == SYMBOL_REF \ + && SYMBOL_REF_SMALL_P (XEXP (XEXP (XEXP ((X), 1), 0), 0)) \ + && GET_CODE (XEXP(XEXP (XEXP ((X), 1), 0), 1)) == CONST_INT))) + +/* Array of valid operand punctuation characters. */ +char arc_punct_chars[256]; + +/* State used by arc_ccfsm_advance to implement conditional execution. */ +struct GTY (()) arc_ccfsm +{ + int state; + int cc; + rtx cond; + rtx target_insn; + int target_label; +}; + +#define arc_ccfsm_current cfun->machine->ccfsm_current + +#define ARC_CCFSM_BRANCH_DELETED_P(STATE) \ + ((STATE)->state == 1 || (STATE)->state == 2) + +/* Indicate we're conditionalizing insns now. */ +#define ARC_CCFSM_RECORD_BRANCH_DELETED(STATE) \ + ((STATE)->state += 2) + +#define ARC_CCFSM_COND_EXEC_P(STATE) \ + ((STATE)->state == 3 || (STATE)->state == 4 || (STATE)->state == 5 \ + || current_insn_predicate) + +/* Check if INSN has a 16 bit opcode considering struct arc_ccfsm *STATE. */ +#define CCFSM_ISCOMPACT(INSN,STATE) \ + (ARC_CCFSM_COND_EXEC_P (STATE) \ + ? (get_attr_iscompact (INSN) == ISCOMPACT_TRUE \ + || get_attr_iscompact (INSN) == ISCOMPACT_TRUE_LIMM) \ + : get_attr_iscompact (INSN) != ISCOMPACT_FALSE) + +/* Likewise, but also consider that INSN might be in a delay slot of JUMP. */ +#define CCFSM_DBR_ISCOMPACT(INSN,JUMP,STATE) \ + ((ARC_CCFSM_COND_EXEC_P (STATE) \ + || (JUMP_P (JUMP) \ + && INSN_ANNULLED_BRANCH_P (JUMP) \ + && (TARGET_AT_DBR_CONDEXEC || INSN_FROM_TARGET_P (INSN)))) \ + ? (get_attr_iscompact (INSN) == ISCOMPACT_TRUE \ + || get_attr_iscompact (INSN) == ISCOMPACT_TRUE_LIMM) \ + : get_attr_iscompact (INSN) != ISCOMPACT_FALSE) + +/* The maximum number of insns skipped which will be conditionalised if + possible. */ +/* When optimizing for speed: + Let p be the probability that the potentially skipped insns need to + be executed, pn the cost of a correctly predicted non-taken branch, + mt the cost of a mis/non-predicted taken branch, + mn mispredicted non-taken, pt correctly predicted taken ; + costs expressed in numbers of instructions like the ones considered + skipping. + Unfortunately we don't have a measure of predictability - this + is linked to probability only in that in the no-eviction-scenario + there is a lower bound 1 - 2 * min (p, 1-p), and a somewhat larger + value that can be assumed *if* the distribution is perfectly random. + A predictability of 1 is perfectly plausible not matter what p is, + because the decision could be dependent on an invocation parameter + of the program. + For large p, we want MAX_INSNS_SKIPPED == pn/(1-p) + mt - pn + For small p, we want MAX_INSNS_SKIPPED == pt + + When optimizing for size: + We want to skip insn unless we could use 16 opcodes for the + non-conditionalized insn to balance the branch length or more. + Performance can be tie-breaker. */ +/* If the potentially-skipped insns are likely to be executed, we'll + generally save one non-taken branch + o + this to be no less than the 1/p */ +#define MAX_INSNS_SKIPPED 3 + +/* The values of unspec's first field. */ +enum { + ARC_UNSPEC_PLT = 3, + ARC_UNSPEC_GOT, + ARC_UNSPEC_GOTOFF +} ; + + +enum arc_builtins { + ARC_BUILTIN_NOP = 2, + ARC_BUILTIN_NORM = 3, + ARC_BUILTIN_NORMW = 4, + ARC_BUILTIN_SWAP = 5, + ARC_BUILTIN_BRK = 6, + ARC_BUILTIN_DIVAW = 7, + ARC_BUILTIN_EX = 8, + ARC_BUILTIN_MUL64 = 9, + ARC_BUILTIN_MULU64 = 10, + ARC_BUILTIN_RTIE = 11, + ARC_BUILTIN_SYNC = 12, + ARC_BUILTIN_CORE_READ = 13, + ARC_BUILTIN_CORE_WRITE = 14, + ARC_BUILTIN_FLAG = 15, + ARC_BUILTIN_LR = 16, + ARC_BUILTIN_SR = 17, + ARC_BUILTIN_SLEEP = 18, + ARC_BUILTIN_SWI = 19, + ARC_BUILTIN_TRAP_S = 20, + ARC_BUILTIN_UNIMP_S = 21, + ARC_BUILTIN_ALIGNED = 22, + + /* Sentinel to mark start of simd builtins. */ + ARC_SIMD_BUILTIN_BEGIN = 1000, + + ARC_SIMD_BUILTIN_VADDAW = 1001, + ARC_SIMD_BUILTIN_VADDW = 1002, + ARC_SIMD_BUILTIN_VAVB = 1003, + ARC_SIMD_BUILTIN_VAVRB = 1004, + ARC_SIMD_BUILTIN_VDIFAW = 1005, + ARC_SIMD_BUILTIN_VDIFW = 1006, + ARC_SIMD_BUILTIN_VMAXAW = 1007, + ARC_SIMD_BUILTIN_VMAXW = 1008, + ARC_SIMD_BUILTIN_VMINAW = 1009, + ARC_SIMD_BUILTIN_VMINW = 1010, + ARC_SIMD_BUILTIN_VMULAW = 1011, + ARC_SIMD_BUILTIN_VMULFAW = 1012, + ARC_SIMD_BUILTIN_VMULFW = 1013, + ARC_SIMD_BUILTIN_VMULW = 1014, + ARC_SIMD_BUILTIN_VSUBAW = 1015, + ARC_SIMD_BUILTIN_VSUBW = 1016, + ARC_SIMD_BUILTIN_VSUMMW = 1017, + ARC_SIMD_BUILTIN_VAND = 1018, + ARC_SIMD_BUILTIN_VANDAW = 1019, + ARC_SIMD_BUILTIN_VBIC = 1020, + ARC_SIMD_BUILTIN_VBICAW = 1021, + ARC_SIMD_BUILTIN_VOR = 1022, + ARC_SIMD_BUILTIN_VXOR = 1023, + ARC_SIMD_BUILTIN_VXORAW = 1024, + ARC_SIMD_BUILTIN_VEQW = 1025, + ARC_SIMD_BUILTIN_VLEW = 1026, + ARC_SIMD_BUILTIN_VLTW = 1027, + ARC_SIMD_BUILTIN_VNEW = 1028, + ARC_SIMD_BUILTIN_VMR1AW = 1029, + ARC_SIMD_BUILTIN_VMR1W = 1030, + ARC_SIMD_BUILTIN_VMR2AW = 1031, + ARC_SIMD_BUILTIN_VMR2W = 1032, + ARC_SIMD_BUILTIN_VMR3AW = 1033, + ARC_SIMD_BUILTIN_VMR3W = 1034, + ARC_SIMD_BUILTIN_VMR4AW = 1035, + ARC_SIMD_BUILTIN_VMR4W = 1036, + ARC_SIMD_BUILTIN_VMR5AW = 1037, + ARC_SIMD_BUILTIN_VMR5W = 1038, + ARC_SIMD_BUILTIN_VMR6AW = 1039, + ARC_SIMD_BUILTIN_VMR6W = 1040, + ARC_SIMD_BUILTIN_VMR7AW = 1041, + ARC_SIMD_BUILTIN_VMR7W = 1042, + ARC_SIMD_BUILTIN_VMRB = 1043, + ARC_SIMD_BUILTIN_VH264F = 1044, + ARC_SIMD_BUILTIN_VH264FT = 1045, + ARC_SIMD_BUILTIN_VH264FW = 1046, + ARC_SIMD_BUILTIN_VVC1F = 1047, + ARC_SIMD_BUILTIN_VVC1FT = 1048, + + /* Va, Vb, rlimm instructions. */ + ARC_SIMD_BUILTIN_VBADDW = 1050, + ARC_SIMD_BUILTIN_VBMAXW = 1051, + ARC_SIMD_BUILTIN_VBMINW = 1052, + ARC_SIMD_BUILTIN_VBMULAW = 1053, + ARC_SIMD_BUILTIN_VBMULFW = 1054, + ARC_SIMD_BUILTIN_VBMULW = 1055, + ARC_SIMD_BUILTIN_VBRSUBW = 1056, + ARC_SIMD_BUILTIN_VBSUBW = 1057, + + /* Va, Vb, Ic instructions. */ + ARC_SIMD_BUILTIN_VASRW = 1060, + ARC_SIMD_BUILTIN_VSR8 = 1061, + ARC_SIMD_BUILTIN_VSR8AW = 1062, + + /* Va, Vb, u6 instructions. */ + ARC_SIMD_BUILTIN_VASRRWi = 1065, + ARC_SIMD_BUILTIN_VASRSRWi = 1066, + ARC_SIMD_BUILTIN_VASRWi = 1067, + ARC_SIMD_BUILTIN_VASRPWBi = 1068, + ARC_SIMD_BUILTIN_VASRRPWBi = 1069, + ARC_SIMD_BUILTIN_VSR8AWi = 1070, + ARC_SIMD_BUILTIN_VSR8i = 1071, + + /* Va, Vb, u8 (simm) instructions. */ + ARC_SIMD_BUILTIN_VMVAW = 1075, + ARC_SIMD_BUILTIN_VMVW = 1076, + ARC_SIMD_BUILTIN_VMVZW = 1077, + ARC_SIMD_BUILTIN_VD6TAPF = 1078, + + /* Va, rlimm, u8 (simm) instructions. */ + ARC_SIMD_BUILTIN_VMOVAW = 1080, + ARC_SIMD_BUILTIN_VMOVW = 1081, + ARC_SIMD_BUILTIN_VMOVZW = 1082, + + /* Va, Vb instructions. */ + ARC_SIMD_BUILTIN_VABSAW = 1085, + ARC_SIMD_BUILTIN_VABSW = 1086, + ARC_SIMD_BUILTIN_VADDSUW = 1087, + ARC_SIMD_BUILTIN_VSIGNW = 1088, + ARC_SIMD_BUILTIN_VEXCH1 = 1089, + ARC_SIMD_BUILTIN_VEXCH2 = 1090, + ARC_SIMD_BUILTIN_VEXCH4 = 1091, + ARC_SIMD_BUILTIN_VUPBAW = 1092, + ARC_SIMD_BUILTIN_VUPBW = 1093, + ARC_SIMD_BUILTIN_VUPSBAW = 1094, + ARC_SIMD_BUILTIN_VUPSBW = 1095, + + ARC_SIMD_BUILTIN_VDIRUN = 1100, + ARC_SIMD_BUILTIN_VDORUN = 1101, + ARC_SIMD_BUILTIN_VDIWR = 1102, + ARC_SIMD_BUILTIN_VDOWR = 1103, + + ARC_SIMD_BUILTIN_VREC = 1105, + ARC_SIMD_BUILTIN_VRUN = 1106, + ARC_SIMD_BUILTIN_VRECRUN = 1107, + ARC_SIMD_BUILTIN_VENDREC = 1108, + + ARC_SIMD_BUILTIN_VLD32WH = 1110, + ARC_SIMD_BUILTIN_VLD32WL = 1111, + ARC_SIMD_BUILTIN_VLD64 = 1112, + ARC_SIMD_BUILTIN_VLD32 = 1113, + ARC_SIMD_BUILTIN_VLD64W = 1114, + ARC_SIMD_BUILTIN_VLD128 = 1115, + ARC_SIMD_BUILTIN_VST128 = 1116, + ARC_SIMD_BUILTIN_VST64 = 1117, + + ARC_SIMD_BUILTIN_VST16_N = 1120, + ARC_SIMD_BUILTIN_VST32_N = 1121, + + ARC_SIMD_BUILTIN_VINTI = 1201, + + ARC_SIMD_BUILTIN_END +}; + +/* A nop is needed between a 4 byte insn that sets the condition codes and + a branch that uses them (the same isn't true for an 8 byte insn that sets + the condition codes). Set by arc_ccfsm_advance. Used by + arc_print_operand. */ + +static int get_arc_condition_code (rtx); + +static tree arc_handle_interrupt_attribute (tree *, tree, tree, int, bool *); + +/* Initialized arc_attribute_table to NULL since arc doesnot have any + machine specific supported attributes. */ +const struct attribute_spec arc_attribute_table[] = +{ + /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler, + affects_type_identity } */ + { "interrupt", 1, 1, true, false, false, arc_handle_interrupt_attribute, true }, + /* Function calls made to this symbol must be done indirectly, because + it may lie outside of the 21/25 bit addressing range of a normal function + call. */ + { "long_call", 0, 0, false, true, true, NULL, false }, + /* Whereas these functions are always known to reside within the 25 bit + addressing range of unconditionalized bl. */ + { "medium_call", 0, 0, false, true, true, NULL, false }, + /* And these functions are always known to reside within the 21 bit + addressing range of blcc. */ + { "short_call", 0, 0, false, true, true, NULL, false }, + { NULL, 0, 0, false, false, false, NULL, false } +}; +static int arc_comp_type_attributes (const_tree, const_tree); +static void arc_file_start (void); +static void arc_internal_label (FILE *, const char *, unsigned long); +static void arc_output_mi_thunk (FILE *, tree, HOST_WIDE_INT, HOST_WIDE_INT, + tree); +static int arc_address_cost (rtx, enum machine_mode, addr_space_t, bool); +static void arc_encode_section_info (tree decl, rtx rtl, int first); + +static void arc_init_builtins (void); +static rtx arc_expand_builtin (tree, rtx, rtx, enum machine_mode, int); + +static int branch_dest (rtx); + +static void arc_output_pic_addr_const (FILE *, rtx, int); +void emit_pic_move (rtx *, enum machine_mode); +bool arc_legitimate_pic_operand_p (rtx); +static bool arc_function_ok_for_sibcall (tree, tree); +static rtx arc_function_value (const_tree, const_tree, bool); +const char * output_shift (rtx *); +static void arc_reorg (void); +static bool arc_in_small_data_p (const_tree); + +static void arc_init_reg_tables (void); +static bool arc_return_in_memory (const_tree, const_tree); +static void arc_init_simd_builtins (void); +static bool arc_vector_mode_supported_p (enum machine_mode); + +static const char *arc_invalid_within_doloop (const_rtx); + +static void output_short_suffix (FILE *file); + +static bool arc_frame_pointer_required (void); + +/* Implements target hook vector_mode_supported_p. */ + +static bool +arc_vector_mode_supported_p (enum machine_mode mode) +{ + if (!TARGET_SIMD_SET) + return false; + + if ((mode == V4SImode) + || (mode == V8HImode)) + return true; + + return false; +} + + +/* TARGET_PRESERVE_RELOAD_P is still awaiting patch re-evaluation / review. */ +static bool arc_preserve_reload_p (rtx in) ATTRIBUTE_UNUSED; +static rtx arc_delegitimize_address (rtx); +static bool arc_can_follow_jump (const_rtx follower, const_rtx followee); + +static rtx frame_insn (rtx); +static void arc_function_arg_advance (cumulative_args_t, enum machine_mode, + const_tree, bool); +static rtx arc_legitimize_address_0 (rtx, rtx, enum machine_mode mode); + +static void arc_finalize_pic (void); + +/* initialize the GCC target structure. */ +#undef TARGET_COMP_TYPE_ATTRIBUTES +#define TARGET_COMP_TYPE_ATTRIBUTES arc_comp_type_attributes +#undef TARGET_ASM_FILE_START +#define TARGET_ASM_FILE_START arc_file_start +#undef TARGET_ATTRIBUTE_TABLE +#define TARGET_ATTRIBUTE_TABLE arc_attribute_table +#undef TARGET_ASM_INTERNAL_LABEL +#define TARGET_ASM_INTERNAL_LABEL arc_internal_label +#undef TARGET_RTX_COSTS +#define TARGET_RTX_COSTS arc_rtx_costs +#undef TARGET_ADDRESS_COST +#define TARGET_ADDRESS_COST arc_address_cost + +#undef TARGET_ENCODE_SECTION_INFO +#define TARGET_ENCODE_SECTION_INFO arc_encode_section_info + +#undef TARGET_CANNOT_FORCE_CONST_MEM +#define TARGET_CANNOT_FORCE_CONST_MEM arc_cannot_force_const_mem + +#undef TARGET_INIT_BUILTINS +#define TARGET_INIT_BUILTINS arc_init_builtins + +#undef TARGET_EXPAND_BUILTIN +#define TARGET_EXPAND_BUILTIN arc_expand_builtin + +#undef TARGET_ASM_OUTPUT_MI_THUNK +#define TARGET_ASM_OUTPUT_MI_THUNK arc_output_mi_thunk + +#undef TARGET_ASM_CAN_OUTPUT_MI_THUNK +#define TARGET_ASM_CAN_OUTPUT_MI_THUNK hook_bool_const_tree_hwi_hwi_const_tree_true + +#undef TARGET_FUNCTION_OK_FOR_SIBCALL +#define TARGET_FUNCTION_OK_FOR_SIBCALL arc_function_ok_for_sibcall + +#undef TARGET_MACHINE_DEPENDENT_REORG +#define TARGET_MACHINE_DEPENDENT_REORG arc_reorg + +#undef TARGET_IN_SMALL_DATA_P +#define TARGET_IN_SMALL_DATA_P arc_in_small_data_p + +#undef TARGET_PROMOTE_FUNCTION_MODE +#define TARGET_PROMOTE_FUNCTION_MODE \ + default_promote_function_mode_always_promote + +#undef TARGET_PROMOTE_PROTOTYPES +#define TARGET_PROMOTE_PROTOTYPES hook_bool_const_tree_true + +#undef TARGET_RETURN_IN_MEMORY +#define TARGET_RETURN_IN_MEMORY arc_return_in_memory +#undef TARGET_PASS_BY_REFERENCE +#define TARGET_PASS_BY_REFERENCE arc_pass_by_reference + +#undef TARGET_SETUP_INCOMING_VARARGS +#define TARGET_SETUP_INCOMING_VARARGS arc_setup_incoming_varargs + +#undef TARGET_ARG_PARTIAL_BYTES +#define TARGET_ARG_PARTIAL_BYTES arc_arg_partial_bytes + +#undef TARGET_MUST_PASS_IN_STACK +#define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size + +#undef TARGET_FUNCTION_VALUE +#define TARGET_FUNCTION_VALUE arc_function_value + +#undef TARGET_SCHED_ADJUST_PRIORITY +#define TARGET_SCHED_ADJUST_PRIORITY arc_sched_adjust_priority + +#undef TARGET_VECTOR_MODE_SUPPORTED_P +#define TARGET_VECTOR_MODE_SUPPORTED_P arc_vector_mode_supported_p + +#undef TARGET_INVALID_WITHIN_DOLOOP +#define TARGET_INVALID_WITHIN_DOLOOP arc_invalid_within_doloop + +#undef TARGET_PRESERVE_RELOAD_P +#define TARGET_PRESERVE_RELOAD_P arc_preserve_reload_p + +#undef TARGET_CAN_FOLLOW_JUMP +#define TARGET_CAN_FOLLOW_JUMP arc_can_follow_jump + +#undef TARGET_DELEGITIMIZE_ADDRESS +#define TARGET_DELEGITIMIZE_ADDRESS arc_delegitimize_address + +/* Usually, we will be able to scale anchor offsets. + When this fails, we want LEGITIMIZE_ADDRESS to kick in. */ +#undef TARGET_MIN_ANCHOR_OFFSET +#define TARGET_MIN_ANCHOR_OFFSET (-1024) +#undef TARGET_MAX_ANCHOR_OFFSET +#define TARGET_MAX_ANCHOR_OFFSET (1020) + +#undef TARGET_SECONDARY_RELOAD +#define TARGET_SECONDARY_RELOAD arc_secondary_reload + +#define TARGET_OPTION_OVERRIDE arc_override_options + +#define TARGET_CONDITIONAL_REGISTER_USAGE arc_conditional_register_usage + +#define TARGET_TRAMPOLINE_INIT arc_initialize_trampoline + +#define TARGET_TRAMPOLINE_ADJUST_ADDRESS arc_trampoline_adjust_address + +#define TARGET_CAN_ELIMINATE arc_can_eliminate + +#define TARGET_FRAME_POINTER_REQUIRED arc_frame_pointer_required + +#define TARGET_FUNCTION_ARG arc_function_arg + +#define TARGET_FUNCTION_ARG_ADVANCE arc_function_arg_advance + +#define TARGET_LEGITIMATE_CONSTANT_P arc_legitimate_constant_p + +#define TARGET_LEGITIMATE_ADDRESS_P arc_legitimate_address_p + +#define TARGET_MODE_DEPENDENT_ADDRESS_P arc_mode_dependent_address_p + +#define TARGET_LEGITIMIZE_ADDRESS arc_legitimize_address + +#define TARGET_ADJUST_INSN_LENGTH arc_adjust_insn_length + +#define TARGET_INSN_LENGTH_PARAMETERS arc_insn_length_parameters + +#define TARGET_LRA_P arc_lra_p +#define TARGET_REGISTER_PRIORITY arc_register_priority +/* Stores with scaled offsets have different displacement ranges. */ +#define TARGET_DIFFERENT_ADDR_DISPLACEMENT_P hook_bool_void_true +#define TARGET_SPILL_CLASS arc_spill_class + +#include "target-def.h" + +#undef TARGET_ASM_ALIGNED_HI_OP +#define TARGET_ASM_ALIGNED_HI_OP "\t.hword\t" +#undef TARGET_ASM_ALIGNED_SI_OP +#define TARGET_ASM_ALIGNED_SI_OP "\t.word\t" + +/* Try to keep the (mov:DF _, reg) as early as possible so + that the d<add/sub/mul>h-lr insns appear together and can + use the peephole2 pattern. */ + +static int +arc_sched_adjust_priority (rtx insn, int priority) +{ + rtx set = single_set (insn); + if (set + && GET_MODE (SET_SRC(set)) == DFmode + && GET_CODE (SET_SRC(set)) == REG) + { + /* Incrementing priority by 20 (empirically derived). */ + return priority + 20; + } + + return priority; +} + +static reg_class_t +arc_secondary_reload (bool in_p, rtx x, reg_class_t cl, enum machine_mode, + secondary_reload_info *) +{ + if (cl == DOUBLE_REGS) + return GENERAL_REGS; + + /* The loop counter register can be stored, but not loaded directly. */ + if ((cl == LPCOUNT_REG || cl == WRITABLE_CORE_REGS) + && in_p && MEM_P (x)) + return GENERAL_REGS; + return NO_REGS; +} + +static unsigned arc_ifcvt (void); + +namespace { + +const pass_data pass_data_arc_ifcvt = +{ + RTL_PASS, + "arc_ifcvt", /* name */ + OPTGROUP_NONE, /* optinfo_flags */ + false, /* has_gate */ + true, /* has_execute */ + TV_IFCVT2, /* tv_id */ + 0, /* properties_required */ + 0, /* properties_provided */ + 0, /* properties_destroyed */ + 0, /* todo_flags_start */ + TODO_df_finish /* todo_flags_finish */ +}; + +class pass_arc_ifcvt : public rtl_opt_pass +{ +public: + pass_arc_ifcvt(gcc::context *ctxt) + : rtl_opt_pass(pass_data_arc_ifcvt, ctxt) + {} + + /* opt_pass methods: */ + opt_pass * clone () { return new pass_arc_ifcvt (ctxt_); } + unsigned int execute () { return arc_ifcvt (); } +}; + +} // anon namespace + +rtl_opt_pass * +make_pass_arc_ifcvt (gcc::context *ctxt) +{ + return new pass_arc_ifcvt (ctxt); +} + +/* Called by OVERRIDE_OPTIONS to initialize various things. */ + +void +arc_init (void) +{ + enum attr_tune tune_dflt = TUNE_NONE; + + if (TARGET_A5) + { + arc_cpu_string = "A5"; + } + else if (TARGET_ARC600) + { + arc_cpu_string = "ARC600"; + tune_dflt = TUNE_ARC600; + } + else if (TARGET_ARC601) + { + arc_cpu_string = "ARC601"; + tune_dflt = TUNE_ARC600; + } + else if (TARGET_ARC700) + { + arc_cpu_string = "ARC700"; + tune_dflt = TUNE_ARC700_4_2_STD; + } + else + gcc_unreachable (); + if (arc_tune == TUNE_NONE) + arc_tune = tune_dflt; + /* Note: arc_multcost is only used in rtx_cost if speed is true. */ + if (arc_multcost < 0) + switch (arc_tune) + { + case TUNE_ARC700_4_2_STD: + /* latency 7; + max throughput (1 multiply + 4 other insns) / 5 cycles. */ + arc_multcost = COSTS_N_INSNS (4); + if (TARGET_NOMPY_SET) + arc_multcost = COSTS_N_INSNS (30); + break; + case TUNE_ARC700_4_2_XMAC: + /* latency 5; + max throughput (1 multiply + 2 other insns) / 3 cycles. */ + arc_multcost = COSTS_N_INSNS (3); + if (TARGET_NOMPY_SET) + arc_multcost = COSTS_N_INSNS (30); + break; + case TUNE_ARC600: + if (TARGET_MUL64_SET) + { + arc_multcost = COSTS_N_INSNS (4); + break; + } + /* Fall through. */ + default: + arc_multcost = COSTS_N_INSNS (30); + break; + } + + /* Support mul64 generation only for A5 and ARC600. */ + if (TARGET_MUL64_SET && TARGET_ARC700) + error ("-mmul64 not supported for ARC700"); + + /* MPY instructions valid only for ARC700. */ + if (TARGET_NOMPY_SET && !TARGET_ARC700) + error ("-mno-mpy supported only for ARC700"); + + /* mul/mac instructions only for ARC600. */ + if (TARGET_MULMAC_32BY16_SET && !(TARGET_ARC600 || TARGET_ARC601)) + error ("-mmul32x16 supported only for ARC600 or ARC601"); + + if (!TARGET_DPFP && TARGET_DPFP_DISABLE_LRSR) + error ("-mno-dpfp-lrsr suppforted only with -mdpfp"); + + /* FPX-1. No fast and compact together. */ + if ((TARGET_DPFP_FAST_SET && TARGET_DPFP_COMPACT_SET) + || (TARGET_SPFP_FAST_SET && TARGET_SPFP_COMPACT_SET)) + error ("FPX fast and compact options cannot be specified together"); + + /* FPX-2. No fast-spfp for arc600 or arc601. */ + if (TARGET_SPFP_FAST_SET && (TARGET_ARC600 || TARGET_ARC601)) + error ("-mspfp_fast not available on ARC600 or ARC601"); + + /* FPX-3. No FPX extensions on pre-ARC600 cores. */ + if ((TARGET_DPFP || TARGET_SPFP) + && !(TARGET_ARC600 || TARGET_ARC601 || TARGET_ARC700)) + error ("FPX extensions not available on pre-ARC600 cores"); + + /* Warn for unimplemented PIC in pre-ARC700 cores, and disable flag_pic. */ + if (flag_pic && !TARGET_ARC700) + { + warning (DK_WARNING, "PIC is not supported for %s. Generating non-PIC code only..", arc_cpu_string); + flag_pic = 0; + } + + arc_init_reg_tables (); + + /* Initialize array for PRINT_OPERAND_PUNCT_VALID_P. */ + memset (arc_punct_chars, 0, sizeof (arc_punct_chars)); + arc_punct_chars['#'] = 1; + arc_punct_chars['*'] = 1; + arc_punct_chars['?'] = 1; + arc_punct_chars['!'] = 1; + arc_punct_chars['^'] = 1; + arc_punct_chars['&'] = 1; + + if (optimize > 1 && !TARGET_NO_COND_EXEC) + { + /* There are two target-independent ifcvt passes, and arc_reorg may do + one or more arc_ifcvt calls. */ + opt_pass *pass_arc_ifcvt_4 = make_pass_arc_ifcvt (g); + struct register_pass_info arc_ifcvt4_info + = { pass_arc_ifcvt_4, "dbr", 1, PASS_POS_INSERT_AFTER }; + struct register_pass_info arc_ifcvt5_info + = { pass_arc_ifcvt_4->clone (), "shorten", 1, PASS_POS_INSERT_BEFORE }; + + register_pass (&arc_ifcvt4_info); + register_pass (&arc_ifcvt5_info); + } +} + +/* Check ARC options, generate derived target attributes. */ + +static void +arc_override_options (void) +{ + if (arc_cpu == PROCESSOR_NONE) + arc_cpu = PROCESSOR_ARC700; + + if (arc_size_opt_level == 3) + optimize_size = 1; + + if (flag_pic) + target_flags |= MASK_NO_SDATA_SET; + + if (flag_no_common == 255) + flag_no_common = !TARGET_NO_SDATA_SET; + + /* TARGET_COMPACT_CASESI needs the "q" register class. */ \ + if (TARGET_MIXED_CODE) + TARGET_Q_CLASS = 1; + if (!TARGET_Q_CLASS) + TARGET_COMPACT_CASESI = 0; + if (TARGET_COMPACT_CASESI) + TARGET_CASE_VECTOR_PC_RELATIVE = 1; + + /* These need to be done at start up. It's convenient to do them here. */ + arc_init (); +} + +/* The condition codes of the ARC, and the inverse function. */ +/* For short branches, the "c" / "nc" names are not defined in the ARC + Programmers manual, so we have to use "lo" / "hs"" instead. */ +static const char *arc_condition_codes[] = +{ + "al", 0, "eq", "ne", "p", "n", "lo", "hs", "v", "nv", + "gt", "le", "ge", "lt", "hi", "ls", "pnz", 0 +}; + +enum arc_cc_code_index +{ + ARC_CC_AL, ARC_CC_EQ = ARC_CC_AL+2, ARC_CC_NE, ARC_CC_P, ARC_CC_N, + ARC_CC_C, ARC_CC_NC, ARC_CC_V, ARC_CC_NV, + ARC_CC_GT, ARC_CC_LE, ARC_CC_GE, ARC_CC_LT, ARC_CC_HI, ARC_CC_LS, ARC_CC_PNZ, + ARC_CC_LO = ARC_CC_C, ARC_CC_HS = ARC_CC_NC +}; + +#define ARC_INVERSE_CONDITION_CODE(X) ((X) ^ 1) + +/* Returns the index of the ARC condition code string in + `arc_condition_codes'. COMPARISON should be an rtx like + `(eq (...) (...))'. */ + +static int +get_arc_condition_code (rtx comparison) +{ + switch (GET_MODE (XEXP (comparison, 0))) + { + case CCmode: + case SImode: /* For BRcc. */ + switch (GET_CODE (comparison)) + { + case EQ : return ARC_CC_EQ; + case NE : return ARC_CC_NE; + case GT : return ARC_CC_GT; + case LE : return ARC_CC_LE; + case GE : return ARC_CC_GE; + case LT : return ARC_CC_LT; + case GTU : return ARC_CC_HI; + case LEU : return ARC_CC_LS; + case LTU : return ARC_CC_LO; + case GEU : return ARC_CC_HS; + default : gcc_unreachable (); + } + case CC_ZNmode: + switch (GET_CODE (comparison)) + { + case EQ : return ARC_CC_EQ; + case NE : return ARC_CC_NE; + case GE: return ARC_CC_P; + case LT: return ARC_CC_N; + case GT : return ARC_CC_PNZ; + default : gcc_unreachable (); + } + case CC_Zmode: + switch (GET_CODE (comparison)) + { + case EQ : return ARC_CC_EQ; + case NE : return ARC_CC_NE; + default : gcc_unreachable (); + } + case CC_Cmode: + switch (GET_CODE (comparison)) + { + case LTU : return ARC_CC_C; + case GEU : return ARC_CC_NC; + default : gcc_unreachable (); + } + case CC_FP_GTmode: + if (TARGET_ARGONAUT_SET && TARGET_SPFP) + switch (GET_CODE (comparison)) + { + case GT : return ARC_CC_N; + case UNLE: return ARC_CC_P; + default : gcc_unreachable (); + } + else + switch (GET_CODE (comparison)) + { + case GT : return ARC_CC_HI; + case UNLE : return ARC_CC_LS; + default : gcc_unreachable (); + } + case CC_FP_GEmode: + /* Same for FPX and non-FPX. */ + switch (GET_CODE (comparison)) + { + case GE : return ARC_CC_HS; + case UNLT : return ARC_CC_LO; + default : gcc_unreachable (); + } + case CC_FP_UNEQmode: + switch (GET_CODE (comparison)) + { + case UNEQ : return ARC_CC_EQ; + case LTGT : return ARC_CC_NE; + default : gcc_unreachable (); + } + case CC_FP_ORDmode: + switch (GET_CODE (comparison)) + { + case UNORDERED : return ARC_CC_C; + case ORDERED : return ARC_CC_NC; + default : gcc_unreachable (); + } + case CC_FPXmode: + switch (GET_CODE (comparison)) + { + case EQ : return ARC_CC_EQ; + case NE : return ARC_CC_NE; + case UNORDERED : return ARC_CC_C; + case ORDERED : return ARC_CC_NC; + case LTGT : return ARC_CC_HI; + case UNEQ : return ARC_CC_LS; + default : gcc_unreachable (); + } + default : gcc_unreachable (); + } + /*NOTREACHED*/ + return (42); +} + +/* Return true if COMPARISON has a short form that can accomodate OFFSET. */ + +bool +arc_short_comparison_p (rtx comparison, int offset) +{ + gcc_assert (ARC_CC_NC == ARC_CC_HS); + gcc_assert (ARC_CC_C == ARC_CC_LO); + switch (get_arc_condition_code (comparison)) + { + case ARC_CC_EQ: case ARC_CC_NE: + return offset >= -512 && offset <= 506; + case ARC_CC_GT: case ARC_CC_LE: case ARC_CC_GE: case ARC_CC_LT: + case ARC_CC_HI: case ARC_CC_LS: case ARC_CC_LO: case ARC_CC_HS: + return offset >= -64 && offset <= 58; + default: + return false; + } +} + +/* Given a comparison code (EQ, NE, etc.) and the first operand of a COMPARE, + return the mode to be used for the comparison. */ + +enum machine_mode +arc_select_cc_mode (enum rtx_code op, rtx x, rtx y) +{ + enum machine_mode mode = GET_MODE (x); + rtx x1; + + /* For an operation that sets the condition codes as a side-effect, the + C and V flags is not set as for cmp, so we can only use comparisons where + this doesn't matter. (For LT and GE we can use "mi" and "pl" + instead.) */ + /* ??? We could use "pnz" for greater than zero, however, we could then + get into trouble because the comparison could not be reversed. */ + if (GET_MODE_CLASS (mode) == MODE_INT + && y == const0_rtx + && (op == EQ || op == NE + || ((op == LT || op == GE) && GET_MODE_SIZE (GET_MODE (x) <= 4)))) + return CC_ZNmode; + + /* add.f for if (a+b) */ + if (mode == SImode + && GET_CODE (y) == NEG + && (op == EQ || op == NE)) + return CC_ZNmode; + + /* Check if this is a test suitable for bxor.f . */ + if (mode == SImode && (op == EQ || op == NE) && CONST_INT_P (y) + && ((INTVAL (y) - 1) & INTVAL (y)) == 0 + && INTVAL (y)) + return CC_Zmode; + + /* Check if this is a test suitable for add / bmsk.f . */ + if (mode == SImode && (op == EQ || op == NE) && CONST_INT_P (y) + && GET_CODE (x) == AND && CONST_INT_P ((x1 = XEXP (x, 1))) + && ((INTVAL (x1) + 1) & INTVAL (x1)) == 0 + && (~INTVAL (x1) | INTVAL (y)) < 0 + && (~INTVAL (x1) | INTVAL (y)) > -0x800) + return CC_Zmode; + + if (GET_MODE (x) == SImode && (op == LTU || op == GEU) + && GET_CODE (x) == PLUS + && (rtx_equal_p (XEXP (x, 0), y) || rtx_equal_p (XEXP (x, 1), y))) + return CC_Cmode; + + if (TARGET_ARGONAUT_SET + && ((mode == SFmode && TARGET_SPFP) || (mode == DFmode && TARGET_DPFP))) + switch (op) + { + case EQ: case NE: case UNEQ: case LTGT: case ORDERED: case UNORDERED: + return CC_FPXmode; + case LT: case UNGE: case GT: case UNLE: + return CC_FP_GTmode; + case LE: case UNGT: case GE: case UNLT: + return CC_FP_GEmode; + default: gcc_unreachable (); + } + else if (GET_MODE_CLASS (mode) == MODE_FLOAT && TARGET_OPTFPE) + switch (op) + { + case EQ: case NE: return CC_Zmode; + case LT: case UNGE: + case GT: case UNLE: return CC_FP_GTmode; + case LE: case UNGT: + case GE: case UNLT: return CC_FP_GEmode; + case UNEQ: case LTGT: return CC_FP_UNEQmode; + case ORDERED: case UNORDERED: return CC_FP_ORDmode; + default: gcc_unreachable (); + } + + return CCmode; +} + +/* Vectors to keep interesting information about registers where it can easily + be got. We use to use the actual mode value as the bit number, but there + is (or may be) more than 32 modes now. Instead we use two tables: one + indexed by hard register number, and one indexed by mode. */ + +/* The purpose of arc_mode_class is to shrink the range of modes so that + they all fit (as bit numbers) in a 32-bit word (again). Each real mode is + mapped into one arc_mode_class mode. */ + +enum arc_mode_class { + C_MODE, + S_MODE, D_MODE, T_MODE, O_MODE, + SF_MODE, DF_MODE, TF_MODE, OF_MODE, + V_MODE +}; + +/* Modes for condition codes. */ +#define C_MODES (1 << (int) C_MODE) + +/* Modes for single-word and smaller quantities. */ +#define S_MODES ((1 << (int) S_MODE) | (1 << (int) SF_MODE)) + +/* Modes for double-word and smaller quantities. */ +#define D_MODES (S_MODES | (1 << (int) D_MODE) | (1 << DF_MODE)) + +/* Mode for 8-byte DF values only. */ +#define DF_MODES (1 << DF_MODE) + +/* Modes for quad-word and smaller quantities. */ +#define T_MODES (D_MODES | (1 << (int) T_MODE) | (1 << (int) TF_MODE)) + +/* Modes for 128-bit vectors. */ +#define V_MODES (1 << (int) V_MODE) + +/* Value is 1 if register/mode pair is acceptable on arc. */ + +unsigned int arc_hard_regno_mode_ok[] = { + T_MODES, T_MODES, T_MODES, T_MODES, T_MODES, T_MODES, T_MODES, T_MODES, + T_MODES, T_MODES, T_MODES, T_MODES, T_MODES, T_MODES, T_MODES, T_MODES, + T_MODES, T_MODES, T_MODES, T_MODES, T_MODES, T_MODES, T_MODES, D_MODES, + D_MODES, S_MODES, S_MODES, S_MODES, S_MODES, S_MODES, S_MODES, S_MODES, + + /* ??? Leave these as S_MODES for now. */ + S_MODES, S_MODES, S_MODES, S_MODES, S_MODES, S_MODES, S_MODES, S_MODES, + DF_MODES, 0, DF_MODES, 0, S_MODES, S_MODES, S_MODES, S_MODES, + S_MODES, S_MODES, S_MODES, S_MODES, S_MODES, S_MODES, S_MODES, S_MODES, + S_MODES, S_MODES, S_MODES, S_MODES, S_MODES, C_MODES, S_MODES, + + V_MODES, V_MODES, V_MODES, V_MODES, V_MODES, V_MODES, V_MODES, V_MODES, + V_MODES, V_MODES, V_MODES, V_MODES, V_MODES, V_MODES, V_MODES, V_MODES, + V_MODES, V_MODES, V_MODES, V_MODES, V_MODES, V_MODES, V_MODES, V_MODES, + V_MODES, V_MODES, V_MODES, V_MODES, V_MODES, V_MODES, V_MODES, V_MODES, + + V_MODES, V_MODES, V_MODES, V_MODES, V_MODES, V_MODES, V_MODES, V_MODES, + V_MODES, V_MODES, V_MODES, V_MODES, V_MODES, V_MODES, V_MODES, V_MODES, + V_MODES, V_MODES, V_MODES, V_MODES, V_MODES, V_MODES, V_MODES, V_MODES, + V_MODES, V_MODES, V_MODES, V_MODES, V_MODES, V_MODES, V_MODES, V_MODES, + + S_MODES, S_MODES, S_MODES, S_MODES, S_MODES, S_MODES, S_MODES, S_MODES, + S_MODES, S_MODES, S_MODES, S_MODES, S_MODES, S_MODES, S_MODES, S_MODES +}; + +unsigned int arc_mode_class [NUM_MACHINE_MODES]; + +enum reg_class arc_regno_reg_class[FIRST_PSEUDO_REGISTER]; + +enum reg_class +arc_preferred_reload_class (rtx, enum reg_class cl) +{ + if ((cl) == CHEAP_CORE_REGS || (cl) == WRITABLE_CORE_REGS) + return GENERAL_REGS; + return cl; +} + +/* Initialize the arc_mode_class array. */ + +static void +arc_init_reg_tables (void) +{ + int i; + + for (i = 0; i < NUM_MACHINE_MODES; i++) + { + switch (GET_MODE_CLASS (i)) + { + case MODE_INT: + case MODE_PARTIAL_INT: + case MODE_COMPLEX_INT: + if (GET_MODE_SIZE (i) <= 4) + arc_mode_class[i] = 1 << (int) S_MODE; + else if (GET_MODE_SIZE (i) == 8) + arc_mode_class[i] = 1 << (int) D_MODE; + else if (GET_MODE_SIZE (i) == 16) + arc_mode_class[i] = 1 << (int) T_MODE; + else if (GET_MODE_SIZE (i) == 32) + arc_mode_class[i] = 1 << (int) O_MODE; + else + arc_mode_class[i] = 0; + break; + case MODE_FLOAT: + case MODE_COMPLEX_FLOAT: + if (GET_MODE_SIZE (i) <= 4) + arc_mode_class[i] = 1 << (int) SF_MODE; + else if (GET_MODE_SIZE (i) == 8) + arc_mode_class[i] = 1 << (int) DF_MODE; + else if (GET_MODE_SIZE (i) == 16) + arc_mode_class[i] = 1 << (int) TF_MODE; + else if (GET_MODE_SIZE (i) == 32) + arc_mode_class[i] = 1 << (int) OF_MODE; + else + arc_mode_class[i] = 0; + break; + case MODE_VECTOR_INT: + arc_mode_class [i] = (1<< (int) V_MODE); + break; + case MODE_CC: + default: + /* mode_class hasn't been initialized yet for EXTRA_CC_MODES, so + we must explicitly check for them here. */ + if (i == (int) CCmode || i == (int) CC_ZNmode || i == (int) CC_Zmode + || i == (int) CC_Cmode + || i == CC_FP_GTmode || i == CC_FP_GEmode || i == CC_FP_ORDmode) + arc_mode_class[i] = 1 << (int) C_MODE; + else + arc_mode_class[i] = 0; + break; + } + } +} + +/* Core registers 56..59 are used for multiply extension options. + The dsp option uses r56 and r57, these are then named acc1 and acc2. + acc1 is the highpart, and acc2 the lowpart, so which register gets which + number depends on endianness. + The mul64 multiplier options use r57 for mlo, r58 for mmid and r59 for mhi. + Because mlo / mhi form a 64 bit value, we use different gcc internal + register numbers to make them form a register pair as the gcc internals + know it. mmid gets number 57, if still available, and mlo / mhi get + number 58 and 59, depending on endianness. We use DBX_REGISTER_NUMBER + to map this back. */ + char rname56[5] = "r56"; + char rname57[5] = "r57"; + char rname58[5] = "r58"; + char rname59[5] = "r59"; + +static void +arc_conditional_register_usage (void) +{ + int regno; + int i; + int fix_start = 60, fix_end = 55; + + if (TARGET_MUL64_SET) + { + fix_start = 57; + fix_end = 59; + + /* We don't provide a name for mmed. In rtl / assembly resource lists, + you are supposed to refer to it as mlo & mhi, e.g + (zero_extract:SI (reg:DI 58) (const_int 32) (16)) . + In an actual asm instruction, you are of course use mmed. + The point of avoiding having a separate register for mmed is that + this way, we don't have to carry clobbers of that reg around in every + isntruction that modifies mlo and/or mhi. */ + strcpy (rname57, ""); + strcpy (rname58, TARGET_BIG_ENDIAN ? "mhi" : "mlo"); + strcpy (rname59, TARGET_BIG_ENDIAN ? "mlo" : "mhi"); + } + if (TARGET_MULMAC_32BY16_SET) + { + fix_start = 56; + fix_end = fix_end > 57 ? fix_end : 57; + strcpy (rname56, TARGET_BIG_ENDIAN ? "acc1" : "acc2"); + strcpy (rname57, TARGET_BIG_ENDIAN ? "acc2" : "acc1"); + } + for (regno = fix_start; regno <= fix_end; regno++) + { + if (!fixed_regs[regno]) + warning (0, "multiply option implies r%d is fixed", regno); + fixed_regs [regno] = call_used_regs[regno] = 1; + } + if (TARGET_Q_CLASS) + { + reg_alloc_order[2] = 12; + reg_alloc_order[3] = 13; + reg_alloc_order[4] = 14; + reg_alloc_order[5] = 15; + reg_alloc_order[6] = 1; + reg_alloc_order[7] = 0; + reg_alloc_order[8] = 4; + reg_alloc_order[9] = 5; + reg_alloc_order[10] = 6; + reg_alloc_order[11] = 7; + reg_alloc_order[12] = 8; + reg_alloc_order[13] = 9; + reg_alloc_order[14] = 10; + reg_alloc_order[15] = 11; + } + if (TARGET_SIMD_SET) + { + int i; + for (i=64; i<88; i++) + reg_alloc_order [i] = i; + } + /* For Arctangent-A5 / ARC600, lp_count may not be read in an instruction + following immediately after another one setting it to a new value. + There was some discussion on how to enforce scheduling constraints for + processors with missing interlocks on the gcc mailing list: + http://gcc.gnu.org/ml/gcc/2008-05/msg00021.html . + However, we can't actually use this approach, because for ARC the + delay slot scheduling pass is active, which runs after + machine_dependent_reorg. */ + if (TARGET_ARC600) + CLEAR_HARD_REG_BIT (reg_class_contents[SIBCALL_REGS], LP_COUNT); + else if (!TARGET_ARC700) + fixed_regs[LP_COUNT] = 1; + for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++) + if (!call_used_regs[regno]) + CLEAR_HARD_REG_BIT (reg_class_contents[SIBCALL_REGS], regno); + for (regno = 32; regno < 60; regno++) + if (!fixed_regs[regno]) + SET_HARD_REG_BIT (reg_class_contents[WRITABLE_CORE_REGS], regno); + if (TARGET_ARC700) + { + for (regno = 32; regno <= 60; regno++) + CLEAR_HARD_REG_BIT (reg_class_contents[CHEAP_CORE_REGS], regno); + + /* If they have used -ffixed-lp_count, make sure it takes + effect. */ + if (fixed_regs[LP_COUNT]) + { + CLEAR_HARD_REG_BIT (reg_class_contents[LPCOUNT_REG], LP_COUNT); + CLEAR_HARD_REG_BIT (reg_class_contents[SIBCALL_REGS], LP_COUNT); + CLEAR_HARD_REG_BIT (reg_class_contents[WRITABLE_CORE_REGS], LP_COUNT); + + /* Instead of taking out SF_MODE like below, forbid it outright. */ + arc_hard_regno_mode_ok[60] = 0; + } + else + arc_hard_regno_mode_ok[60] = 1 << (int) S_MODE; + } + + for (i = 0; i < FIRST_PSEUDO_REGISTER; i++) + { + if (i < 29) + { + if (TARGET_Q_CLASS && ((i <= 3) || ((i >= 12) && (i <= 15)))) + arc_regno_reg_class[i] = ARCOMPACT16_REGS; + else + arc_regno_reg_class[i] = GENERAL_REGS; + } + else if (i < 60) + arc_regno_reg_class[i] + = (fixed_regs[i] + ? (TEST_HARD_REG_BIT (reg_class_contents[CHEAP_CORE_REGS], i) + ? CHEAP_CORE_REGS : ALL_CORE_REGS) + : ((TARGET_ARC700 + && TEST_HARD_REG_BIT (reg_class_contents[CHEAP_CORE_REGS], i)) + ? CHEAP_CORE_REGS : WRITABLE_CORE_REGS)); + else + arc_regno_reg_class[i] = NO_REGS; + } + + /* ARCOMPACT16_REGS is empty, if TARGET_Q_CLASS has not been activated. */ + if (!TARGET_Q_CLASS) + { + CLEAR_HARD_REG_SET(reg_class_contents [ARCOMPACT16_REGS]); + CLEAR_HARD_REG_SET(reg_class_contents [AC16_BASE_REGS]); + } + + gcc_assert (FIRST_PSEUDO_REGISTER >= 144); + + /* Handle Special Registers. */ + arc_regno_reg_class[29] = LINK_REGS; /* ilink1 register. */ + arc_regno_reg_class[30] = LINK_REGS; /* ilink2 register. */ + arc_regno_reg_class[31] = LINK_REGS; /* blink register. */ + arc_regno_reg_class[60] = LPCOUNT_REG; + arc_regno_reg_class[61] = NO_REGS; /* CC_REG: must be NO_REGS. */ + arc_regno_reg_class[62] = GENERAL_REGS; + + if (TARGET_DPFP) + { + for (i = 40; i < 44; ++i) + { + arc_regno_reg_class[i] = DOUBLE_REGS; + + /* Unless they want us to do 'mov d1, 0x00000000' make sure + no attempt is made to use such a register as a destination + operand in *movdf_insn. */ + if (!TARGET_ARGONAUT_SET) + { + /* Make sure no 'c', 'w', 'W', or 'Rac' constraint is + interpreted to mean they can use D1 or D2 in their insn. */ + CLEAR_HARD_REG_BIT(reg_class_contents[CHEAP_CORE_REGS ], i); + CLEAR_HARD_REG_BIT(reg_class_contents[ALL_CORE_REGS ], i); + CLEAR_HARD_REG_BIT(reg_class_contents[WRITABLE_CORE_REGS ], i); + CLEAR_HARD_REG_BIT(reg_class_contents[MPY_WRITABLE_CORE_REGS], i); + } + } + } + else + { + /* Disable all DOUBLE_REGISTER settings, + if not generating DPFP code. */ + arc_regno_reg_class[40] = ALL_REGS; + arc_regno_reg_class[41] = ALL_REGS; + arc_regno_reg_class[42] = ALL_REGS; + arc_regno_reg_class[43] = ALL_REGS; + + arc_hard_regno_mode_ok[40] = 0; + arc_hard_regno_mode_ok[42] = 0; + + CLEAR_HARD_REG_SET(reg_class_contents [DOUBLE_REGS]); + } + + if (TARGET_SIMD_SET) + { + gcc_assert (ARC_FIRST_SIMD_VR_REG == 64); + gcc_assert (ARC_LAST_SIMD_VR_REG == 127); + + for (i = ARC_FIRST_SIMD_VR_REG; i <= ARC_LAST_SIMD_VR_REG; i++) + arc_regno_reg_class [i] = SIMD_VR_REGS; + + gcc_assert (ARC_FIRST_SIMD_DMA_CONFIG_REG == 128); + gcc_assert (ARC_FIRST_SIMD_DMA_CONFIG_IN_REG == 128); + gcc_assert (ARC_FIRST_SIMD_DMA_CONFIG_OUT_REG == 136); + gcc_assert (ARC_LAST_SIMD_DMA_CONFIG_REG == 143); + + for (i = ARC_FIRST_SIMD_DMA_CONFIG_REG; + i <= ARC_LAST_SIMD_DMA_CONFIG_REG; i++) + arc_regno_reg_class [i] = SIMD_DMA_CONFIG_REGS; + } + + /* pc : r63 */ + arc_regno_reg_class[PROGRAM_COUNTER_REGNO] = GENERAL_REGS; +} + +/* Handle an "interrupt" attribute; arguments as in + struct attribute_spec.handler. */ + +static tree +arc_handle_interrupt_attribute (tree *, tree name, tree args, int, + bool *no_add_attrs) +{ + gcc_assert (args); + + tree value = TREE_VALUE (args); + + if (TREE_CODE (value) != STRING_CST) + { + warning (OPT_Wattributes, + "argument of %qE attribute is not a string constant", + name); + *no_add_attrs = true; + } + else if (strcmp (TREE_STRING_POINTER (value), "ilink1") + && strcmp (TREE_STRING_POINTER (value), "ilink2")) + { + warning (OPT_Wattributes, + "argument of %qE attribute is not \"ilink1\" or \"ilink2\"", + name); + *no_add_attrs = true; + } + return NULL_TREE; +} + +/* Return zero if TYPE1 and TYPE are incompatible, one if they are compatible, + and two if they are nearly compatible (which causes a warning to be + generated). */ + +static int +arc_comp_type_attributes (const_tree type1, + const_tree type2) +{ + int l1, l2, m1, m2, s1, s2; + + /* Check for mismatch of non-default calling convention. */ + if (TREE_CODE (type1) != FUNCTION_TYPE) + return 1; + + /* Check for mismatched call attributes. */ + l1 = lookup_attribute ("long_call", TYPE_ATTRIBUTES (type1)) != NULL; + l2 = lookup_attribute ("long_call", TYPE_ATTRIBUTES (type2)) != NULL; + m1 = lookup_attribute ("medium_call", TYPE_ATTRIBUTES (type1)) != NULL; + m2 = lookup_attribute ("medium_call", TYPE_ATTRIBUTES (type2)) != NULL; + s1 = lookup_attribute ("short_call", TYPE_ATTRIBUTES (type1)) != NULL; + s2 = lookup_attribute ("short_call", TYPE_ATTRIBUTES (type2)) != NULL; + + /* Only bother to check if an attribute is defined. */ + if (l1 | l2 | m1 | m2 | s1 | s2) + { + /* If one type has an attribute, the other must have the same attribute. */ + if ((l1 != l2) || (m1 != m2) || (s1 != s2)) + return 0; + + /* Disallow mixed attributes. */ + if (l1 + m1 + s1 > 1) + return 0; + } + + + return 1; +} + +/* Set the default attributes for TYPE. */ + +void +arc_set_default_type_attributes (tree type ATTRIBUTE_UNUSED) +{ + gcc_unreachable(); +} + +/* Misc. utilities. */ + +/* X and Y are two things to compare using CODE. Emit the compare insn and + return the rtx for the cc reg in the proper mode. */ + +rtx +gen_compare_reg (rtx comparison, enum machine_mode omode) +{ + enum rtx_code code = GET_CODE (comparison); + rtx x = XEXP (comparison, 0); + rtx y = XEXP (comparison, 1); + rtx tmp, cc_reg; + enum machine_mode mode, cmode; + + + cmode = GET_MODE (x); + if (cmode == VOIDmode) + cmode = GET_MODE (y); + gcc_assert (cmode == SImode || cmode == SFmode || cmode == DFmode); + if (cmode == SImode) + { + if (!register_operand (x, SImode)) + { + if (register_operand (y, SImode)) + { + tmp = x; + x = y; + y = tmp; + code = swap_condition (code); + } + else + x = copy_to_mode_reg (SImode, x); + } + if (GET_CODE (y) == SYMBOL_REF && flag_pic) + y = copy_to_mode_reg (SImode, y); + } + else + { + x = force_reg (cmode, x); + y = force_reg (cmode, y); + } + mode = SELECT_CC_MODE (code, x, y); + + cc_reg = gen_rtx_REG (mode, CC_REG); + + /* ??? FIXME (x-y)==0, as done by both cmpsfpx_raw and + cmpdfpx_raw, is not a correct comparison for floats: + http://www.cygnus-software.com/papers/comparingfloats/comparingfloats.htm + */ + if (TARGET_ARGONAUT_SET + && ((cmode == SFmode && TARGET_SPFP) || (cmode == DFmode && TARGET_DPFP))) + { + switch (code) + { + case NE: case EQ: case LT: case UNGE: case LE: case UNGT: + case UNEQ: case LTGT: case ORDERED: case UNORDERED: + break; + case GT: case UNLE: case GE: case UNLT: + code = swap_condition (code); + tmp = x; + x = y; + y = tmp; + break; + default: + gcc_unreachable (); + } + if (cmode == SFmode) + { + emit_insn (gen_cmpsfpx_raw (x, y)); + } + else /* DFmode */ + { + /* Accepts Dx regs directly by insns. */ + emit_insn (gen_cmpdfpx_raw (x, y)); + } + + if (mode != CC_FPXmode) + emit_insn (gen_rtx_SET (VOIDmode, cc_reg, + gen_rtx_COMPARE (mode, + gen_rtx_REG (CC_FPXmode, 61), + const0_rtx))); + } + else if (GET_MODE_CLASS (cmode) == MODE_FLOAT && TARGET_OPTFPE) + { + rtx op0 = gen_rtx_REG (cmode, 0); + rtx op1 = gen_rtx_REG (cmode, GET_MODE_SIZE (cmode) / UNITS_PER_WORD); + + switch (code) + { + case NE: case EQ: case GT: case UNLE: case GE: case UNLT: + case UNEQ: case LTGT: case ORDERED: case UNORDERED: + break; + case LT: case UNGE: case LE: case UNGT: + code = swap_condition (code); + tmp = x; + x = y; + y = tmp; + break; + default: + gcc_unreachable (); + } + if (currently_expanding_to_rtl) + { + emit_move_insn (op0, x); + emit_move_insn (op1, y); + } + else + { + gcc_assert (rtx_equal_p (op0, x)); + gcc_assert (rtx_equal_p (op1, y)); + } + emit_insn (gen_cmp_float (cc_reg, gen_rtx_COMPARE (mode, op0, op1))); + } + else + emit_insn (gen_rtx_SET (omode, cc_reg, + gen_rtx_COMPARE (mode, x, y))); + return gen_rtx_fmt_ee (code, omode, cc_reg, const0_rtx); +} + +/* Return true if VALUE, a const_double, will fit in a limm (4 byte number). + We assume the value can be either signed or unsigned. */ + +bool +arc_double_limm_p (rtx value) +{ + HOST_WIDE_INT low, high; + + gcc_assert (GET_CODE (value) == CONST_DOUBLE); + + if (TARGET_DPFP) + return true; + + low = CONST_DOUBLE_LOW (value); + high = CONST_DOUBLE_HIGH (value); + + if (low & 0x80000000) + { + return (((unsigned HOST_WIDE_INT) low <= 0xffffffff && high == 0) + || (((low & - (unsigned HOST_WIDE_INT) 0x80000000) + == - (unsigned HOST_WIDE_INT) 0x80000000) + && high == -1)); + } + else + { + return (unsigned HOST_WIDE_INT) low <= 0x7fffffff && high == 0; + } +} + +/* Do any needed setup for a variadic function. For the ARC, we must + create a register parameter block, and then copy any anonymous arguments + in registers to memory. + + CUM has not been updated for the last named argument which has type TYPE + and mode MODE, and we rely on this fact. */ + +static void +arc_setup_incoming_varargs (cumulative_args_t args_so_far, + enum machine_mode mode, tree type, + int *pretend_size, int no_rtl) +{ + int first_anon_arg; + CUMULATIVE_ARGS next_cum; + + /* We must treat `__builtin_va_alist' as an anonymous arg. */ + + next_cum = *get_cumulative_args (args_so_far); + arc_function_arg_advance (pack_cumulative_args (&next_cum), mode, type, 1); + first_anon_arg = next_cum; + + if (first_anon_arg < MAX_ARC_PARM_REGS) + { + /* First anonymous (unnamed) argument is in a reg. */ + + /* Note that first_reg_offset < MAX_ARC_PARM_REGS. */ + int first_reg_offset = first_anon_arg; + + if (!no_rtl) + { + rtx regblock + = gen_rtx_MEM (BLKmode, plus_constant (Pmode, arg_pointer_rtx, + FIRST_PARM_OFFSET (0))); + move_block_from_reg (first_reg_offset, regblock, + MAX_ARC_PARM_REGS - first_reg_offset); + } + + *pretend_size + = ((MAX_ARC_PARM_REGS - first_reg_offset ) * UNITS_PER_WORD); + } +} + +/* Cost functions. */ + +/* Provide the costs of an addressing mode that contains ADDR. + If ADDR is not a valid address, its cost is irrelevant. */ + +int +arc_address_cost (rtx addr, enum machine_mode, addr_space_t, bool speed) +{ + switch (GET_CODE (addr)) + { + case REG : + return speed || satisfies_constraint_Rcq (addr) ? 0 : 1; + case PRE_INC: case PRE_DEC: case POST_INC: case POST_DEC: + case PRE_MODIFY: case POST_MODIFY: + return !speed; + + case LABEL_REF : + case SYMBOL_REF : + case CONST : + /* Most likely needs a LIMM. */ + return COSTS_N_INSNS (1); + + case PLUS : + { + register rtx plus0 = XEXP (addr, 0); + register rtx plus1 = XEXP (addr, 1); + + if (GET_CODE (plus0) != REG + && (GET_CODE (plus0) != MULT + || !CONST_INT_P (XEXP (plus0, 1)) + || (INTVAL (XEXP (plus0, 1)) != 2 + && INTVAL (XEXP (plus0, 1)) != 4))) + break; + + switch (GET_CODE (plus1)) + { + case CONST_INT : + return (!RTX_OK_FOR_OFFSET_P (SImode, plus1) + ? COSTS_N_INSNS (1) + : speed + ? 0 + : (satisfies_constraint_Rcq (plus0) + && satisfies_constraint_O (plus1)) + ? 0 + : 1); + case REG: + return (speed < 1 ? 0 + : (satisfies_constraint_Rcq (plus0) + && satisfies_constraint_Rcq (plus1)) + ? 0 : 1); + case CONST : + case SYMBOL_REF : + case LABEL_REF : + return COSTS_N_INSNS (1); + default: + break; + } + break; + } + default: + break; + } + + return 4; +} + +/* Emit instruction X with the frame related bit set. */ + +static rtx +frame_insn (rtx x) +{ + x = emit_insn (x); + RTX_FRAME_RELATED_P (x) = 1; + return x; +} + +/* Emit a frame insn to move SRC to DST. */ + +static rtx +frame_move (rtx dst, rtx src) +{ + return frame_insn (gen_rtx_SET (VOIDmode, dst, src)); +} + +/* Like frame_move, but add a REG_INC note for REG if ADDR contains an + auto increment address, or is zero. */ + +static rtx +frame_move_inc (rtx dst, rtx src, rtx reg, rtx addr) +{ + rtx insn = frame_move (dst, src); + + if (!addr + || GET_CODE (addr) == PRE_DEC || GET_CODE (addr) == POST_INC + || GET_CODE (addr) == PRE_MODIFY || GET_CODE (addr) == POST_MODIFY) + add_reg_note (insn, REG_INC, reg); + return insn; +} + +/* Emit a frame insn which adjusts a frame address register REG by OFFSET. */ + +static rtx +frame_add (rtx reg, HOST_WIDE_INT offset) +{ + gcc_assert ((offset & 0x3) == 0); + if (!offset) + return NULL_RTX; + return frame_move (reg, plus_constant (Pmode, reg, offset)); +} + +/* Emit a frame insn which adjusts stack pointer by OFFSET. */ + +static rtx +frame_stack_add (HOST_WIDE_INT offset) +{ + return frame_add (stack_pointer_rtx, offset); +} + +/* Traditionally, we push saved registers first in the prologue, + then we allocate the rest of the frame - and reverse in the epilogue. + This has still its merits for ease of debugging, or saving code size + or even execution time if the stack frame is so large that some accesses + can't be encoded anymore with offsets in the instruction code when using + a different scheme. + Also, it would be a good starting point if we got instructions to help + with register save/restore. + + However, often stack frames are small, and the pushing / popping has + some costs: + - the stack modification prevents a lot of scheduling. + - frame allocation / deallocation needs extra instructions. + - unless we know that we compile ARC700 user code, we need to put + a memory barrier after frame allocation / before deallocation to + prevent interrupts clobbering our data in the frame. + In particular, we don't have any such guarantees for library functions, + which tend to, on the other hand, to have small frames. + + Thus, for small frames, we'd like to use a different scheme: + - The frame is allocated in full with the first prologue instruction, + and deallocated in full with the last epilogue instruction. + Thus, the instructions in-betwen can be freely scheduled. + - If the function has no outgoing arguments on the stack, we can allocate + one register save slot at the top of the stack. This register can then + be saved simultanously with frame allocation, and restored with + frame deallocation. + This register can be picked depending on scheduling considerations, + although same though should go into having some set of registers + to be potentially lingering after a call, and others to be available + immediately - i.e. in the absence of interprocedual optimization, we + can use an ABI-like convention for register allocation to reduce + stalls after function return. */ +/* Function prologue/epilogue handlers. */ + +/* ARCompact stack frames look like: + + Before call After call + high +-----------------------+ +-----------------------+ + mem | reg parm save area | | reg parm save area | + | only created for | | only created for | + | variable arg fns | | variable arg fns | + AP +-----------------------+ +-----------------------+ + | return addr register | | return addr register | + | (if required) | | (if required) | + +-----------------------+ +-----------------------+ + | | | | + | reg save area | | reg save area | + | | | | + +-----------------------+ +-----------------------+ + | frame pointer | | frame pointer | + | (if required) | | (if required) | + FP +-----------------------+ +-----------------------+ + | | | | + | local/temp variables | | local/temp variables | + | | | | + +-----------------------+ +-----------------------+ + | | | | + | arguments on stack | | arguments on stack | + | | | | + SP +-----------------------+ +-----------------------+ + | reg parm save area | + | only created for | + | variable arg fns | + AP +-----------------------+ + | return addr register | + | (if required) | + +-----------------------+ + | | + | reg save area | + | | + +-----------------------+ + | frame pointer | + | (if required) | + FP +-----------------------+ + | | + | local/temp variables | + | | + +-----------------------+ + | | + | arguments on stack | + low | | + mem SP +-----------------------+ + +Notes: +1) The "reg parm save area" does not exist for non variable argument fns. + The "reg parm save area" can be eliminated completely if we created our + own va-arc.h, but that has tradeoffs as well (so it's not done). */ + +/* Structure to be filled in by arc_compute_frame_size with register + save masks, and offsets for the current function. */ +struct GTY (()) arc_frame_info +{ + unsigned int total_size; /* # bytes that the entire frame takes up. */ + unsigned int extra_size; /* # bytes of extra stuff. */ + unsigned int pretend_size; /* # bytes we push and pretend caller did. */ + unsigned int args_size; /* # bytes that outgoing arguments take up. */ + unsigned int reg_size; /* # bytes needed to store regs. */ + unsigned int var_size; /* # bytes that variables take up. */ + unsigned int reg_offset; /* Offset from new sp to store regs. */ + unsigned int gmask; /* Mask of saved gp registers. */ + int initialized; /* Nonzero if frame size already calculated. */ + short millicode_start_reg; + short millicode_end_reg; + bool save_return_addr; +}; + +/* Defining data structures for per-function information. */ + +typedef struct GTY (()) machine_function +{ + enum arc_function_type fn_type; + struct arc_frame_info frame_info; + /* To keep track of unalignment caused by short insns. */ + int unalign; + int force_short_suffix; /* Used when disgorging return delay slot insns. */ + const char *size_reason; + struct arc_ccfsm ccfsm_current; + /* Map from uid to ccfsm state during branch shortening. */ + rtx ccfsm_current_insn; + char arc_reorg_started; + char prescan_initialized; +} machine_function; + +/* Type of function DECL. + + The result is cached. To reset the cache at the end of a function, + call with DECL = NULL_TREE. */ + +enum arc_function_type +arc_compute_function_type (struct function *fun) +{ + tree decl = fun->decl; + tree a; + enum arc_function_type fn_type = fun->machine->fn_type; + + if (fn_type != ARC_FUNCTION_UNKNOWN) + return fn_type; + + /* Assume we have a normal function (not an interrupt handler). */ + fn_type = ARC_FUNCTION_NORMAL; + + /* Now see if this is an interrupt handler. */ + for (a = DECL_ATTRIBUTES (decl); + a; + a = TREE_CHAIN (a)) + { + tree name = TREE_PURPOSE (a), args = TREE_VALUE (a); + + if (name == get_identifier ("interrupt") + && list_length (args) == 1 + && TREE_CODE (TREE_VALUE (args)) == STRING_CST) + { + tree value = TREE_VALUE (args); + + if (!strcmp (TREE_STRING_POINTER (value), "ilink1")) + fn_type = ARC_FUNCTION_ILINK1; + else if (!strcmp (TREE_STRING_POINTER (value), "ilink2")) + fn_type = ARC_FUNCTION_ILINK2; + else + gcc_unreachable (); + break; + } + } + + return fun->machine->fn_type = fn_type; +} + +#define FRAME_POINTER_MASK (1 << (FRAME_POINTER_REGNUM)) +#define RETURN_ADDR_MASK (1 << (RETURN_ADDR_REGNUM)) + +/* Tell prologue and epilogue if register REGNO should be saved / restored. + The return address and frame pointer are treated separately. + Don't consider them here. + Addition for pic: The gp register needs to be saved if the current + function changes it to access gotoff variables. + FIXME: This will not be needed if we used some arbitrary register + instead of r26. +*/ +#define MUST_SAVE_REGISTER(regno, interrupt_p) \ +(((regno) != RETURN_ADDR_REGNUM && (regno) != FRAME_POINTER_REGNUM \ + && (df_regs_ever_live_p (regno) && (!call_used_regs[regno] || interrupt_p))) \ + || (flag_pic && crtl->uses_pic_offset_table \ + && regno == PIC_OFFSET_TABLE_REGNUM) ) + +#define MUST_SAVE_RETURN_ADDR \ + (cfun->machine->frame_info.save_return_addr) + +/* Return non-zero if there are registers to be saved or loaded using + millicode thunks. We can only use consecutive sequences starting + with r13, and not going beyond r25. + GMASK is a bitmask of registers to save. This function sets + FRAME->millicod_start_reg .. FRAME->millicode_end_reg to the range + of registers to be saved / restored with a millicode call. */ + +static int +arc_compute_millicode_save_restore_regs (unsigned int gmask, + struct arc_frame_info *frame) +{ + int regno; + + int start_reg = 13, end_reg = 25; + + for (regno = start_reg; regno <= end_reg && (gmask & (1L << regno));) + regno++; + end_reg = regno - 1; + /* There is no point in using millicode thunks if we don't save/restore + at least three registers. For non-leaf functions we also have the + blink restore. */ + if (regno - start_reg >= 3 - (crtl->is_leaf == 0)) + { + frame->millicode_start_reg = 13; + frame->millicode_end_reg = regno - 1; + return 1; + } + return 0; +} + +/* Return the bytes needed to compute the frame pointer from the current + stack pointer. + + SIZE is the size needed for local variables. */ + +unsigned int +arc_compute_frame_size (int size) /* size = # of var. bytes allocated. */ +{ + int regno; + unsigned int total_size, var_size, args_size, pretend_size, extra_size; + unsigned int reg_size, reg_offset; + unsigned int gmask; + enum arc_function_type fn_type; + int interrupt_p; + struct arc_frame_info *frame_info = &cfun->machine->frame_info; + + size = ARC_STACK_ALIGN (size); + + /* 1) Size of locals and temporaries */ + var_size = size; + + /* 2) Size of outgoing arguments */ + args_size = crtl->outgoing_args_size; + + /* 3) Calculate space needed for saved registers. + ??? We ignore the extension registers for now. */ + + /* See if this is an interrupt handler. Call used registers must be saved + for them too. */ + + reg_size = 0; + gmask = 0; + fn_type = arc_compute_function_type (cfun); + interrupt_p = ARC_INTERRUPT_P (fn_type); + + for (regno = 0; regno <= 31; regno++) + { + if (MUST_SAVE_REGISTER (regno, interrupt_p)) + { + reg_size += UNITS_PER_WORD; + gmask |= 1 << regno; + } + } + + /* 4) Space for back trace data structure. + <return addr reg size> (if required) + <fp size> (if required). */ + frame_info->save_return_addr + = (!crtl->is_leaf || df_regs_ever_live_p (RETURN_ADDR_REGNUM)); + /* Saving blink reg in case of leaf function for millicode thunk calls. */ + if (optimize_size && !TARGET_NO_MILLICODE_THUNK_SET) + { + if (arc_compute_millicode_save_restore_regs (gmask, frame_info)) + frame_info->save_return_addr = true; + } + + extra_size = 0; + if (MUST_SAVE_RETURN_ADDR) + extra_size = 4; + if (frame_pointer_needed) + extra_size += 4; + + /* 5) Space for variable arguments passed in registers */ + pretend_size = crtl->args.pretend_args_size; + + /* Ensure everything before the locals is aligned appropriately. */ + { + unsigned int extra_plus_reg_size; + unsigned int extra_plus_reg_size_aligned; + + extra_plus_reg_size = extra_size + reg_size; + extra_plus_reg_size_aligned = ARC_STACK_ALIGN(extra_plus_reg_size); + reg_size = extra_plus_reg_size_aligned - extra_size; + } + + /* Compute total frame size. */ + total_size = var_size + args_size + extra_size + pretend_size + reg_size; + + total_size = ARC_STACK_ALIGN (total_size); + + /* Compute offset of register save area from stack pointer: + A5 Frame: pretend_size <blink> reg_size <fp> var_size args_size <--sp + */ + reg_offset = (total_size - (pretend_size + reg_size + extra_size) + + (frame_pointer_needed ? 4 : 0)); + + /* Save computed information. */ + frame_info->total_size = total_size; + frame_info->extra_size = extra_size; + frame_info->pretend_size = pretend_size; + frame_info->var_size = var_size; + frame_info->args_size = args_size; + frame_info->reg_size = reg_size; + frame_info->reg_offset = reg_offset; + frame_info->gmask = gmask; + frame_info->initialized = reload_completed; + + /* Ok, we're done. */ + return total_size; +} + +/* Common code to save/restore registers. */ +/* BASE_REG is the base register to use for addressing and to adjust. + GMASK is a bitmask of general purpose registers to save/restore. + epilogue_p 0: prologue 1:epilogue 2:epilogue, sibling thunk + If *FIRST_OFFSET is non-zero, add it first to BASE_REG - preferably + using a pre-modify for the first memory access. *FIRST_OFFSET is then + zeroed. */ + +static void +arc_save_restore (rtx base_reg, + unsigned int gmask, int epilogue_p, int *first_offset) +{ + unsigned int offset = 0; + int regno; + struct arc_frame_info *frame = &cfun->machine->frame_info; + rtx sibthunk_insn = NULL_RTX; + + if (gmask) + { + /* Millicode thunks implementation: + Generates calls to millicodes for registers starting from r13 to r25 + Present Limitations: + - Only one range supported. The remaining regs will have the ordinary + st and ld instructions for store and loads. Hence a gmask asking + to store r13-14, r16-r25 will only generate calls to store and + load r13 to r14 while store and load insns will be generated for + r16 to r25 in the prologue and epilogue respectively. + + - Presently library only supports register ranges starting from r13. + */ + if (epilogue_p == 2 || frame->millicode_end_reg > 14) + { + int start_call = frame->millicode_start_reg; + int end_call = frame->millicode_end_reg; + int n_regs = end_call - start_call + 1; + int i = 0, r, off = 0; + rtx insn; + rtx ret_addr = gen_rtx_REG (Pmode, RETURN_ADDR_REGNUM); + + if (*first_offset) + { + /* "reg_size" won't be more than 127 . */ + gcc_assert (epilogue_p || abs (*first_offset <= 127)); + frame_add (base_reg, *first_offset); + *first_offset = 0; + } + insn = gen_rtx_PARALLEL + (VOIDmode, rtvec_alloc ((epilogue_p == 2) + n_regs + 1)); + if (epilogue_p == 2) + i += 2; + else + XVECEXP (insn, 0, n_regs) = gen_rtx_CLOBBER (VOIDmode, ret_addr); + for (r = start_call; r <= end_call; r++, off += UNITS_PER_WORD, i++) + { + rtx reg = gen_rtx_REG (SImode, r); + rtx mem + = gen_frame_mem (SImode, plus_constant (Pmode, base_reg, off)); + + if (epilogue_p) + XVECEXP (insn, 0, i) = gen_rtx_SET (VOIDmode, reg, mem); + else + XVECEXP (insn, 0, i) = gen_rtx_SET (VOIDmode, mem, reg); + gmask = gmask & ~(1L << r); + } + if (epilogue_p == 2) + sibthunk_insn = insn; + else + frame_insn (insn); + offset += off; + } + + for (regno = 0; regno <= 31; regno++) + { + if ((gmask & (1L << regno)) != 0) + { + rtx reg = gen_rtx_REG (SImode, regno); + rtx addr, mem; + + if (*first_offset) + { + gcc_assert (!offset); + addr = plus_constant (Pmode, base_reg, *first_offset); + addr = gen_rtx_PRE_MODIFY (Pmode, base_reg, addr); + *first_offset = 0; + } + else + { + gcc_assert (SMALL_INT (offset)); + addr = plus_constant (Pmode, base_reg, offset); + } + mem = gen_frame_mem (SImode, addr); + if (epilogue_p) + frame_move_inc (reg, mem, base_reg, addr); + else + frame_move_inc (mem, reg, base_reg, addr); + offset += UNITS_PER_WORD; + } /* if */ + } /* for */ + }/* if */ + if (sibthunk_insn) + { + rtx r12 = gen_rtx_REG (Pmode, 12); + + frame_insn (gen_rtx_SET (VOIDmode, r12, GEN_INT (offset))); + XVECEXP (sibthunk_insn, 0, 0) = ret_rtx; + XVECEXP (sibthunk_insn, 0, 1) + = gen_rtx_SET (VOIDmode, stack_pointer_rtx, + gen_rtx_PLUS (Pmode, stack_pointer_rtx, r12)); + sibthunk_insn = emit_jump_insn (sibthunk_insn); + RTX_FRAME_RELATED_P (sibthunk_insn) = 1; + } +} /* arc_save_restore */ + + +int arc_return_address_regs[4] + = {0, RETURN_ADDR_REGNUM, ILINK1_REGNUM, ILINK2_REGNUM}; + +/* Set up the stack and frame pointer (if desired) for the function. */ + +void +arc_expand_prologue (void) +{ + int size = get_frame_size (); + unsigned int gmask = cfun->machine->frame_info.gmask; + /* unsigned int frame_pointer_offset;*/ + unsigned int frame_size_to_allocate; + /* (FIXME: The first store will use a PRE_MODIFY; this will usually be r13. + Change the stack layout so that we rather store a high register with the + PRE_MODIFY, thus enabling more short insn generation.) */ + int first_offset = 0; + + size = ARC_STACK_ALIGN (size); + + /* Compute/get total frame size. */ + size = (!cfun->machine->frame_info.initialized + ? arc_compute_frame_size (size) + : cfun->machine->frame_info.total_size); + + if (flag_stack_usage_info) + current_function_static_stack_size = size; + + /* Keep track of frame size to be allocated. */ + frame_size_to_allocate = size; + + /* These cases shouldn't happen. Catch them now. */ + gcc_assert (!(size == 0 && gmask)); + + /* Allocate space for register arguments if this is a variadic function. */ + if (cfun->machine->frame_info.pretend_size != 0) + { + /* Ensure pretend_size is maximum of 8 * word_size. */ + gcc_assert (cfun->machine->frame_info.pretend_size <= 32); + + frame_stack_add (-(HOST_WIDE_INT)cfun->machine->frame_info.pretend_size); + frame_size_to_allocate -= cfun->machine->frame_info.pretend_size; + } + + /* The home-grown ABI says link register is saved first. */ + if (MUST_SAVE_RETURN_ADDR) + { + rtx ra = gen_rtx_REG (SImode, RETURN_ADDR_REGNUM); + rtx mem = gen_frame_mem (Pmode, gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx)); + + frame_move_inc (mem, ra, stack_pointer_rtx, 0); + frame_size_to_allocate -= UNITS_PER_WORD; + + } /* MUST_SAVE_RETURN_ADDR */ + + /* Save any needed call-saved regs (and call-used if this is an + interrupt handler) for ARCompact ISA. */ + if (cfun->machine->frame_info.reg_size) + { + first_offset = -cfun->machine->frame_info.reg_size; + /* N.B. FRAME_POINTER_MASK and RETURN_ADDR_MASK are cleared in gmask. */ + arc_save_restore (stack_pointer_rtx, gmask, 0, &first_offset); + frame_size_to_allocate -= cfun->machine->frame_info.reg_size; + } + + + /* Save frame pointer if needed. */ + if (frame_pointer_needed) + { + rtx addr = gen_rtx_PLUS (Pmode, stack_pointer_rtx, + GEN_INT (-UNITS_PER_WORD + first_offset)); + rtx mem = gen_frame_mem (Pmode, gen_rtx_PRE_MODIFY (Pmode, + stack_pointer_rtx, + addr)); + frame_move_inc (mem, frame_pointer_rtx, stack_pointer_rtx, 0); + frame_size_to_allocate -= UNITS_PER_WORD; + first_offset = 0; + frame_move (frame_pointer_rtx, stack_pointer_rtx); + } + + /* ??? We don't handle the case where the saved regs are more than 252 + bytes away from sp. This can be handled by decrementing sp once, saving + the regs, and then decrementing it again. The epilogue doesn't have this + problem as the `ld' insn takes reg+limm values (though it would be more + efficient to avoid reg+limm). */ + + frame_size_to_allocate -= first_offset; + /* Allocate the stack frame. */ + if (frame_size_to_allocate > 0) + frame_stack_add ((HOST_WIDE_INT) 0 - frame_size_to_allocate); + + /* Setup the gp register, if needed. */ + if (crtl->uses_pic_offset_table) + arc_finalize_pic (); +} + +/* Do any necessary cleanup after a function to restore stack, frame, + and regs. */ + +void +arc_expand_epilogue (int sibcall_p) +{ + int size = get_frame_size (); + enum arc_function_type fn_type = arc_compute_function_type (cfun); + + size = ARC_STACK_ALIGN (size); + size = (!cfun->machine->frame_info.initialized + ? arc_compute_frame_size (size) + : cfun->machine->frame_info.total_size); + + if (1) + { + unsigned int pretend_size = cfun->machine->frame_info.pretend_size; + unsigned int frame_size; + unsigned int size_to_deallocate; + int restored; +#if 0 + bool fp_restored_p; +#endif + int can_trust_sp_p = !cfun->calls_alloca; + int first_offset = 0; + int millicode_p = cfun->machine->frame_info.millicode_end_reg > 0; + + size_to_deallocate = size; + + frame_size = size - (pretend_size + + cfun->machine->frame_info.reg_size + + cfun->machine->frame_info.extra_size); + + /* ??? There are lots of optimizations that can be done here. + EG: Use fp to restore regs if it's closer. + Maybe in time we'll do them all. For now, always restore regs from + sp, but don't restore sp if we don't have to. */ + + if (!can_trust_sp_p) + gcc_assert (frame_pointer_needed); + + /* Restore stack pointer to the beginning of saved register area for + ARCompact ISA. */ + if (frame_size) + { + if (frame_pointer_needed) + frame_move (stack_pointer_rtx, frame_pointer_rtx); + else + first_offset = frame_size; + size_to_deallocate -= frame_size; + } + else if (!can_trust_sp_p) + frame_stack_add (-frame_size); + + + /* Restore any saved registers. */ + if (frame_pointer_needed) + { + rtx addr = gen_rtx_POST_INC (Pmode, stack_pointer_rtx); + + frame_move_inc (frame_pointer_rtx, gen_frame_mem (Pmode, addr), + stack_pointer_rtx, 0); + size_to_deallocate -= UNITS_PER_WORD; + } + + /* Load blink after the calls to thunk calls in case of optimize size. */ + if (millicode_p) + { + int sibthunk_p = (!sibcall_p + && fn_type == ARC_FUNCTION_NORMAL + && !cfun->machine->frame_info.pretend_size); + + gcc_assert (!(cfun->machine->frame_info.gmask + & (FRAME_POINTER_MASK | RETURN_ADDR_MASK))); + arc_save_restore (stack_pointer_rtx, + cfun->machine->frame_info.gmask, + 1 + sibthunk_p, &first_offset); + if (sibthunk_p) + goto epilogue_done; + } + /* If we are to restore registers, and first_offset would require + a limm to be encoded in a PRE_MODIFY, yet we can add it with a + fast add to the stack pointer, do this now. */ + if ((!SMALL_INT (first_offset) + && cfun->machine->frame_info.gmask + && ((TARGET_ARC700 && !optimize_size) + ? first_offset <= 0x800 + : satisfies_constraint_C2a (GEN_INT (first_offset)))) + /* Also do this if we have both gprs and return + address to restore, and they both would need a LIMM. */ + || (MUST_SAVE_RETURN_ADDR + && !SMALL_INT ((cfun->machine->frame_info.reg_size + first_offset) >> 2) + && cfun->machine->frame_info.gmask)) + { + frame_stack_add (first_offset); + first_offset = 0; + } + if (MUST_SAVE_RETURN_ADDR) + { + rtx ra = gen_rtx_REG (Pmode, RETURN_ADDR_REGNUM); + int ra_offs = cfun->machine->frame_info.reg_size + first_offset; + rtx addr = plus_constant (Pmode, stack_pointer_rtx, ra_offs); + + /* If the load of blink would need a LIMM, but we can add + the offset quickly to sp, do the latter. */ + if (!SMALL_INT (ra_offs >> 2) + && !cfun->machine->frame_info.gmask + && ((TARGET_ARC700 && !optimize_size) + ? ra_offs <= 0x800 + : satisfies_constraint_C2a (GEN_INT (ra_offs)))) + { + size_to_deallocate -= ra_offs - first_offset; + first_offset = 0; + frame_stack_add (ra_offs); + ra_offs = 0; + addr = stack_pointer_rtx; + } + /* See if we can combine the load of the return address with the + final stack adjustment. + We need a separate load if there are still registers to + restore. We also want a separate load if the combined insn + would need a limm, but a separate load doesn't. */ + if (ra_offs + && !cfun->machine->frame_info.gmask + && (SMALL_INT (ra_offs) || !SMALL_INT (ra_offs >> 2))) + { + addr = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx, addr); + first_offset = 0; + size_to_deallocate -= cfun->machine->frame_info.reg_size; + } + else if (!ra_offs && size_to_deallocate == UNITS_PER_WORD) + { + addr = gen_rtx_POST_INC (Pmode, addr); + size_to_deallocate = 0; + } + frame_move_inc (ra, gen_frame_mem (Pmode, addr), stack_pointer_rtx, addr); + } + + if (!millicode_p) + { + if (cfun->machine->frame_info.reg_size) + arc_save_restore (stack_pointer_rtx, + /* The zeroing of these two bits is unnecessary, but leave this in for clarity. */ + cfun->machine->frame_info.gmask + & ~(FRAME_POINTER_MASK | RETURN_ADDR_MASK), 1, &first_offset); + } + + + /* The rest of this function does the following: + ARCompact : handle epilogue_delay, restore sp (phase-2), return + */ + + /* Keep track of how much of the stack pointer we've restored. + It makes the following a lot more readable. */ + size_to_deallocate += first_offset; + restored = size - size_to_deallocate; +#if 0 + fp_restored_p = 1; +#endif + + if (size > restored) + frame_stack_add (size - restored); + /* Emit the return instruction. */ + if (sibcall_p == FALSE) + emit_jump_insn (gen_simple_return ()); + } + epilogue_done: + if (!TARGET_EPILOGUE_CFI) + { + rtx insn; + + for (insn = get_insns (); insn; insn = NEXT_INSN (insn)) + RTX_FRAME_RELATED_P (insn) = 0; + } +} + +/* Return the offset relative to the stack pointer where the return address + is stored, or -1 if it is not stored. */ + +int +arc_return_slot_offset () +{ + struct arc_frame_info *afi = &cfun->machine->frame_info; + + return (afi->save_return_addr + ? afi->total_size - afi->pretend_size - afi->extra_size : -1); +} + +/* PIC */ + +/* Emit special PIC prologues and epilogues. */ +/* If the function has any GOTOFF relocations, then the GOTBASE + register has to be setup in the prologue + The instruction needed at the function start for setting up the + GOTBASE register is + add rdest, pc, + ---------------------------------------------------------- + The rtl to be emitted for this should be: + set (reg basereg) + (plus (reg pc) + (const (unspec (symref _DYNAMIC) 3))) + ---------------------------------------------------------- */ + +static void +arc_finalize_pic (void) +{ + rtx pat; + rtx baseptr_rtx = gen_rtx_REG (Pmode, PIC_OFFSET_TABLE_REGNUM); + + if (crtl->uses_pic_offset_table == 0) + return; + + gcc_assert (flag_pic != 0); + + pat = gen_rtx_SYMBOL_REF (Pmode, "_DYNAMIC"); + pat = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, pat), ARC_UNSPEC_GOT); + pat = gen_rtx_CONST (Pmode, pat); + + pat = gen_rtx_SET (VOIDmode, baseptr_rtx, pat); + + emit_insn (pat); +} + +/* !TARGET_BARREL_SHIFTER support. */ +/* Emit a shift insn to set OP0 to OP1 shifted by OP2; CODE specifies what + kind of shift. */ + +void +emit_shift (enum rtx_code code, rtx op0, rtx op1, rtx op2) +{ + rtx shift = gen_rtx_fmt_ee (code, SImode, op1, op2); + rtx pat + = ((shift4_operator (shift, SImode) ? gen_shift_si3 : gen_shift_si3_loop) + (op0, op1, op2, shift)); + emit_insn (pat); +} + +/* Output the assembler code for doing a shift. + We go to a bit of trouble to generate efficient code as the ARC601 only has + single bit shifts. This is taken from the h8300 port. We only have one + mode of shifting and can't access individual bytes like the h8300 can, so + this is greatly simplified (at the expense of not generating hyper- + efficient code). + + This function is not used if the variable shift insns are present. */ + +/* FIXME: This probably can be done using a define_split in arc.md. + Alternately, generate rtx rather than output instructions. */ + +const char * +output_shift (rtx *operands) +{ + /* static int loopend_lab;*/ + rtx shift = operands[3]; + enum machine_mode mode = GET_MODE (shift); + enum rtx_code code = GET_CODE (shift); + const char *shift_one; + + gcc_assert (mode == SImode); + + switch (code) + { + case ASHIFT: shift_one = "add %0,%1,%1"; break; + case ASHIFTRT: shift_one = "asr %0,%1"; break; + case LSHIFTRT: shift_one = "lsr %0,%1"; break; + default: gcc_unreachable (); + } + + if (GET_CODE (operands[2]) != CONST_INT) + { + output_asm_insn ("and.f lp_count,%2, 0x1f", operands); + goto shiftloop; + } + else + { + int n; + + n = INTVAL (operands[2]); + + /* Only consider the lower 5 bits of the shift count. */ + n = n & 0x1f; + + /* First see if we can do them inline. */ + /* ??? We could get better scheduling & shorter code (using short insns) + by using splitters. Alas, that'd be even more verbose. */ + if (code == ASHIFT && n <= 9 && n > 2 + && dest_reg_operand (operands[4], SImode)) + { + output_asm_insn ("mov %4,0\n\tadd3 %0,%4,%1", operands); + for (n -=3 ; n >= 3; n -= 3) + output_asm_insn ("add3 %0,%4,%0", operands); + if (n == 2) + output_asm_insn ("add2 %0,%4,%0", operands); + else if (n) + output_asm_insn ("add %0,%0,%0", operands); + } + else if (n <= 4) + { + while (--n >= 0) + { + output_asm_insn (shift_one, operands); + operands[1] = operands[0]; + } + } + /* See if we can use a rotate/and. */ + else if (n == BITS_PER_WORD - 1) + { + switch (code) + { + case ASHIFT : + output_asm_insn ("and %0,%1,1\n\tror %0,%0", operands); + break; + case ASHIFTRT : + /* The ARC doesn't have a rol insn. Use something else. */ + output_asm_insn ("add.f 0,%1,%1\n\tsbc %0,%0,%0", operands); + break; + case LSHIFTRT : + /* The ARC doesn't have a rol insn. Use something else. */ + output_asm_insn ("add.f 0,%1,%1\n\trlc %0,0", operands); + break; + default: + break; + } + } + else if (n == BITS_PER_WORD - 2 && dest_reg_operand (operands[4], SImode)) + { + switch (code) + { + case ASHIFT : + output_asm_insn ("and %0,%1,3\n\tror %0,%0\n\tror %0,%0", operands); + break; + case ASHIFTRT : +#if 1 /* Need some scheduling comparisons. */ + output_asm_insn ("add.f %4,%1,%1\n\tsbc %0,%0,%0\n\t" + "add.f 0,%4,%4\n\trlc %0,%0", operands); +#else + output_asm_insn ("add.f %4,%1,%1\n\tbxor %0,%4,31\n\t" + "sbc.f %0,%0,%4\n\trlc %0,%0", operands); +#endif + break; + case LSHIFTRT : +#if 1 + output_asm_insn ("add.f %4,%1,%1\n\trlc %0,0\n\t" + "add.f 0,%4,%4\n\trlc %0,%0", operands); +#else + output_asm_insn ("add.f %0,%1,%1\n\trlc.f %0,0\n\t" + "and %0,%0,1\n\trlc %0,%0", operands); +#endif + break; + default: + break; + } + } + else if (n == BITS_PER_WORD - 3 && code == ASHIFT) + output_asm_insn ("and %0,%1,7\n\tror %0,%0\n\tror %0,%0\n\tror %0,%0", + operands); + /* Must loop. */ + else + { + operands[2] = GEN_INT (n); + output_asm_insn ("mov.f lp_count, %2", operands); + + shiftloop: + { + output_asm_insn ("lpnz\t2f", operands); + output_asm_insn (shift_one, operands); + output_asm_insn ("nop", operands); + fprintf (asm_out_file, "2:\t%s end single insn loop\n", + ASM_COMMENT_START); + } + } + } + + return ""; +} + +/* Nested function support. */ + +/* Directly store VALUE into memory object BLOCK at OFFSET. */ + +static void +emit_store_direct (rtx block, int offset, int value) +{ + emit_insn (gen_store_direct (adjust_address (block, SImode, offset), + force_reg (SImode, + gen_int_mode (value, SImode)))); +} + +/* Emit RTL insns to initialize the variable parts of a trampoline. + FNADDR is an RTX for the address of the function's pure code. + CXT is an RTX for the static chain value for the function. */ +/* With potentially multiple shared objects loaded, and multiple stacks + present for multiple thereds where trampolines might reside, a simple + range check will likely not suffice for the profiler to tell if a callee + is a trampoline. We a speedier check by making the trampoline start at + an address that is not 4-byte aligned. + A trampoline looks like this: + + nop_s 0x78e0 +entry: + ld_s r12,[pcl,12] 0xd403 + ld r11,[pcl,12] 0x170c 700b + j_s [r12] 0x7c00 + nop_s 0x78e0 + + The fastest trampoline to execute for trampolines within +-8KB of CTX + would be: + add2 r11,pcl,s12 + j [limm] 0x20200f80 limm + and that would also be faster to write to the stack by computing the offset + from CTX to TRAMP at compile time. However, it would really be better to + get rid of the high cost of cache invalidation when generating trampolines, + which requires that the code part of trampolines stays constant, and + additionally either + - making sure that no executable code but trampolines is on the stack, + no icache entries linger for the area of the stack from when before the + stack was allocated, and allocating trampolines in trampoline-only + cache lines + or + - allocate trampolines fram a special pool of pre-allocated trampolines. */ + +static void +arc_initialize_trampoline (rtx tramp, tree fndecl, rtx cxt) +{ + rtx fnaddr = XEXP (DECL_RTL (fndecl), 0); + + emit_store_direct (tramp, 0, TARGET_BIG_ENDIAN ? 0x78e0d403 : 0xd40378e0); + emit_store_direct (tramp, 4, TARGET_BIG_ENDIAN ? 0x170c700b : 0x700b170c); + emit_store_direct (tramp, 8, TARGET_BIG_ENDIAN ? 0x7c0078e0 : 0x78e07c00); + emit_move_insn (adjust_address (tramp, SImode, 12), fnaddr); + emit_move_insn (adjust_address (tramp, SImode, 16), cxt); + emit_insn (gen_flush_icache (adjust_address (tramp, SImode, 0))); +} + +/* Allow the profiler to easily distinguish trampolines from normal + functions. */ + +static rtx +arc_trampoline_adjust_address (rtx addr) +{ + return plus_constant (Pmode, addr, 2); +} + +/* This is set briefly to 1 when we output a ".as" address modifer, and then + reset when we output the scaled address. */ +static int output_scaled = 0; + +/* Print operand X (an rtx) in assembler syntax to file FILE. + CODE is a letter or dot (`z' in `%z0') or 0 if no letter was specified. + For `%' followed by punctuation, CODE is the punctuation and X is null. */ +/* In final.c:output_asm_insn: + 'l' : label + 'a' : address + 'c' : constant address if CONSTANT_ADDRESS_P + 'n' : negative + Here: + 'Z': log2(x+1)-1 + 'z': log2 + 'M': log2(~x) + '#': condbranch delay slot suffix + '*': jump delay slot suffix + '?' : nonjump-insn suffix for conditional execution or short instruction + '!' : jump / call suffix for conditional execution or short instruction + '`': fold constant inside unary o-perator, re-recognize, and emit. + 'd' + 'D' + 'R': Second word + 'S' + 'B': Branch comparison operand - suppress sda reference + 'H': Most significant word + 'L': Least significant word + 'A': ASCII decimal representation of floating point value + 'U': Load/store update or scaling indicator + 'V': cache bypass indicator for volatile + 'P' + 'F' + '^' + 'O': Operator + 'o': original symbol - no @ prepending. */ + +void +arc_print_operand (FILE *file, rtx x, int code) +{ + switch (code) + { + case 'Z': + if (GET_CODE (x) == CONST_INT) + fprintf (file, "%d",exact_log2(INTVAL (x) + 1) - 1 ); + else + output_operand_lossage ("invalid operand to %%Z code"); + + return; + + case 'z': + if (GET_CODE (x) == CONST_INT) + fprintf (file, "%d",exact_log2(INTVAL (x)) ); + else + output_operand_lossage ("invalid operand to %%z code"); + + return; + + case 'M': + if (GET_CODE (x) == CONST_INT) + fprintf (file, "%d",exact_log2(~INTVAL (x)) ); + else + output_operand_lossage ("invalid operand to %%M code"); + + return; + + case '#' : + /* Conditional branches depending on condition codes. + Note that this is only for branches that were known to depend on + condition codes before delay slot scheduling; + out-of-range brcc / bbit expansions should use '*'. + This distinction is important because of the different + allowable delay slot insns and the output of the delay suffix + for TARGET_AT_DBR_COND_EXEC. */ + case '*' : + /* Unconditional branches / branches not depending on condition codes. + This could also be a CALL_INSN. + Output the appropriate delay slot suffix. */ + if (final_sequence && XVECLEN (final_sequence, 0) != 1) + { + rtx jump = XVECEXP (final_sequence, 0, 0); + rtx delay = XVECEXP (final_sequence, 0, 1); + + /* For TARGET_PAD_RETURN we might have grabbed the delay insn. */ + if (INSN_DELETED_P (delay)) + return; + if (JUMP_P (jump) && INSN_ANNULLED_BRANCH_P (jump)) + fputs (INSN_FROM_TARGET_P (delay) ? ".d" + : TARGET_AT_DBR_CONDEXEC && code == '#' ? ".d" + : get_attr_type (jump) == TYPE_RETURN && code == '#' ? "" + : ".nd", + file); + else + fputs (".d", file); + } + return; + case '?' : /* with leading "." */ + case '!' : /* without leading "." */ + /* This insn can be conditionally executed. See if the ccfsm machinery + says it should be conditionalized. + If it shouldn't, we'll check the compact attribute if this insn + has a short variant, which may be used depending on code size and + alignment considerations. */ + if (current_insn_predicate) + arc_ccfsm_current.cc + = get_arc_condition_code (current_insn_predicate); + if (ARC_CCFSM_COND_EXEC_P (&arc_ccfsm_current)) + { + /* Is this insn in a delay slot sequence? */ + if (!final_sequence || XVECLEN (final_sequence, 0) < 2 + || current_insn_predicate + || CALL_P (XVECEXP (final_sequence, 0, 0)) + || simplejump_p (XVECEXP (final_sequence, 0, 0))) + { + /* This insn isn't in a delay slot sequence, or conditionalized + independently of its position in a delay slot. */ + fprintf (file, "%s%s", + code == '?' ? "." : "", + arc_condition_codes[arc_ccfsm_current.cc]); + /* If this is a jump, there are still short variants. However, + only beq_s / bne_s have the same offset range as b_s, + and the only short conditional returns are jeq_s and jne_s. */ + if (code == '!' + && (arc_ccfsm_current.cc == ARC_CC_EQ + || arc_ccfsm_current.cc == ARC_CC_NE + || 0 /* FIXME: check if branch in 7 bit range. */)) + output_short_suffix (file); + } + else if (code == '!') /* Jump with delay slot. */ + fputs (arc_condition_codes[arc_ccfsm_current.cc], file); + else /* An Instruction in a delay slot of a jump or call. */ + { + rtx jump = XVECEXP (final_sequence, 0, 0); + rtx insn = XVECEXP (final_sequence, 0, 1); + + /* If the insn is annulled and is from the target path, we need + to inverse the condition test. */ + if (JUMP_P (jump) && INSN_ANNULLED_BRANCH_P (jump)) + { + if (INSN_FROM_TARGET_P (insn)) + fprintf (file, "%s%s", + code == '?' ? "." : "", + arc_condition_codes[ARC_INVERSE_CONDITION_CODE (arc_ccfsm_current.cc)]); + else + fprintf (file, "%s%s", + code == '?' ? "." : "", + arc_condition_codes[arc_ccfsm_current.cc]); + if (arc_ccfsm_current.state == 5) + arc_ccfsm_current.state = 0; + } + else + /* This insn is executed for either path, so don't + conditionalize it at all. */ + output_short_suffix (file); + + } + } + else + output_short_suffix (file); + return; + case'`': + /* FIXME: fold constant inside unary operator, re-recognize, and emit. */ + gcc_unreachable (); + case 'd' : + fputs (arc_condition_codes[get_arc_condition_code (x)], file); + return; + case 'D' : + fputs (arc_condition_codes[ARC_INVERSE_CONDITION_CODE + (get_arc_condition_code (x))], + file); + return; + case 'R' : + /* Write second word of DImode or DFmode reference, + register or memory. */ + if (GET_CODE (x) == REG) + fputs (reg_names[REGNO (x)+1], file); + else if (GET_CODE (x) == MEM) + { + fputc ('[', file); + + /* Handle possible auto-increment. For PRE_INC / PRE_DEC / + PRE_MODIFY, we will have handled the first word already; + For POST_INC / POST_DEC / POST_MODIFY, the access to the + first word will be done later. In either case, the access + to the first word will do the modify, and we only have + to add an offset of four here. */ + if (GET_CODE (XEXP (x, 0)) == PRE_INC + || GET_CODE (XEXP (x, 0)) == PRE_DEC + || GET_CODE (XEXP (x, 0)) == PRE_MODIFY + || GET_CODE (XEXP (x, 0)) == POST_INC + || GET_CODE (XEXP (x, 0)) == POST_DEC + || GET_CODE (XEXP (x, 0)) == POST_MODIFY) + output_address (plus_constant (Pmode, XEXP (XEXP (x, 0), 0), 4)); + else if (output_scaled) + { + rtx addr = XEXP (x, 0); + int size = GET_MODE_SIZE (GET_MODE (x)); + + output_address (plus_constant (Pmode, XEXP (addr, 0), + ((INTVAL (XEXP (addr, 1)) + 4) + >> (size == 2 ? 1 : 2)))); + output_scaled = 0; + } + else + output_address (plus_constant (Pmode, XEXP (x, 0), 4)); + fputc (']', file); + } + else + output_operand_lossage ("invalid operand to %%R code"); + return; + case 'S' : + /* FIXME: remove %S option. */ + break; + case 'B' /* Branch or other LIMM ref - must not use sda references. */ : + if (CONSTANT_P (x)) + { + output_addr_const (file, x); + return; + } + break; + case 'H' : + case 'L' : + if (GET_CODE (x) == REG) + { + /* L = least significant word, H = most significant word. */ + if ((WORDS_BIG_ENDIAN != 0) ^ (code == 'L')) + fputs (reg_names[REGNO (x)], file); + else + fputs (reg_names[REGNO (x)+1], file); + } + else if (GET_CODE (x) == CONST_INT + || GET_CODE (x) == CONST_DOUBLE) + { + rtx first, second; + + split_double (x, &first, &second); + + if((WORDS_BIG_ENDIAN) == 0) + fprintf (file, "0x%08lx", + code == 'L' ? INTVAL (first) : INTVAL (second)); + else + fprintf (file, "0x%08lx", + code == 'L' ? INTVAL (second) : INTVAL (first)); + + + } + else + output_operand_lossage ("invalid operand to %%H/%%L code"); + return; + case 'A' : + { + char str[30]; + + gcc_assert (GET_CODE (x) == CONST_DOUBLE + && GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT); + + real_to_decimal (str, CONST_DOUBLE_REAL_VALUE (x), sizeof (str), 0, 1); + fprintf (file, "%s", str); + return; + } + case 'U' : + /* Output a load/store with update indicator if appropriate. */ + if (GET_CODE (x) == MEM) + { + rtx addr = XEXP (x, 0); + switch (GET_CODE (addr)) + { + case PRE_INC: case PRE_DEC: case PRE_MODIFY: + fputs (".a", file); break; + case POST_INC: case POST_DEC: case POST_MODIFY: + fputs (".ab", file); break; + case PLUS: + /* Are we using a scaled index? */ + if (GET_CODE (XEXP (addr, 0)) == MULT) + fputs (".as", file); + /* Can we use a scaled offset? */ + else if (CONST_INT_P (XEXP (addr, 1)) + && GET_MODE_SIZE (GET_MODE (x)) > 1 + && (!(INTVAL (XEXP (addr, 1)) + & (GET_MODE_SIZE (GET_MODE (x)) - 1) & 3)) + /* Does it make a difference? */ + && !SMALL_INT_RANGE(INTVAL (XEXP (addr, 1)), + GET_MODE_SIZE (GET_MODE (x)) - 2, 0)) + { + fputs (".as", file); + output_scaled = 1; + } + break; + case REG: + break; + default: + gcc_assert (CONSTANT_P (addr)); break; + } + } + else + output_operand_lossage ("invalid operand to %%U code"); + return; + case 'V' : + /* Output cache bypass indicator for a load/store insn. Volatile memory + refs are defined to use the cache bypass mechanism. */ + if (GET_CODE (x) == MEM) + { + if (MEM_VOLATILE_P (x) && !TARGET_VOLATILE_CACHE_SET ) + fputs (".di", file); + } + else + output_operand_lossage ("invalid operand to %%V code"); + return; + /* plt code. */ + case 'P': + case 0 : + /* Do nothing special. */ + break; + case 'F': + fputs (reg_names[REGNO (x)]+1, file); + return; + case '^': + /* This punctuation character is needed because label references are + printed in the output template using %l. This is a front end + character, and when we want to emit a '@' before it, we have to use + this '^'. */ + + fputc('@',file); + return; + case 'O': + /* Output an operator. */ + switch (GET_CODE (x)) + { + case PLUS: fputs ("add", file); return; + case SS_PLUS: fputs ("adds", file); return; + case AND: fputs ("and", file); return; + case IOR: fputs ("or", file); return; + case XOR: fputs ("xor", file); return; + case MINUS: fputs ("sub", file); return; + case SS_MINUS: fputs ("subs", file); return; + case ASHIFT: fputs ("asl", file); return; + case ASHIFTRT: fputs ("asr", file); return; + case LSHIFTRT: fputs ("lsr", file); return; + case ROTATERT: fputs ("ror", file); return; + case MULT: fputs ("mpy", file); return; + case ABS: fputs ("abs", file); return; /* Unconditional. */ + case NEG: fputs ("neg", file); return; + case SS_NEG: fputs ("negs", file); return; + case NOT: fputs ("not", file); return; /* Unconditional. */ + case ZERO_EXTEND: + fputs ("ext", file); /* bmsk allows predication. */ + goto size_suffix; + case SIGN_EXTEND: /* Unconditional. */ + fputs ("sex", file); + size_suffix: + switch (GET_MODE (XEXP (x, 0))) + { + case QImode: fputs ("b", file); return; + case HImode: fputs ("w", file); return; + default: break; + } + break; + case SS_TRUNCATE: + if (GET_MODE (x) != HImode) + break; + fputs ("sat16", file); + default: break; + } + output_operand_lossage ("invalid operand to %%O code"); return; + case 'o': + if (GET_CODE (x) == SYMBOL_REF) + { + assemble_name (file, XSTR (x, 0)); + return; + } + break; + case '&': + if (TARGET_ANNOTATE_ALIGN && cfun->machine->size_reason) + fprintf (file, "; unalign: %d", cfun->machine->unalign); + return; + default : + /* Unknown flag. */ + output_operand_lossage ("invalid operand output code"); + } + + switch (GET_CODE (x)) + { + case REG : + fputs (reg_names[REGNO (x)], file); + break; + case MEM : + { + rtx addr = XEXP (x, 0); + int size = GET_MODE_SIZE (GET_MODE (x)); + + fputc ('[', file); + + switch (GET_CODE (addr)) + { + case PRE_INC: case POST_INC: + output_address (plus_constant (Pmode, XEXP (addr, 0), size)); break; + case PRE_DEC: case POST_DEC: + output_address (plus_constant (Pmode, XEXP (addr, 0), -size)); + break; + case PRE_MODIFY: case POST_MODIFY: + output_address (XEXP (addr, 1)); break; + case PLUS: + if (output_scaled) + { + output_address (plus_constant (Pmode, XEXP (addr, 0), + (INTVAL (XEXP (addr, 1)) + >> (size == 2 ? 1 : 2)))); + output_scaled = 0; + } + else + output_address (addr); + break; + default: + if (flag_pic && CONSTANT_ADDRESS_P (addr)) + arc_output_pic_addr_const (file, addr, code); + else + output_address (addr); + break; + } + fputc (']', file); + break; + } + case CONST_DOUBLE : + /* We handle SFmode constants here as output_addr_const doesn't. */ + if (GET_MODE (x) == SFmode) + { + REAL_VALUE_TYPE d; + long l; + + REAL_VALUE_FROM_CONST_DOUBLE (d, x); + REAL_VALUE_TO_TARGET_SINGLE (d, l); + fprintf (file, "0x%08lx", l); + break; + } + /* Fall through. Let output_addr_const deal with it. */ + default : + if (flag_pic) + arc_output_pic_addr_const (file, x, code); + else + { + /* FIXME: Dirty way to handle @var@sda+const. Shd be handled + with asm_output_symbol_ref */ + if (GET_CODE (x) == CONST && GET_CODE (XEXP (x, 0)) == PLUS) + { + x = XEXP (x, 0); + output_addr_const (file, XEXP (x, 0)); + if (GET_CODE (XEXP (x, 0)) == SYMBOL_REF && SYMBOL_REF_SMALL_P (XEXP (x, 0))) + fprintf (file, "@sda"); + + if (GET_CODE (XEXP (x, 1)) != CONST_INT + || INTVAL (XEXP (x, 1)) >= 0) + fprintf (file, "+"); + output_addr_const (file, XEXP (x, 1)); + } + else + output_addr_const (file, x); + } + if (GET_CODE (x) == SYMBOL_REF && SYMBOL_REF_SMALL_P (x)) + fprintf (file, "@sda"); + break; + } +} + +/* Print a memory address as an operand to reference that memory location. */ + +void +arc_print_operand_address (FILE *file , rtx addr) +{ + register rtx base, index = 0; + + switch (GET_CODE (addr)) + { + case REG : + fputs (reg_names[REGNO (addr)], file); + break; + case SYMBOL_REF : + output_addr_const (file, addr); + if (SYMBOL_REF_SMALL_P (addr)) + fprintf (file, "@sda"); + break; + case PLUS : + if (GET_CODE (XEXP (addr, 0)) == MULT) + index = XEXP (XEXP (addr, 0), 0), base = XEXP (addr, 1); + else if (CONST_INT_P (XEXP (addr, 0))) + index = XEXP (addr, 0), base = XEXP (addr, 1); + else + base = XEXP (addr, 0), index = XEXP (addr, 1); + + gcc_assert (OBJECT_P (base)); + arc_print_operand_address (file, base); + if (CONSTANT_P (base) && CONST_INT_P (index)) + fputc ('+', file); + else + fputc (',', file); + gcc_assert (OBJECT_P (index)); + arc_print_operand_address (file, index); + break; + case CONST: + { + rtx c = XEXP (addr, 0); + + gcc_assert (GET_CODE (XEXP (c, 0)) == SYMBOL_REF); + gcc_assert (GET_CODE (XEXP (c, 1)) == CONST_INT); + + output_address(XEXP(addr,0)); + + break; + } + case PRE_INC : + case PRE_DEC : + /* We shouldn't get here as we've lost the mode of the memory object + (which says how much to inc/dec by. */ + gcc_unreachable (); + break; + default : + if (flag_pic) + arc_output_pic_addr_const (file, addr, 0); + else + output_addr_const (file, addr); + break; + } +} + +/* Called via walk_stores. DATA points to a hash table we can use to + establish a unique SYMBOL_REF for each counter, which corresponds to + a caller-callee pair. + X is a store which we want to examine for an UNSPEC_PROF, which + would be an address loaded into a register, or directly used in a MEM. + If we found an UNSPEC_PROF, if we encounter a new counter the first time, + write out a description and a data allocation for a 32 bit counter. + Also, fill in the appropriate symbol_ref into each UNSPEC_PROF instance. */ + +static void +write_profile_sections (rtx dest ATTRIBUTE_UNUSED, rtx x, void *data) +{ + rtx *srcp, src; + htab_t htab = (htab_t) data; + rtx *slot; + + if (GET_CODE (x) != SET) + return; + srcp = &SET_SRC (x); + if (MEM_P (*srcp)) + srcp = &XEXP (*srcp, 0); + else if (MEM_P (SET_DEST (x))) + srcp = &XEXP (SET_DEST (x), 0); + src = *srcp; + if (GET_CODE (src) != CONST) + return; + src = XEXP (src, 0); + if (GET_CODE (src) != UNSPEC || XINT (src, 1) != UNSPEC_PROF) + return; + + gcc_assert (XVECLEN (src, 0) == 3); + if (!htab_elements (htab)) + { + output_asm_insn (".section .__arc_profile_desc, \"a\"\n" + "\t.long %0 + 1\n", + &XVECEXP (src, 0, 0)); + } + slot = (rtx *) htab_find_slot (htab, src, INSERT); + if (*slot == HTAB_EMPTY_ENTRY) + { + static int count_nr; + char buf[24]; + rtx count; + + *slot = src; + sprintf (buf, "__prof_count%d", count_nr++); + count = gen_rtx_SYMBOL_REF (Pmode, xstrdup (buf)); + XVECEXP (src, 0, 2) = count; + output_asm_insn (".section\t.__arc_profile_desc, \"a\"\n" + "\t.long\t%1\n" + "\t.section\t.__arc_profile_counters, \"aw\"\n" + "\t.type\t%o2, @object\n" + "\t.size\t%o2, 4\n" + "%o2:\t.zero 4", + &XVECEXP (src, 0, 0)); + *srcp = count; + } + else + *srcp = XVECEXP (*slot, 0, 2); +} + +/* Hash function for UNSPEC_PROF htab. Use both the caller's name and + the callee's name (if known). */ + +static hashval_t +unspec_prof_hash (const void *x) +{ + const_rtx u = (const_rtx) x; + const_rtx s1 = XVECEXP (u, 0, 1); + + return (htab_hash_string (XSTR (XVECEXP (u, 0, 0), 0)) + ^ (s1->code == SYMBOL_REF ? htab_hash_string (XSTR (s1, 0)) : 0)); +} + +/* Equality function for UNSPEC_PROF htab. Two pieces of UNSPEC_PROF rtl + shall refer to the same counter if both caller name and callee rtl + are identical. */ + +static int +unspec_prof_htab_eq (const void *x, const void *y) +{ + const_rtx u0 = (const_rtx) x; + const_rtx u1 = (const_rtx) y; + const_rtx s01 = XVECEXP (u0, 0, 1); + const_rtx s11 = XVECEXP (u1, 0, 1); + + return (!strcmp (XSTR (XVECEXP (u0, 0, 0), 0), + XSTR (XVECEXP (u1, 0, 0), 0)) + && rtx_equal_p (s01, s11)); +} + +/* Conditional execution support. + + This is based on the ARM port but for now is much simpler. + + A finite state machine takes care of noticing whether or not instructions + can be conditionally executed, and thus decrease execution time and code + size by deleting branch instructions. The fsm is controlled by + arc_ccfsm_advance (called by arc_final_prescan_insn), and controls the + actions of PRINT_OPERAND. The patterns in the .md file for the branch + insns also have a hand in this. */ +/* The way we leave dealing with non-anulled or annull-false delay slot + insns to the consumer is awkward. */ + +/* The state of the fsm controlling condition codes are: + 0: normal, do nothing special + 1: don't output this insn + 2: don't output this insn + 3: make insns conditional + 4: make insns conditional + 5: make insn conditional (only for outputting anulled delay slot insns) + + special value for cfun->machine->uid_ccfsm_state: + 6: return with but one insn before it since function start / call + + State transitions (state->state by whom, under what condition): + 0 -> 1 arc_ccfsm_advance, if insn is a conditional branch skipping over + some instructions. + 0 -> 2 arc_ccfsm_advance, if insn is a conditional branch followed + by zero or more non-jump insns and an unconditional branch with + the same target label as the condbranch. + 1 -> 3 branch patterns, after having not output the conditional branch + 2 -> 4 branch patterns, after having not output the conditional branch + 0 -> 5 branch patterns, for anulled delay slot insn. + 3 -> 0 ASM_OUTPUT_INTERNAL_LABEL, if the `target' label is reached + (the target label has CODE_LABEL_NUMBER equal to + arc_ccfsm_target_label). + 4 -> 0 arc_ccfsm_advance, if `target' unconditional branch is reached + 3 -> 1 arc_ccfsm_advance, finding an 'else' jump skipping over some insns. + 5 -> 0 when outputting the delay slot insn + + If the jump clobbers the conditions then we use states 2 and 4. + + A similar thing can be done with conditional return insns. + + We also handle separating branches from sets of the condition code. + This is done here because knowledge of the ccfsm state is required, + we may not be outputting the branch. */ + +/* arc_final_prescan_insn calls arc_ccfsm_advance to adjust arc_ccfsm_current, + before letting final output INSN. */ + +static void +arc_ccfsm_advance (rtx insn, struct arc_ccfsm *state) +{ + /* BODY will hold the body of INSN. */ + register rtx body; + + /* This will be 1 if trying to repeat the trick (ie: do the `else' part of + an if/then/else), and things need to be reversed. */ + int reverse = 0; + + /* If we start with a return insn, we only succeed if we find another one. */ + int seeking_return = 0; + + /* START_INSN will hold the insn from where we start looking. This is the + first insn after the following code_label if REVERSE is true. */ + rtx start_insn = insn; + + /* Type of the jump_insn. Brcc insns don't affect ccfsm changes, + since they don't rely on a cmp preceding the. */ + enum attr_type jump_insn_type; + + /* Allow -mdebug-ccfsm to turn this off so we can see how well it does. + We can't do this in macro FINAL_PRESCAN_INSN because its called from + final_scan_insn which has `optimize' as a local. */ + if (optimize < 2 || TARGET_NO_COND_EXEC) + return; + + /* Ignore notes and labels. */ + if (!INSN_P (insn)) + return; + body = PATTERN (insn); + /* If in state 4, check if the target branch is reached, in order to + change back to state 0. */ + if (state->state == 4) + { + if (insn == state->target_insn) + { + state->target_insn = NULL; + state->state = 0; + } + return; + } + + /* If in state 3, it is possible to repeat the trick, if this insn is an + unconditional branch to a label, and immediately following this branch + is the previous target label which is only used once, and the label this + branch jumps to is not too far off. Or in other words "we've done the + `then' part, see if we can do the `else' part." */ + if (state->state == 3) + { + if (simplejump_p (insn)) + { + start_insn = next_nonnote_insn (start_insn); + if (GET_CODE (start_insn) == BARRIER) + { + /* ??? Isn't this always a barrier? */ + start_insn = next_nonnote_insn (start_insn); + } + if (GET_CODE (start_insn) == CODE_LABEL + && CODE_LABEL_NUMBER (start_insn) == state->target_label + && LABEL_NUSES (start_insn) == 1) + reverse = TRUE; + else + return; + } + else if (GET_CODE (body) == SIMPLE_RETURN) + { + start_insn = next_nonnote_insn (start_insn); + if (GET_CODE (start_insn) == BARRIER) + start_insn = next_nonnote_insn (start_insn); + if (GET_CODE (start_insn) == CODE_LABEL + && CODE_LABEL_NUMBER (start_insn) == state->target_label + && LABEL_NUSES (start_insn) == 1) + { + reverse = TRUE; + seeking_return = 1; + } + else + return; + } + else + return; + } + + if (GET_CODE (insn) != JUMP_INSN + || GET_CODE (PATTERN (insn)) == ADDR_VEC + || GET_CODE (PATTERN (insn)) == ADDR_DIFF_VEC) + return; + + /* We can't predicate BRCC or loop ends. + Also, when generating PIC code, and considering a medium range call, + we can't predicate the call. */ + jump_insn_type = get_attr_type (insn); + if (jump_insn_type == TYPE_BRCC + || jump_insn_type == TYPE_BRCC_NO_DELAY_SLOT + || jump_insn_type == TYPE_LOOP_END + || (jump_insn_type == TYPE_CALL && !get_attr_predicable (insn))) + return; + + /* This jump might be paralleled with a clobber of the condition codes, + the jump should always come first. */ + if (GET_CODE (body) == PARALLEL && XVECLEN (body, 0) > 0) + body = XVECEXP (body, 0, 0); + + if (reverse + || (GET_CODE (body) == SET && GET_CODE (SET_DEST (body)) == PC + && GET_CODE (SET_SRC (body)) == IF_THEN_ELSE)) + { + int insns_skipped = 0, fail = FALSE, succeed = FALSE; + /* Flag which part of the IF_THEN_ELSE is the LABEL_REF. */ + int then_not_else = TRUE; + /* Nonzero if next insn must be the target label. */ + int next_must_be_target_label_p; + rtx this_insn = start_insn, label = 0; + + /* Register the insn jumped to. */ + if (reverse) + { + if (!seeking_return) + label = XEXP (SET_SRC (body), 0); + } + else if (GET_CODE (XEXP (SET_SRC (body), 1)) == LABEL_REF) + label = XEXP (XEXP (SET_SRC (body), 1), 0); + else if (GET_CODE (XEXP (SET_SRC (body), 2)) == LABEL_REF) + { + label = XEXP (XEXP (SET_SRC (body), 2), 0); + then_not_else = FALSE; + } + else if (GET_CODE (XEXP (SET_SRC (body), 1)) == SIMPLE_RETURN) + seeking_return = 1; + else if (GET_CODE (XEXP (SET_SRC (body), 2)) == SIMPLE_RETURN) + { + seeking_return = 1; + then_not_else = FALSE; + } + else + gcc_unreachable (); + + /* If this is a non-annulled branch with a delay slot, there is + no need to conditionalize the delay slot. */ + if (NEXT_INSN (PREV_INSN (insn)) != insn + && state->state == 0 && !INSN_ANNULLED_BRANCH_P (insn)) + { + this_insn = NEXT_INSN (this_insn); + gcc_assert (NEXT_INSN (NEXT_INSN (PREV_INSN (start_insn))) + == NEXT_INSN (this_insn)); + } + /* See how many insns this branch skips, and what kind of insns. If all + insns are okay, and the label or unconditional branch to the same + label is not too far away, succeed. */ + for (insns_skipped = 0, next_must_be_target_label_p = FALSE; + !fail && !succeed && insns_skipped < MAX_INSNS_SKIPPED; + insns_skipped++) + { + rtx scanbody; + + this_insn = next_nonnote_insn (this_insn); + if (!this_insn) + break; + + if (next_must_be_target_label_p) + { + if (GET_CODE (this_insn) == BARRIER) + continue; + if (GET_CODE (this_insn) == CODE_LABEL + && this_insn == label) + { + state->state = 1; + succeed = TRUE; + } + else + fail = TRUE; + break; + } + + scanbody = PATTERN (this_insn); + + switch (GET_CODE (this_insn)) + { + case CODE_LABEL: + /* Succeed if it is the target label, otherwise fail since + control falls in from somewhere else. */ + if (this_insn == label) + { + state->state = 1; + succeed = TRUE; + } + else + fail = TRUE; + break; + + case BARRIER: + /* Succeed if the following insn is the target label. + Otherwise fail. + If return insns are used then the last insn in a function + will be a barrier. */ + next_must_be_target_label_p = TRUE; + break; + + case CALL_INSN: + /* Can handle a call insn if there are no insns after it. + IE: The next "insn" is the target label. We don't have to + worry about delay slots as such insns are SEQUENCE's inside + INSN's. ??? It is possible to handle such insns though. */ + if (get_attr_cond (this_insn) == COND_CANUSE) + next_must_be_target_label_p = TRUE; + else + fail = TRUE; + break; + + case JUMP_INSN: + /* If this is an unconditional branch to the same label, succeed. + If it is to another label, do nothing. If it is conditional, + fail. */ + /* ??? Probably, the test for the SET and the PC are + unnecessary. */ + + if (GET_CODE (scanbody) == SET + && GET_CODE (SET_DEST (scanbody)) == PC) + { + if (GET_CODE (SET_SRC (scanbody)) == LABEL_REF + && XEXP (SET_SRC (scanbody), 0) == label && !reverse) + { + state->state = 2; + succeed = TRUE; + } + else if (GET_CODE (SET_SRC (scanbody)) == IF_THEN_ELSE) + fail = TRUE; + else if (get_attr_cond (this_insn) != COND_CANUSE) + fail = TRUE; + } + else if (GET_CODE (scanbody) == SIMPLE_RETURN + && seeking_return) + { + state->state = 2; + succeed = TRUE; + } + else if (GET_CODE (scanbody) == PARALLEL) + { + if (get_attr_cond (this_insn) != COND_CANUSE) + fail = TRUE; + } + break; + + case INSN: + /* We can only do this with insns that can use the condition + codes (and don't set them). */ + if (GET_CODE (scanbody) == SET + || GET_CODE (scanbody) == PARALLEL) + { + if (get_attr_cond (this_insn) != COND_CANUSE) + fail = TRUE; + } + /* We can't handle other insns like sequences. */ + else + fail = TRUE; + break; + + default: + break; + } + } + + if (succeed) + { + if ((!seeking_return) && (state->state == 1 || reverse)) + state->target_label = CODE_LABEL_NUMBER (label); + else if (seeking_return || state->state == 2) + { + while (this_insn && GET_CODE (PATTERN (this_insn)) == USE) + { + this_insn = next_nonnote_insn (this_insn); + + gcc_assert (!this_insn || + (GET_CODE (this_insn) != BARRIER + && GET_CODE (this_insn) != CODE_LABEL)); + } + if (!this_insn) + { + /* Oh dear! we ran off the end, give up. */ + extract_insn_cached (insn); + state->state = 0; + state->target_insn = NULL; + return; + } + state->target_insn = this_insn; + } + else + gcc_unreachable (); + + /* If REVERSE is true, ARM_CURRENT_CC needs to be inverted from + what it was. */ + if (!reverse) + { + state->cond = XEXP (SET_SRC (body), 0); + state->cc = get_arc_condition_code (XEXP (SET_SRC (body), 0)); + } + + if (reverse || then_not_else) + state->cc = ARC_INVERSE_CONDITION_CODE (state->cc); + } + + /* Restore recog_operand. Getting the attributes of other insns can + destroy this array, but final.c assumes that it remains intact + across this call; since the insn has been recognized already we + call insn_extract direct. */ + extract_insn_cached (insn); + } +} + +/* Record that we are currently outputting label NUM with prefix PREFIX. + It it's the label we're looking for, reset the ccfsm machinery. + + Called from ASM_OUTPUT_INTERNAL_LABEL. */ + +static void +arc_ccfsm_at_label (const char *prefix, int num, struct arc_ccfsm *state) +{ + if (state->state == 3 && state->target_label == num + && !strcmp (prefix, "L")) + { + state->state = 0; + state->target_insn = NULL_RTX; + } +} + +/* We are considering a conditional branch with the condition COND. + Check if we want to conditionalize a delay slot insn, and if so modify + the ccfsm state accordingly. + REVERSE says branch will branch when the condition is false. */ +void +arc_ccfsm_record_condition (rtx cond, bool reverse, rtx jump, + struct arc_ccfsm *state) +{ + rtx seq_insn = NEXT_INSN (PREV_INSN (jump)); + if (!state) + state = &arc_ccfsm_current; + + gcc_assert (state->state == 0); + if (seq_insn != jump) + { + rtx insn = XVECEXP (PATTERN (seq_insn), 0, 1); + + if (!INSN_DELETED_P (insn) + && INSN_ANNULLED_BRANCH_P (jump) + && (TARGET_AT_DBR_CONDEXEC || INSN_FROM_TARGET_P (insn))) + { + state->cond = cond; + state->cc = get_arc_condition_code (cond); + if (!reverse) + arc_ccfsm_current.cc + = ARC_INVERSE_CONDITION_CODE (state->cc); + rtx pat = PATTERN (insn); + if (GET_CODE (pat) == COND_EXEC) + gcc_assert ((INSN_FROM_TARGET_P (insn) + ? ARC_INVERSE_CONDITION_CODE (state->cc) : state->cc) + == get_arc_condition_code (XEXP (pat, 0))); + else + state->state = 5; + } + } +} + +/* Update *STATE as we would when we emit INSN. */ + +static void +arc_ccfsm_post_advance (rtx insn, struct arc_ccfsm *state) +{ + if (LABEL_P (insn)) + arc_ccfsm_at_label ("L", CODE_LABEL_NUMBER (insn), state); + else if (JUMP_P (insn) + && GET_CODE (PATTERN (insn)) != ADDR_VEC + && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC + && get_attr_type (insn) == TYPE_BRANCH) + { + if (ARC_CCFSM_BRANCH_DELETED_P (state)) + ARC_CCFSM_RECORD_BRANCH_DELETED (state); + else + { + rtx src = SET_SRC (PATTERN (insn)); + arc_ccfsm_record_condition (XEXP (src, 0), XEXP (src, 1) == pc_rtx, + insn, state); + } + } + else if (arc_ccfsm_current.state == 5) + arc_ccfsm_current.state = 0; +} + +/* Return true if the current insn, which is a conditional branch, is to be + deleted. */ + +bool +arc_ccfsm_branch_deleted_p (void) +{ + return ARC_CCFSM_BRANCH_DELETED_P (&arc_ccfsm_current); +} + +/* Record a branch isn't output because subsequent insns can be + conditionalized. */ + +void +arc_ccfsm_record_branch_deleted (void) +{ + ARC_CCFSM_RECORD_BRANCH_DELETED (&arc_ccfsm_current); +} + +/* During insn output, indicate if the current insn is predicated. */ + +bool +arc_ccfsm_cond_exec_p (void) +{ + return (cfun->machine->prescan_initialized + && ARC_CCFSM_COND_EXEC_P (&arc_ccfsm_current)); +} + +/* Like next_active_insn, but return NULL if we find an ADDR_(DIFF_)VEC, + and look inside SEQUENCEs. */ + +static rtx +arc_next_active_insn (rtx insn, struct arc_ccfsm *statep) +{ + rtx pat; + + do + { + if (statep) + arc_ccfsm_post_advance (insn, statep); + insn = NEXT_INSN (insn); + if (!insn || BARRIER_P (insn)) + return NULL_RTX; + if (statep) + arc_ccfsm_advance (insn, statep); + } + while (NOTE_P (insn) + || (cfun->machine->arc_reorg_started + && LABEL_P (insn) && !label_to_alignment (insn)) + || (NONJUMP_INSN_P (insn) + && (GET_CODE (PATTERN (insn)) == USE + || GET_CODE (PATTERN (insn)) == CLOBBER))); + if (!LABEL_P (insn)) + { + gcc_assert (INSN_P (insn)); + pat = PATTERN (insn); + if (GET_CODE (pat) == ADDR_VEC || GET_CODE (pat) == ADDR_DIFF_VEC) + return NULL_RTX; + if (GET_CODE (pat) == SEQUENCE) + return XVECEXP (pat, 0, 0); + } + return insn; +} + +/* When deciding if an insn should be output short, we want to know something + about the following insns: + - if another insn follows which we know we can output as a short insn + before an alignment-sensitive point, we can output this insn short: + the decision about the eventual alignment can be postponed. + - if a to-be-aligned label comes next, we should output this insn such + as to get / preserve 4-byte alignment. + - if a likely branch without delay slot insn, or a call with an immediately + following short insn comes next, we should out output this insn such as to + get / preserve 2 mod 4 unalignment. + - do the same for a not completely unlikely branch with a short insn + following before any other branch / label. + - in order to decide if we are actually looking at a branch, we need to + call arc_ccfsm_advance. + - in order to decide if we are looking at a short insn, we should know + if it is conditionalized. To a first order of approximation this is + the case if the state from arc_ccfsm_advance from before this insn + indicates the insn is conditionalized. However, a further refinement + could be to not conditionalize an insn if the destination register(s) + is/are dead in the non-executed case. */ +/* Return non-zero if INSN should be output as a short insn. UNALIGN is + zero if the current insn is aligned to a 4-byte-boundary, two otherwise. + If CHECK_ATTR is greater than 0, check the iscompact attribute first. */ + +int +arc_verify_short (rtx insn, int, int check_attr) +{ + enum attr_iscompact iscompact; + struct machine_function *machine; + + if (check_attr > 0) + { + iscompact = get_attr_iscompact (insn); + if (iscompact == ISCOMPACT_FALSE) + return 0; + } + machine = cfun->machine; + + if (machine->force_short_suffix >= 0) + return machine->force_short_suffix; + + return (get_attr_length (insn) & 2) != 0; +} + +/* When outputting an instruction (alternative) that can potentially be short, + output the short suffix if the insn is in fact short, and update + cfun->machine->unalign accordingly. */ + +static void +output_short_suffix (FILE *file) +{ + rtx insn = current_output_insn; + + if (arc_verify_short (insn, cfun->machine->unalign, 1)) + { + fprintf (file, "_s"); + cfun->machine->unalign ^= 2; + } + /* Restore recog_operand. */ + extract_insn_cached (insn); +} + +/* Implement FINAL_PRESCAN_INSN. */ + +void +arc_final_prescan_insn (rtx insn, rtx *opvec ATTRIBUTE_UNUSED, + int noperands ATTRIBUTE_UNUSED) +{ + if (TARGET_DUMPISIZE) + fprintf (asm_out_file, "\n; at %04x\n", INSN_ADDRESSES (INSN_UID (insn))); + + /* Output a nop if necessary to prevent a hazard. + Don't do this for delay slots: inserting a nop would + alter semantics, and the only time we would find a hazard is for a + call function result - and in that case, the hazard is spurious to + start with. */ + if (PREV_INSN (insn) + && PREV_INSN (NEXT_INSN (insn)) == insn + && arc_hazard (prev_real_insn (insn), insn)) + { + current_output_insn = + emit_insn_before (gen_nop (), NEXT_INSN (PREV_INSN (insn))); + final_scan_insn (current_output_insn, asm_out_file, optimize, 1, NULL); + current_output_insn = insn; + } + /* Restore extraction data which might have been clobbered by arc_hazard. */ + extract_constrain_insn_cached (insn); + + if (!cfun->machine->prescan_initialized) + { + /* Clear lingering state from branch shortening. */ + memset (&arc_ccfsm_current, 0, sizeof arc_ccfsm_current); + cfun->machine->prescan_initialized = 1; + } + arc_ccfsm_advance (insn, &arc_ccfsm_current); + + cfun->machine->size_reason = 0; +} + +/* Given FROM and TO register numbers, say whether this elimination is allowed. + Frame pointer elimination is automatically handled. + + All eliminations are permissible. If we need a frame + pointer, we must eliminate ARG_POINTER_REGNUM into + FRAME_POINTER_REGNUM and not into STACK_POINTER_REGNUM. */ + +static bool +arc_can_eliminate (const int from ATTRIBUTE_UNUSED, const int to) +{ + return to == FRAME_POINTER_REGNUM || !arc_frame_pointer_required (); +} + +/* Define the offset between two registers, one to be eliminated, and + the other its replacement, at the start of a routine. */ + +int +arc_initial_elimination_offset (int from, int to) +{ + if (! cfun->machine->frame_info.initialized) + arc_compute_frame_size (get_frame_size ()); + + if (from == ARG_POINTER_REGNUM && to == FRAME_POINTER_REGNUM) + { + return (cfun->machine->frame_info.extra_size + + cfun->machine->frame_info.reg_size); + } + + if (from == ARG_POINTER_REGNUM && to == STACK_POINTER_REGNUM) + { + return (cfun->machine->frame_info.total_size + - cfun->machine->frame_info.pretend_size); + } + + if ((from == FRAME_POINTER_REGNUM) && (to == STACK_POINTER_REGNUM)) + { + return (cfun->machine->frame_info.total_size + - (cfun->machine->frame_info.pretend_size + + cfun->machine->frame_info.extra_size + + cfun->machine->frame_info.reg_size)); + } + + gcc_unreachable (); +} + +static bool +arc_frame_pointer_required (void) +{ + return cfun->calls_alloca; +} + + +/* Return the destination address of a branch. */ + +int +branch_dest (rtx branch) +{ + rtx pat = PATTERN (branch); + rtx dest = (GET_CODE (pat) == PARALLEL + ? SET_SRC (XVECEXP (pat, 0, 0)) : SET_SRC (pat)); + int dest_uid; + + if (GET_CODE (dest) == IF_THEN_ELSE) + dest = XEXP (dest, XEXP (dest, 1) == pc_rtx ? 2 : 1); + + dest = XEXP (dest, 0); + dest_uid = INSN_UID (dest); + + return INSN_ADDRESSES (dest_uid); +} + + +/* Symbols in the text segment can be accessed without indirecting via the + constant pool; it may take an extra binary operation, but this is still + faster than indirecting via memory. Don't do this when not optimizing, + since we won't be calculating al of the offsets necessary to do this + simplification. */ + +static void +arc_encode_section_info (tree decl, rtx rtl, int first) +{ + /* For sdata, SYMBOL_FLAG_LOCAL and SYMBOL_FLAG_FUNCTION. + This clears machine specific flags, so has to come first. */ + default_encode_section_info (decl, rtl, first); + + /* Check if it is a function, and whether it has the + [long/medium/short]_call attribute specified. */ + if (TREE_CODE (decl) == FUNCTION_DECL) + { + rtx symbol = XEXP (rtl, 0); + int flags = SYMBOL_REF_FLAGS (symbol); + + tree attr = (TREE_TYPE (decl) != error_mark_node + ? TYPE_ATTRIBUTES (TREE_TYPE (decl)) : NULL_TREE); + tree long_call_attr = lookup_attribute ("long_call", attr); + tree medium_call_attr = lookup_attribute ("medium_call", attr); + tree short_call_attr = lookup_attribute ("short_call", attr); + + if (long_call_attr != NULL_TREE) + flags |= SYMBOL_FLAG_LONG_CALL; + else if (medium_call_attr != NULL_TREE) + flags |= SYMBOL_FLAG_MEDIUM_CALL; + else if (short_call_attr != NULL_TREE) + flags |= SYMBOL_FLAG_SHORT_CALL; + + SYMBOL_REF_FLAGS (symbol) = flags; + } +} + +/* This is how to output a definition of an internal numbered label where + PREFIX is the class of label and NUM is the number within the class. */ + +static void arc_internal_label (FILE *stream, const char *prefix, unsigned long labelno) +{ + if (cfun) + arc_ccfsm_at_label (prefix, labelno, &arc_ccfsm_current); + default_internal_label (stream, prefix, labelno); +} + +/* Set the cpu type and print out other fancy things, + at the top of the file. */ + +static void arc_file_start (void) +{ + default_file_start (); + fprintf (asm_out_file, "\t.cpu %s\n", arc_cpu_string); +} + +/* Cost functions. */ + +/* Compute a (partial) cost for rtx X. Return true if the complete + cost has been computed, and false if subexpressions should be + scanned. In either case, *TOTAL contains the cost result. */ + +static bool +arc_rtx_costs (rtx x, int code, int outer_code, int opno ATTRIBUTE_UNUSED, + int *total, bool speed) +{ + switch (code) + { + /* Small integers are as cheap as registers. */ + case CONST_INT: + { + bool nolimm = false; /* Can we do without long immediate? */ + bool fast = false; /* Is the result available immediately? */ + bool condexec = false; /* Does this allow conditiobnal execution? */ + bool compact = false; /* Is a 16 bit opcode available? */ + /* CONDEXEC also implies that we can have an unconditional + 3-address operation. */ + + nolimm = compact = condexec = false; + if (UNSIGNED_INT6 (INTVAL (x))) + nolimm = condexec = compact = true; + else + { + if (SMALL_INT (INTVAL (x))) + nolimm = fast = true; + switch (outer_code) + { + case AND: /* bclr, bmsk, ext[bw] */ + if (satisfies_constraint_Ccp (x) /* bclr */ + || satisfies_constraint_C1p (x) /* bmsk */) + nolimm = fast = condexec = compact = true; + break; + case IOR: /* bset */ + if (satisfies_constraint_C0p (x)) /* bset */ + nolimm = fast = condexec = compact = true; + break; + case XOR: + if (satisfies_constraint_C0p (x)) /* bxor */ + nolimm = fast = condexec = true; + break; + case SET: + if (satisfies_constraint_Crr (x)) /* ror b,u6 */ + nolimm = true; + default: + break; + } + } + /* FIXME: Add target options to attach a small cost if + condexec / compact is not true. */ + if (nolimm) + { + *total = 0; + return true; + } + } + /* FALLTHRU */ + + /* 4 byte values can be fetched as immediate constants - + let's give that the cost of an extra insn. */ + case CONST: + case LABEL_REF: + case SYMBOL_REF: + *total = COSTS_N_INSNS (1); + return true; + + case CONST_DOUBLE: + { + rtx high, low; + + if (TARGET_DPFP) + { + *total = COSTS_N_INSNS (1); + return true; + } + /* FIXME: correct the order of high,low */ + split_double (x, &high, &low); + *total = COSTS_N_INSNS (!SMALL_INT (INTVAL (high)) + + !SMALL_INT (INTVAL (low))); + return true; + } + + /* Encourage synth_mult to find a synthetic multiply when reasonable. + If we need more than 12 insns to do a multiply, then go out-of-line, + since the call overhead will be < 10% of the cost of the multiply. */ + case ASHIFT: + case ASHIFTRT: + case LSHIFTRT: + if (TARGET_BARREL_SHIFTER) + { + /* If we want to shift a constant, we need a LIMM. */ + /* ??? when the optimizers want to know if a constant should be + hoisted, they ask for the cost of the constant. OUTER_CODE is + insufficient context for shifts since we don't know which operand + we are looking at. */ + if (CONSTANT_P (XEXP (x, 0))) + { + *total += (COSTS_N_INSNS (2) + + rtx_cost (XEXP (x, 1), (enum rtx_code) code, 0, speed)); + return true; + } + *total = COSTS_N_INSNS (1); + } + else if (GET_CODE (XEXP (x, 1)) != CONST_INT) + *total = COSTS_N_INSNS (16); + else + { + *total = COSTS_N_INSNS (INTVAL (XEXP ((x), 1))); + /* ??? want_to_gcse_p can throw negative shift counts at us, + and then panics when it gets a negative cost as result. + Seen for gcc.c-torture/compile/20020710-1.c -Os . */ + if (*total < 0) + *total = 0; + } + return false; + + case DIV: + case UDIV: + if (speed) + *total = COSTS_N_INSNS(30); + else + *total = COSTS_N_INSNS(1); + return false; + + case MULT: + if ((TARGET_DPFP && GET_MODE (x) == DFmode)) + *total = COSTS_N_INSNS (1); + else if (speed) + *total= arc_multcost; + /* We do not want synth_mult sequences when optimizing + for size. */ + else if (TARGET_MUL64_SET || (TARGET_ARC700 && !TARGET_NOMPY_SET)) + *total = COSTS_N_INSNS (1); + else + *total = COSTS_N_INSNS (2); + return false; + case PLUS: + if (GET_CODE (XEXP (x, 0)) == MULT + && _2_4_8_operand (XEXP (XEXP (x, 0), 1), VOIDmode)) + { + *total += (rtx_cost (XEXP (x, 1), PLUS, 0, speed) + + rtx_cost (XEXP (XEXP (x, 0), 0), PLUS, 1, speed)); + return true; + } + return false; + case MINUS: + if (GET_CODE (XEXP (x, 1)) == MULT + && _2_4_8_operand (XEXP (XEXP (x, 1), 1), VOIDmode)) + { + *total += (rtx_cost (XEXP (x, 0), PLUS, 0, speed) + + rtx_cost (XEXP (XEXP (x, 1), 0), PLUS, 1, speed)); + return true; + } + return false; + case COMPARE: + { + rtx op0 = XEXP (x, 0); + rtx op1 = XEXP (x, 1); + + if (GET_CODE (op0) == ZERO_EXTRACT && op1 == const0_rtx + && XEXP (op0, 1) == const1_rtx) + { + /* btst / bbit0 / bbit1: + Small integers and registers are free; everything else can + be put in a register. */ + *total = (rtx_cost (XEXP (op0, 0), SET, 1, speed) + + rtx_cost (XEXP (op0, 2), SET, 1, speed)); + return true; + } + if (GET_CODE (op0) == AND && op1 == const0_rtx + && satisfies_constraint_C1p (XEXP (op0, 1))) + { + /* bmsk.f */ + *total = rtx_cost (XEXP (op0, 0), SET, 1, speed); + return true; + } + /* add.f */ + if (GET_CODE (op1) == NEG) + { + /* op0 might be constant, the inside of op1 is rather + unlikely to be so. So swapping the operands might lower + the cost. */ + *total = (rtx_cost (op0, PLUS, 1, speed) + + rtx_cost (XEXP (op1, 0), PLUS, 0, speed)); + } + return false; + } + case EQ: case NE: + if (outer_code == IF_THEN_ELSE + && GET_CODE (XEXP (x, 0)) == ZERO_EXTRACT + && XEXP (x, 1) == const0_rtx + && XEXP (XEXP (x, 0), 1) == const1_rtx) + { + /* btst / bbit0 / bbit1: + Small integers and registers are free; everything else can + be put in a register. */ + rtx op0 = XEXP (x, 0); + + *total = (rtx_cost (XEXP (op0, 0), SET, 1, speed) + + rtx_cost (XEXP (op0, 2), SET, 1, speed)); + return true; + } + /* Fall through. */ + /* scc_insn expands into two insns. */ + case GTU: case GEU: case LEU: + if (GET_MODE (x) == SImode) + *total += COSTS_N_INSNS (1); + return false; + case LTU: /* might use adc. */ + if (GET_MODE (x) == SImode) + *total += COSTS_N_INSNS (1) - 1; + return false; + default: + return false; + } +} + +/* Return true if ADDR is an address that needs to be expressed as an + explicit sum of pcl + offset. */ + +bool +arc_legitimate_pc_offset_p (rtx addr) +{ + if (GET_CODE (addr) != CONST) + return false; + addr = XEXP (addr, 0); + if (GET_CODE (addr) == PLUS) + { + if (GET_CODE (XEXP (addr, 1)) != CONST_INT) + return false; + addr = XEXP (addr, 0); + } + return (GET_CODE (addr) == UNSPEC + && XVECLEN (addr, 0) == 1 + && XINT (addr, 1) == ARC_UNSPEC_GOT + && GET_CODE (XVECEXP (addr, 0, 0)) == SYMBOL_REF); +} + +/* Return true if ADDR is a valid pic address. + A valid pic address on arc should look like + const (unspec (SYMBOL_REF/LABEL) (ARC_UNSPEC_GOTOFF/ARC_UNSPEC_GOT)) */ + +bool +arc_legitimate_pic_addr_p (rtx addr) +{ + if (GET_CODE (addr) == LABEL_REF) + return true; + if (GET_CODE (addr) != CONST) + return false; + + addr = XEXP (addr, 0); + + + if (GET_CODE (addr) == PLUS) + { + if (GET_CODE (XEXP (addr, 1)) != CONST_INT) + return false; + addr = XEXP (addr, 0); + } + + if (GET_CODE (addr) != UNSPEC + || XVECLEN (addr, 0) != 1) + return false; + + /* Must be @GOT or @GOTOFF. */ + if (XINT (addr, 1) != ARC_UNSPEC_GOT + && XINT (addr, 1) != ARC_UNSPEC_GOTOFF) + return false; + + if (GET_CODE (XVECEXP (addr, 0, 0)) != SYMBOL_REF + && GET_CODE (XVECEXP (addr, 0, 0)) != LABEL_REF) + return false; + + return true; +} + + + +/* Return true if OP contains a symbol reference. */ + +static bool +symbolic_reference_mentioned_p (rtx op) +{ + register const char *fmt; + register int i; + + if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF) + return true; + + fmt = GET_RTX_FORMAT (GET_CODE (op)); + for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--) + { + if (fmt[i] == 'E') + { + register int j; + + for (j = XVECLEN (op, i) - 1; j >= 0; j--) + if (symbolic_reference_mentioned_p (XVECEXP (op, i, j))) + return true; + } + + else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i))) + return true; + } + + return false; +} + +/* Return true if OP contains a SYMBOL_REF that is not wrapped in an unspec. + If SKIP_LOCAL is true, skip symbols that bind locally. + This is used further down in this file, and, without SKIP_LOCAL, + in the addsi3 / subsi3 expanders when generating PIC code. */ + +bool +arc_raw_symbolic_reference_mentioned_p (rtx op, bool skip_local) +{ + register const char *fmt; + register int i; + + if (GET_CODE(op) == UNSPEC) + return false; + + if (GET_CODE (op) == SYMBOL_REF) + { + tree decl = SYMBOL_REF_DECL (op); + return !skip_local || !decl || !default_binds_local_p (decl); + } + + fmt = GET_RTX_FORMAT (GET_CODE (op)); + for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--) + { + if (fmt[i] == 'E') + { + register int j; + + for (j = XVECLEN (op, i) - 1; j >= 0; j--) + if (arc_raw_symbolic_reference_mentioned_p (XVECEXP (op, i, j), + skip_local)) + return true; + } + + else if (fmt[i] == 'e' + && arc_raw_symbolic_reference_mentioned_p (XEXP (op, i), + skip_local)) + return true; + } + + return false; +} + +/* Legitimize a pic address reference in ORIG. + The return value is the legitimated address. + If OLDX is non-zero, it is the target to assign the address to first. */ + +rtx +arc_legitimize_pic_address (rtx orig, rtx oldx) +{ + rtx addr = orig; + rtx pat = orig; + rtx base; + + if (oldx == orig) + oldx = NULL; + + if (GET_CODE (addr) == LABEL_REF) + ; /* Do nothing. */ + else if (GET_CODE (addr) == SYMBOL_REF + && (CONSTANT_POOL_ADDRESS_P (addr) + || SYMBOL_REF_LOCAL_P (addr))) + { + /* This symbol may be referenced via a displacement from the PIC + base address (@GOTOFF). */ + + /* FIXME: if we had a way to emit pc-relative adds that don't + create a GOT entry, we could do without the use of the gp register. */ + crtl->uses_pic_offset_table = 1; + pat = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), ARC_UNSPEC_GOTOFF); + pat = gen_rtx_CONST (Pmode, pat); + pat = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, pat); + + if (oldx == NULL) + oldx = gen_reg_rtx (Pmode); + + if (oldx != 0) + { + emit_move_insn (oldx, pat); + pat = oldx; + } + + } + else if (GET_CODE (addr) == SYMBOL_REF) + { + /* This symbol must be referenced via a load from the + Global Offset Table (@GOTPC). */ + + pat = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), ARC_UNSPEC_GOT); + pat = gen_rtx_CONST (Pmode, pat); + pat = gen_const_mem (Pmode, pat); + + if (oldx == 0) + oldx = gen_reg_rtx (Pmode); + + emit_move_insn (oldx, pat); + pat = oldx; + } + else + { + if (GET_CODE (addr) == CONST) + { + addr = XEXP (addr, 0); + if (GET_CODE (addr) == UNSPEC) + { + /* Check that the unspec is one of the ones we generate? */ + } + else + gcc_assert (GET_CODE (addr) == PLUS); + } + + if (GET_CODE (addr) == PLUS) + { + rtx op0 = XEXP (addr, 0), op1 = XEXP (addr, 1); + + /* Check first to see if this is a constant offset from a @GOTOFF + symbol reference. */ + if ((GET_CODE (op0) == LABEL_REF + || (GET_CODE (op0) == SYMBOL_REF + && (CONSTANT_POOL_ADDRESS_P (op0) + || SYMBOL_REF_LOCAL_P (op0)))) + && GET_CODE (op1) == CONST_INT) + { + /* FIXME: like above, could do without gp reference. */ + crtl->uses_pic_offset_table = 1; + pat + = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op0), ARC_UNSPEC_GOTOFF); + pat = gen_rtx_PLUS (Pmode, pat, op1); + pat = gen_rtx_CONST (Pmode, pat); + pat = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, pat); + + if (oldx != 0) + { + emit_move_insn (oldx, pat); + pat = oldx; + } + } + else + { + base = arc_legitimize_pic_address (XEXP (addr, 0), oldx); + pat = arc_legitimize_pic_address (XEXP (addr, 1), + base == oldx ? NULL_RTX : oldx); + + if (GET_CODE (pat) == CONST_INT) + pat = plus_constant (Pmode, base, INTVAL (pat)); + else + { + if (GET_CODE (pat) == PLUS && CONSTANT_P (XEXP (pat, 1))) + { + base = gen_rtx_PLUS (Pmode, base, XEXP (pat, 0)); + pat = XEXP (pat, 1); + } + pat = gen_rtx_PLUS (Pmode, base, pat); + } + } + } + } + + return pat; +} + +/* Output address constant X to FILE, taking PIC into account. */ + +void +arc_output_pic_addr_const (FILE * file, rtx x, int code) +{ + char buf[256]; + + restart: + switch (GET_CODE (x)) + { + case PC: + if (flag_pic) + putc ('.', file); + else + gcc_unreachable (); + break; + + case SYMBOL_REF: + output_addr_const (file, x); + + /* Local functions do not get references through the PLT. */ + if (code == 'P' && ! SYMBOL_REF_LOCAL_P (x)) + fputs ("@plt", file); + break; + + case LABEL_REF: + ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (XEXP (x, 0))); + assemble_name (file, buf); + break; + + case CODE_LABEL: + ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (x)); + assemble_name (file, buf); + break; + + case CONST_INT: + fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x)); + break; + + case CONST: + arc_output_pic_addr_const (file, XEXP (x, 0), code); + break; + + case CONST_DOUBLE: + if (GET_MODE (x) == VOIDmode) + { + /* We can use %d if the number is one word and positive. */ + if (CONST_DOUBLE_HIGH (x)) + fprintf (file, HOST_WIDE_INT_PRINT_DOUBLE_HEX, + CONST_DOUBLE_HIGH (x), CONST_DOUBLE_LOW (x)); + else if (CONST_DOUBLE_LOW (x) < 0) + fprintf (file, HOST_WIDE_INT_PRINT_HEX, CONST_DOUBLE_LOW (x)); + else + fprintf (file, HOST_WIDE_INT_PRINT_DEC, CONST_DOUBLE_LOW (x)); + } + else + /* We can't handle floating point constants; + PRINT_OPERAND must handle them. */ + output_operand_lossage ("floating constant misused"); + break; + + case PLUS: + /* FIXME: Not needed here. */ + /* Some assemblers need integer constants to appear last (eg masm). */ + if (GET_CODE (XEXP (x, 0)) == CONST_INT) + { + arc_output_pic_addr_const (file, XEXP (x, 1), code); + fprintf (file, "+"); + arc_output_pic_addr_const (file, XEXP (x, 0), code); + } + else if (GET_CODE (XEXP (x, 1)) == CONST_INT) + { + arc_output_pic_addr_const (file, XEXP (x, 0), code); + if (INTVAL (XEXP (x, 1)) >= 0) + fprintf (file, "+"); + arc_output_pic_addr_const (file, XEXP (x, 1), code); + } + else + gcc_unreachable(); + break; + + case MINUS: + /* Avoid outputting things like x-x or x+5-x, + since some assemblers can't handle that. */ + x = simplify_subtraction (x); + if (GET_CODE (x) != MINUS) + goto restart; + + arc_output_pic_addr_const (file, XEXP (x, 0), code); + fprintf (file, "-"); + if (GET_CODE (XEXP (x, 1)) == CONST_INT + && INTVAL (XEXP (x, 1)) < 0) + { + fprintf (file, "("); + arc_output_pic_addr_const (file, XEXP (x, 1), code); + fprintf (file, ")"); + } + else + arc_output_pic_addr_const (file, XEXP (x, 1), code); + break; + + case ZERO_EXTEND: + case SIGN_EXTEND: + arc_output_pic_addr_const (file, XEXP (x, 0), code); + break; + + + case UNSPEC: + gcc_assert (XVECLEN (x, 0) == 1); + if (XINT (x, 1) == ARC_UNSPEC_GOT) + fputs ("pcl,", file); + arc_output_pic_addr_const (file, XVECEXP (x, 0, 0), code); + switch (XINT (x, 1)) + { + case ARC_UNSPEC_GOT: + fputs ("@gotpc", file); + break; + case ARC_UNSPEC_GOTOFF: + fputs ("@gotoff", file); + break; + case ARC_UNSPEC_PLT: + fputs ("@plt", file); + break; + default: + output_operand_lossage ("invalid UNSPEC as operand: %d", XINT (x,1)); + break; + } + break; + + default: + output_operand_lossage ("invalid expression as operand"); + } +} + +#define SYMBOLIC_CONST(X) \ +(GET_CODE (X) == SYMBOL_REF \ + || GET_CODE (X) == LABEL_REF \ + || (GET_CODE (X) == CONST && symbolic_reference_mentioned_p (X))) + +/* Emit insns to move operands[1] into operands[0]. */ + +void +emit_pic_move (rtx *operands, enum machine_mode) +{ + rtx temp = reload_in_progress ? operands[0] : gen_reg_rtx (Pmode); + + if (GET_CODE (operands[0]) == MEM && SYMBOLIC_CONST (operands[1])) + operands[1] = force_reg (Pmode, operands[1]); + else + operands[1] = arc_legitimize_pic_address (operands[1], temp); +} + + +/* The function returning the number of words, at the beginning of an + argument, must be put in registers. The returned value must be + zero for arguments that are passed entirely in registers or that + are entirely pushed on the stack. + + On some machines, certain arguments must be passed partially in + registers and partially in memory. On these machines, typically + the first N words of arguments are passed in registers, and the + rest on the stack. If a multi-word argument (a `double' or a + structure) crosses that boundary, its first few words must be + passed in registers and the rest must be pushed. This function + tells the compiler when this occurs, and how many of the words + should go in registers. + + `FUNCTION_ARG' for these arguments should return the first register + to be used by the caller for this argument; likewise + `FUNCTION_INCOMING_ARG', for the called function. + + The function is used to implement macro FUNCTION_ARG_PARTIAL_NREGS. */ + +/* If REGNO is the least arg reg available then what is the total number of arg + regs available. */ +#define GPR_REST_ARG_REGS(REGNO) \ + ((REGNO) <= MAX_ARC_PARM_REGS ? MAX_ARC_PARM_REGS - (REGNO) : 0 ) + +/* Since arc parm regs are contiguous. */ +#define ARC_NEXT_ARG_REG(REGNO) ( (REGNO) + 1 ) + +/* Implement TARGET_ARG_PARTIAL_BYTES. */ + +static int +arc_arg_partial_bytes (cumulative_args_t cum_v, enum machine_mode mode, + tree type, bool named ATTRIBUTE_UNUSED) +{ + CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v); + int bytes = (mode == BLKmode + ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode)); + int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD; + int arg_num = *cum; + int ret; + + arg_num = ROUND_ADVANCE_CUM (arg_num, mode, type); + ret = GPR_REST_ARG_REGS (arg_num); + + /* ICEd at function.c:2361, and ret is copied to data->partial */ + ret = (ret >= words ? 0 : ret * UNITS_PER_WORD); + + return ret; +} + + + +/* This function is used to control a function argument is passed in a + register, and which register. + + The arguments are CUM, of type CUMULATIVE_ARGS, which summarizes + (in a way defined by INIT_CUMULATIVE_ARGS and FUNCTION_ARG_ADVANCE) + all of the previous arguments so far passed in registers; MODE, the + machine mode of the argument; TYPE, the data type of the argument + as a tree node or 0 if that is not known (which happens for C + support library functions); and NAMED, which is 1 for an ordinary + argument and 0 for nameless arguments that correspond to `...' in + the called function's prototype. + + The returned value should either be a `reg' RTX for the hard + register in which to pass the argument, or zero to pass the + argument on the stack. + + For machines like the Vax and 68000, where normally all arguments + are pushed, zero suffices as a definition. + + The usual way to make the ANSI library `stdarg.h' work on a machine + where some arguments are usually passed in registers, is to cause + nameless arguments to be passed on the stack instead. This is done + by making the function return 0 whenever NAMED is 0. + + You may use the macro `MUST_PASS_IN_STACK (MODE, TYPE)' in the + definition of this function to determine if this argument is of a + type that must be passed in the stack. If `REG_PARM_STACK_SPACE' + is not defined and the function returns non-zero for such an + argument, the compiler will abort. If `REG_PARM_STACK_SPACE' is + defined, the argument will be computed in the stack and then loaded + into a register. + + The function is used to implement macro FUNCTION_ARG. */ +/* On the ARC the first MAX_ARC_PARM_REGS args are normally in registers + and the rest are pushed. */ + +static rtx +arc_function_arg (cumulative_args_t cum_v, enum machine_mode mode, + const_tree type ATTRIBUTE_UNUSED, bool named ATTRIBUTE_UNUSED) +{ + CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v); + int arg_num = *cum; + rtx ret; + const char *debstr ATTRIBUTE_UNUSED; + + arg_num = ROUND_ADVANCE_CUM (arg_num, mode, type); + /* Return a marker for use in the call instruction. */ + if (mode == VOIDmode) + { + ret = const0_rtx; + debstr = "<0>"; + } + else if (GPR_REST_ARG_REGS (arg_num) > 0) + { + ret = gen_rtx_REG (mode, arg_num); + debstr = reg_names [arg_num]; + } + else + { + ret = NULL_RTX; + debstr = "memory"; + } + return ret; +} + +/* The function to update the summarizer variable *CUM to advance past + an argument in the argument list. The values MODE, TYPE and NAMED + describe that argument. Once this is done, the variable *CUM is + suitable for analyzing the *following* argument with + `FUNCTION_ARG', etc. + + This function need not do anything if the argument in question was + passed on the stack. The compiler knows how to track the amount of + stack space used for arguments without any special help. + + The function is used to implement macro FUNCTION_ARG_ADVANCE. */ +/* For the ARC: the cum set here is passed on to function_arg where we + look at its value and say which reg to use. Strategy: advance the + regnumber here till we run out of arg regs, then set *cum to last + reg. In function_arg, since *cum > last arg reg we would return 0 + and thus the arg will end up on the stack. For straddling args of + course function_arg_partial_nregs will come into play. */ + +static void +arc_function_arg_advance (cumulative_args_t cum_v, enum machine_mode mode, + const_tree type, bool named ATTRIBUTE_UNUSED) +{ + CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v); + int bytes = (mode == BLKmode + ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode)); + int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD; + int i; + + if (words) + *cum = ROUND_ADVANCE_CUM (*cum, mode, type); + for (i = 0; i < words; i++) + *cum = ARC_NEXT_ARG_REG (*cum); + +} + +/* Define how to find the value returned by a function. + VALTYPE is the data type of the value (as a tree). + If the precise function being called is known, FN_DECL_OR_TYPE is its + FUNCTION_DECL; otherwise, FN_DECL_OR_TYPE is its type. */ + +static rtx +arc_function_value (const_tree valtype, + const_tree fn_decl_or_type ATTRIBUTE_UNUSED, + bool outgoing ATTRIBUTE_UNUSED) +{ + enum machine_mode mode = TYPE_MODE (valtype); + int unsignedp ATTRIBUTE_UNUSED; + + unsignedp = TYPE_UNSIGNED (valtype); + if (INTEGRAL_TYPE_P (valtype) || TREE_CODE (valtype) == OFFSET_TYPE) + PROMOTE_MODE (mode, unsignedp, valtype); + return gen_rtx_REG (mode, 0); +} + +/* Returns the return address that is used by builtin_return_address. */ + +rtx +arc_return_addr_rtx (int count, ATTRIBUTE_UNUSED rtx frame) +{ + if (count != 0) + return const0_rtx; + + return get_hard_reg_initial_val (Pmode , RETURN_ADDR_REGNUM); +} + +/* Nonzero if the constant value X is a legitimate general operand + when generating PIC code. It is given that flag_pic is on and + that X satisfies CONSTANT_P or is a CONST_DOUBLE. */ + +bool +arc_legitimate_pic_operand_p (rtx x) +{ + return !arc_raw_symbolic_reference_mentioned_p (x, true); +} + +/* Determine if a given RTX is a valid constant. We already know this + satisfies CONSTANT_P. */ + +bool +arc_legitimate_constant_p (enum machine_mode, rtx x) +{ + if (!flag_pic) + return true; + + switch (GET_CODE (x)) + { + case CONST: + x = XEXP (x, 0); + + if (GET_CODE (x) == PLUS) + { + if (GET_CODE (XEXP (x, 1)) != CONST_INT) + return false; + x = XEXP (x, 0); + } + + /* Only some unspecs are valid as "constants". */ + if (GET_CODE (x) == UNSPEC) + switch (XINT (x, 1)) + { + case ARC_UNSPEC_PLT: + case ARC_UNSPEC_GOTOFF: + case ARC_UNSPEC_GOT: + case UNSPEC_PROF: + return true; + + default: + gcc_unreachable (); + } + + /* We must have drilled down to a symbol. */ + if (arc_raw_symbolic_reference_mentioned_p (x, false)) + return false; + + /* Return true. */ + break; + + case LABEL_REF: + case SYMBOL_REF: + return false; + + default: + break; + } + + /* Otherwise we handle everything else in the move patterns. */ + return true; +} + +static bool +arc_legitimate_address_p (enum machine_mode mode, rtx x, bool strict) +{ + if (RTX_OK_FOR_BASE_P (x, strict)) + return true; + if (LEGITIMATE_OFFSET_ADDRESS_P (mode, x, TARGET_INDEXED_LOADS, strict)) + return true; + if (LEGITIMATE_SCALED_ADDRESS_P (mode, x, strict)) + return true; + if (LEGITIMATE_SMALL_DATA_ADDRESS_P (x)) + return true; + if (GET_CODE (x) == CONST_INT && LARGE_INT (INTVAL (x))) + return true; + if ((GET_MODE_SIZE (mode) != 16) + && (GET_CODE (x) == SYMBOL_REF + || GET_CODE (x) == LABEL_REF + || GET_CODE (x) == CONST)) + { + if (!flag_pic || arc_legitimate_pic_addr_p (x)) + return true; + } + if ((GET_CODE (x) == PRE_DEC || GET_CODE (x) == PRE_INC + || GET_CODE (x) == POST_DEC || GET_CODE (x) == POST_INC) + && RTX_OK_FOR_BASE_P (XEXP (x, 0), strict)) + return true; + /* We're restricted here by the `st' insn. */ + if ((GET_CODE (x) == PRE_MODIFY || GET_CODE (x) == POST_MODIFY) + && GET_CODE (XEXP ((x), 1)) == PLUS + && rtx_equal_p (XEXP ((x), 0), XEXP (XEXP (x, 1), 0)) + && LEGITIMATE_OFFSET_ADDRESS_P (QImode, XEXP (x, 1), + TARGET_AUTO_MODIFY_REG, strict)) + return true; + return false; +} + +/* Return true iff ADDR (a legitimate address expression) + has an effect that depends on the machine mode it is used for. */ + +static bool +arc_mode_dependent_address_p (const_rtx addr, addr_space_t) +{ + /* SYMBOL_REF is not mode dependent: it is either a small data reference, + which is valid for loads and stores, or a limm offset, which is valid for + loads. */ + /* Scaled indices are scaled by the access mode; likewise for scaled + offsets, which are needed for maximum offset stores. */ + if (GET_CODE (addr) == PLUS + && (GET_CODE (XEXP ((addr), 0)) == MULT + || (CONST_INT_P (XEXP ((addr), 1)) + && !SMALL_INT (INTVAL (XEXP ((addr), 1)))))) + return true; + return false; +} + +/* Determine if it's legal to put X into the constant pool. */ + +static bool +arc_cannot_force_const_mem (enum machine_mode mode, rtx x) +{ + return !arc_legitimate_constant_p (mode, x); +} + + +/* Generic function to define a builtin. */ +#define def_mbuiltin(MASK, NAME, TYPE, CODE) \ + do \ + { \ + if (MASK) \ + add_builtin_function ((NAME), (TYPE), (CODE), BUILT_IN_MD, NULL, NULL_TREE); \ + } \ + while (0) + + +static void +arc_init_builtins (void) +{ + tree endlink = void_list_node; + + tree void_ftype_void + = build_function_type (void_type_node, + endlink); + + tree int_ftype_int + = build_function_type (integer_type_node, + tree_cons (NULL_TREE, integer_type_node, endlink)); + + tree pcvoid_type_node + = build_pointer_type (build_qualified_type (void_type_node, TYPE_QUAL_CONST)); + tree int_ftype_pcvoid_int + = build_function_type (integer_type_node, + tree_cons (NULL_TREE, pcvoid_type_node, + tree_cons (NULL_TREE, integer_type_node, + endlink))); + + tree int_ftype_short_int + = build_function_type (integer_type_node, + tree_cons (NULL_TREE, short_integer_type_node, endlink)); + + tree void_ftype_int_int + = build_function_type (void_type_node, + tree_cons (NULL_TREE, integer_type_node, + tree_cons (NULL_TREE, integer_type_node, endlink))); + tree void_ftype_usint_usint + = build_function_type (void_type_node, + tree_cons (NULL_TREE, long_unsigned_type_node, + tree_cons (NULL_TREE, long_unsigned_type_node, endlink))); + + tree int_ftype_int_int + = build_function_type (integer_type_node, + tree_cons (NULL_TREE, integer_type_node, + tree_cons (NULL_TREE, integer_type_node, endlink))); + + tree usint_ftype_usint + = build_function_type (long_unsigned_type_node, + tree_cons (NULL_TREE, long_unsigned_type_node, endlink)); + + tree void_ftype_usint + = build_function_type (void_type_node, + tree_cons (NULL_TREE, long_unsigned_type_node, endlink)); + + /* Add the builtins. */ + def_mbuiltin (1,"__builtin_arc_nop", void_ftype_void, ARC_BUILTIN_NOP); + def_mbuiltin (TARGET_NORM, "__builtin_arc_norm", int_ftype_int, ARC_BUILTIN_NORM); + def_mbuiltin (TARGET_NORM, "__builtin_arc_normw", int_ftype_short_int, ARC_BUILTIN_NORMW); + def_mbuiltin (TARGET_SWAP, "__builtin_arc_swap", int_ftype_int, ARC_BUILTIN_SWAP); + def_mbuiltin (TARGET_MUL64_SET,"__builtin_arc_mul64", void_ftype_int_int, ARC_BUILTIN_MUL64); + def_mbuiltin (TARGET_MUL64_SET,"__builtin_arc_mulu64", void_ftype_usint_usint, ARC_BUILTIN_MULU64); + def_mbuiltin (1,"__builtin_arc_rtie", void_ftype_void, ARC_BUILTIN_RTIE); + def_mbuiltin (TARGET_ARC700,"__builtin_arc_sync", void_ftype_void, ARC_BUILTIN_SYNC); + def_mbuiltin ((TARGET_EA_SET),"__builtin_arc_divaw", int_ftype_int_int, ARC_BUILTIN_DIVAW); + def_mbuiltin (1,"__builtin_arc_brk", void_ftype_void, ARC_BUILTIN_BRK); + def_mbuiltin (1,"__builtin_arc_flag", void_ftype_usint, ARC_BUILTIN_FLAG); + def_mbuiltin (1,"__builtin_arc_sleep", void_ftype_usint, ARC_BUILTIN_SLEEP); + def_mbuiltin (1,"__builtin_arc_swi", void_ftype_void, ARC_BUILTIN_SWI); + def_mbuiltin (1,"__builtin_arc_core_read", usint_ftype_usint, ARC_BUILTIN_CORE_READ); + def_mbuiltin (1,"__builtin_arc_core_write", void_ftype_usint_usint, ARC_BUILTIN_CORE_WRITE); + def_mbuiltin (1,"__builtin_arc_lr", usint_ftype_usint, ARC_BUILTIN_LR); + def_mbuiltin (1,"__builtin_arc_sr", void_ftype_usint_usint, ARC_BUILTIN_SR); + def_mbuiltin (TARGET_ARC700,"__builtin_arc_trap_s", void_ftype_usint, ARC_BUILTIN_TRAP_S); + def_mbuiltin (TARGET_ARC700,"__builtin_arc_unimp_s", void_ftype_void, ARC_BUILTIN_UNIMP_S); + def_mbuiltin (1,"__builtin_arc_aligned", int_ftype_pcvoid_int, ARC_BUILTIN_ALIGNED); + + if (TARGET_SIMD_SET) + arc_init_simd_builtins (); +} + +static rtx arc_expand_simd_builtin (tree, rtx, rtx, enum machine_mode, int); + +/* Expand an expression EXP that calls a built-in function, + with result going to TARGET if that's convenient + (and in mode MODE if that's convenient). + SUBTARGET may be used as the target for computing one of EXP's operands. + IGNORE is nonzero if the value is to be ignored. */ + +static rtx +arc_expand_builtin (tree exp, + rtx target, + rtx subtarget, + enum machine_mode mode, + int ignore) +{ + tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0); + tree arg0; + tree arg1; + rtx op0; + rtx op1; + int fcode = DECL_FUNCTION_CODE (fndecl); + int icode; + enum machine_mode mode0; + enum machine_mode mode1; + + if (fcode > ARC_SIMD_BUILTIN_BEGIN && fcode < ARC_SIMD_BUILTIN_END) + return arc_expand_simd_builtin (exp, target, subtarget, mode, ignore); + + switch (fcode) + { + case ARC_BUILTIN_NOP: + emit_insn (gen_nop ()); + return NULL_RTX; + + case ARC_BUILTIN_NORM: + icode = CODE_FOR_clrsbsi2; + arg0 = CALL_EXPR_ARG (exp, 0); + op0 = expand_expr (arg0, NULL_RTX, VOIDmode, EXPAND_NORMAL); + mode0 = insn_data[icode].operand[1].mode; + target = gen_reg_rtx (SImode); + + if (! (*insn_data[icode].operand[1].predicate) (op0, mode0)) + op0 = copy_to_mode_reg (mode0, op0); + + emit_insn (gen_clrsbsi2 (target, op0)); + return target; + + case ARC_BUILTIN_NORMW: + + /* FIXME : This should all be HImode, not SImode. */ + icode = CODE_FOR_normw; + arg0 = CALL_EXPR_ARG (exp, 0); + op0 = expand_expr (arg0, NULL_RTX, VOIDmode, EXPAND_NORMAL); + mode0 = insn_data[icode].operand[1].mode; + target = gen_reg_rtx (SImode); + + if (! (*insn_data[icode].operand[1].predicate) (op0, mode0)) + op0 = copy_to_mode_reg (mode0, convert_to_mode (mode0, op0,0)); + + emit_insn (gen_normw (target, op0)); + return target; + + case ARC_BUILTIN_MUL64: + icode = CODE_FOR_mul64; + arg0 = CALL_EXPR_ARG (exp, 0); + arg1 = CALL_EXPR_ARG (exp, 1); + op0 = expand_expr (arg0, NULL_RTX, VOIDmode, EXPAND_NORMAL); + op1 = expand_expr (arg1, NULL_RTX, VOIDmode, EXPAND_NORMAL); + + mode0 = insn_data[icode].operand[0].mode; + mode1 = insn_data[icode].operand[1].mode; + + if (! (*insn_data[icode].operand[0].predicate) (op0, mode0)) + op0 = copy_to_mode_reg (mode0, op0); + + if (! (*insn_data[icode].operand[1].predicate) (op1, mode1)) + op1 = copy_to_mode_reg (mode1, op1); + + emit_insn (gen_mul64 (op0,op1)); + return NULL_RTX; + + case ARC_BUILTIN_MULU64: + icode = CODE_FOR_mulu64; + arg0 = CALL_EXPR_ARG (exp, 0); + arg1 = CALL_EXPR_ARG (exp, 1); + op0 = expand_expr (arg0, NULL_RTX, VOIDmode, EXPAND_NORMAL); + op1 = expand_expr (arg1, NULL_RTX, VOIDmode, EXPAND_NORMAL); + + mode0 = insn_data[icode].operand[0].mode; + mode1 = insn_data[icode].operand[1].mode; + + if (! (*insn_data[icode].operand[0].predicate) (op0, mode0)) + op0 = copy_to_mode_reg (mode0, op0); + + if (! (*insn_data[icode].operand[0].predicate) (op1, mode1)) + op1 = copy_to_mode_reg (mode1, op1); + + emit_insn (gen_mulu64 (op0,op1)); + return NULL_RTX; + + case ARC_BUILTIN_RTIE: + icode = CODE_FOR_rtie; + emit_insn (gen_rtie (const1_rtx)); + return NULL_RTX; + + case ARC_BUILTIN_SYNC: + icode = CODE_FOR_sync; + emit_insn (gen_sync (const1_rtx)); + return NULL_RTX; + + case ARC_BUILTIN_SWAP: + icode = CODE_FOR_swap; + arg0 = CALL_EXPR_ARG (exp, 0); + op0 = expand_expr (arg0, NULL_RTX, VOIDmode, EXPAND_NORMAL); + mode0 = insn_data[icode].operand[1].mode; + target = gen_reg_rtx (SImode); + + if (! (*insn_data[icode].operand[1].predicate) (op0, mode0)) + op0 = copy_to_mode_reg (mode0, op0); + + emit_insn (gen_swap (target, op0)); + return target; + + case ARC_BUILTIN_DIVAW: + icode = CODE_FOR_divaw; + arg0 = CALL_EXPR_ARG (exp, 0); + arg1 = CALL_EXPR_ARG (exp, 1); + + op0 = expand_expr (arg0, NULL_RTX, VOIDmode, EXPAND_NORMAL); + op1 = expand_expr (arg1, NULL_RTX, VOIDmode, EXPAND_NORMAL); + target = gen_reg_rtx (SImode); + + mode0 = insn_data[icode].operand[0].mode; + mode1 = insn_data[icode].operand[1].mode; + + if (! (*insn_data[icode].operand[0].predicate) (op0, mode0)) + op0 = copy_to_mode_reg (mode0, op0); + + if (! (*insn_data[icode].operand[1].predicate) (op1, mode1)) + op1 = copy_to_mode_reg (mode1, op1); + + emit_insn (gen_divaw (target, op0, op1)); + return target; + + case ARC_BUILTIN_BRK: + icode = CODE_FOR_brk; + emit_insn (gen_brk (const1_rtx)); + return NULL_RTX; + + case ARC_BUILTIN_SLEEP: + icode = CODE_FOR_sleep; + arg0 = CALL_EXPR_ARG (exp, 0); + + fold (arg0); + + op0 = expand_expr (arg0, NULL_RTX, VOIDmode, EXPAND_NORMAL); + mode0 = insn_data[icode].operand[1].mode; + + emit_insn (gen_sleep (op0)); + return NULL_RTX; + + case ARC_BUILTIN_SWI: + icode = CODE_FOR_swi; + emit_insn (gen_swi (const1_rtx)); + return NULL_RTX; + + case ARC_BUILTIN_FLAG: + icode = CODE_FOR_flag; + arg0 = CALL_EXPR_ARG (exp, 0); + op0 = expand_expr (arg0, NULL_RTX, VOIDmode, EXPAND_NORMAL); + mode0 = insn_data[icode].operand[0].mode; + + if (! (*insn_data[icode].operand[0].predicate) (op0, mode0)) + op0 = copy_to_mode_reg (mode0, op0); + + emit_insn (gen_flag (op0)); + return NULL_RTX; + + case ARC_BUILTIN_CORE_READ: + icode = CODE_FOR_core_read; + arg0 = CALL_EXPR_ARG (exp, 0); + target = gen_reg_rtx (SImode); + + fold (arg0); + + op0 = expand_expr (arg0, NULL_RTX, VOIDmode, EXPAND_NORMAL); + mode0 = insn_data[icode].operand[1].mode; + + emit_insn (gen_core_read (target, op0)); + return target; + + case ARC_BUILTIN_CORE_WRITE: + icode = CODE_FOR_core_write; + arg0 = CALL_EXPR_ARG (exp, 0); + arg1 = CALL_EXPR_ARG (exp, 1); + + fold (arg1); + + op0 = expand_expr (arg0, NULL_RTX, VOIDmode, EXPAND_NORMAL); + op1 = expand_expr (arg1, NULL_RTX, VOIDmode, EXPAND_NORMAL); + + mode0 = insn_data[icode].operand[0].mode; + mode1 = insn_data[icode].operand[1].mode; + + emit_insn (gen_core_write (op0, op1)); + return NULL_RTX; + + case ARC_BUILTIN_LR: + icode = CODE_FOR_lr; + arg0 = CALL_EXPR_ARG (exp, 0); + target = gen_reg_rtx (SImode); + + fold (arg0); + + op0 = expand_expr (arg0, NULL_RTX, VOIDmode, EXPAND_NORMAL); + mode0 = insn_data[icode].operand[1].mode; + + emit_insn (gen_lr (target, op0)); + return target; + + case ARC_BUILTIN_SR: + icode = CODE_FOR_sr; + arg0 = CALL_EXPR_ARG (exp, 0); + arg1 = CALL_EXPR_ARG (exp, 1); + + fold (arg1); + + op0 = expand_expr (arg0, NULL_RTX, VOIDmode, EXPAND_NORMAL); + op1 = expand_expr (arg1, NULL_RTX, VOIDmode, EXPAND_NORMAL); + + mode0 = insn_data[icode].operand[0].mode; + mode1 = insn_data[icode].operand[1].mode; + + emit_insn (gen_sr (op0, op1)); + return NULL_RTX; + + case ARC_BUILTIN_TRAP_S: + icode = CODE_FOR_trap_s; + arg0 = CALL_EXPR_ARG (exp, 0); + + fold (arg0); + + op0 = expand_expr (arg0, NULL_RTX, VOIDmode, EXPAND_NORMAL); + mode0 = insn_data[icode].operand[1].mode; + + /* We don't give an error for non-cost values here because + we still want to allow things to be fixed up by later inlining / + constant folding / dead code elimination. */ + if (CONST_INT_P (op0) && !satisfies_constraint_L (op0)) + { + /* Keep this message in sync with the one in arc.md:trap_s, + because *.md files don't get scanned by exgettext. */ + error ("operand to trap_s should be an unsigned 6-bit value"); + } + emit_insn (gen_trap_s (op0)); + return NULL_RTX; + + case ARC_BUILTIN_UNIMP_S: + icode = CODE_FOR_unimp_s; + emit_insn (gen_unimp_s (const1_rtx)); + return NULL_RTX; + + case ARC_BUILTIN_ALIGNED: + /* __builtin_arc_aligned (void* val, int alignval) */ + arg0 = CALL_EXPR_ARG (exp, 0); + arg1 = CALL_EXPR_ARG (exp, 1); + fold (arg1); + op0 = expand_expr (arg0, NULL_RTX, VOIDmode, EXPAND_NORMAL); + op1 = expand_expr (arg1, NULL_RTX, VOIDmode, EXPAND_NORMAL); + target = gen_reg_rtx (SImode); + + if (!CONST_INT_P (op1)) + { + /* If we can't fold the alignment to a constant integer + whilst optimizing, this is probably a user error. */ + if (optimize) + warning (0, "__builtin_arc_aligned with non-constant alignment"); + } + else + { + HOST_WIDE_INT alignTest = INTVAL (op1); + /* Check alignTest is positive, and a power of two. */ + if (alignTest <= 0 || alignTest != (alignTest & -alignTest)) + { + error ("invalid alignment value for __builtin_arc_aligned"); + return NULL_RTX; + } + + if (CONST_INT_P (op0)) + { + HOST_WIDE_INT pnt = INTVAL (op0); + + if ((pnt & (alignTest - 1)) == 0) + return const1_rtx; + } + else + { + unsigned align = get_pointer_alignment (arg0); + unsigned numBits = alignTest * BITS_PER_UNIT; + + if (align && align >= numBits) + return const1_rtx; + /* Another attempt to ascertain alignment. Check the type + we are pointing to. */ + if (POINTER_TYPE_P (TREE_TYPE (arg0)) + && TYPE_ALIGN (TREE_TYPE (TREE_TYPE (arg0))) >= numBits) + return const1_rtx; + } + } + + /* Default to false. */ + return const0_rtx; + + default: + break; + } + + /* @@@ Should really do something sensible here. */ + return NULL_RTX; +} + +/* Returns true if the operands[opno] is a valid compile-time constant to be + used as register number in the code for builtins. Else it flags an error + and returns false. */ + +bool +check_if_valid_regno_const (rtx *operands, int opno) +{ + + switch (GET_CODE (operands[opno])) + { + case SYMBOL_REF : + case CONST : + case CONST_INT : + return true; + default: + error ("register number must be a compile-time constant. Try giving higher optimization levels"); + break; + } + return false; +} + +/* Check that after all the constant folding, whether the operand to + __builtin_arc_sleep is an unsigned int of 6 bits. If not, flag an error. */ + +bool +check_if_valid_sleep_operand (rtx *operands, int opno) +{ + switch (GET_CODE (operands[opno])) + { + case CONST : + case CONST_INT : + if( UNSIGNED_INT6 (INTVAL (operands[opno]))) + return true; + default: + fatal_error("operand for sleep instruction must be an unsigned 6 bit compile-time constant"); + break; + } + return false; +} + +/* Return true if it is ok to make a tail-call to DECL. */ + +static bool +arc_function_ok_for_sibcall (tree decl ATTRIBUTE_UNUSED, + tree exp ATTRIBUTE_UNUSED) +{ + /* Never tailcall from an ISR routine - it needs a special exit sequence. */ + if (ARC_INTERRUPT_P (arc_compute_function_type (cfun))) + return false; + + /* Everything else is ok. */ + return true; +} + +/* Output code to add DELTA to the first argument, and then jump + to FUNCTION. Used for C++ multiple inheritance. */ + +static void +arc_output_mi_thunk (FILE *file, tree thunk ATTRIBUTE_UNUSED, + HOST_WIDE_INT delta, + HOST_WIDE_INT vcall_offset, + tree function) +{ + int mi_delta = delta; + const char *const mi_op = mi_delta < 0 ? "sub" : "add"; + int shift = 0; + int this_regno + = aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function) ? 1 : 0; + rtx fnaddr; + + if (mi_delta < 0) + mi_delta = - mi_delta; + + /* Add DELTA. When possible use a plain add, otherwise load it into + a register first. */ + + while (mi_delta != 0) + { + if ((mi_delta & (3 << shift)) == 0) + shift += 2; + else + { + asm_fprintf (file, "\t%s\t%s, %s, %d\n", + mi_op, reg_names[this_regno], reg_names[this_regno], + mi_delta & (0xff << shift)); + mi_delta &= ~(0xff << shift); + shift += 8; + } + } + + /* If needed, add *(*THIS + VCALL_OFFSET) to THIS. */ + if (vcall_offset != 0) + { + /* ld r12,[this] --> temp = *this + add r12,r12,vcall_offset --> temp = *(*this + vcall_offset) + ld r12,[r12] + add this,this,r12 --> this+ = *(*this + vcall_offset) */ + asm_fprintf (file, "\tld\t%s, [%s]\n", + ARC_TEMP_SCRATCH_REG, reg_names[this_regno]); + asm_fprintf (file, "\tadd\t%s, %s, %ld\n", + ARC_TEMP_SCRATCH_REG, ARC_TEMP_SCRATCH_REG, vcall_offset); + asm_fprintf (file, "\tld\t%s, [%s]\n", + ARC_TEMP_SCRATCH_REG, ARC_TEMP_SCRATCH_REG); + asm_fprintf (file, "\tadd\t%s, %s, %s\n", reg_names[this_regno], + reg_names[this_regno], ARC_TEMP_SCRATCH_REG); + } + + fnaddr = XEXP (DECL_RTL (function), 0); + + if (arc_is_longcall_p (fnaddr)) + fputs ("\tj\t", file); + else + fputs ("\tb\t", file); + assemble_name (file, XSTR (fnaddr, 0)); + fputc ('\n', file); +} + +/* Return true if a 32 bit "long_call" should be generated for + this calling SYM_REF. We generate a long_call if the function: + + a. has an __attribute__((long call)) + or b. the -mlong-calls command line switch has been specified + + However we do not generate a long call if the function has an + __attribute__ ((short_call)) or __attribute__ ((medium_call)) + + This function will be called by C fragments contained in the machine + description file. */ + +bool +arc_is_longcall_p (rtx sym_ref) +{ + if (GET_CODE (sym_ref) != SYMBOL_REF) + return false; + + return (SYMBOL_REF_LONG_CALL_P (sym_ref) + || (TARGET_LONG_CALLS_SET + && !SYMBOL_REF_SHORT_CALL_P (sym_ref) + && !SYMBOL_REF_MEDIUM_CALL_P (sym_ref))); + +} + +/* Likewise for short calls. */ + +bool +arc_is_shortcall_p (rtx sym_ref) +{ + if (GET_CODE (sym_ref) != SYMBOL_REF) + return false; + + return (SYMBOL_REF_SHORT_CALL_P (sym_ref) + || (!TARGET_LONG_CALLS_SET && !TARGET_MEDIUM_CALLS + && !SYMBOL_REF_LONG_CALL_P (sym_ref) + && !SYMBOL_REF_MEDIUM_CALL_P (sym_ref))); + +} + +/* Emit profiling code for calling CALLEE. Return true if a special + call pattern needs to be generated. */ + +bool +arc_profile_call (rtx callee) +{ + rtx from = XEXP (DECL_RTL (current_function_decl), 0); + + if (TARGET_UCB_MCOUNT) + /* Profiling is done by instrumenting the callee. */ + return false; + + if (CONSTANT_P (callee)) + { + rtx count_ptr + = gen_rtx_CONST (Pmode, + gen_rtx_UNSPEC (Pmode, + gen_rtvec (3, from, callee, + CONST0_RTX (Pmode)), + UNSPEC_PROF)); + rtx counter = gen_rtx_MEM (SImode, count_ptr); + /* ??? The increment would better be done atomically, but as there is + no proper hardware support, that would be too expensive. */ + emit_move_insn (counter, + force_reg (SImode, plus_constant (SImode, counter, 1))); + return false; + } + else + { + rtx count_list_ptr + = gen_rtx_CONST (Pmode, + gen_rtx_UNSPEC (Pmode, + gen_rtvec (3, from, CONST0_RTX (Pmode), + CONST0_RTX (Pmode)), + UNSPEC_PROF)); + emit_move_insn (gen_rtx_REG (Pmode, 8), count_list_ptr); + emit_move_insn (gen_rtx_REG (Pmode, 9), callee); + return true; + } +} + +/* Worker function for TARGET_RETURN_IN_MEMORY. */ + +static bool +arc_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED) +{ + if (AGGREGATE_TYPE_P (type) || TREE_ADDRESSABLE (type)) + return true; + else + { + HOST_WIDE_INT size = int_size_in_bytes (type); + return (size == -1 || size > 8); + } +} + + +/* This was in rtlanal.c, and can go in there when we decide we want + to submit the change for inclusion in the GCC tree. */ +/* Like note_stores, but allow the callback to have side effects on the rtl + (like the note_stores of yore): + Call FUN on each register or MEM that is stored into or clobbered by X. + (X would be the pattern of an insn). DATA is an arbitrary pointer, + ignored by note_stores, but passed to FUN. + FUN may alter parts of the RTL. + + FUN receives three arguments: + 1. the REG, MEM, CC0 or PC being stored in or clobbered, + 2. the SET or CLOBBER rtx that does the store, + 3. the pointer DATA provided to note_stores. + + If the item being stored in or clobbered is a SUBREG of a hard register, + the SUBREG will be passed. */ + +/* For now. */ static +void +walk_stores (rtx x, void (*fun) (rtx, rtx, void *), void *data) +{ + int i; + + if (GET_CODE (x) == COND_EXEC) + x = COND_EXEC_CODE (x); + + if (GET_CODE (x) == SET || GET_CODE (x) == CLOBBER) + { + rtx dest = SET_DEST (x); + + while ((GET_CODE (dest) == SUBREG + && (!REG_P (SUBREG_REG (dest)) + || REGNO (SUBREG_REG (dest)) >= FIRST_PSEUDO_REGISTER)) + || GET_CODE (dest) == ZERO_EXTRACT + || GET_CODE (dest) == STRICT_LOW_PART) + dest = XEXP (dest, 0); + + /* If we have a PARALLEL, SET_DEST is a list of EXPR_LIST expressions, + each of whose first operand is a register. */ + if (GET_CODE (dest) == PARALLEL) + { + for (i = XVECLEN (dest, 0) - 1; i >= 0; i--) + if (XEXP (XVECEXP (dest, 0, i), 0) != 0) + (*fun) (XEXP (XVECEXP (dest, 0, i), 0), x, data); + } + else + (*fun) (dest, x, data); + } + + else if (GET_CODE (x) == PARALLEL) + for (i = XVECLEN (x, 0) - 1; i >= 0; i--) + walk_stores (XVECEXP (x, 0, i), fun, data); +} + +static bool +arc_pass_by_reference (cumulative_args_t ca_v ATTRIBUTE_UNUSED, + enum machine_mode mode ATTRIBUTE_UNUSED, + const_tree type, + bool named ATTRIBUTE_UNUSED) +{ + return (type != 0 + && (TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST + || TREE_ADDRESSABLE (type))); +} + + +/* NULL if INSN insn is valid within a low-overhead loop. + Otherwise return why doloop cannot be applied. */ + +static const char * +arc_invalid_within_doloop (const_rtx insn) +{ + if (CALL_P (insn)) + return "Function call in the loop."; + return NULL; +} + +static int arc_reorg_in_progress = 0; + +/* ARC's machince specific reorg function. */ + +static void +arc_reorg (void) +{ + rtx insn, pattern; + rtx pc_target; + long offset; + int changed; + + cfun->machine->arc_reorg_started = 1; + arc_reorg_in_progress = 1; + + /* Emit special sections for profiling. */ + if (crtl->profile) + { + section *save_text_section; + rtx insn; + int size = get_max_uid () >> 4; + htab_t htab = htab_create (size, unspec_prof_hash, unspec_prof_htab_eq, + NULL); + + save_text_section = in_section; + for (insn = get_insns (); insn; insn = NEXT_INSN (insn)) + if (NONJUMP_INSN_P (insn)) + walk_stores (PATTERN (insn), write_profile_sections, htab); + if (htab_elements (htab)) + in_section = 0; + switch_to_section (save_text_section); + htab_delete (htab); + } + + /* Link up loop ends with their loop start. */ + { + for (insn = get_insns (); insn; insn = NEXT_INSN (insn)) + if (GET_CODE (insn) == JUMP_INSN + && recog_memoized (insn) == CODE_FOR_doloop_end_i) + { + rtx top_label + = XEXP (XEXP (SET_SRC (XVECEXP (PATTERN (insn), 0, 0)), 1), 0); + rtx num = GEN_INT (CODE_LABEL_NUMBER (top_label)); + rtx lp, prev = prev_nonnote_insn (top_label); + rtx lp_simple = NULL_RTX; + rtx next = NULL_RTX; + rtx op0 = XEXP (XVECEXP (PATTERN (insn), 0, 1), 0); + HOST_WIDE_INT loop_end_id + = -INTVAL (XEXP (XVECEXP (PATTERN (insn), 0, 4), 0)); + int seen_label = 0; + + for (lp = prev; + (lp && NONJUMP_INSN_P (lp) + && recog_memoized (lp) != CODE_FOR_doloop_begin_i); + lp = prev_nonnote_insn (lp)) + ; + if (!lp || !NONJUMP_INSN_P (lp) + || dead_or_set_regno_p (lp, LP_COUNT)) + { + for (prev = next = insn, lp = NULL_RTX ; prev || next;) + { + if (prev) + { + if (NONJUMP_INSN_P (prev) + && recog_memoized (prev) == CODE_FOR_doloop_begin_i + && (INTVAL (XEXP (XVECEXP (PATTERN (prev), 0, 5), 0)) + == loop_end_id)) + { + lp = prev; + break; + } + else if (LABEL_P (prev)) + seen_label = 1; + prev = prev_nonnote_insn (prev); + } + if (next) + { + if (NONJUMP_INSN_P (next) + && recog_memoized (next) == CODE_FOR_doloop_begin_i + && (INTVAL (XEXP (XVECEXP (PATTERN (next), 0, 5), 0)) + == loop_end_id)) + { + lp = next; + break; + } + next = next_nonnote_insn (next); + } + } + prev = NULL_RTX; + } + else + lp_simple = lp; + if (lp && !dead_or_set_regno_p (lp, LP_COUNT)) + { + rtx begin_cnt = XEXP (XVECEXP (PATTERN (lp), 0 ,3), 0); + if (INTVAL (XEXP (XVECEXP (PATTERN (lp), 0, 4), 0))) + /* The loop end insn has been duplicated. That can happen + when there is a conditional block at the very end of + the loop. */ + goto failure; + /* If Register allocation failed to allocate to the right + register, There is no point into teaching reload to + fix this up with reloads, as that would cost more + than using an ordinary core register with the + doloop_fallback pattern. */ + if ((true_regnum (op0) != LP_COUNT || !REG_P (begin_cnt)) + /* Likewise, if the loop setup is evidently inside the loop, + we loose. */ + || (!lp_simple && lp != next && !seen_label)) + { + remove_insn (lp); + goto failure; + } + /* It is common that the optimizers copy the loop count from + another register, and doloop_begin_i is stuck with the + source of the move. Making doloop_begin_i only accept "l" + is nonsentical, as this then makes reload evict the pseudo + used for the loop end. The underlying cause is that the + optimizers don't understand that the register allocation for + doloop_begin_i should be treated as part of the loop. + Try to work around this problem by verifying the previous + move exists. */ + if (true_regnum (begin_cnt) != LP_COUNT) + { + rtx mov, set, note; + + for (mov = prev_nonnote_insn (lp); mov; + mov = prev_nonnote_insn (mov)) + { + if (!NONJUMP_INSN_P (mov)) + mov = 0; + else if ((set = single_set (mov)) + && rtx_equal_p (SET_SRC (set), begin_cnt) + && rtx_equal_p (SET_DEST (set), op0)) + break; + } + if (mov) + { + XEXP (XVECEXP (PATTERN (lp), 0 ,3), 0) = op0; + note = find_regno_note (lp, REG_DEAD, REGNO (begin_cnt)); + if (note) + remove_note (lp, note); + } + else + { + remove_insn (lp); + goto failure; + } + } + XEXP (XVECEXP (PATTERN (insn), 0, 4), 0) = num; + XEXP (XVECEXP (PATTERN (lp), 0, 4), 0) = num; + if (next == lp) + XEXP (XVECEXP (PATTERN (lp), 0, 6), 0) = const2_rtx; + else if (!lp_simple) + XEXP (XVECEXP (PATTERN (lp), 0, 6), 0) = const1_rtx; + else if (prev != lp) + { + remove_insn (lp); + add_insn_after (lp, prev, NULL); + } + if (!lp_simple) + { + XEXP (XVECEXP (PATTERN (lp), 0, 7), 0) + = gen_rtx_LABEL_REF (Pmode, top_label); + add_reg_note (lp, REG_LABEL_OPERAND, top_label); + LABEL_NUSES (top_label)++; + } + /* We can avoid tedious loop start / end setting for empty loops + be merely setting the loop count to its final value. */ + if (next_active_insn (top_label) == insn) + { + rtx lc_set + = gen_rtx_SET (VOIDmode, + XEXP (XVECEXP (PATTERN (lp), 0, 3), 0), + const0_rtx); + + lc_set = emit_insn_before (lc_set, insn); + delete_insn (lp); + delete_insn (insn); + insn = lc_set; + } + /* If the loop is non-empty with zero length, we can't make it + a zero-overhead loop. That can happen for empty asms. */ + else + { + rtx scan; + + for (scan = top_label; + (scan && scan != insn + && (!NONJUMP_INSN_P (scan) || !get_attr_length (scan))); + scan = NEXT_INSN (scan)); + if (scan == insn) + { + remove_insn (lp); + goto failure; + } + } + } + else + { + /* Sometimes the loop optimizer makes a complete hash of the + loop. If it were only that the loop is not entered at the + top, we could fix this up by setting LP_START with SR . + However, if we can't find the loop begin were it should be, + chances are that it does not even dominate the loop, but is + inside the loop instead. Using SR there would kill + performance. + We use the doloop_fallback pattern here, which executes + in two cycles on the ARC700 when predicted correctly. */ + failure: + if (!REG_P (op0)) + { + rtx op3 = XEXP (XVECEXP (PATTERN (insn), 0, 5), 0); + + emit_insn_before (gen_move_insn (op3, op0), insn); + PATTERN (insn) + = gen_doloop_fallback_m (op3, JUMP_LABEL (insn), op0); + } + else + XVEC (PATTERN (insn), 0) + = gen_rtvec (2, XVECEXP (PATTERN (insn), 0, 0), + XVECEXP (PATTERN (insn), 0, 1)); + INSN_CODE (insn) = -1; + } + } + } + +/* FIXME: should anticipate ccfsm action, generate special patterns for + to-be-deleted branches that have no delay slot and have at least the + length of the size increase forced on other insns that are conditionalized. + This can also have an insn_list inside that enumerates insns which are + not actually conditionalized because the destinations are dead in the + not-execute case. + Could also tag branches that we want to be unaligned if they get no delay + slot, or even ones that we don't want to do delay slot sheduling for + because we can unalign them. + + However, there are cases when conditional execution is only possible after + delay slot scheduling: + + - If a delay slot is filled with a nocond/set insn from above, the previous + basic block can become elegible for conditional execution. + - If a delay slot is filled with a nocond insn from the fall-through path, + the branch with that delay slot can become eligble for conditional + execution (however, with the same sort of data flow analysis that dbr + does, we could have figured out before that we don't need to + conditionalize this insn.) + - If a delay slot insn is filled with an insn from the target, the + target label gets its uses decremented (even deleted if falling to zero), + thus possibly creating more condexec opportunities there. + Therefore, we should still be prepared to apply condexec optimization on + non-prepared branches if the size increase of conditionalized insns is no + more than the size saved from eliminating the branch. An invocation option + could also be used to reserve a bit of extra size for condbranches so that + this'll work more often (could also test in arc_reorg if the block is + 'close enough' to be eligible for condexec to make this likely, and + estimate required size increase). */ + /* Generate BRcc insns, by combining cmp and Bcc insns wherever possible. */ + if (TARGET_NO_BRCC_SET) + return; + + do + { + init_insn_lengths(); + changed = 0; + + if (optimize > 1 && !TARGET_NO_COND_EXEC) + { + arc_ifcvt (); + unsigned int flags = pass_data_arc_ifcvt.todo_flags_finish; + df_finish_pass ((flags & TODO_df_verify) != 0); + } + + /* Call shorten_branches to calculate the insn lengths. */ + shorten_branches (get_insns()); + cfun->machine->ccfsm_current_insn = NULL_RTX; + + if (!INSN_ADDRESSES_SET_P()) + fatal_error ("Insn addresses not set after shorten_branches"); + + for (insn = get_insns (); insn; insn = NEXT_INSN (insn)) + { + rtx label; + enum attr_type insn_type; + + /* If a non-jump insn (or a casesi jump table), continue. */ + if (GET_CODE (insn) != JUMP_INSN || + GET_CODE (PATTERN (insn)) == ADDR_VEC + || GET_CODE (PATTERN (insn)) == ADDR_DIFF_VEC) + continue; + + /* If we already have a brcc, note if it is suitable for brcc_s. + Be a bit generous with the brcc_s range so that we can take + advantage of any code shortening from delay slot scheduling. */ + if (recog_memoized (insn) == CODE_FOR_cbranchsi4_scratch) + { + rtx pat = PATTERN (insn); + rtx op = XEXP (SET_SRC (XVECEXP (pat, 0, 0)), 0); + rtx *ccp = &XEXP (XVECEXP (pat, 0, 1), 0); + + offset = branch_dest (insn) - INSN_ADDRESSES (INSN_UID (insn)); + if ((offset >= -140 && offset < 140) + && rtx_equal_p (XEXP (op, 1), const0_rtx) + && compact_register_operand (XEXP (op, 0), VOIDmode) + && equality_comparison_operator (op, VOIDmode)) + PUT_MODE (*ccp, CC_Zmode); + else if (GET_MODE (*ccp) == CC_Zmode) + PUT_MODE (*ccp, CC_ZNmode); + continue; + } + if ((insn_type = get_attr_type (insn)) == TYPE_BRCC + || insn_type == TYPE_BRCC_NO_DELAY_SLOT) + continue; + + /* OK. so we have a jump insn. */ + /* We need to check that it is a bcc. */ + /* Bcc => set (pc) (if_then_else ) */ + pattern = PATTERN (insn); + if (GET_CODE (pattern) != SET + || GET_CODE (SET_SRC (pattern)) != IF_THEN_ELSE + || ANY_RETURN_P (XEXP (SET_SRC (pattern), 1))) + continue; + + /* Now check if the jump is beyond the s9 range. */ + if (find_reg_note (insn, REG_CROSSING_JUMP, NULL_RTX)) + continue; + offset = branch_dest (insn) - INSN_ADDRESSES (INSN_UID (insn)); + + if(offset > 253 || offset < -254) + continue; + + pc_target = SET_SRC (pattern); + + /* Now go back and search for the set cc insn. */ + + label = XEXP (pc_target, 1); + + { + rtx pat, scan, link_insn = NULL; + + for (scan = PREV_INSN (insn); + scan && GET_CODE (scan) != CODE_LABEL; + scan = PREV_INSN (scan)) + { + if (! INSN_P (scan)) + continue; + pat = PATTERN (scan); + if (GET_CODE (pat) == SET + && cc_register (SET_DEST (pat), VOIDmode)) + { + link_insn = scan; + break; + } + } + if (! link_insn) + continue; + else + /* Check if this is a data dependency. */ + { + rtx op, cc_clob_rtx, op0, op1, brcc_insn, note; + rtx cmp0, cmp1; + + /* Ok this is the set cc. copy args here. */ + op = XEXP (pc_target, 0); + + op0 = cmp0 = XEXP (SET_SRC (pat), 0); + op1 = cmp1 = XEXP (SET_SRC (pat), 1); + if (GET_CODE (op0) == ZERO_EXTRACT + && XEXP (op0, 1) == const1_rtx + && (GET_CODE (op) == EQ + || GET_CODE (op) == NE)) + { + /* btst / b{eq,ne} -> bbit{0,1} */ + op0 = XEXP (cmp0, 0); + op1 = XEXP (cmp0, 2); + } + else if (!register_operand (op0, VOIDmode) + || !general_operand (op1, VOIDmode)) + continue; + /* Be careful not to break what cmpsfpx_raw is + trying to create for checking equality of + single-precision floats. */ + else if (TARGET_SPFP + && GET_MODE (op0) == SFmode + && GET_MODE (op1) == SFmode) + continue; + + /* None of the two cmp operands should be set between the + cmp and the branch. */ + if (reg_set_between_p (op0, link_insn, insn)) + continue; + + if (reg_set_between_p (op1, link_insn, insn)) + continue; + + /* Since the MODE check does not work, check that this is + CC reg's last set location before insn, and also no + instruction between the cmp and branch uses the + condition codes. */ + if ((reg_set_between_p (SET_DEST (pat), link_insn, insn)) + || (reg_used_between_p (SET_DEST (pat), link_insn, insn))) + continue; + + /* CC reg should be dead after insn. */ + if (!find_regno_note (insn, REG_DEAD, CC_REG)) + continue; + + op = gen_rtx_fmt_ee (GET_CODE (op), + GET_MODE (op), cmp0, cmp1); + /* If we create a LIMM where there was none before, + we only benefit if we can avoid a scheduling bubble + for the ARC600. Otherwise, we'd only forgo chances + at short insn generation, and risk out-of-range + branches. */ + if (!brcc_nolimm_operator (op, VOIDmode) + && !long_immediate_operand (op1, VOIDmode) + && (TARGET_ARC700 + || next_active_insn (link_insn) != insn)) + continue; + + /* Emit bbit / brcc (or brcc_s if possible). + CC_Zmode indicates that brcc_s is possible. */ + + if (op0 != cmp0) + cc_clob_rtx = gen_rtx_REG (CC_ZNmode, CC_REG); + else if ((offset >= -140 && offset < 140) + && rtx_equal_p (op1, const0_rtx) + && compact_register_operand (op0, VOIDmode) + && (GET_CODE (op) == EQ + || GET_CODE (op) == NE)) + cc_clob_rtx = gen_rtx_REG (CC_Zmode, CC_REG); + else + cc_clob_rtx = gen_rtx_REG (CCmode, CC_REG); + + brcc_insn + = gen_rtx_IF_THEN_ELSE (VOIDmode, op, label, pc_rtx); + brcc_insn = gen_rtx_SET (VOIDmode, pc_rtx, brcc_insn); + cc_clob_rtx = gen_rtx_CLOBBER (VOIDmode, cc_clob_rtx); + brcc_insn + = gen_rtx_PARALLEL + (VOIDmode, gen_rtvec (2, brcc_insn, cc_clob_rtx)); + brcc_insn = emit_jump_insn_before (brcc_insn, insn); + + JUMP_LABEL (brcc_insn) = JUMP_LABEL (insn); + note = find_reg_note (insn, REG_BR_PROB, 0); + if (note) + { + XEXP (note, 1) = REG_NOTES (brcc_insn); + REG_NOTES (brcc_insn) = note; + } + note = find_reg_note (link_insn, REG_DEAD, op0); + if (note) + { + remove_note (link_insn, note); + XEXP (note, 1) = REG_NOTES (brcc_insn); + REG_NOTES (brcc_insn) = note; + } + note = find_reg_note (link_insn, REG_DEAD, op1); + if (note) + { + XEXP (note, 1) = REG_NOTES (brcc_insn); + REG_NOTES (brcc_insn) = note; + } + + changed = 1; + + /* Delete the bcc insn. */ + set_insn_deleted (insn); + + /* Delete the cmp insn. */ + set_insn_deleted (link_insn); + + } + } + } + /* Clear out insn_addresses. */ + INSN_ADDRESSES_FREE (); + + } while (changed); + + if (INSN_ADDRESSES_SET_P()) + fatal_error ("insn addresses not freed"); + + arc_reorg_in_progress = 0; +} + + /* Check if the operands are valid for BRcc.d generation + Valid Brcc.d patterns are + Brcc.d b, c, s9 + Brcc.d b, u6, s9 + + For cc={GT, LE, GTU, LEU}, u6=63 can not be allowed, + since they are encoded by the assembler as {GE, LT, HS, LS} 64, which + does not have a delay slot + + Assumed precondition: Second operand is either a register or a u6 value. */ + +bool +valid_brcc_with_delay_p (rtx *operands) +{ + if (optimize_size && GET_MODE (operands[4]) == CC_Zmode) + return false; + return brcc_nolimm_operator (operands[0], VOIDmode); +} + +/* ??? Hack. This should no really be here. See PR32143. */ +static bool +arc_decl_anon_ns_mem_p (const_tree decl) +{ + while (1) + { + if (decl == NULL_TREE || decl == error_mark_node) + return false; + if (TREE_CODE (decl) == NAMESPACE_DECL + && DECL_NAME (decl) == NULL_TREE) + return true; + /* Classes and namespaces inside anonymous namespaces have + TREE_PUBLIC == 0, so we can shortcut the search. */ + else if (TYPE_P (decl)) + return (TREE_PUBLIC (TYPE_NAME (decl)) == 0); + else if (TREE_CODE (decl) == NAMESPACE_DECL) + return (TREE_PUBLIC (decl) == 0); + else + decl = DECL_CONTEXT (decl); + } +} + +/* Implement TARGET_IN_SMALL_DATA_P. Return true if it would be safe to + access DECL using %gp_rel(...)($gp). */ + +static bool +arc_in_small_data_p (const_tree decl) +{ + HOST_WIDE_INT size; + + if (TREE_CODE (decl) == STRING_CST || TREE_CODE (decl) == FUNCTION_DECL) + return false; + + + /* We don't yet generate small-data references for -mabicalls. See related + -G handling in override_options. */ + if (TARGET_NO_SDATA_SET) + return false; + + if (TREE_CODE (decl) == VAR_DECL && DECL_SECTION_NAME (decl) != 0) + { + const char *name; + + /* Reject anything that isn't in a known small-data section. */ + name = TREE_STRING_POINTER (DECL_SECTION_NAME (decl)); + if (strcmp (name, ".sdata") != 0 && strcmp (name, ".sbss") != 0) + return false; + + /* If a symbol is defined externally, the assembler will use the + usual -G rules when deciding how to implement macros. */ + if (!DECL_EXTERNAL (decl)) + return true; + } + /* Only global variables go into sdata section for now. */ + else if (1) + { + /* Don't put constants into the small data section: we want them + to be in ROM rather than RAM. */ + if (TREE_CODE (decl) != VAR_DECL) + return false; + + if (TREE_READONLY (decl) + && !TREE_SIDE_EFFECTS (decl) + && (!DECL_INITIAL (decl) || TREE_CONSTANT (DECL_INITIAL (decl)))) + return false; + + /* TREE_PUBLIC might change after the first call, because of the patch + for PR19238. */ + if (default_binds_local_p_1 (decl, 1) + || arc_decl_anon_ns_mem_p (decl)) + return false; + + /* To ensure -mvolatile-cache works + ld.di does not have a gp-relative variant. */ + if (TREE_THIS_VOLATILE (decl)) + return false; + } + + /* Disable sdata references to weak variables. */ + if (DECL_WEAK (decl)) + return false; + + size = int_size_in_bytes (TREE_TYPE (decl)); + +/* if (AGGREGATE_TYPE_P (TREE_TYPE (decl))) */ +/* return false; */ + + /* Allow only <=4B long data types into sdata. */ + return (size > 0 && size <= 4); +} + +/* Return true if X is a small data address that can be rewritten + as a gp+symref. */ + +static bool +arc_rewrite_small_data_p (rtx x) +{ + if (GET_CODE (x) == CONST) + x = XEXP (x, 0); + + if (GET_CODE (x) == PLUS) + { + if (GET_CODE (XEXP (x, 1)) == CONST_INT) + x = XEXP (x, 0); + } + + return (GET_CODE (x) == SYMBOL_REF + && SYMBOL_REF_SMALL_P(x)); +} + +/* A for_each_rtx callback, used by arc_rewrite_small_data. */ + +static int +arc_rewrite_small_data_1 (rtx *loc, void *data) +{ + if (arc_rewrite_small_data_p (*loc)) + { + rtx top; + + gcc_assert (SDATA_BASE_REGNUM == PIC_OFFSET_TABLE_REGNUM); + *loc = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, *loc); + if (loc == data) + return -1; + top = *(rtx*) data; + if (GET_CODE (top) == MEM && &XEXP (top, 0) == loc) + ; /* OK. */ + else if (GET_CODE (top) == MEM + && GET_CODE (XEXP (top, 0)) == PLUS + && GET_CODE (XEXP (XEXP (top, 0), 0)) == MULT) + *loc = force_reg (Pmode, *loc); + else + gcc_unreachable (); + return -1; + } + + if (GET_CODE (*loc) == PLUS + && rtx_equal_p (XEXP (*loc, 0), pic_offset_table_rtx)) + return -1; + + return 0; +} + +/* If possible, rewrite OP so that it refers to small data using + explicit relocations. */ + +rtx +arc_rewrite_small_data (rtx op) +{ + op = copy_insn (op); + for_each_rtx (&op, arc_rewrite_small_data_1, &op); + return op; +} + +/* A for_each_rtx callback for small_data_pattern. */ + +static int +small_data_pattern_1 (rtx *loc, void *data ATTRIBUTE_UNUSED) +{ + if (GET_CODE (*loc) == PLUS + && rtx_equal_p (XEXP (*loc, 0), pic_offset_table_rtx)) + return -1; + + return arc_rewrite_small_data_p (*loc); +} + +/* Return true if OP refers to small data symbols directly, not through + a PLUS. */ + +bool +small_data_pattern (rtx op, enum machine_mode) +{ + return (GET_CODE (op) != SEQUENCE + && for_each_rtx (&op, small_data_pattern_1, 0)); +} + +/* Return true if OP is an acceptable memory operand for ARCompact + 16-bit gp-relative load instructions. + op shd look like : [r26, symref@sda] + i.e. (mem (plus (reg 26) (symref with smalldata flag set)) + */ +/* volatile cache option still to be handled. */ + +bool +compact_sda_memory_operand (rtx op, enum machine_mode mode) +{ + rtx addr; + int size; + + /* Eliminate non-memory operations. */ + if (GET_CODE (op) != MEM) + return false; + + if (mode == VOIDmode) + mode = GET_MODE (op); + + size = GET_MODE_SIZE (mode); + + /* dword operations really put out 2 instructions, so eliminate them. */ + if (size > UNITS_PER_WORD) + return false; + + /* Decode the address now. */ + addr = XEXP (op, 0); + + return LEGITIMATE_SMALL_DATA_ADDRESS_P (addr); +} + +/* Implement ASM_OUTPUT_ALIGNED_DECL_LOCAL. */ + +void +arc_asm_output_aligned_decl_local (FILE * stream, tree decl, const char * name, + unsigned HOST_WIDE_INT size, + unsigned HOST_WIDE_INT align, + unsigned HOST_WIDE_INT globalize_p) +{ + int in_small_data = arc_in_small_data_p (decl); + + if (in_small_data) + switch_to_section (get_named_section (NULL, ".sbss", 0)); + /* named_section (0,".sbss",0); */ + else + switch_to_section (bss_section); + + if (globalize_p) + (*targetm.asm_out.globalize_label) (stream, name); + + ASM_OUTPUT_ALIGN (stream, floor_log2 ((align) / BITS_PER_UNIT)); + ASM_OUTPUT_TYPE_DIRECTIVE (stream, name, "object"); + ASM_OUTPUT_SIZE_DIRECTIVE (stream, name, size); + ASM_OUTPUT_LABEL (stream, name); + + if (size != 0) + ASM_OUTPUT_SKIP (stream, size); +} + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +/* SIMD builtins support. */ +enum simd_insn_args_type { + Va_Vb_Vc, + Va_Vb_rlimm, + Va_Vb_Ic, + Va_Vb_u6, + Va_Vb_u8, + Va_rlimm_u8, + + Va_Vb, + + void_rlimm, + void_u6, + + Da_u3_rlimm, + Da_rlimm_rlimm, + + Va_Ib_u8, + void_Va_Ib_u8, + + Va_Vb_Ic_u8, + void_Va_u3_Ib_u8 +}; + +struct builtin_description +{ + enum simd_insn_args_type args_type; + const enum insn_code icode; + const char * const name; + const enum arc_builtins code; +}; + +static const struct builtin_description arc_simd_builtin_desc_list[] = +{ + /* VVV builtins go first. */ +#define SIMD_BUILTIN(type, code, string, builtin) \ + { type,CODE_FOR_##code, "__builtin_arc_" string, \ + ARC_SIMD_BUILTIN_##builtin }, + + SIMD_BUILTIN (Va_Vb_Vc, vaddaw_insn, "vaddaw", VADDAW) + SIMD_BUILTIN (Va_Vb_Vc, vaddw_insn, "vaddw", VADDW) + SIMD_BUILTIN (Va_Vb_Vc, vavb_insn, "vavb", VAVB) + SIMD_BUILTIN (Va_Vb_Vc, vavrb_insn, "vavrb", VAVRB) + SIMD_BUILTIN (Va_Vb_Vc, vdifaw_insn, "vdifaw", VDIFAW) + SIMD_BUILTIN (Va_Vb_Vc, vdifw_insn, "vdifw", VDIFW) + SIMD_BUILTIN (Va_Vb_Vc, vmaxaw_insn, "vmaxaw", VMAXAW) + SIMD_BUILTIN (Va_Vb_Vc, vmaxw_insn, "vmaxw", VMAXW) + SIMD_BUILTIN (Va_Vb_Vc, vminaw_insn, "vminaw", VMINAW) + SIMD_BUILTIN (Va_Vb_Vc, vminw_insn, "vminw", VMINW) + SIMD_BUILTIN (Va_Vb_Vc, vmulaw_insn, "vmulaw", VMULAW) + SIMD_BUILTIN (Va_Vb_Vc, vmulfaw_insn, "vmulfaw", VMULFAW) + SIMD_BUILTIN (Va_Vb_Vc, vmulfw_insn, "vmulfw", VMULFW) + SIMD_BUILTIN (Va_Vb_Vc, vmulw_insn, "vmulw", VMULW) + SIMD_BUILTIN (Va_Vb_Vc, vsubaw_insn, "vsubaw", VSUBAW) + SIMD_BUILTIN (Va_Vb_Vc, vsubw_insn, "vsubw", VSUBW) + SIMD_BUILTIN (Va_Vb_Vc, vsummw_insn, "vsummw", VSUMMW) + SIMD_BUILTIN (Va_Vb_Vc, vand_insn, "vand", VAND) + SIMD_BUILTIN (Va_Vb_Vc, vandaw_insn, "vandaw", VANDAW) + SIMD_BUILTIN (Va_Vb_Vc, vbic_insn, "vbic", VBIC) + SIMD_BUILTIN (Va_Vb_Vc, vbicaw_insn, "vbicaw", VBICAW) + SIMD_BUILTIN (Va_Vb_Vc, vor_insn, "vor", VOR) + SIMD_BUILTIN (Va_Vb_Vc, vxor_insn, "vxor", VXOR) + SIMD_BUILTIN (Va_Vb_Vc, vxoraw_insn, "vxoraw", VXORAW) + SIMD_BUILTIN (Va_Vb_Vc, veqw_insn, "veqw", VEQW) + SIMD_BUILTIN (Va_Vb_Vc, vlew_insn, "vlew", VLEW) + SIMD_BUILTIN (Va_Vb_Vc, vltw_insn, "vltw", VLTW) + SIMD_BUILTIN (Va_Vb_Vc, vnew_insn, "vnew", VNEW) + SIMD_BUILTIN (Va_Vb_Vc, vmr1aw_insn, "vmr1aw", VMR1AW) + SIMD_BUILTIN (Va_Vb_Vc, vmr1w_insn, "vmr1w", VMR1W) + SIMD_BUILTIN (Va_Vb_Vc, vmr2aw_insn, "vmr2aw", VMR2AW) + SIMD_BUILTIN (Va_Vb_Vc, vmr2w_insn, "vmr2w", VMR2W) + SIMD_BUILTIN (Va_Vb_Vc, vmr3aw_insn, "vmr3aw", VMR3AW) + SIMD_BUILTIN (Va_Vb_Vc, vmr3w_insn, "vmr3w", VMR3W) + SIMD_BUILTIN (Va_Vb_Vc, vmr4aw_insn, "vmr4aw", VMR4AW) + SIMD_BUILTIN (Va_Vb_Vc, vmr4w_insn, "vmr4w", VMR4W) + SIMD_BUILTIN (Va_Vb_Vc, vmr5aw_insn, "vmr5aw", VMR5AW) + SIMD_BUILTIN (Va_Vb_Vc, vmr5w_insn, "vmr5w", VMR5W) + SIMD_BUILTIN (Va_Vb_Vc, vmr6aw_insn, "vmr6aw", VMR6AW) + SIMD_BUILTIN (Va_Vb_Vc, vmr6w_insn, "vmr6w", VMR6W) + SIMD_BUILTIN (Va_Vb_Vc, vmr7aw_insn, "vmr7aw", VMR7AW) + SIMD_BUILTIN (Va_Vb_Vc, vmr7w_insn, "vmr7w", VMR7W) + SIMD_BUILTIN (Va_Vb_Vc, vmrb_insn, "vmrb", VMRB) + SIMD_BUILTIN (Va_Vb_Vc, vh264f_insn, "vh264f", VH264F) + SIMD_BUILTIN (Va_Vb_Vc, vh264ft_insn, "vh264ft", VH264FT) + SIMD_BUILTIN (Va_Vb_Vc, vh264fw_insn, "vh264fw", VH264FW) + SIMD_BUILTIN (Va_Vb_Vc, vvc1f_insn, "vvc1f", VVC1F) + SIMD_BUILTIN (Va_Vb_Vc, vvc1ft_insn, "vvc1ft", VVC1FT) + + SIMD_BUILTIN (Va_Vb_rlimm, vbaddw_insn, "vbaddw", VBADDW) + SIMD_BUILTIN (Va_Vb_rlimm, vbmaxw_insn, "vbmaxw", VBMAXW) + SIMD_BUILTIN (Va_Vb_rlimm, vbminw_insn, "vbminw", VBMINW) + SIMD_BUILTIN (Va_Vb_rlimm, vbmulaw_insn, "vbmulaw", VBMULAW) + SIMD_BUILTIN (Va_Vb_rlimm, vbmulfw_insn, "vbmulfw", VBMULFW) + SIMD_BUILTIN (Va_Vb_rlimm, vbmulw_insn, "vbmulw", VBMULW) + SIMD_BUILTIN (Va_Vb_rlimm, vbrsubw_insn, "vbrsubw", VBRSUBW) + SIMD_BUILTIN (Va_Vb_rlimm, vbsubw_insn, "vbsubw", VBSUBW) + + /* Va, Vb, Ic instructions. */ + SIMD_BUILTIN (Va_Vb_Ic, vasrw_insn, "vasrw", VASRW) + SIMD_BUILTIN (Va_Vb_Ic, vsr8_insn, "vsr8", VSR8) + SIMD_BUILTIN (Va_Vb_Ic, vsr8aw_insn, "vsr8aw", VSR8AW) + + /* Va, Vb, u6 instructions. */ + SIMD_BUILTIN (Va_Vb_u6, vasrrwi_insn, "vasrrwi", VASRRWi) + SIMD_BUILTIN (Va_Vb_u6, vasrsrwi_insn, "vasrsrwi", VASRSRWi) + SIMD_BUILTIN (Va_Vb_u6, vasrwi_insn, "vasrwi", VASRWi) + SIMD_BUILTIN (Va_Vb_u6, vasrpwbi_insn, "vasrpwbi", VASRPWBi) + SIMD_BUILTIN (Va_Vb_u6, vasrrpwbi_insn,"vasrrpwbi", VASRRPWBi) + SIMD_BUILTIN (Va_Vb_u6, vsr8awi_insn, "vsr8awi", VSR8AWi) + SIMD_BUILTIN (Va_Vb_u6, vsr8i_insn, "vsr8i", VSR8i) + + /* Va, Vb, u8 (simm) instructions. */ + SIMD_BUILTIN (Va_Vb_u8, vmvaw_insn, "vmvaw", VMVAW) + SIMD_BUILTIN (Va_Vb_u8, vmvw_insn, "vmvw", VMVW) + SIMD_BUILTIN (Va_Vb_u8, vmvzw_insn, "vmvzw", VMVZW) + SIMD_BUILTIN (Va_Vb_u8, vd6tapf_insn, "vd6tapf", VD6TAPF) + + /* Va, rlimm, u8 (simm) instructions. */ + SIMD_BUILTIN (Va_rlimm_u8, vmovaw_insn, "vmovaw", VMOVAW) + SIMD_BUILTIN (Va_rlimm_u8, vmovw_insn, "vmovw", VMOVW) + SIMD_BUILTIN (Va_rlimm_u8, vmovzw_insn, "vmovzw", VMOVZW) + + /* Va, Vb instructions. */ + SIMD_BUILTIN (Va_Vb, vabsaw_insn, "vabsaw", VABSAW) + SIMD_BUILTIN (Va_Vb, vabsw_insn, "vabsw", VABSW) + SIMD_BUILTIN (Va_Vb, vaddsuw_insn, "vaddsuw", VADDSUW) + SIMD_BUILTIN (Va_Vb, vsignw_insn, "vsignw", VSIGNW) + SIMD_BUILTIN (Va_Vb, vexch1_insn, "vexch1", VEXCH1) + SIMD_BUILTIN (Va_Vb, vexch2_insn, "vexch2", VEXCH2) + SIMD_BUILTIN (Va_Vb, vexch4_insn, "vexch4", VEXCH4) + SIMD_BUILTIN (Va_Vb, vupbaw_insn, "vupbaw", VUPBAW) + SIMD_BUILTIN (Va_Vb, vupbw_insn, "vupbw", VUPBW) + SIMD_BUILTIN (Va_Vb, vupsbaw_insn, "vupsbaw", VUPSBAW) + SIMD_BUILTIN (Va_Vb, vupsbw_insn, "vupsbw", VUPSBW) + + /* DIb, rlimm, rlimm instructions. */ + SIMD_BUILTIN (Da_rlimm_rlimm, vdirun_insn, "vdirun", VDIRUN) + SIMD_BUILTIN (Da_rlimm_rlimm, vdorun_insn, "vdorun", VDORUN) + + /* DIb, limm, rlimm instructions. */ + SIMD_BUILTIN (Da_u3_rlimm, vdiwr_insn, "vdiwr", VDIWR) + SIMD_BUILTIN (Da_u3_rlimm, vdowr_insn, "vdowr", VDOWR) + + /* rlimm instructions. */ + SIMD_BUILTIN (void_rlimm, vrec_insn, "vrec", VREC) + SIMD_BUILTIN (void_rlimm, vrun_insn, "vrun", VRUN) + SIMD_BUILTIN (void_rlimm, vrecrun_insn, "vrecrun", VRECRUN) + SIMD_BUILTIN (void_rlimm, vendrec_insn, "vendrec", VENDREC) + + /* Va, [Ib,u8] instructions. */ + SIMD_BUILTIN (Va_Vb_Ic_u8, vld32wh_insn, "vld32wh", VLD32WH) + SIMD_BUILTIN (Va_Vb_Ic_u8, vld32wl_insn, "vld32wl", VLD32WL) + SIMD_BUILTIN (Va_Vb_Ic_u8, vld64_insn, "vld64", VLD64) + SIMD_BUILTIN (Va_Vb_Ic_u8, vld32_insn, "vld32", VLD32) + + SIMD_BUILTIN (Va_Ib_u8, vld64w_insn, "vld64w", VLD64W) + SIMD_BUILTIN (Va_Ib_u8, vld128_insn, "vld128", VLD128) + SIMD_BUILTIN (void_Va_Ib_u8, vst128_insn, "vst128", VST128) + SIMD_BUILTIN (void_Va_Ib_u8, vst64_insn, "vst64", VST64) + + /* Va, [Ib, u8] instructions. */ + SIMD_BUILTIN (void_Va_u3_Ib_u8, vst16_n_insn, "vst16_n", VST16_N) + SIMD_BUILTIN (void_Va_u3_Ib_u8, vst32_n_insn, "vst32_n", VST32_N) + + SIMD_BUILTIN (void_u6, vinti_insn, "vinti", VINTI) +}; + +static void +arc_init_simd_builtins (void) +{ + int i; + tree endlink = void_list_node; + tree V8HI_type_node = build_vector_type_for_mode (intHI_type_node, V8HImode); + + tree v8hi_ftype_v8hi_v8hi + = build_function_type (V8HI_type_node, + tree_cons (NULL_TREE, V8HI_type_node, + tree_cons (NULL_TREE, V8HI_type_node, + endlink))); + tree v8hi_ftype_v8hi_int + = build_function_type (V8HI_type_node, + tree_cons (NULL_TREE, V8HI_type_node, + tree_cons (NULL_TREE, integer_type_node, + endlink))); + + tree v8hi_ftype_v8hi_int_int + = build_function_type (V8HI_type_node, + tree_cons (NULL_TREE, V8HI_type_node, + tree_cons (NULL_TREE, integer_type_node, + tree_cons (NULL_TREE, + integer_type_node, + endlink)))); + + tree void_ftype_v8hi_int_int + = build_function_type (void_type_node, + tree_cons (NULL_TREE, V8HI_type_node, + tree_cons (NULL_TREE, integer_type_node, + tree_cons (NULL_TREE, + integer_type_node, + endlink)))); + + tree void_ftype_v8hi_int_int_int + = (build_function_type + (void_type_node, + tree_cons (NULL_TREE, V8HI_type_node, + tree_cons (NULL_TREE, integer_type_node, + tree_cons (NULL_TREE, integer_type_node, + tree_cons (NULL_TREE, + integer_type_node, + endlink)))))); + + tree v8hi_ftype_int_int + = build_function_type (V8HI_type_node, + tree_cons (NULL_TREE, integer_type_node, + tree_cons (NULL_TREE, integer_type_node, + endlink))); + + tree void_ftype_int_int + = build_function_type (void_type_node, + tree_cons (NULL_TREE, integer_type_node, + tree_cons (NULL_TREE, integer_type_node, + endlink))); + + tree void_ftype_int + = build_function_type (void_type_node, + tree_cons (NULL_TREE, integer_type_node, endlink)); + + tree v8hi_ftype_v8hi + = build_function_type (V8HI_type_node, tree_cons (NULL_TREE, V8HI_type_node, + endlink)); + + /* These asserts have been introduced to ensure that the order of builtins + does not get messed up, else the initialization goes wrong. */ + gcc_assert (arc_simd_builtin_desc_list [0].args_type == Va_Vb_Vc); + for (i=0; arc_simd_builtin_desc_list [i].args_type == Va_Vb_Vc; i++) + def_mbuiltin (TARGET_SIMD_SET, arc_simd_builtin_desc_list[i].name, + v8hi_ftype_v8hi_v8hi, arc_simd_builtin_desc_list[i].code); + + gcc_assert (arc_simd_builtin_desc_list [i].args_type == Va_Vb_rlimm); + for (; arc_simd_builtin_desc_list [i].args_type == Va_Vb_rlimm; i++) + def_mbuiltin (TARGET_SIMD_SET, arc_simd_builtin_desc_list[i].name, + v8hi_ftype_v8hi_int, arc_simd_builtin_desc_list[i].code); + + gcc_assert (arc_simd_builtin_desc_list [i].args_type == Va_Vb_Ic); + for (; arc_simd_builtin_desc_list [i].args_type == Va_Vb_Ic; i++) + def_mbuiltin (TARGET_SIMD_SET, arc_simd_builtin_desc_list[i].name, + v8hi_ftype_v8hi_int, arc_simd_builtin_desc_list[i].code); + + gcc_assert (arc_simd_builtin_desc_list [i].args_type == Va_Vb_u6); + for (; arc_simd_builtin_desc_list [i].args_type == Va_Vb_u6; i++) + def_mbuiltin (TARGET_SIMD_SET, arc_simd_builtin_desc_list[i].name, + v8hi_ftype_v8hi_int, arc_simd_builtin_desc_list[i].code); + + gcc_assert (arc_simd_builtin_desc_list [i].args_type == Va_Vb_u8); + for (; arc_simd_builtin_desc_list [i].args_type == Va_Vb_u8; i++) + def_mbuiltin (TARGET_SIMD_SET, arc_simd_builtin_desc_list[i].name, + v8hi_ftype_v8hi_int, arc_simd_builtin_desc_list[i].code); + + gcc_assert (arc_simd_builtin_desc_list [i].args_type == Va_rlimm_u8); + for (; arc_simd_builtin_desc_list [i].args_type == Va_rlimm_u8; i++) + def_mbuiltin (TARGET_SIMD_SET, arc_simd_builtin_desc_list[i].name, + v8hi_ftype_int_int, arc_simd_builtin_desc_list[i].code); + + gcc_assert (arc_simd_builtin_desc_list [i].args_type == Va_Vb); + for (; arc_simd_builtin_desc_list [i].args_type == Va_Vb; i++) + def_mbuiltin (TARGET_SIMD_SET, arc_simd_builtin_desc_list[i].name, + v8hi_ftype_v8hi, arc_simd_builtin_desc_list[i].code); + + gcc_assert (arc_simd_builtin_desc_list [i].args_type == Da_rlimm_rlimm); + for (; arc_simd_builtin_desc_list [i].args_type == Da_rlimm_rlimm; i++) + def_mbuiltin (TARGET_SIMD_SET, arc_simd_builtin_desc_list [i].name, + void_ftype_int_int, arc_simd_builtin_desc_list[i].code); + + gcc_assert (arc_simd_builtin_desc_list [i].args_type == Da_u3_rlimm); + for (; arc_simd_builtin_desc_list [i].args_type == Da_u3_rlimm; i++) + def_mbuiltin (TARGET_SIMD_SET, arc_simd_builtin_desc_list[i].name, + void_ftype_int_int, arc_simd_builtin_desc_list[i].code); + + gcc_assert (arc_simd_builtin_desc_list [i].args_type == void_rlimm); + for (; arc_simd_builtin_desc_list [i].args_type == void_rlimm; i++) + def_mbuiltin (TARGET_SIMD_SET, arc_simd_builtin_desc_list[i].name, + void_ftype_int, arc_simd_builtin_desc_list[i].code); + + gcc_assert (arc_simd_builtin_desc_list [i].args_type == Va_Vb_Ic_u8); + for (; arc_simd_builtin_desc_list [i].args_type == Va_Vb_Ic_u8; i++) + def_mbuiltin (TARGET_SIMD_SET, arc_simd_builtin_desc_list[i].name, + v8hi_ftype_v8hi_int_int, arc_simd_builtin_desc_list[i].code); + + gcc_assert (arc_simd_builtin_desc_list [i].args_type == Va_Ib_u8); + for (; arc_simd_builtin_desc_list [i].args_type == Va_Ib_u8; i++) + def_mbuiltin (TARGET_SIMD_SET, arc_simd_builtin_desc_list[i].name, + v8hi_ftype_int_int, arc_simd_builtin_desc_list[i].code); + + gcc_assert (arc_simd_builtin_desc_list [i].args_type == void_Va_Ib_u8); + for (; arc_simd_builtin_desc_list [i].args_type == void_Va_Ib_u8; i++) + def_mbuiltin (TARGET_SIMD_SET, arc_simd_builtin_desc_list [i].name, + void_ftype_v8hi_int_int, arc_simd_builtin_desc_list[i].code); + + gcc_assert (arc_simd_builtin_desc_list [i].args_type == void_Va_u3_Ib_u8); + for (; arc_simd_builtin_desc_list [i].args_type == void_Va_u3_Ib_u8; i++) + def_mbuiltin (TARGET_SIMD_SET, arc_simd_builtin_desc_list[i].name, + void_ftype_v8hi_int_int_int, + arc_simd_builtin_desc_list[i].code); + + gcc_assert (arc_simd_builtin_desc_list [i].args_type == void_u6); + for (; arc_simd_builtin_desc_list [i].args_type == void_u6; i++) + def_mbuiltin (TARGET_SIMD_SET, arc_simd_builtin_desc_list[i].name, + void_ftype_int, arc_simd_builtin_desc_list[i].code); + + gcc_assert(i == ARRAY_SIZE (arc_simd_builtin_desc_list)); +} + +/* Helper function of arc_expand_builtin; has the same parameters, + except that EXP is now known to be a call to a simd builtin. */ + +static rtx +arc_expand_simd_builtin (tree exp, + rtx target, + rtx subtarget ATTRIBUTE_UNUSED, + enum machine_mode mode ATTRIBUTE_UNUSED, + int ignore ATTRIBUTE_UNUSED) +{ + tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0); + tree arg0; + tree arg1; + tree arg2; + tree arg3; + rtx op0; + rtx op1; + rtx op2; + rtx op3; + rtx op4; + rtx pat; + unsigned int i; + int fcode = DECL_FUNCTION_CODE (fndecl); + int icode; + enum machine_mode mode0; + enum machine_mode mode1; + enum machine_mode mode2; + enum machine_mode mode3; + enum machine_mode mode4; + const struct builtin_description * d; + + for (i = 0, d = arc_simd_builtin_desc_list; + i < ARRAY_SIZE (arc_simd_builtin_desc_list); i++, d++) + if (d->code == (const enum arc_builtins) fcode) + break; + + /* We must get an entry here. */ + gcc_assert (i < ARRAY_SIZE (arc_simd_builtin_desc_list)); + + switch (d->args_type) + { + case Va_Vb_rlimm: + icode = d->icode; + arg0 = CALL_EXPR_ARG (exp, 0); + arg1 = CALL_EXPR_ARG (exp, 1); + op0 = expand_expr (arg0, NULL_RTX, V8HImode, EXPAND_NORMAL); + op1 = expand_expr (arg1, NULL_RTX, SImode, EXPAND_NORMAL); + + target = gen_reg_rtx (V8HImode); + mode0 = insn_data[icode].operand[1].mode; + mode1 = insn_data[icode].operand[2].mode; + + if (! (*insn_data[icode].operand[1].predicate) (op0, mode0)) + op0 = copy_to_mode_reg (mode0, op0); + + if (! (*insn_data[icode].operand[2].predicate) (op1, mode1)) + op1 = copy_to_mode_reg (mode1, op1); + + pat = GEN_FCN (icode) (target, op0, op1); + if (! pat) + return 0; + + emit_insn (pat); + return target; + + case Va_Vb_u6: + case Va_Vb_u8: + icode = d->icode; + arg0 = CALL_EXPR_ARG (exp, 0); + arg1 = CALL_EXPR_ARG (exp, 1); + op0 = expand_expr (arg0, NULL_RTX, V8HImode, EXPAND_NORMAL); + op1 = expand_expr (arg1, NULL_RTX, SImode, EXPAND_NORMAL); + + target = gen_reg_rtx (V8HImode); + mode0 = insn_data[icode].operand[1].mode; + mode1 = insn_data[icode].operand[2].mode; + + if (! (*insn_data[icode].operand[1].predicate) (op0, mode0)) + op0 = copy_to_mode_reg (mode0, op0); + + if (! (*insn_data[icode].operand[2].predicate) (op1, mode1) + || (d->args_type == Va_Vb_u6 && !UNSIGNED_INT6 (INTVAL (op1))) + || (d->args_type == Va_Vb_u8 && !UNSIGNED_INT8 (INTVAL (op1)))) + error ("operand 2 of %s instruction should be an unsigned %d-bit value", + d->name, + (d->args_type == Va_Vb_u6)? 6: 8); + + pat = GEN_FCN (icode) (target, op0, op1); + if (! pat) + return 0; + + emit_insn (pat); + return target; + + case Va_rlimm_u8: + icode = d->icode; + arg0 = CALL_EXPR_ARG (exp, 0); + arg1 = CALL_EXPR_ARG (exp, 1); + op0 = expand_expr (arg0, NULL_RTX, SImode, EXPAND_NORMAL); + op1 = expand_expr (arg1, NULL_RTX, SImode, EXPAND_NORMAL); + + target = gen_reg_rtx (V8HImode); + mode0 = insn_data[icode].operand[1].mode; + mode1 = insn_data[icode].operand[2].mode; + + if (! (*insn_data[icode].operand[1].predicate) (op0, mode0)) + op0 = copy_to_mode_reg (mode0, op0); + + if ( (!(*insn_data[icode].operand[2].predicate) (op1, mode1)) + || !(UNSIGNED_INT8 (INTVAL (op1)))) + error ("operand 2 of %s instruction should be an unsigned 8-bit value", + d->name); + + pat = GEN_FCN (icode) (target, op0, op1); + if (! pat) + return 0; + + emit_insn (pat); + return target; + + case Va_Vb_Ic: + icode = d->icode; + arg0 = CALL_EXPR_ARG (exp, 0); + arg1 = CALL_EXPR_ARG (exp, 1); + op0 = expand_expr (arg0, NULL_RTX, V8HImode, EXPAND_NORMAL); + op1 = expand_expr (arg1, NULL_RTX, SImode, EXPAND_NORMAL); + op2 = gen_rtx_REG (V8HImode, ARC_FIRST_SIMD_VR_REG); + + target = gen_reg_rtx (V8HImode); + mode0 = insn_data[icode].operand[1].mode; + mode1 = insn_data[icode].operand[2].mode; + + if (! (*insn_data[icode].operand[1].predicate) (op0, mode0)) + op0 = copy_to_mode_reg (mode0, op0); + + if ( (!(*insn_data[icode].operand[2].predicate) (op1, mode1)) + || !(UNSIGNED_INT3 (INTVAL (op1)))) + error ("operand 2 of %s instruction should be an unsigned 3-bit value (I0-I7)", + d->name); + + pat = GEN_FCN (icode) (target, op0, op1, op2); + if (! pat) + return 0; + + emit_insn (pat); + return target; + + case Va_Vb_Vc: + icode = d->icode; + arg0 = CALL_EXPR_ARG (exp, 0); + arg1 = CALL_EXPR_ARG (exp, 1); + op0 = expand_expr (arg0, NULL_RTX, V8HImode, EXPAND_NORMAL); + op1 = expand_expr (arg1, NULL_RTX, V8HImode, EXPAND_NORMAL); + + target = gen_reg_rtx (V8HImode); + mode0 = insn_data[icode].operand[1].mode; + mode1 = insn_data[icode].operand[2].mode; + + if (! (*insn_data[icode].operand[1].predicate) (op0, mode0)) + op0 = copy_to_mode_reg (mode0, op0); + + if (! (*insn_data[icode].operand[2].predicate) (op1, mode1)) + op1 = copy_to_mode_reg (mode1, op1); + + pat = GEN_FCN (icode) (target, op0, op1); + if (! pat) + return 0; + + emit_insn (pat); + return target; + + case Va_Vb: + icode = d->icode; + arg0 = CALL_EXPR_ARG (exp, 0); + op0 = expand_expr (arg0, NULL_RTX, V8HImode, EXPAND_NORMAL); + + target = gen_reg_rtx (V8HImode); + mode0 = insn_data[icode].operand[1].mode; + + if (! (*insn_data[icode].operand[1].predicate) (op0, mode0)) + op0 = copy_to_mode_reg (mode0, op0); + + pat = GEN_FCN (icode) (target, op0); + if (! pat) + return 0; + + emit_insn (pat); + return target; + + case Da_rlimm_rlimm: + icode = d->icode; + arg0 = CALL_EXPR_ARG (exp, 0); + arg1 = CALL_EXPR_ARG (exp, 1); + op0 = expand_expr (arg0, NULL_RTX, SImode, EXPAND_NORMAL); + op1 = expand_expr (arg1, NULL_RTX, SImode, EXPAND_NORMAL); + + + if (icode == CODE_FOR_vdirun_insn) + target = gen_rtx_REG (SImode, 131); + else if (icode == CODE_FOR_vdorun_insn) + target = gen_rtx_REG (SImode, 139); + else + gcc_unreachable (); + + mode0 = insn_data[icode].operand[1].mode; + mode1 = insn_data[icode].operand[2].mode; + + if (! (*insn_data[icode].operand[1].predicate) (op0, mode0)) + op0 = copy_to_mode_reg (mode0, op0); + + if (! (*insn_data[icode].operand[2].predicate) (op1, mode1)) + op1 = copy_to_mode_reg (mode1, op1); + + + pat = GEN_FCN (icode) (target, op0, op1); + if (! pat) + return 0; + + emit_insn (pat); + return NULL_RTX; + + case Da_u3_rlimm: + icode = d->icode; + arg0 = CALL_EXPR_ARG (exp, 0); + arg1 = CALL_EXPR_ARG (exp, 1); + op0 = expand_expr (arg0, NULL_RTX, SImode, EXPAND_NORMAL); + op1 = expand_expr (arg1, NULL_RTX, SImode, EXPAND_NORMAL); + + + if (! (GET_CODE (op0) == CONST_INT) + || !(UNSIGNED_INT3 (INTVAL (op0)))) + error ("operand 1 of %s instruction should be an unsigned 3-bit value (DR0-DR7)", + d->name); + + mode1 = insn_data[icode].operand[1].mode; + + if (icode == CODE_FOR_vdiwr_insn) + target = gen_rtx_REG (SImode, + ARC_FIRST_SIMD_DMA_CONFIG_IN_REG + INTVAL (op0)); + else if (icode == CODE_FOR_vdowr_insn) + target = gen_rtx_REG (SImode, + ARC_FIRST_SIMD_DMA_CONFIG_OUT_REG + INTVAL (op0)); + else + gcc_unreachable (); + + if (! (*insn_data[icode].operand[2].predicate) (op1, mode1)) + op1 = copy_to_mode_reg (mode1, op1); + + pat = GEN_FCN (icode) (target, op1); + if (! pat) + return 0; + + emit_insn (pat); + return NULL_RTX; + + case void_u6: + icode = d->icode; + arg0 = CALL_EXPR_ARG (exp, 0); + + fold (arg0); + + op0 = expand_expr (arg0, NULL_RTX, SImode, EXPAND_NORMAL); + mode0 = insn_data[icode].operand[0].mode; + + /* op0 should be u6. */ + if (! (*insn_data[icode].operand[0].predicate) (op0, mode0) + || !(UNSIGNED_INT6 (INTVAL (op0)))) + error ("operand of %s instruction should be an unsigned 6-bit value", + d->name); + + pat = GEN_FCN (icode) (op0); + if (! pat) + return 0; + + emit_insn (pat); + return NULL_RTX; + + case void_rlimm: + icode = d->icode; + arg0 = CALL_EXPR_ARG (exp, 0); + + fold (arg0); + + op0 = expand_expr (arg0, NULL_RTX, SImode, EXPAND_NORMAL); + mode0 = insn_data[icode].operand[0].mode; + + if (! (*insn_data[icode].operand[0].predicate) (op0, mode0)) + op0 = copy_to_mode_reg (mode0, op0); + + pat = GEN_FCN (icode) (op0); + if (! pat) + return 0; + + emit_insn (pat); + return NULL_RTX; + + case Va_Vb_Ic_u8: + { + rtx src_vreg; + icode = d->icode; + arg0 = CALL_EXPR_ARG (exp, 0); /* source vreg */ + arg1 = CALL_EXPR_ARG (exp, 1); /* [I]0-7 */ + arg2 = CALL_EXPR_ARG (exp, 2); /* u8 */ + + src_vreg = expand_expr (arg0, NULL_RTX, V8HImode, EXPAND_NORMAL); + op0 = expand_expr (arg1, NULL_RTX, SImode, EXPAND_NORMAL); /* [I]0-7 */ + op1 = expand_expr (arg2, NULL_RTX, SImode, EXPAND_NORMAL); /* u8 */ + op2 = gen_rtx_REG (V8HImode, ARC_FIRST_SIMD_VR_REG); /* VR0 */ + + /* target <- src vreg */ + emit_insn (gen_move_insn (target, src_vreg)); + + /* target <- vec_concat: target, mem(Ib, u8) */ + mode0 = insn_data[icode].operand[3].mode; + mode1 = insn_data[icode].operand[1].mode; + + if ( (!(*insn_data[icode].operand[3].predicate) (op0, mode0)) + || !(UNSIGNED_INT3 (INTVAL (op0)))) + error ("operand 1 of %s instruction should be an unsigned 3-bit value (I0-I7)", + d->name); + + if ( (!(*insn_data[icode].operand[1].predicate) (op1, mode1)) + || !(UNSIGNED_INT8 (INTVAL (op1)))) + error ("operand 2 of %s instruction should be an unsigned 8-bit value", + d->name); + + pat = GEN_FCN (icode) (target, op1, op2, op0); + if (! pat) + return 0; + + emit_insn (pat); + return target; + } + + case void_Va_Ib_u8: + icode = d->icode; + arg0 = CALL_EXPR_ARG (exp, 0); /* src vreg */ + arg1 = CALL_EXPR_ARG (exp, 1); /* [I]0-7 */ + arg2 = CALL_EXPR_ARG (exp, 2); /* u8 */ + + op0 = gen_rtx_REG (V8HImode, ARC_FIRST_SIMD_VR_REG); /* VR0 */ + op1 = expand_expr (arg1, NULL_RTX, SImode, EXPAND_NORMAL); /* I[0-7] */ + op2 = expand_expr (arg2, NULL_RTX, SImode, EXPAND_NORMAL); /* u8 */ + op3 = expand_expr (arg0, NULL_RTX, V8HImode, EXPAND_NORMAL); /* Vdest */ + + mode0 = insn_data[icode].operand[0].mode; + mode1 = insn_data[icode].operand[1].mode; + mode2 = insn_data[icode].operand[2].mode; + mode3 = insn_data[icode].operand[3].mode; + + if ( (!(*insn_data[icode].operand[1].predicate) (op1, mode1)) + || !(UNSIGNED_INT3 (INTVAL (op1)))) + error ("operand 2 of %s instruction should be an unsigned 3-bit value (I0-I7)", + d->name); + + if ( (!(*insn_data[icode].operand[2].predicate) (op2, mode2)) + || !(UNSIGNED_INT8 (INTVAL (op2)))) + error ("operand 3 of %s instruction should be an unsigned 8-bit value", + d->name); + + if (!(*insn_data[icode].operand[3].predicate) (op3, mode3)) + op3 = copy_to_mode_reg (mode3, op3); + + pat = GEN_FCN (icode) (op0, op1, op2, op3); + if (! pat) + return 0; + + emit_insn (pat); + return NULL_RTX; + + case Va_Ib_u8: + icode = d->icode; + arg0 = CALL_EXPR_ARG (exp, 0); /* dest vreg */ + arg1 = CALL_EXPR_ARG (exp, 1); /* [I]0-7 */ + + op0 = gen_rtx_REG (V8HImode, ARC_FIRST_SIMD_VR_REG); /* VR0 */ + op1 = expand_expr (arg0, NULL_RTX, SImode, EXPAND_NORMAL); /* I[0-7] */ + op2 = expand_expr (arg1, NULL_RTX, SImode, EXPAND_NORMAL); /* u8 */ + + /* target <- src vreg */ + target = gen_reg_rtx (V8HImode); + + /* target <- vec_concat: target, mem(Ib, u8) */ + mode0 = insn_data[icode].operand[1].mode; + mode1 = insn_data[icode].operand[2].mode; + mode2 = insn_data[icode].operand[3].mode; + + if ( (!(*insn_data[icode].operand[2].predicate) (op1, mode1)) + || !(UNSIGNED_INT3 (INTVAL (op1)))) + error ("operand 1 of %s instruction should be an unsigned 3-bit value (I0-I7)", + d->name); + + if ( (!(*insn_data[icode].operand[3].predicate) (op2, mode2)) + || !(UNSIGNED_INT8 (INTVAL (op2)))) + error ("operand 2 of %s instruction should be an unsigned 8-bit value", + d->name); + + pat = GEN_FCN (icode) (target, op0, op1, op2); + if (! pat) + return 0; + + emit_insn (pat); + return target; + + case void_Va_u3_Ib_u8: + icode = d->icode; + arg0 = CALL_EXPR_ARG (exp, 0); /* source vreg */ + arg1 = CALL_EXPR_ARG (exp, 1); /* u3 */ + arg2 = CALL_EXPR_ARG (exp, 2); /* [I]0-7 */ + arg3 = CALL_EXPR_ARG (exp, 3); /* u8 */ + + op0 = expand_expr (arg3, NULL_RTX, SImode, EXPAND_NORMAL); /* u8 */ + op1 = gen_rtx_REG (V8HImode, ARC_FIRST_SIMD_VR_REG); /* VR */ + op2 = expand_expr (arg2, NULL_RTX, SImode, EXPAND_NORMAL); /* [I]0-7 */ + op3 = expand_expr (arg0, NULL_RTX, V8HImode, EXPAND_NORMAL);/* vreg to be stored */ + op4 = expand_expr (arg1, NULL_RTX, SImode, EXPAND_NORMAL); /* vreg 0-7 subreg no. */ + + mode0 = insn_data[icode].operand[0].mode; + mode2 = insn_data[icode].operand[2].mode; + mode3 = insn_data[icode].operand[3].mode; + mode4 = insn_data[icode].operand[4].mode; + + /* Do some correctness checks for the operands. */ + if ( (!(*insn_data[icode].operand[0].predicate) (op0, mode0)) + || !(UNSIGNED_INT8 (INTVAL (op0)))) + error ("operand 4 of %s instruction should be an unsigned 8-bit value (0-255)", + d->name); + + if ( (!(*insn_data[icode].operand[2].predicate) (op2, mode2)) + || !(UNSIGNED_INT3 (INTVAL (op2)))) + error ("operand 3 of %s instruction should be an unsigned 3-bit value (I0-I7)", + d->name); + + if (!(*insn_data[icode].operand[3].predicate) (op3, mode3)) + op3 = copy_to_mode_reg (mode3, op3); + + if ( (!(*insn_data[icode].operand[4].predicate) (op4, mode4)) + || !(UNSIGNED_INT3 (INTVAL (op4)))) + error ("operand 2 of %s instruction should be an unsigned 3-bit value (subreg 0-7)", + d->name); + else if (icode == CODE_FOR_vst32_n_insn + && ((INTVAL(op4) % 2 ) != 0)) + error ("operand 2 of %s instruction should be an even 3-bit value (subreg 0,2,4,6)", + d->name); + + pat = GEN_FCN (icode) (op0, op1, op2, op3, op4); + if (! pat) + return 0; + + emit_insn (pat); + return NULL_RTX; + + default: + gcc_unreachable (); + } + return NULL_RTX; +} + +static bool +arc_preserve_reload_p (rtx in) +{ + return (GET_CODE (in) == PLUS + && RTX_OK_FOR_BASE_P (XEXP (in, 0), true) + && CONST_INT_P (XEXP (in, 1)) + && !((INTVAL (XEXP (in, 1)) & 511))); +} + +int +arc_register_move_cost (enum machine_mode, + enum reg_class from_class, enum reg_class to_class) +{ + /* The ARC600 has no bypass for extension registers, hence a nop might be + needed to be inserted after a write so that reads are safe. */ + if (TARGET_ARC600) + { + if (to_class == MPY_WRITABLE_CORE_REGS) + return 3; + /* Instructions modifying LP_COUNT need 4 additional cycles before + the register will actually contain the value. */ + else if (to_class == LPCOUNT_REG) + return 6; + else if (to_class == WRITABLE_CORE_REGS) + return 6; + } + + /* The ARC700 stalls for 3 cycles when *reading* from lp_count. */ + if (TARGET_ARC700 + && (from_class == LPCOUNT_REG || from_class == ALL_CORE_REGS + || from_class == WRITABLE_CORE_REGS)) + return 8; + + /* Force an attempt to 'mov Dy,Dx' to spill. */ + if (TARGET_ARC700 && TARGET_DPFP + && from_class == DOUBLE_REGS && to_class == DOUBLE_REGS) + return 100; + + return 2; +} + +/* Emit code for an addsi3 instruction with OPERANDS. + COND_P indicates if this will use conditional execution. + Return the length of the instruction. + If OUTPUT_P is false, don't actually output the instruction, just return + its length. */ +int +arc_output_addsi (rtx *operands, bool cond_p, bool output_p) +{ + char format[32]; + + int match = operands_match_p (operands[0], operands[1]); + int match2 = operands_match_p (operands[0], operands[2]); + int intval = (REG_P (operands[2]) ? 1 + : CONST_INT_P (operands[2]) ? INTVAL (operands[2]) : 0xbadc057); + int neg_intval = -intval; + int short_0 = satisfies_constraint_Rcq (operands[0]); + int short_p = (!cond_p && short_0 && satisfies_constraint_Rcq (operands[1])); + int ret = 0; + +#define ADDSI_OUTPUT1(FORMAT) do {\ + if (output_p) \ + output_asm_insn (FORMAT, operands);\ + return ret; \ +} while (0) +#define ADDSI_OUTPUT(LIST) do {\ + if (output_p) \ + sprintf LIST;\ + ADDSI_OUTPUT1 (format);\ + return ret; \ +} while (0) + + /* First try to emit a 16 bit insn. */ + ret = 2; + if (!cond_p + /* If we are actually about to output this insn, don't try a 16 bit + variant if we already decided that we don't want that + (I.e. we upsized this insn to align some following insn.) + E.g. add_s r0,sp,70 is 16 bit, but add r0,sp,70 requires a LIMM - + but add1 r0,sp,35 doesn't. */ + && (!output_p || (get_attr_length (current_output_insn) & 2))) + { + if (short_p + && (REG_P (operands[2]) + ? (match || satisfies_constraint_Rcq (operands[2])) + : (unsigned) intval <= (match ? 127 : 7))) + ADDSI_OUTPUT1 ("add%? %0,%1,%2"); + if (short_0 && REG_P (operands[1]) && match2) + ADDSI_OUTPUT1 ("add%? %0,%2,%1"); + if ((short_0 || REGNO (operands[0]) == STACK_POINTER_REGNUM) + && REGNO (operands[1]) == STACK_POINTER_REGNUM && !(intval & ~124)) + ADDSI_OUTPUT1 ("add%? %0,%1,%2"); + + if ((short_p && (unsigned) neg_intval <= (match ? 31 : 7)) + || (REGNO (operands[0]) == STACK_POINTER_REGNUM + && match && !(neg_intval & ~124))) + ADDSI_OUTPUT1 ("sub%? %0,%1,%n2"); + } + + /* Now try to emit a 32 bit insn without long immediate. */ + ret = 4; + if (!match && match2 && REG_P (operands[1])) + ADDSI_OUTPUT1 ("add%? %0,%2,%1"); + if (match || !cond_p) + { + int limit = (match && !cond_p) ? 0x7ff : 0x3f; + int range_factor = neg_intval & intval; + int shift; + + if (intval == -1 << 31) + ADDSI_OUTPUT1 ("bxor%? %0,%1,31"); + + /* If we can use a straight add / sub instead of a {add,sub}[123] of + same size, do, so - the insn latency is lower. */ + /* -0x800 is a 12-bit constant for add /add3 / sub / sub3, but + 0x800 is not. */ + if ((intval >= 0 && intval <= limit) + || (intval == -0x800 && limit == 0x7ff)) + ADDSI_OUTPUT1 ("add%? %0,%1,%2"); + else if ((intval < 0 && neg_intval <= limit) + || (intval == 0x800 && limit == 0x7ff)) + ADDSI_OUTPUT1 ("sub%? %0,%1,%n2"); + shift = range_factor >= 8 ? 3 : (range_factor >> 1); + gcc_assert (shift == 0 || shift == 1 || shift == 2 || shift == 3); + gcc_assert ((((1 << shift) - 1) & intval) == 0); + if (((intval < 0 && intval != -0x4000) + /* sub[123] is slower than add_s / sub, only use it if it + avoids a long immediate. */ + && neg_intval <= limit << shift) + || (intval == 0x4000 && limit == 0x7ff)) + ADDSI_OUTPUT ((format, "sub%d%%? %%0,%%1,%d", + shift, neg_intval >> shift)); + else if ((intval >= 0 && intval <= limit << shift) + || (intval == -0x4000 && limit == 0x7ff)) + ADDSI_OUTPUT ((format, "add%d%%? %%0,%%1,%d", shift, intval >> shift)); + } + /* Try to emit a 16 bit opcode with long immediate. */ + ret = 6; + if (short_p && match) + ADDSI_OUTPUT1 ("add%? %0,%1,%S2"); + + /* We have to use a 32 bit opcode, and with a long immediate. */ + ret = 8; + ADDSI_OUTPUT1 (intval < 0 ? "sub%? %0,%1,%n2" : "add%? %0,%1,%S2"); +} + +/* Emit code for an commutative_cond_exec instruction with OPERANDS. + Return the length of the instruction. + If OUTPUT_P is false, don't actually output the instruction, just return + its length. */ +int +arc_output_commutative_cond_exec (rtx *operands, bool output_p) +{ + enum rtx_code commutative_op = GET_CODE (operands[3]); + const char *pat = NULL; + + /* Canonical rtl should not have a constant in the first operand position. */ + gcc_assert (!CONSTANT_P (operands[1])); + + switch (commutative_op) + { + case AND: + if (satisfies_constraint_C1p (operands[2])) + pat = "bmsk%? %0,%1,%Z2"; + else if (satisfies_constraint_Ccp (operands[2])) + pat = "bclr%? %0,%1,%M2"; + else if (satisfies_constraint_CnL (operands[2])) + pat = "bic%? %0,%1,%n2-1"; + break; + case IOR: + if (satisfies_constraint_C0p (operands[2])) + pat = "bset%? %0,%1,%z2"; + break; + case XOR: + if (satisfies_constraint_C0p (operands[2])) + pat = "bxor%? %0,%1,%z2"; + break; + case PLUS: + return arc_output_addsi (operands, true, output_p); + default: break; + } + if (output_p) + output_asm_insn (pat ? pat : "%O3.%d5 %0,%1,%2", operands); + if (pat || REG_P (operands[2]) || satisfies_constraint_L (operands[2])) + return 4; + return 8; +} + +/* Helper function of arc_expand_movmem. ADDR points to a chunk of memory. + Emit code and return an potentially modified address such that offsets + up to SIZE are can be added to yield a legitimate address. + if REUSE is set, ADDR is a register that may be modified. */ + +static rtx +force_offsettable (rtx addr, HOST_WIDE_INT size, bool reuse) +{ + rtx base = addr; + rtx offs = const0_rtx; + + if (GET_CODE (base) == PLUS) + { + offs = XEXP (base, 1); + base = XEXP (base, 0); + } + if (!REG_P (base) + || (REGNO (base) != STACK_POINTER_REGNUM + && REGNO_PTR_FRAME_P (REGNO (addr))) + || !CONST_INT_P (offs) || !SMALL_INT (INTVAL (offs)) + || !SMALL_INT (INTVAL (offs) + size)) + { + if (reuse) + emit_insn (gen_add2_insn (addr, offs)); + else + addr = copy_to_mode_reg (Pmode, addr); + } + return addr; +} + +/* Like move_by_pieces, but take account of load latency, + and actual offset ranges. + Return true on success. */ + +bool +arc_expand_movmem (rtx *operands) +{ + rtx dst = operands[0]; + rtx src = operands[1]; + rtx dst_addr, src_addr; + HOST_WIDE_INT size; + int align = INTVAL (operands[3]); + unsigned n_pieces; + int piece = align; + rtx store[2]; + rtx tmpx[2]; + int i; + + if (!CONST_INT_P (operands[2])) + return false; + size = INTVAL (operands[2]); + /* move_by_pieces_ninsns is static, so we can't use it. */ + if (align >= 4) + n_pieces = (size + 2) / 4U + (size & 1); + else if (align == 2) + n_pieces = (size + 1) / 2U; + else + n_pieces = size; + if (n_pieces >= (unsigned int) (optimize_size ? 3 : 15)) + return false; + if (piece > 4) + piece = 4; + dst_addr = force_offsettable (XEXP (operands[0], 0), size, 0); + src_addr = force_offsettable (XEXP (operands[1], 0), size, 0); + store[0] = store[1] = NULL_RTX; + tmpx[0] = tmpx[1] = NULL_RTX; + for (i = 0; size > 0; i ^= 1, size -= piece) + { + rtx tmp; + enum machine_mode mode; + + if (piece > size) + piece = size & -size; + mode = smallest_mode_for_size (piece * BITS_PER_UNIT, MODE_INT); + /* If we don't re-use temporaries, the scheduler gets carried away, + and the register pressure gets unnecessarily high. */ + if (0 && tmpx[i] && GET_MODE (tmpx[i]) == mode) + tmp = tmpx[i]; + else + tmpx[i] = tmp = gen_reg_rtx (mode); + dst_addr = force_offsettable (dst_addr, piece, 1); + src_addr = force_offsettable (src_addr, piece, 1); + if (store[i]) + emit_insn (store[i]); + emit_move_insn (tmp, change_address (src, mode, src_addr)); + store[i] = gen_move_insn (change_address (dst, mode, dst_addr), tmp); + dst_addr = plus_constant (Pmode, dst_addr, piece); + src_addr = plus_constant (Pmode, src_addr, piece); + } + if (store[i]) + emit_insn (store[i]); + if (store[i^1]) + emit_insn (store[i^1]); + return true; +} + +/* Prepare operands for move in MODE. Return true iff the move has + been emitted. */ + +bool +prepare_move_operands (rtx *operands, enum machine_mode mode) +{ + /* We used to do this only for MODE_INT Modes, but addresses to floating + point variables may well be in the small data section. */ + if (1) + { + if (!TARGET_NO_SDATA_SET && small_data_pattern (operands[0], Pmode)) + operands[0] = arc_rewrite_small_data (operands[0]); + else if (mode == SImode && flag_pic && SYMBOLIC_CONST (operands[1])) + { + emit_pic_move (operands, SImode); + + /* Disable any REG_EQUALs associated with the symref + otherwise the optimization pass undoes the work done + here and references the variable directly. */ + } + else if (GET_CODE (operands[0]) != MEM + && !TARGET_NO_SDATA_SET + && small_data_pattern (operands[1], Pmode)) + { + /* This is to take care of address calculations involving sdata + variables. */ + operands[1] = arc_rewrite_small_data (operands[1]); + + emit_insn (gen_rtx_SET (mode, operands[0],operands[1])); + /* ??? This note is useless, since it only restates the set itself. + We should rather use the original SYMBOL_REF. However, there is + the problem that we are lying to the compiler about these + SYMBOL_REFs to start with. symbol@sda should be encoded specially + so that we can tell it apart from an actual symbol. */ + set_unique_reg_note (get_last_insn (), REG_EQUAL, operands[1]); + + /* Take care of the REG_EQUAL note that will be attached to mark the + output reg equal to the initial symbol_ref after this code is + executed. */ + emit_move_insn (operands[0], operands[0]); + return true; + } + } + + if (MEM_P (operands[0]) + && !(reload_in_progress || reload_completed)) + { + operands[1] = force_reg (mode, operands[1]); + if (!move_dest_operand (operands[0], mode)) + { + rtx addr = copy_to_mode_reg (Pmode, XEXP (operands[0], 0)); + /* This is like change_address_1 (operands[0], mode, 0, 1) , + except that we can't use that function because it is static. */ + rtx pat = change_address (operands[0], mode, addr); + MEM_COPY_ATTRIBUTES (pat, operands[0]); + operands[0] = pat; + } + if (!cse_not_expected) + { + rtx pat = XEXP (operands[0], 0); + + pat = arc_legitimize_address_0 (pat, pat, mode); + if (pat) + { + pat = change_address (operands[0], mode, pat); + MEM_COPY_ATTRIBUTES (pat, operands[0]); + operands[0] = pat; + } + } + } + + if (MEM_P (operands[1]) && !cse_not_expected) + { + rtx pat = XEXP (operands[1], 0); + + pat = arc_legitimize_address_0 (pat, pat, mode); + if (pat) + { + pat = change_address (operands[1], mode, pat); + MEM_COPY_ATTRIBUTES (pat, operands[1]); + operands[1] = pat; + } + } + + return false; +} + +/* Prepare OPERANDS for an extension using CODE to OMODE. + Return true iff the move has been emitted. */ + +bool +prepare_extend_operands (rtx *operands, enum rtx_code code, + enum machine_mode omode) +{ + if (!TARGET_NO_SDATA_SET && small_data_pattern (operands[1], Pmode)) + { + /* This is to take care of address calculations involving sdata + variables. */ + operands[1] + = gen_rtx_fmt_e (code, omode, arc_rewrite_small_data (operands[1])); + emit_insn (gen_rtx_SET (omode, operands[0], operands[1])); + set_unique_reg_note (get_last_insn (), REG_EQUAL, operands[1]); + + /* Take care of the REG_EQUAL note that will be attached to mark the + output reg equal to the initial extension after this code is + executed. */ + emit_move_insn (operands[0], operands[0]); + return true; + } + return false; +} + +/* Output a library call to a function called FNAME that has been arranged + to be local to any dso. */ + +const char * +arc_output_libcall (const char *fname) +{ + unsigned len = strlen (fname); + static char buf[64]; + + gcc_assert (len < sizeof buf - 35); + if (TARGET_LONG_CALLS_SET + || (TARGET_MEDIUM_CALLS && arc_ccfsm_cond_exec_p ())) + { + if (flag_pic) + sprintf (buf, "add r12,pcl,@%s-(.&-4)\n\tjl%%!%%* [r12]", fname); + else + sprintf (buf, "jl%%! @%s", fname); + } + else + sprintf (buf, "bl%%!%%* @%s", fname); + return buf; +} + +/* Return the SImode highpart of the DImode value IN. */ + +rtx +disi_highpart (rtx in) +{ + return simplify_gen_subreg (SImode, in, DImode, TARGET_BIG_ENDIAN ? 0 : 4); +} + +/* Called by arc600_corereg_hazard via for_each_rtx. + If a hazard is found, return a conservative estimate of the required + length adjustment to accomodate a nop. */ + +static int +arc600_corereg_hazard_1 (rtx *xp, void *data) +{ + rtx x = *xp; + rtx dest; + rtx pat = (rtx) data; + + switch (GET_CODE (x)) + { + case SET: case POST_INC: case POST_DEC: case PRE_INC: case PRE_DEC: + break; + default: + /* This is also fine for PRE/POST_MODIFY, because they contain a SET. */ + return 0; + } + dest = XEXP (x, 0); + /* Check if this sets a an extension register. N.B. we use 61 for the + condition codes, which is definitely not an extension register. */ + if (REG_P (dest) && REGNO (dest) >= 32 && REGNO (dest) < 61 + /* Check if the same register is used by the PAT. */ + && (refers_to_regno_p + (REGNO (dest), + REGNO (dest) + (GET_MODE_SIZE (GET_MODE (dest)) + 3) / 4U, pat, 0))) + return 4; + + return 0; +} + +/* Return length adjustment for INSN. + For ARC600: + A write to a core reg greater or equal to 32 must not be immediately + followed by a use. Anticipate the length requirement to insert a nop + between PRED and SUCC to prevent a hazard. */ + +static int +arc600_corereg_hazard (rtx pred, rtx succ) +{ + if (!TARGET_ARC600) + return 0; + /* If SUCC is a doloop_end_i with a preceding label, we must output a nop + in front of SUCC anyway, so there will be separation between PRED and + SUCC. */ + if (recog_memoized (succ) == CODE_FOR_doloop_end_i + && LABEL_P (prev_nonnote_insn (succ))) + return 0; + if (recog_memoized (succ) == CODE_FOR_doloop_begin_i) + return 0; + if (GET_CODE (PATTERN (pred)) == SEQUENCE) + pred = XVECEXP (PATTERN (pred), 0, 1); + if (GET_CODE (PATTERN (succ)) == SEQUENCE) + succ = XVECEXP (PATTERN (succ), 0, 0); + if (recog_memoized (pred) == CODE_FOR_mulsi_600 + || recog_memoized (pred) == CODE_FOR_umul_600 + || recog_memoized (pred) == CODE_FOR_mac_600 + || recog_memoized (pred) == CODE_FOR_mul64_600 + || recog_memoized (pred) == CODE_FOR_mac64_600 + || recog_memoized (pred) == CODE_FOR_umul64_600 + || recog_memoized (pred) == CODE_FOR_umac64_600) + return 0; + return for_each_rtx (&PATTERN (pred), arc600_corereg_hazard_1, + PATTERN (succ)); +} + +/* For ARC600: + A write to a core reg greater or equal to 32 must not be immediately + followed by a use. Anticipate the length requirement to insert a nop + between PRED and SUCC to prevent a hazard. */ + +int +arc_hazard (rtx pred, rtx succ) +{ + if (!TARGET_ARC600) + return 0; + if (!pred || !INSN_P (pred) || !succ || !INSN_P (succ)) + return 0; + /* We might have a CALL to a non-returning function before a loop end. + ??? Although the manual says that's OK (the target is outside the loop, + and the loop counter unused there), the assembler barfs on this, so we + must instert a nop before such a call too. */ + if (recog_memoized (succ) == CODE_FOR_doloop_end_i + && (JUMP_P (pred) || CALL_P (pred) + || GET_CODE (PATTERN (pred)) == SEQUENCE)) + return 4; + return arc600_corereg_hazard (pred, succ); +} + +/* Return length adjustment for INSN. */ + +int +arc_adjust_insn_length (rtx insn, int len, bool) +{ + if (!INSN_P (insn)) + return len; + /* We already handle sequences by ignoring the delay sequence flag. */ + if (GET_CODE (PATTERN (insn)) == SEQUENCE) + return len; + + /* It is impossible to jump to the very end of a Zero-Overhead Loop, as + the ZOL mechanism only triggers when advancing to the end address, + so if there's a label at the end of a ZOL, we need to insert a nop. + The ARC600 ZOL also has extra restrictions on jumps at the end of a + loop. */ + if (recog_memoized (insn) == CODE_FOR_doloop_end_i) + { + rtx prev = prev_nonnote_insn (insn); + + return ((LABEL_P (prev) + || (TARGET_ARC600 + && (JUMP_P (prev) + || CALL_P (prev) /* Could be a noreturn call. */ + || (NONJUMP_INSN_P (prev) + && GET_CODE (PATTERN (prev)) == SEQUENCE)))) + ? len + 4 : len); + } + + /* Check for return with but one preceding insn since function + start / call. */ + if (TARGET_PAD_RETURN + && JUMP_P (insn) + && GET_CODE (PATTERN (insn)) != ADDR_VEC + && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC + && get_attr_type (insn) == TYPE_RETURN) + { + rtx prev = prev_active_insn (insn); + + if (!prev || !(prev = prev_active_insn (prev)) + || ((NONJUMP_INSN_P (prev) + && GET_CODE (PATTERN (prev)) == SEQUENCE) + ? CALL_ATTR (XVECEXP (PATTERN (prev), 0, 0), NON_SIBCALL) + : CALL_ATTR (prev, NON_SIBCALL))) + return len + 4; + } + if (TARGET_ARC600) + { + rtx succ = next_real_insn (insn); + + /* One the ARC600, a write to an extension register must be separated + from a read. */ + if (succ && INSN_P (succ)) + len += arc600_corereg_hazard (insn, succ); + } + + /* Restore extracted operands - otherwise splitters like the addsi3_mixed one + can go awry. */ + extract_constrain_insn_cached (insn); + + return len; +} + +/* Values for length_sensitive. */ +enum +{ + ARC_LS_NONE,// Jcc + ARC_LS_25, // 25 bit offset, B + ARC_LS_21, // 21 bit offset, Bcc + ARC_LS_U13,// 13 bit unsigned offset, LP + ARC_LS_10, // 10 bit offset, B_s, Beq_s, Bne_s + ARC_LS_9, // 9 bit offset, BRcc + ARC_LS_8, // 8 bit offset, BRcc_s + ARC_LS_U7, // 7 bit unsigned offset, LPcc + ARC_LS_7 // 7 bit offset, Bcc_s +}; + +/* While the infrastructure patch is waiting for review, duplicate the + struct definitions, to allow this file to compile. */ +#if 1 +typedef struct +{ + unsigned align_set; + /* Cost as a branch / call target or call return address. */ + int target_cost; + int fallthrough_cost; + int branch_cost; + int length; + /* 0 for not length sensitive, 1 for largest offset range, + * 2 for next smaller etc. */ + unsigned length_sensitive : 8; + bool enabled; +} insn_length_variant_t; + +typedef struct insn_length_parameters_s +{ + int align_unit_log; + int align_base_log; + int max_variants; + int (*get_variants) (rtx, int, bool, bool, insn_length_variant_t *); +} insn_length_parameters_t; + +static void +arc_insn_length_parameters (insn_length_parameters_t *ilp) ATTRIBUTE_UNUSED; +#endif + +static int +arc_get_insn_variants (rtx insn, int len, bool, bool target_p, + insn_length_variant_t *ilv) +{ + if (!NONDEBUG_INSN_P (insn)) + return 0; + enum attr_type type; + /* shorten_branches doesn't take optimize_size into account yet for the + get_variants mechanism, so turn this off for now. */ + if (optimize_size) + return 0; + if (GET_CODE (PATTERN (insn)) == SEQUENCE) + { + /* The interaction of a short delay slot insn with a short branch is + too weird for shorten_branches to piece together, so describe the + entire SEQUENCE. */ + rtx pat, inner; + if (TARGET_UPSIZE_DBR + && get_attr_length (XVECEXP ((pat = PATTERN (insn)), 0, 1)) <= 2 + && (((type = get_attr_type (inner = XVECEXP (pat, 0, 0))) + == TYPE_UNCOND_BRANCH) + || type == TYPE_BRANCH) + && get_attr_delay_slot_filled (inner) == DELAY_SLOT_FILLED_YES) + { + int n_variants + = arc_get_insn_variants (inner, get_attr_length (inner), true, + target_p, ilv+1); + /* The short variant gets split into a higher-cost aligned + and a lower cost unaligned variant. */ + gcc_assert (n_variants); + gcc_assert (ilv[1].length_sensitive == ARC_LS_7 + || ilv[1].length_sensitive == ARC_LS_10); + gcc_assert (ilv[1].align_set == 3); + ilv[0] = ilv[1]; + ilv[0].align_set = 1; + ilv[0].branch_cost += 1; + ilv[1].align_set = 2; + n_variants++; + for (int i = 0; i < n_variants; i++) + ilv[i].length += 2; + /* In case an instruction with aligned size is wanted, and + the short variants are unavailable / too expensive, add + versions of long branch + long delay slot. */ + for (int i = 2, end = n_variants; i < end; i++, n_variants++) + { + ilv[n_variants] = ilv[i]; + ilv[n_variants].length += 2; + } + return n_variants; + } + return 0; + } + insn_length_variant_t *first_ilv = ilv; + type = get_attr_type (insn); + bool delay_filled + = (get_attr_delay_slot_filled (insn) == DELAY_SLOT_FILLED_YES); + int branch_align_cost = delay_filled ? 0 : 1; + int branch_unalign_cost = delay_filled ? 0 : TARGET_UNALIGN_BRANCH ? 0 : 1; + /* If the previous instruction is an sfunc call, this insn is always + a target, even though the middle-end is unaware of this. */ + bool force_target = false; + rtx prev = prev_active_insn (insn); + if (prev && arc_next_active_insn (prev, 0) == insn + && ((NONJUMP_INSN_P (prev) && GET_CODE (PATTERN (prev)) == SEQUENCE) + ? CALL_ATTR (XVECEXP (PATTERN (prev), 0, 0), NON_SIBCALL) + : (CALL_ATTR (prev, NON_SIBCALL) + && NEXT_INSN (PREV_INSN (prev)) == prev))) + force_target = true; + + switch (type) + { + case TYPE_BRCC: + /* Short BRCC only comes in no-delay-slot version, and without limm */ + if (!delay_filled) + { + ilv->align_set = 3; + ilv->length = 2; + ilv->branch_cost = 1; + ilv->enabled = (len == 2); + ilv->length_sensitive = ARC_LS_8; + ilv++; + } + /* Fall through. */ + case TYPE_BRCC_NO_DELAY_SLOT: + /* doloop_fallback* patterns are TYPE_BRCC_NO_DELAY_SLOT for + (delay slot) scheduling purposes, but they are longer. */ + if (GET_CODE (PATTERN (insn)) == PARALLEL + && GET_CODE (XVECEXP (PATTERN (insn), 0, 1)) == SET) + return 0; + /* Standard BRCC: 4 bytes, or 8 bytes with limm. */ + ilv->length = ((type == TYPE_BRCC) ? 4 : 8); + ilv->align_set = 3; + ilv->branch_cost = branch_align_cost; + ilv->enabled = (len <= ilv->length); + ilv->length_sensitive = ARC_LS_9; + if ((target_p || force_target) + || (!delay_filled && TARGET_UNALIGN_BRANCH)) + { + ilv[1] = *ilv; + ilv->align_set = 1; + ilv++; + ilv->align_set = 2; + ilv->target_cost = 1; + ilv->branch_cost = branch_unalign_cost; + } + ilv++; + + rtx op, op0; + op = XEXP (SET_SRC (XVECEXP (PATTERN (insn), 0, 0)), 0); + op0 = XEXP (op, 0); + + if (GET_CODE (op0) == ZERO_EXTRACT + && satisfies_constraint_L (XEXP (op0, 2))) + op0 = XEXP (op0, 0); + if (satisfies_constraint_Rcq (op0)) + { + ilv->length = ((type == TYPE_BRCC) ? 6 : 10); + ilv->align_set = 3; + ilv->branch_cost = 1 + branch_align_cost; + ilv->fallthrough_cost = 1; + ilv->enabled = true; + ilv->length_sensitive = ARC_LS_21; + if (!delay_filled && TARGET_UNALIGN_BRANCH) + { + ilv[1] = *ilv; + ilv->align_set = 1; + ilv++; + ilv->align_set = 2; + ilv->branch_cost = 1 + branch_unalign_cost; + } + ilv++; + } + ilv->length = ((type == TYPE_BRCC) ? 8 : 12); + ilv->align_set = 3; + ilv->branch_cost = 1 + branch_align_cost; + ilv->fallthrough_cost = 1; + ilv->enabled = true; + ilv->length_sensitive = ARC_LS_21; + if ((target_p || force_target) + || (!delay_filled && TARGET_UNALIGN_BRANCH)) + { + ilv[1] = *ilv; + ilv->align_set = 1; + ilv++; + ilv->align_set = 2; + ilv->target_cost = 1; + ilv->branch_cost = 1 + branch_unalign_cost; + } + ilv++; + break; + + case TYPE_SFUNC: + ilv->length = 12; + goto do_call; + case TYPE_CALL_NO_DELAY_SLOT: + ilv->length = 8; + goto do_call; + case TYPE_CALL: + ilv->length = 4; + ilv->length_sensitive + = GET_CODE (PATTERN (insn)) == COND_EXEC ? ARC_LS_21 : ARC_LS_25; + do_call: + ilv->align_set = 3; + ilv->fallthrough_cost = branch_align_cost; + ilv->enabled = true; + if ((target_p || force_target) + || (!delay_filled && TARGET_UNALIGN_BRANCH)) + { + ilv[1] = *ilv; + ilv->align_set = 1; + ilv++; + ilv->align_set = 2; + ilv->target_cost = 1; + ilv->fallthrough_cost = branch_unalign_cost; + } + ilv++; + break; + case TYPE_UNCOND_BRANCH: + /* Strictly speaking, this should be ARC_LS_10 for equality comparisons, + but that makes no difference at the moment. */ + ilv->length_sensitive = ARC_LS_7; + ilv[1].length_sensitive = ARC_LS_25; + goto do_branch; + case TYPE_BRANCH: + ilv->length_sensitive = ARC_LS_10; + ilv[1].length_sensitive = ARC_LS_21; + do_branch: + ilv->align_set = 3; + ilv->length = 2; + ilv->branch_cost = branch_align_cost; + ilv->enabled = (len == ilv->length); + ilv++; + ilv->length = 4; + ilv->align_set = 3; + ilv->branch_cost = branch_align_cost; + ilv->enabled = true; + if ((target_p || force_target) + || (!delay_filled && TARGET_UNALIGN_BRANCH)) + { + ilv[1] = *ilv; + ilv->align_set = 1; + ilv++; + ilv->align_set = 2; + ilv->target_cost = 1; + ilv->branch_cost = branch_unalign_cost; + } + ilv++; + break; + case TYPE_JUMP: + return 0; + default: + /* For every short insn, there is generally also a long insn. + trap_s is an exception. */ + if ((len & 2) == 0 || recog_memoized (insn) == CODE_FOR_trap_s) + return 0; + ilv->align_set = 3; + ilv->length = len; + ilv->enabled = 1; + ilv++; + ilv->align_set = 3; + ilv->length = len + 2; + ilv->enabled = 1; + if (target_p || force_target) + { + ilv[1] = *ilv; + ilv->align_set = 1; + ilv++; + ilv->align_set = 2; + ilv->target_cost = 1; + } + ilv++; + } + /* If the previous instruction is an sfunc call, this insn is always + a target, even though the middle-end is unaware of this. + Therefore, if we have a call predecessor, transfer the target cost + to the fallthrough and branch costs. */ + if (force_target) + { + for (insn_length_variant_t *p = first_ilv; p < ilv; p++) + { + p->fallthrough_cost += p->target_cost; + p->branch_cost += p->target_cost; + p->target_cost = 0; + } + } + + return ilv - first_ilv; +} + +static void +arc_insn_length_parameters (insn_length_parameters_t *ilp) +{ + ilp->align_unit_log = 1; + ilp->align_base_log = 1; + ilp->max_variants = 7; + ilp->get_variants = arc_get_insn_variants; +} + +/* Return a copy of COND from *STATEP, inverted if that is indicated by the + CC field of *STATEP. */ + +static rtx +arc_get_ccfsm_cond (struct arc_ccfsm *statep, bool reverse) +{ + rtx cond = statep->cond; + int raw_cc = get_arc_condition_code (cond); + if (reverse) + raw_cc = ARC_INVERSE_CONDITION_CODE (raw_cc); + + if (statep->cc == raw_cc) + return copy_rtx (cond); + + gcc_assert (ARC_INVERSE_CONDITION_CODE (raw_cc) == statep->cc); + + enum machine_mode ccm = GET_MODE (XEXP (cond, 0)); + enum rtx_code code = reverse_condition (GET_CODE (cond)); + if (code == UNKNOWN || ccm == CC_FP_GTmode || ccm == CC_FP_GEmode) + code = reverse_condition_maybe_unordered (GET_CODE (cond)); + + return gen_rtx_fmt_ee (code, GET_MODE (cond), + copy_rtx (XEXP (cond, 0)), copy_rtx (XEXP (cond, 1))); +} + +/* Use the ccfsm machinery to do if conversion. */ + +static unsigned +arc_ifcvt (void) +{ + struct arc_ccfsm *statep = &cfun->machine->ccfsm_current; + basic_block merge_bb = 0; + + memset (statep, 0, sizeof *statep); + for (rtx insn = get_insns (); insn; insn = next_insn (insn)) + { + arc_ccfsm_advance (insn, statep); + + switch (statep->state) + { + case 0: + if (JUMP_P (insn)) + merge_bb = 0; + break; + case 1: case 2: + { + /* Deleted branch. */ + gcc_assert (!merge_bb); + merge_bb = BLOCK_FOR_INSN (insn); + basic_block succ_bb + = BLOCK_FOR_INSN (NEXT_INSN (NEXT_INSN (PREV_INSN (insn)))); + arc_ccfsm_post_advance (insn, statep); + rtx seq = NEXT_INSN (PREV_INSN (insn)); + if (seq != insn) + { + rtx slot = XVECEXP (PATTERN (seq), 0, 1); + rtx pat = PATTERN (slot); + if (INSN_ANNULLED_BRANCH_P (insn)) + { + rtx cond + = arc_get_ccfsm_cond (statep, INSN_FROM_TARGET_P (slot)); + pat = gen_rtx_COND_EXEC (VOIDmode, cond, pat); + } + if (!validate_change (seq, &PATTERN (seq), pat, 0)) + gcc_unreachable (); + PUT_CODE (slot, NOTE); + NOTE_KIND (slot) = NOTE_INSN_DELETED; + if (merge_bb && succ_bb) + merge_blocks (merge_bb, succ_bb); + } + else if (merge_bb && succ_bb) + { + set_insn_deleted (insn); + merge_blocks (merge_bb, succ_bb); + } + else + { + PUT_CODE (insn, NOTE); + NOTE_KIND (insn) = NOTE_INSN_DELETED; + } + continue; + } + case 3: + if (LABEL_P (insn) + && statep->target_label == CODE_LABEL_NUMBER (insn)) + { + arc_ccfsm_post_advance (insn, statep); + basic_block succ_bb = BLOCK_FOR_INSN (insn); + if (merge_bb && succ_bb) + merge_blocks (merge_bb, succ_bb); + else if (--LABEL_NUSES (insn) == 0) + { + const char *name = LABEL_NAME (insn); + PUT_CODE (insn, NOTE); + NOTE_KIND (insn) = NOTE_INSN_DELETED_LABEL; + NOTE_DELETED_LABEL_NAME (insn) = name; + } + merge_bb = 0; + continue; + } + /* Fall through. */ + case 4: case 5: + if (!NONDEBUG_INSN_P (insn)) + break; + + /* Conditionalized insn. */ + + rtx prev, pprev, *patp, pat, cond; + + /* If this is a delay slot insn in a non-annulled branch, + don't conditionalize it. N.B., this should be fine for + conditional return too. However, don't do this for + unconditional branches, as these would be encountered when + processing an 'else' part. */ + prev = PREV_INSN (insn); + pprev = PREV_INSN (prev); + if (pprev && NEXT_INSN (NEXT_INSN (pprev)) == NEXT_INSN (insn) + && JUMP_P (prev) && get_attr_cond (prev) == COND_USE + && !INSN_ANNULLED_BRANCH_P (prev)) + break; + + patp = &PATTERN (insn); + pat = *patp; + cond = arc_get_ccfsm_cond (statep, INSN_FROM_TARGET_P (insn)); + if (NONJUMP_INSN_P (insn) || CALL_P (insn)) + { + /* ??? don't conditionalize if all side effects are dead + in the not-execute case. */ + /* dwarf2out.c:dwarf2out_frame_debug_expr doesn't know + what to do with COND_EXEC. */ + if (RTX_FRAME_RELATED_P (insn)) + { + /* If this is the delay slot insn of an anulled branch, + dwarf2out.c:scan_trace understands the anulling semantics + without the COND_EXEC. */ + gcc_assert + (pprev && NEXT_INSN (NEXT_INSN (pprev)) == NEXT_INSN (insn) + && JUMP_P (prev) && get_attr_cond (prev) == COND_USE + && INSN_ANNULLED_BRANCH_P (prev)); + rtx note = alloc_reg_note (REG_FRAME_RELATED_EXPR, pat, + REG_NOTES (insn)); + validate_change (insn, ®_NOTES (insn), note, 1); + } + pat = gen_rtx_COND_EXEC (VOIDmode, cond, pat); + } + else if (simplejump_p (insn)) + { + patp = &SET_SRC (pat); + pat = gen_rtx_IF_THEN_ELSE (VOIDmode, cond, *patp, pc_rtx); + } + else if (JUMP_P (insn) && ANY_RETURN_P (PATTERN (insn))) + { + pat = gen_rtx_IF_THEN_ELSE (VOIDmode, cond, pat, pc_rtx); + pat = gen_rtx_SET (VOIDmode, pc_rtx, pat); + } + else + gcc_unreachable (); + validate_change (insn, patp, pat, 1); + if (!apply_change_group ()) + gcc_unreachable (); + if (JUMP_P (insn)) + { + rtx next = next_nonnote_insn (insn); + if (GET_CODE (next) == BARRIER) + delete_insn (next); + if (statep->state == 3) + continue; + } + break; + default: + gcc_unreachable (); + } + arc_ccfsm_post_advance (insn, statep); + } + return 0; +} + +/* For ARC600: If a write to a core reg >=32 appears in a delay slot + (other than of a forward brcc), it creates a hazard when there is a read + of the same register at the branch target. We can't know what is at the + branch target of calls, and for branches, we don't really know before the + end of delay slot scheduling, either. Not only can individual instruction + be hoisted out into a delay slot, a basic block can also be emptied this + way, and branch and/or fall through targets be redirected. Hence we don't + want such writes in a delay slot. */ +/* Called by arc_write_ext_corereg via for_each_rtx. */ + +static int +write_ext_corereg_1 (rtx *xp, void *data ATTRIBUTE_UNUSED) +{ + rtx x = *xp; + rtx dest; + + switch (GET_CODE (x)) + { + case SET: case POST_INC: case POST_DEC: case PRE_INC: case PRE_DEC: + break; + default: + /* This is also fine for PRE/POST_MODIFY, because they contain a SET. */ + return 0; + } + dest = XEXP (x, 0); + if (REG_P (dest) && REGNO (dest) >= 32 && REGNO (dest) < 61) + return 1; + return 0; +} + +/* Return nonzreo iff INSN writes to an extension core register. */ + +int +arc_write_ext_corereg (rtx insn) +{ + return for_each_rtx (&PATTERN (insn), write_ext_corereg_1, 0); +} + +/* This is like the hook, but returns NULL when it can't / won't generate + a legitimate address. */ + +static rtx +arc_legitimize_address_0 (rtx x, rtx oldx ATTRIBUTE_UNUSED, + enum machine_mode mode) +{ + rtx addr, inner; + + if (flag_pic && SYMBOLIC_CONST (x)) + (x) = arc_legitimize_pic_address (x, 0); + addr = x; + if (GET_CODE (addr) == CONST) + addr = XEXP (addr, 0); + if (GET_CODE (addr) == PLUS + && CONST_INT_P (XEXP (addr, 1)) + && ((GET_CODE (XEXP (addr, 0)) == SYMBOL_REF + && !SYMBOL_REF_FUNCTION_P (XEXP (addr, 0))) + || (REG_P (XEXP (addr, 0)) + && (INTVAL (XEXP (addr, 1)) & 252)))) + { + HOST_WIDE_INT offs, upper; + int size = GET_MODE_SIZE (mode); + + offs = INTVAL (XEXP (addr, 1)); + upper = (offs + 256 * size) & ~511 * size; + inner = plus_constant (Pmode, XEXP (addr, 0), upper); +#if 0 /* ??? this produces worse code for EEMBC idctrn01 */ + if (GET_CODE (x) == CONST) + inner = gen_rtx_CONST (Pmode, inner); +#endif + addr = plus_constant (Pmode, force_reg (Pmode, inner), offs - upper); + x = addr; + } + else if (GET_CODE (addr) == SYMBOL_REF && !SYMBOL_REF_FUNCTION_P (addr)) + x = force_reg (Pmode, x); + if (memory_address_p ((enum machine_mode) mode, x)) + return x; + return NULL_RTX; +} + +static rtx +arc_legitimize_address (rtx orig_x, rtx oldx, enum machine_mode mode) +{ + rtx new_x = arc_legitimize_address_0 (orig_x, oldx, mode); + + if (new_x) + return new_x; + return orig_x; +} + +static rtx +arc_delegitimize_address_0 (rtx x) +{ + rtx u, gp; + + if (GET_CODE (x) == CONST && GET_CODE (u = XEXP (x, 0)) == UNSPEC) + { + if (XINT (u, 1) == ARC_UNSPEC_GOT) + return XVECEXP (u, 0, 0); + } + else if (GET_CODE (x) == PLUS + && ((REG_P (gp = XEXP (x, 0)) + && REGNO (gp) == PIC_OFFSET_TABLE_REGNUM) + || (GET_CODE (gp) == CONST + && GET_CODE (u = XEXP (gp, 0)) == UNSPEC + && XINT (u, 1) == ARC_UNSPEC_GOT + && GET_CODE (XVECEXP (u, 0, 0)) == SYMBOL_REF + && !strcmp (XSTR (XVECEXP (u, 0, 0), 0), "_DYNAMIC"))) + && GET_CODE (XEXP (x, 1)) == CONST + && GET_CODE (u = XEXP (XEXP (x, 1), 0)) == UNSPEC + && XINT (u, 1) == ARC_UNSPEC_GOTOFF) + return XVECEXP (u, 0, 0); + else if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS + && ((REG_P (gp = XEXP (XEXP (x, 0), 1)) + && REGNO (gp) == PIC_OFFSET_TABLE_REGNUM) + || (GET_CODE (gp) == CONST + && GET_CODE (u = XEXP (gp, 0)) == UNSPEC + && XINT (u, 1) == ARC_UNSPEC_GOT + && GET_CODE (XVECEXP (u, 0, 0)) == SYMBOL_REF + && !strcmp (XSTR (XVECEXP (u, 0, 0), 0), "_DYNAMIC"))) + && GET_CODE (XEXP (x, 1)) == CONST + && GET_CODE (u = XEXP (XEXP (x, 1), 0)) == UNSPEC + && XINT (u, 1) == ARC_UNSPEC_GOTOFF) + return gen_rtx_PLUS (GET_MODE (x), XEXP (XEXP (x, 0), 0), + XVECEXP (u, 0, 0)); + else if (GET_CODE (x) == PLUS + && (u = arc_delegitimize_address_0 (XEXP (x, 1)))) + return gen_rtx_PLUS (GET_MODE (x), XEXP (x, 0), u); + return NULL_RTX; +} + +static rtx +arc_delegitimize_address (rtx x) +{ + rtx orig_x = x = delegitimize_mem_from_attrs (x); + if (GET_CODE (x) == MEM) + x = XEXP (x, 0); + x = arc_delegitimize_address_0 (x); + if (x) + { + if (MEM_P (orig_x)) + x = replace_equiv_address_nv (orig_x, x); + return x; + } + return orig_x; +} + +/* Return a REG rtx for acc1. N.B. the gcc-internal representation may + differ from the hardware register number in order to allow the generic + code to correctly split the concatenation of acc1 and acc2. */ + +rtx +gen_acc1 (void) +{ + return gen_rtx_REG (SImode, TARGET_BIG_ENDIAN ? 56: 57); +} + +/* Return a REG rtx for acc2. N.B. the gcc-internal representation may + differ from the hardware register number in order to allow the generic + code to correctly split the concatenation of acc1 and acc2. */ + +rtx +gen_acc2 (void) +{ + return gen_rtx_REG (SImode, TARGET_BIG_ENDIAN ? 57: 56); +} + +/* Return a REG rtx for mlo. N.B. the gcc-internal representation may + differ from the hardware register number in order to allow the generic + code to correctly split the concatenation of mhi and mlo. */ + +rtx +gen_mlo (void) +{ + return gen_rtx_REG (SImode, TARGET_BIG_ENDIAN ? 59: 58); +} + +/* Return a REG rtx for mhi. N.B. the gcc-internal representation may + differ from the hardware register number in order to allow the generic + code to correctly split the concatenation of mhi and mlo. */ + +rtx +gen_mhi (void) +{ + return gen_rtx_REG (SImode, TARGET_BIG_ENDIAN ? 58: 59); +} + +/* FIXME: a parameter should be added, and code added to final.c, + to reproduce this functionality in shorten_branches. */ +#if 0 +/* Return nonzero iff BRANCH should be unaligned if possible by upsizing + a previous instruction. */ +int +arc_unalign_branch_p (rtx branch) +{ + rtx note; + + if (!TARGET_UNALIGN_BRANCH) + return 0; + /* Do not do this if we have a filled delay slot. */ + if (get_attr_delay_slot_filled (branch) == DELAY_SLOT_FILLED_YES + && !INSN_DELETED_P (NEXT_INSN (branch))) + return 0; + note = find_reg_note (branch, REG_BR_PROB, 0); + return (!note + || (arc_unalign_prob_threshold && !br_prob_note_reliable_p (note)) + || INTVAL (XEXP (note, 0)) < arc_unalign_prob_threshold); +} +#endif + +/* When estimating sizes during arc_reorg, when optimizing for speed, there + are three reasons why we need to consider branches to be length 6: + - annull-false delay slot insns are implemented using conditional execution, + thus preventing short insn formation where used. + - for ARC600: annul-true delay slot insns are implemented where possible + using conditional execution, preventing short insn formation where used. + - for ARC700: likely or somewhat likely taken branches are made long and + unaligned if possible to avoid branch penalty. */ + +bool +arc_branch_size_unknown_p (void) +{ + return !optimize_size && arc_reorg_in_progress; +} + +/* We are about to output a return insn. Add padding if necessary to avoid + a mispredict. A return could happen immediately after the function + start, but after a call we know that there will be at least a blink + restore. */ + +void +arc_pad_return (void) +{ + rtx insn = current_output_insn; + rtx prev = prev_active_insn (insn); + int want_long; + + if (!prev) + { + fputs ("\tnop_s\n", asm_out_file); + cfun->machine->unalign ^= 2; + want_long = 1; + } + /* If PREV is a sequence, we know it must be a branch / jump or a tailcall, + because after a call, we'd have to restore blink first. */ + else if (GET_CODE (PATTERN (prev)) == SEQUENCE) + return; + else + { + want_long = (get_attr_length (prev) == 2); + prev = prev_active_insn (prev); + } + if (!prev + || ((NONJUMP_INSN_P (prev) && GET_CODE (PATTERN (prev)) == SEQUENCE) + ? CALL_ATTR (XVECEXP (PATTERN (prev), 0, 0), NON_SIBCALL) + : CALL_ATTR (prev, NON_SIBCALL))) + { + if (want_long) + cfun->machine->size_reason + = "call/return and return/return must be 6 bytes apart to avoid mispredict"; + else if (TARGET_UNALIGN_BRANCH && cfun->machine->unalign) + { + cfun->machine->size_reason + = "Long unaligned jump avoids non-delay slot penalty"; + want_long = 1; + } + /* Disgorge delay insn, if there is any, and it may be moved. */ + if (final_sequence + /* ??? Annulled would be OK if we can and do conditionalize + the delay slot insn accordingly. */ + && !INSN_ANNULLED_BRANCH_P (insn) + && (get_attr_cond (insn) != COND_USE + || !reg_set_p (gen_rtx_REG (CCmode, CC_REG), + XVECEXP (final_sequence, 0, 1)))) + { + prev = XVECEXP (final_sequence, 0, 1); + gcc_assert (!prev_real_insn (insn) + || !arc_hazard (prev_real_insn (insn), prev)); + cfun->machine->force_short_suffix = !want_long; + rtx save_pred = current_insn_predicate; + final_scan_insn (prev, asm_out_file, optimize, 1, NULL); + cfun->machine->force_short_suffix = -1; + INSN_DELETED_P (prev) = 1; + current_output_insn = insn; + current_insn_predicate = save_pred; + } + else if (want_long) + fputs ("\tnop\n", asm_out_file); + else + { + fputs ("\tnop_s\n", asm_out_file); + cfun->machine->unalign ^= 2; + } + } + return; +} + +/* The usual; we set up our machine_function data. */ + +static struct machine_function * +arc_init_machine_status (void) +{ + struct machine_function *machine; + machine = ggc_alloc_cleared_machine_function (); + machine->fn_type = ARC_FUNCTION_UNKNOWN; + machine->force_short_suffix = -1; + + return machine; +} + +/* Implements INIT_EXPANDERS. We just set up to call the above + function. */ + +void +arc_init_expanders (void) +{ + init_machine_status = arc_init_machine_status; +} + +/* Check if OP is a proper parallel of a millicode call pattern. OFFSET + indicates a number of elements to ignore - that allows to have a + sibcall pattern that starts with (return). LOAD_P is zero for store + multiple (for prologues), and one for load multiples (for epilogues), + and two for load multiples where no final clobber of blink is required. + We also skip the first load / store element since this is supposed to + be checked in the instruction pattern. */ + +int +arc_check_millicode (rtx op, int offset, int load_p) +{ + int len = XVECLEN (op, 0) - offset; + int i; + + if (load_p == 2) + { + if (len < 2 || len > 13) + return 0; + load_p = 1; + } + else + { + rtx elt = XVECEXP (op, 0, --len); + + if (GET_CODE (elt) != CLOBBER + || !REG_P (XEXP (elt, 0)) + || REGNO (XEXP (elt, 0)) != RETURN_ADDR_REGNUM + || len < 3 || len > 13) + return 0; + } + for (i = 1; i < len; i++) + { + rtx elt = XVECEXP (op, 0, i + offset); + rtx reg, mem, addr; + + if (GET_CODE (elt) != SET) + return 0; + mem = XEXP (elt, load_p); + reg = XEXP (elt, 1-load_p); + if (!REG_P (reg) || REGNO (reg) != 13U+i || !MEM_P (mem)) + return 0; + addr = XEXP (mem, 0); + if (GET_CODE (addr) != PLUS + || !rtx_equal_p (stack_pointer_rtx, XEXP (addr, 0)) + || !CONST_INT_P (XEXP (addr, 1)) || INTVAL (XEXP (addr, 1)) != i*4) + return 0; + } + return 1; +} + +/* Accessor functions for cfun->machine->unalign. */ + +int +arc_get_unalign (void) +{ + return cfun->machine->unalign; +} + +void +arc_clear_unalign (void) +{ + if (cfun) + cfun->machine->unalign = 0; +} + +void +arc_toggle_unalign (void) +{ + cfun->machine->unalign ^= 2; +} + +/* Operands 0..2 are the operands of a addsi which uses a 12 bit + constant in operand 2, but which would require a LIMM because of + operand mismatch. + operands 3 and 4 are new SET_SRCs for operands 0. */ + +void +split_addsi (rtx *operands) +{ + int val = INTVAL (operands[2]); + + /* Try for two short insns first. Lengths being equal, we prefer + expansions with shorter register lifetimes. */ + if (val > 127 && val <= 255 + && satisfies_constraint_Rcq (operands[0])) + { + operands[3] = operands[2]; + operands[4] = gen_rtx_PLUS (SImode, operands[0], operands[1]); + } + else + { + operands[3] = operands[1]; + operands[4] = gen_rtx_PLUS (SImode, operands[0], operands[2]); + } +} + +/* Operands 0..2 are the operands of a subsi which uses a 12 bit + constant in operand 1, but which would require a LIMM because of + operand mismatch. + operands 3 and 4 are new SET_SRCs for operands 0. */ + +void +split_subsi (rtx *operands) +{ + int val = INTVAL (operands[1]); + + /* Try for two short insns first. Lengths being equal, we prefer + expansions with shorter register lifetimes. */ + if (satisfies_constraint_Rcq (operands[0]) + && satisfies_constraint_Rcq (operands[2])) + { + if (val >= -31 && val <= 127) + { + operands[3] = gen_rtx_NEG (SImode, operands[2]); + operands[4] = gen_rtx_PLUS (SImode, operands[0], operands[1]); + return; + } + else if (val >= 0 && val < 255) + { + operands[3] = operands[1]; + operands[4] = gen_rtx_MINUS (SImode, operands[0], operands[2]); + return; + } + } + /* If the destination is not an ARCompact16 register, we might + still have a chance to make a short insn if the source is; + we need to start with a reg-reg move for this. */ + operands[3] = operands[2]; + operands[4] = gen_rtx_MINUS (SImode, operands[1], operands[0]); +} + +/* Handle DOUBLE_REGS uses. + Operand 0: destination register + Operand 1: source register */ + +static rtx +arc_process_double_reg_moves (rtx *operands) +{ + rtx dest = operands[0]; + rtx src = operands[1]; + rtx val; + + enum usesDxState { none, srcDx, destDx, maxDx }; + enum usesDxState state = none; + + if (refers_to_regno_p (40, 44, src, 0)) + state = srcDx; + if (refers_to_regno_p (40, 44, dest, 0)) + { + /* Via arc_register_move_cost, we should never see D,D moves. */ + gcc_assert (state == none); + state = destDx; + } + + if (state == none) + return NULL_RTX; + + start_sequence (); + + if (state == srcDx) + { + /* Without the LR insn, we need to split this into a + sequence of insns which will use the DEXCLx and DADDHxy + insns to be able to read the Dx register in question. */ + if (TARGET_DPFP_DISABLE_LRSR) + { + /* gen *movdf_insn_nolrsr */ + rtx set = gen_rtx_SET (VOIDmode, dest, src); + rtx use1 = gen_rtx_USE (VOIDmode, const1_rtx); + emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, set, use1))); + } + else + { + /* When we have 'mov D, r' or 'mov D, D' then get the target + register pair for use with LR insn. */ + rtx destHigh = simplify_gen_subreg(SImode, dest, DFmode, 4); + rtx destLow = simplify_gen_subreg(SImode, dest, DFmode, 0); + + /* Produce the two LR insns to get the high and low parts. */ + emit_insn (gen_rtx_SET (VOIDmode, + destHigh, + gen_rtx_UNSPEC_VOLATILE (Pmode, gen_rtvec (1, src), + VUNSPEC_LR_HIGH))); + emit_insn (gen_rtx_SET (VOIDmode, + destLow, + gen_rtx_UNSPEC_VOLATILE (Pmode, gen_rtvec (1, src), + VUNSPEC_LR))); + } + } + else if (state == destDx) + { + /* When we have 'mov r, D' or 'mov D, D' and we have access to the + LR insn get the target register pair. */ + rtx srcHigh = simplify_gen_subreg(SImode, src, DFmode, 4); + rtx srcLow = simplify_gen_subreg(SImode, src, DFmode, 0); + + emit_insn (gen_rtx_UNSPEC_VOLATILE (Pmode, + gen_rtvec (3, dest, srcHigh, srcLow), + VUNSPEC_DEXCL_NORES)); + + } + else + gcc_unreachable (); + + val = get_insns (); + end_sequence (); + return val; +} + +/* operands 0..1 are the operands of a 64 bit move instruction. + split it into two moves with operands 2/3 and 4/5. */ + +rtx +arc_split_move (rtx *operands) +{ + enum machine_mode mode = GET_MODE (operands[0]); + int i; + int swap = 0; + rtx xop[4]; + rtx val; + + if (TARGET_DPFP) + { + val = arc_process_double_reg_moves (operands); + if (val) + return val; + } + + for (i = 0; i < 2; i++) + { + if (MEM_P (operands[i]) && auto_inc_p (XEXP (operands[i], 0))) + { + rtx addr = XEXP (operands[i], 0); + rtx r, o; + enum rtx_code code; + + gcc_assert (!reg_overlap_mentioned_p (operands[0], addr)); + switch (GET_CODE (addr)) + { + case PRE_DEC: o = GEN_INT (-8); goto pre_modify; + case PRE_INC: o = GEN_INT (8); goto pre_modify; + case PRE_MODIFY: o = XEXP (XEXP (addr, 1), 1); + pre_modify: + code = PRE_MODIFY; + break; + case POST_DEC: o = GEN_INT (-8); goto post_modify; + case POST_INC: o = GEN_INT (8); goto post_modify; + case POST_MODIFY: o = XEXP (XEXP (addr, 1), 1); + post_modify: + code = POST_MODIFY; + swap = 2; + break; + default: + gcc_unreachable (); + } + r = XEXP (addr, 0); + xop[0+i] = adjust_automodify_address_nv + (operands[i], SImode, + gen_rtx_fmt_ee (code, Pmode, r, + gen_rtx_PLUS (Pmode, r, o)), + 0); + xop[2+i] = adjust_automodify_address_nv + (operands[i], SImode, plus_constant (Pmode, r, 4), 4); + } + else + { + xop[0+i] = operand_subword (operands[i], 0, 0, mode); + xop[2+i] = operand_subword (operands[i], 1, 0, mode); + } + } + if (reg_overlap_mentioned_p (xop[0], xop[3])) + { + swap = 2; + gcc_assert (!reg_overlap_mentioned_p (xop[2], xop[1])); + } + operands[2+swap] = xop[0]; + operands[3+swap] = xop[1]; + operands[4-swap] = xop[2]; + operands[5-swap] = xop[3]; + + start_sequence (); + emit_insn (gen_rtx_SET (VOIDmode, operands[2], operands[3])); + emit_insn (gen_rtx_SET (VOIDmode, operands[4], operands[5])); + val = get_insns (); + end_sequence (); + + return val; +} + +/* Select between the instruction output templates s_tmpl (for short INSNs) + and l_tmpl (for long INSNs). */ + +const char * +arc_short_long (rtx insn, const char *s_tmpl, const char *l_tmpl) +{ + int is_short = arc_verify_short (insn, cfun->machine->unalign, -1); + + extract_constrain_insn_cached (insn); + return is_short ? s_tmpl : l_tmpl; +} + +/* Searches X for any reference to REGNO, returning the rtx of the + reference found if any. Otherwise, returns NULL_RTX. */ + +rtx +arc_regno_use_in (unsigned int regno, rtx x) +{ + const char *fmt; + int i, j; + rtx tem; + + if (REG_P (x) && refers_to_regno_p (regno, regno+1, x, (rtx *) 0)) + return x; + + fmt = GET_RTX_FORMAT (GET_CODE (x)); + for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--) + { + if (fmt[i] == 'e') + { + if ((tem = regno_use_in (regno, XEXP (x, i)))) + return tem; + } + else if (fmt[i] == 'E') + for (j = XVECLEN (x, i) - 1; j >= 0; j--) + if ((tem = regno_use_in (regno , XVECEXP (x, i, j)))) + return tem; + } + + return NULL_RTX; +} + +/* Return the integer value of the "type" attribute for INSN, or -1 if + INSN can't have attributes. */ + +int +arc_attr_type (rtx insn) +{ + if (NONJUMP_INSN_P (insn) + ? (GET_CODE (PATTERN (insn)) == USE + || GET_CODE (PATTERN (insn)) == CLOBBER) + : JUMP_P (insn) + ? (GET_CODE (PATTERN (insn)) == ADDR_VEC + || GET_CODE (PATTERN (insn)) == ADDR_DIFF_VEC) + : !CALL_P (insn)) + return -1; + return get_attr_type (insn); +} + +/* Return true if insn sets the condition codes. */ + +bool +arc_sets_cc_p (rtx insn) +{ + if (NONJUMP_INSN_P (insn) && GET_CODE (PATTERN (insn)) == SEQUENCE) + insn = XVECEXP (PATTERN (insn), 0, XVECLEN (PATTERN (insn), 0) - 1); + return arc_attr_type (insn) == TYPE_COMPARE; +} + +/* Return true if INSN is an instruction with a delay slot we may want + to fill. */ + +bool +arc_need_delay (rtx insn) +{ + rtx next; + + if (!flag_delayed_branch) + return false; + /* The return at the end of a function needs a delay slot. */ + if (NONJUMP_INSN_P (insn) && GET_CODE (PATTERN (insn)) == USE + && (!(next = next_active_insn (insn)) + || ((!NONJUMP_INSN_P (next) || GET_CODE (PATTERN (next)) != SEQUENCE) + && arc_attr_type (next) == TYPE_RETURN)) + && (!TARGET_PAD_RETURN + || (prev_active_insn (insn) + && prev_active_insn (prev_active_insn (insn)) + && prev_active_insn (prev_active_insn (prev_active_insn (insn)))))) + return true; + if (NONJUMP_INSN_P (insn) + ? (GET_CODE (PATTERN (insn)) == USE + || GET_CODE (PATTERN (insn)) == CLOBBER + || GET_CODE (PATTERN (insn)) == SEQUENCE) + : JUMP_P (insn) + ? (GET_CODE (PATTERN (insn)) == ADDR_VEC + || GET_CODE (PATTERN (insn)) == ADDR_DIFF_VEC) + : !CALL_P (insn)) + return false; + return num_delay_slots (insn) != 0; +} + +/* Return true if the scheduling pass(es) has/have already run, + i.e. where possible, we should try to mitigate high latencies + by different instruction selection. */ + +bool +arc_scheduling_not_expected (void) +{ + return cfun->machine->arc_reorg_started; +} + +/* Oddly enough, sometimes we get a zero overhead loop that branch + shortening doesn't think is a loop - observed with compile/pr24883.c + -O3 -fomit-frame-pointer -funroll-loops. Make sure to include the + alignment visible for branch shortening (we actually align the loop + insn before it, but that is equivalent since the loop insn is 4 byte + long.) */ + +int +arc_label_align (rtx label) +{ + int loop_align = LOOP_ALIGN (LABEL); + + if (loop_align > align_labels_log) + { + rtx prev = prev_nonnote_insn (label); + + if (prev && NONJUMP_INSN_P (prev) + && GET_CODE (PATTERN (prev)) == PARALLEL + && recog_memoized (prev) == CODE_FOR_doloop_begin_i) + return loop_align; + } + /* Code has a minimum p2 alignment of 1, which we must restore after an + ADDR_DIFF_VEC. */ + if (align_labels_log < 1) + { + rtx next = next_nonnote_nondebug_insn (label); + if (INSN_P (next) && recog_memoized (next) >= 0) + return 1; + } + return align_labels_log; +} + +/* Return true if LABEL is in executable code. */ + +bool +arc_text_label (rtx label) +{ + rtx next; + + /* ??? We use deleted labels like they were still there, see + gcc.c-torture/compile/20000326-2.c . */ + gcc_assert (GET_CODE (label) == CODE_LABEL + || (GET_CODE (label) == NOTE + && NOTE_KIND (label) == NOTE_INSN_DELETED_LABEL)); + next = next_nonnote_insn (label); + if (next) + return (!JUMP_TABLE_DATA_P (next) + || GET_CODE (PATTERN (next)) != ADDR_VEC); + else if (!PREV_INSN (label)) + /* ??? sometimes text labels get inserted very late, see + gcc.dg/torture/stackalign/comp-goto-1.c */ + return true; + return false; +} + +/* Return the size of the pretend args for DECL. */ + +int +arc_decl_pretend_args (tree decl) +{ + /* struct function is in DECL_STRUCT_FUNCTION (decl), but no + pretend_args there... See PR38391. */ + gcc_assert (decl == current_function_decl); + return crtl->args.pretend_args_size; +} + +/* Without this, gcc.dg/tree-prof/bb-reorg.c fails to assemble + when compiling with -O2 -freorder-blocks-and-partition -fprofile-use + -D_PROFILE_USE; delay branch scheduling then follows a REG_CROSSING_JUMP + to redirect two breqs. */ + +static bool +arc_can_follow_jump (const_rtx follower, const_rtx followee) +{ + /* ??? get_attr_type is declared to take an rtx. */ + union { const_rtx c; rtx r; } u; + + u.c = follower; + if (find_reg_note (followee, REG_CROSSING_JUMP, NULL_RTX)) + switch (get_attr_type (u.r)) + { + case TYPE_BRCC: + case TYPE_BRCC_NO_DELAY_SLOT: + return false; + default: + return true; + } + return true; +} + +/* Implement EPILOGUE__USES. + Return true if REGNO should be added to the deemed uses of the epilogue. + + We use the return address + arc_return_address_regs[arc_compute_function_type (cfun)] . + But also, we have to make sure all the register restore instructions + are known to be live in interrupt functions. */ + +bool +arc_epilogue_uses (int regno) +{ + if (reload_completed) + { + if (ARC_INTERRUPT_P (cfun->machine->fn_type)) + { + if (!fixed_regs[regno]) + return true; + return regno == arc_return_address_regs[cfun->machine->fn_type]; + } + else + return regno == RETURN_ADDR_REGNUM; + } + else + return regno == arc_return_address_regs[arc_compute_function_type (cfun)]; +} + +#ifndef TARGET_NO_LRA +#define TARGET_NO_LRA !TARGET_LRA +#endif + +static bool +arc_lra_p (void) +{ + return !TARGET_NO_LRA; +} + +/* ??? Should we define TARGET_REGISTER_PRIORITY? We might perfer to use + Rcq registers, because some insn are shorter with them. OTOH we already + have separate alternatives for this purpose, and other insns don't + mind, so maybe we should rather prefer the other registers? + We need more data, and we can only get that if we allow people to + try all options. */ +static int +arc_register_priority (int r) +{ + switch (arc_lra_priority_tag) + { + case ARC_LRA_PRIORITY_NONE: + return 0; + case ARC_LRA_PRIORITY_NONCOMPACT: + return ((((r & 7) ^ 4) - 4) & 15) != r; + case ARC_LRA_PRIORITY_COMPACT: + return ((((r & 7) ^ 4) - 4) & 15) == r; + default: + gcc_unreachable (); + } +} + +static reg_class_t +arc_spill_class (reg_class_t /* orig_class */, enum machine_mode) +{ + return GENERAL_REGS; +} + +bool +arc_legitimize_reload_address (rtx *p, enum machine_mode mode, int opnum, + int itype) +{ + rtx x = *p; + enum reload_type type = (enum reload_type) itype; + + if (GET_CODE (x) == PLUS + && CONST_INT_P (XEXP (x, 1)) + && (RTX_OK_FOR_BASE_P (XEXP (x, 0), true) + || (REG_P (XEXP (x, 0)) + && reg_equiv_constant (REGNO (XEXP (x, 0)))))) + { + int scale = GET_MODE_SIZE (mode); + int shift; + rtx index_rtx = XEXP (x, 1); + HOST_WIDE_INT offset = INTVAL (index_rtx), offset_base; + rtx reg, sum, sum2; + + if (scale > 4) + scale = 4; + if ((scale-1) & offset) + scale = 1; + shift = scale >> 1; + offset_base = (offset + (256 << shift)) & (-512 << shift); + /* Sometimes the normal form does not suit DImode. We + could avoid that by using smaller ranges, but that + would give less optimized code when SImode is + prevalent. */ + if (GET_MODE_SIZE (mode) + offset - offset_base <= (256 << shift)) + { + int regno; + + reg = XEXP (x, 0); + regno = REGNO (reg); + sum2 = sum = plus_constant (Pmode, reg, offset_base); + + if (reg_equiv_constant (regno)) + { + sum2 = plus_constant (Pmode, reg_equiv_constant (regno), + offset_base); + if (GET_CODE (sum2) == PLUS) + sum2 = gen_rtx_CONST (Pmode, sum2); + } + *p = gen_rtx_PLUS (Pmode, sum, GEN_INT (offset - offset_base)); + push_reload (sum2, NULL_RTX, &XEXP (*p, 0), NULL, + BASE_REG_CLASS, Pmode, VOIDmode, 0, 0, opnum, + type); + return true; + } + } + /* We must re-recognize what we created before. */ + else if (GET_CODE (x) == PLUS + && GET_CODE (XEXP (x, 0)) == PLUS + && CONST_INT_P (XEXP (XEXP (x, 0), 1)) + && REG_P (XEXP (XEXP (x, 0), 0)) + && CONST_INT_P (XEXP (x, 1))) + { + /* Because this address is so complex, we know it must have + been created by LEGITIMIZE_RELOAD_ADDRESS before; thus, + it is already unshared, and needs no further unsharing. */ + push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL, + BASE_REG_CLASS, Pmode, VOIDmode, 0, 0, opnum, type); + return true; + } + return false; +} + +struct gcc_target targetm = TARGET_INITIALIZER; + +#include "gt-arc.h" diff --git a/gcc/config/arc/arc.h b/gcc/config/arc/arc.h new file mode 100644 index 00000000000..637f7b66acc --- /dev/null +++ b/gcc/config/arc/arc.h @@ -0,0 +1,1683 @@ +/* Definitions of target machine for GNU compiler, Synopsys DesignWare ARC cpu. + Copyright (C) 1994, 1995, 1997, 1998, 2007-2013 + Free Software Foundation, Inc. + + Sources derived from work done by Sankhya Technologies (www.sankhya.com) on + behalf of Synopsys Inc. + + Position Independent Code support added,Code cleaned up, + Comments and Support For ARC700 instructions added by + Saurabh Verma (saurabh.verma@codito.com) + Ramana Radhakrishnan(ramana.radhakrishnan@codito.com) + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 3, or (at your option) +any later version. + +GCC is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with GCC; see the file COPYING3. If not see +<http://www.gnu.org/licenses/>. */ + +#ifndef GCC_ARC_H +#define GCC_ARC_H + +/* Things to do: + + - incscc, decscc? + +*/ + +#define SYMBOL_FLAG_SHORT_CALL (SYMBOL_FLAG_MACH_DEP << 0) +#define SYMBOL_FLAG_MEDIUM_CALL (SYMBOL_FLAG_MACH_DEP << 1) +#define SYMBOL_FLAG_LONG_CALL (SYMBOL_FLAG_MACH_DEP << 2) + +/* Check if this symbol has a long_call attribute in its declaration */ +#define SYMBOL_REF_LONG_CALL_P(X) \ + ((SYMBOL_REF_FLAGS (X) & SYMBOL_FLAG_LONG_CALL) != 0) + +/* Check if this symbol has a medium_call attribute in its declaration */ +#define SYMBOL_REF_MEDIUM_CALL_P(X) \ + ((SYMBOL_REF_FLAGS (X) & SYMBOL_FLAG_MEDIUM_CALL) != 0) + +/* Check if this symbol has a short_call attribute in its declaration */ +#define SYMBOL_REF_SHORT_CALL_P(X) \ + ((SYMBOL_REF_FLAGS (X) & SYMBOL_FLAG_SHORT_CALL) != 0) + +#undef ASM_SPEC +#undef LINK_SPEC +#undef STARTFILE_SPEC +#undef ENDFILE_SPEC +#undef SIZE_TYPE +#undef PTRDIFF_TYPE +#undef WCHAR_TYPE +#undef WCHAR_TYPE_SIZE +#undef ASM_APP_ON +#undef ASM_APP_OFF +#undef CC1_SPEC + +/* Names to predefine in the preprocessor for this target machine. */ +#define TARGET_CPU_CPP_BUILTINS() \ + do { \ + builtin_define ("__arc__"); \ + if (TARGET_A5) \ + builtin_define ("__A5__"); \ + else if (TARGET_ARC600) \ + { \ + builtin_define ("__A6__"); \ + builtin_define ("__ARC600__"); \ + } \ + else if (TARGET_ARC601) \ + { \ + builtin_define ("__ARC601__"); \ + } \ + else if (TARGET_ARC700) \ + { \ + builtin_define ("__A7__"); \ + builtin_define ("__ARC700__"); \ + } \ + if (TARGET_NORM) \ + { \ + builtin_define ("__ARC_NORM__");\ + builtin_define ("__Xnorm"); \ + } \ + if (TARGET_MUL64_SET) \ + builtin_define ("__ARC_MUL64__");\ + if (TARGET_MULMAC_32BY16_SET) \ + builtin_define ("__ARC_MUL32BY16__");\ + if (TARGET_SIMD_SET) \ + builtin_define ("__ARC_SIMD__"); \ + if (TARGET_BARREL_SHIFTER) \ + builtin_define ("__Xbarrel_shifter");\ + builtin_assert ("cpu=arc"); \ + builtin_assert ("machine=arc"); \ + builtin_define (TARGET_BIG_ENDIAN \ + ? "__BIG_ENDIAN__" : "__LITTLE_ENDIAN__"); \ + if (TARGET_BIG_ENDIAN) \ + builtin_define ("__big_endian__"); \ +} while(0) + +#if DEFAULT_LIBC == LIBC_UCLIBC + +#define TARGET_OS_CPP_BUILTINS() \ + do \ + { \ + GNU_USER_TARGET_OS_CPP_BUILTINS (); \ + } \ + while (0) +#endif + +/* Match the macros used in the assembler. */ +#define CPP_SPEC "\ +%{msimd:-D__Xsimd} %{mno-mpy:-D__Xno_mpy} %{mswap:-D__Xswap} \ +%{mmin-max:-D__Xmin_max} %{mEA:-D__Xea} \ +%{mspfp*:-D__Xspfp} %{mdpfp*:-D__Xdpfp} \ +%{mmac-d16:-D__Xxmac_d16} %{mmac-24:-D__Xxmac_24} \ +%{mdsp-packa:-D__Xdsp_packa} %{mcrc:-D__Xcrc} %{mdvbf:-D__Xdvbf} \ +%{mtelephony:-D__Xtelephony} %{mxy:-D__Xxy} %{mmul64: -D__Xmult32} \ +%{mlock:-D__Xlock} %{mswape:-D__Xswape} %{mrtsc:-D__Xrtsc} \ +" + +#define CC1_SPEC "\ +%{EB:%{EL:%emay not use both -EB and -EL}} \ +%{EB:-mbig-endian} %{EL:-mlittle-endian} \ +" + +#define ASM_DEFAULT "-mARC700 -mEA" + +#define ASM_SPEC "\ +%{mbig-endian|EB:-EB} %{EL} \ +%{mcpu=A5|mcpu=a5|mA5:-mA5} \ +%{mcpu=ARC600:-mARC600} \ +%{mcpu=ARC601:-mARC601} \ +%{mcpu=ARC700:-mARC700} \ +%{mcpu=ARC700:-mEA} \ +%{!mcpu=*:" ASM_DEFAULT "} \ +%{mbarrel-shifter} %{mno-mpy} %{mmul64} %{mmul32x16:-mdsp-packa} %{mnorm} \ +%{mswap} %{mEA} %{mmin-max} %{mspfp*} %{mdpfp*} \ +%{msimd} \ +%{mmac-d16} %{mmac-24} %{mdsp-packa} %{mcrc} %{mdvbf} %{mtelephony} %{mxy} \ +%{mcpu=ARC700|!mcpu=*:%{mlock}} \ +%{mcpu=ARC700|!mcpu=*:%{mswape}} \ +%{mcpu=ARC700|!mcpu=*:%{mrtsc}} \ +" + +#if DEFAULT_LIBC == LIBC_UCLIBC +/* Note that the default is to link against dynamic libraries, if they are + available. Override with -static. */ +#define LINK_SPEC "%{h*} \ + %{static:-Bstatic} \ + %{symbolic:-Bsymbolic} \ + %{rdynamic:-export-dynamic}\ + -dynamic-linker /lib/ld-uClibc.so.0 \ + -X %{mbig-endian:-EB} \ + %{EB} %{EL} \ + %{marclinux*} \ + %{!marclinux*: %{pg|p|profile:-marclinux_prof;: -marclinux}} \ + %{!z:-z max-page-size=0x2000 -z common-page-size=0x2000} \ + %{shared:-shared}" +/* Like the standard LINK_COMMAND_SPEC, but add %G when building + a shared library with -nostdlib, so that the hidden functions of libgcc + will be incorporated. + N.B., we don't want a plain -lgcc, as this would lead to re-exporting + non-hidden functions, so we have to consider libgcc_s.so.* first, which in + turn should be wrapped with --as-needed. */ +#define LINK_COMMAND_SPEC "\ +%{!fsyntax-only:%{!c:%{!M:%{!MM:%{!E:%{!S:\ + %(linker) %l " LINK_PIE_SPEC "%X %{o*} %{A} %{d} %{e*} %{m} %{N} %{n} %{r}\ + %{s} %{t} %{u*} %{x} %{z} %{Z} %{!A:%{!nostdlib:%{!nostartfiles:%S}}}\ + %{static:} %{L*} %(mfwrap) %(link_libgcc) %o\ + %{fopenmp:%:include(libgomp.spec)%(link_gomp)} %(mflib)\ + %{fprofile-arcs|fprofile-generate|coverage:-lgcov}\ + %{!nostdlib:%{!nodefaultlibs:%(link_ssp) %(link_gcc_c_sequence)}}\ + %{!A:%{!nostdlib:%{!nostartfiles:%E}}} %{T*} }}}}}}" + +#else +#define LINK_SPEC "%{mbig-endian:-EB} %{EB} %{EL}\ + %{pg|p:-marcelf_prof;mA7|mARC700|mcpu=arc700|mcpu=ARC700: -marcelf}" +#endif + +#if DEFAULT_LIBC != LIBC_UCLIBC +#define STARTFILE_SPEC "%{!shared:crt0.o%s} crti%O%s %{pg|p:crtg.o%s} crtbegin.o%s" +#else +#define STARTFILE_SPEC "%{!shared:%{!mkernel:crt1.o%s}} crti.o%s \ + %{!shared:%{pg|p|profile:crtg.o%s} crtbegin.o%s} %{shared:crtbeginS.o%s}" + +#endif + +#if DEFAULT_LIBC != LIBC_UCLIBC +#define ENDFILE_SPEC "%{pg|p:crtgend.o%s} crtend.o%s crtn%O%s" +#else +#define ENDFILE_SPEC "%{!shared:%{pg|p|profile:crtgend.o%s} crtend.o%s} \ + %{shared:crtendS.o%s} crtn.o%s" + +#endif + +#if DEFAULT_LIBC == LIBC_UCLIBC +#undef LIB_SPEC +#define LIB_SPEC \ + "%{pthread:-lpthread} \ + %{shared:-lc} \ + %{!shared:%{pg|p|profile:-lgmon -u profil --defsym __profil=profil} -lc}" +#define TARGET_ASM_FILE_END file_end_indicate_exec_stack +#else +#undef LIB_SPEC +/* -lc_p not present for arc-elf32-* : ashwin */ +#define LIB_SPEC "%{!shared:%{g*:-lg} %{pg|p:-lgmon} -lc}" +#endif + +#ifndef DRIVER_ENDIAN_SELF_SPECS +#define DRIVER_ENDIAN_SELF_SPECS "" +#endif +#ifndef TARGET_SDATA_DEFAULT +#define TARGET_SDATA_DEFAULT 1 +#endif +#ifndef TARGET_MMEDIUM_CALLS_DEFAULT +#define TARGET_MMEDIUM_CALLS_DEFAULT 0 +#endif + +#define DRIVER_SELF_SPECS DRIVER_ENDIAN_SELF_SPECS \ + "%{mARC5|mA5: -mcpu=A5 %<mARC5 %<mA5}" \ + "%{mARC600|mA6: -mcpu=ARC600 %<mARC600 %<mA6}" \ + "%{mARC601: -mcpu=ARC601 %<mARC601}" \ + "%{mARC700|mA7: -mcpu=ARC700 %<mARC700 %<mA7}" \ + "%{mbarrel_shifte*: -mbarrel-shifte%* %<mbarrel_shifte*}" \ + "%{mEA: -mea %<mEA}" \ + "%{mspfp_*: -mspfp-%* %<mspfp_*}" \ + "%{mdpfp_*: -mdpfp-%* %<mdpfp_*}" \ + "%{mdsp_pack*: -mdsp-pack%* %<mdsp_pack*}" \ + "%{mmac_*: -mmac-%* %<mmac_*}" \ + "%{multcost=*: -mmultcost=%* %<multcost=*}" + +/* Run-time compilation parameters selecting different hardware subsets. */ + +#define TARGET_MIXED_CODE (TARGET_MIXED_CODE_SET) + +#define TARGET_SPFP (TARGET_SPFP_FAST_SET || TARGET_SPFP_COMPACT_SET) +#define TARGET_DPFP (TARGET_DPFP_FAST_SET || TARGET_DPFP_COMPACT_SET) + +#define SUBTARGET_SWITCHES + +/* Instruction set characteristics. + These are internal macros, set by the appropriate -m option. */ + +/* Non-zero means the cpu supports norm instruction. This flag is set by + default for A7, and only for pre A7 cores when -mnorm is given. */ +#define TARGET_NORM (TARGET_ARC700 || TARGET_NORM_SET) +/* Indicate if an optimized floating point emulation library is available. */ +#define TARGET_OPTFPE \ + (TARGET_ARC700 \ + /* We need a barrel shifter and NORM. */ \ + || (TARGET_ARC600 && TARGET_NORM_SET)) + +/* Non-zero means the cpu supports swap instruction. This flag is set by + default for A7, and only for pre A7 cores when -mswap is given. */ +#define TARGET_SWAP (TARGET_ARC700 || TARGET_SWAP_SET) + +/* Provide some macros for size / scheduling features of the ARC700, so + that we can pick & choose features if we get a new cpu family member. */ + +/* Should we try to unalign likely taken branches without a delay slot. */ +#define TARGET_UNALIGN_BRANCH (TARGET_ARC700 && !optimize_size) + +/* Should we upsize short delayed branches with a short delay insn? */ +#define TARGET_UPSIZE_DBR (TARGET_ARC700 && !optimize_size) + +/* Should we add padding before a return insn to avoid mispredict? */ +#define TARGET_PAD_RETURN (TARGET_ARC700 && !optimize_size) + +/* For an anulled-true delay slot insn for a delayed branch, should we only + use conditional execution? */ +#define TARGET_AT_DBR_CONDEXEC (!TARGET_ARC700) + +#define TARGET_A5 (arc_cpu == PROCESSOR_A5) +#define TARGET_ARC600 (arc_cpu == PROCESSOR_ARC600) +#define TARGET_ARC601 (arc_cpu == PROCESSOR_ARC601) +#define TARGET_ARC700 (arc_cpu == PROCESSOR_ARC700) + +/* Recast the cpu class to be the cpu attribute. */ +#define arc_cpu_attr ((enum attr_cpu)arc_cpu) + +#ifndef MULTILIB_DEFAULTS +#define MULTILIB_DEFAULTS { "mARC700" } +#endif + +/* Target machine storage layout. */ + +/* We want zero_extract to mean the same + no matter what the byte endianness is. */ +#define BITS_BIG_ENDIAN 0 + +/* Define this if most significant byte of a word is the lowest numbered. */ +#define BYTES_BIG_ENDIAN (TARGET_BIG_ENDIAN) + +/* Define this if most significant word of a multiword number is the lowest + numbered. */ +#define WORDS_BIG_ENDIAN (TARGET_BIG_ENDIAN) + +/* Number of bits in an addressable storage unit. */ +#define BITS_PER_UNIT 8 + +/* Width in bits of a "word", which is the contents of a machine register. + Note that this is not necessarily the width of data type `int'; + if using 16-bit ints on a 68000, this would still be 32. + But on a machine with 16-bit registers, this would be 16. */ +#define BITS_PER_WORD 32 + +/* Width of a word, in units (bytes). */ +#define UNITS_PER_WORD 4 + +/* Define this macro if it is advisable to hold scalars in registers + in a wider mode than that declared by the program. In such cases, + the value is constrained to be within the bounds of the declared + type, but kept valid in the wider mode. The signedness of the + extension may differ from that of the type. */ +#define PROMOTE_MODE(MODE,UNSIGNEDP,TYPE) \ +if (GET_MODE_CLASS (MODE) == MODE_INT \ + && GET_MODE_SIZE (MODE) < UNITS_PER_WORD) \ +{ \ + (MODE) = SImode; \ +} + +/* Width in bits of a pointer. + See also the macro `Pmode' defined below. */ +#define POINTER_SIZE 32 + +/* Allocation boundary (in *bits*) for storing arguments in argument list. */ +#define PARM_BOUNDARY 32 + +/* Boundary (in *bits*) on which stack pointer should be aligned. */ +/* TOCHECK: Changed from 64 to 32 */ +#define STACK_BOUNDARY 32 + +/* ALIGN FRAMES on word boundaries. */ +#define ARC_STACK_ALIGN(LOC) \ + (((LOC) + STACK_BOUNDARY / BITS_PER_UNIT - 1) & -STACK_BOUNDARY/BITS_PER_UNIT) + +/* Allocation boundary (in *bits*) for the code of a function. */ +#define FUNCTION_BOUNDARY 32 + +/* Alignment of field after `int : 0' in a structure. */ +#define EMPTY_FIELD_BOUNDARY 32 + +/* Every structure's size must be a multiple of this. */ +#define STRUCTURE_SIZE_BOUNDARY 8 + +/* A bitfield declared as `int' forces `int' alignment for the struct. */ +#define PCC_BITFIELD_TYPE_MATTERS 1 + +/* An expression for the alignment of a structure field FIELD if the + alignment computed in the usual way (including applying of + `BIGGEST_ALIGNMENT' and `BIGGEST_FIELD_ALIGNMENT' to the + alignment) is COMPUTED. It overrides alignment only if the field + alignment has not been set by the `__attribute__ ((aligned (N)))' + construct. +*/ + +#define ADJUST_FIELD_ALIGN(FIELD, COMPUTED) \ +(TYPE_MODE (strip_array_types (TREE_TYPE (FIELD))) == DFmode \ + ? MIN ((COMPUTED), 32) : (COMPUTED)) + + + +/* No data type wants to be aligned rounder than this. */ +/* This is bigger than currently necessary for the ARC. If 8 byte floats are + ever added it's not clear whether they'll need such alignment or not. For + now we assume they will. We can always relax it if necessary but the + reverse isn't true. */ +/* TOCHECK: Changed from 64 to 32 */ +#define BIGGEST_ALIGNMENT 32 + +/* The best alignment to use in cases where we have a choice. */ +#define FASTEST_ALIGNMENT 32 + +/* Make strings word-aligned so strcpy from constants will be faster. */ +#define CONSTANT_ALIGNMENT(EXP, ALIGN) \ + ((TREE_CODE (EXP) == STRING_CST \ + && (ALIGN) < FASTEST_ALIGNMENT) \ + ? FASTEST_ALIGNMENT : (ALIGN)) + + +/* Make arrays of chars word-aligned for the same reasons. */ +#define LOCAL_ALIGNMENT(TYPE, ALIGN) \ + (TREE_CODE (TYPE) == ARRAY_TYPE \ + && TYPE_MODE (TREE_TYPE (TYPE)) == QImode \ + && (ALIGN) < FASTEST_ALIGNMENT ? FASTEST_ALIGNMENT : (ALIGN)) + +#define DATA_ALIGNMENT(TYPE, ALIGN) \ + (TREE_CODE (TYPE) == ARRAY_TYPE \ + && TYPE_MODE (TREE_TYPE (TYPE)) == QImode \ + && arc_size_opt_level < 3 \ + && (ALIGN) < FASTEST_ALIGNMENT ? FASTEST_ALIGNMENT : (ALIGN)) + +/* Set this nonzero if move instructions will actually fail to work + when given unaligned data. */ +/* On the ARC the lower address bits are masked to 0 as necessary. The chip + won't croak when given an unaligned address, but the insn will still fail + to produce the correct result. */ +#define STRICT_ALIGNMENT 1 + +/* Layout of source language data types. */ + +#define SHORT_TYPE_SIZE 16 +#define INT_TYPE_SIZE 32 +#define LONG_TYPE_SIZE 32 +#define LONG_LONG_TYPE_SIZE 64 +#define FLOAT_TYPE_SIZE 32 +#define DOUBLE_TYPE_SIZE 64 +#define LONG_DOUBLE_TYPE_SIZE 64 + +/* Define this as 1 if `char' should by default be signed; else as 0. */ +#define DEFAULT_SIGNED_CHAR 0 + +#define SIZE_TYPE "long unsigned int" +#define PTRDIFF_TYPE "long int" +#define WCHAR_TYPE "int" +#define WCHAR_TYPE_SIZE 32 + + +/* ashwin : shifted from arc.c:102 */ +#define PROGRAM_COUNTER_REGNO 63 + +/* Standard register usage. */ + +/* Number of actual hardware registers. + The hardware registers are assigned numbers for the compiler + from 0 to just below FIRST_PSEUDO_REGISTER. + All registers that the compiler knows about must be given numbers, + even those that are not normally considered general registers. + + Registers 61, 62, and 63 are not really registers and we needn't treat + them as such. We still need a register for the condition code and + argument pointer. */ + +/* r63 is pc, r64-r127 = simd vregs, r128-r143 = simd dma config regs + r144, r145 = lp_start, lp_end + and therefore the pseudo registers start from r146. */ +#define FIRST_PSEUDO_REGISTER 146 + +/* 1 for registers that have pervasive standard uses + and are not available for the register allocator. + + 0-28 - general purpose registers + 29 - ilink1 (interrupt link register) + 30 - ilink2 (interrupt link register) + 31 - blink (branch link register) + 32-59 - reserved for extensions + 60 - LP_COUNT + 61 - condition code + 62 - argument pointer + 63 - program counter + + FWIW, this is how the 61-63 encodings are used by the hardware: + 61 - reserved + 62 - long immediate data indicator + 63 - PCL (program counter aligned to 32 bit, read-only) + + The general purpose registers are further broken down into: + + 0-7 - arguments/results + 8-12 - call used (r11 - static chain pointer) + 13-25 - call saved + 26 - global pointer + 27 - frame pointer + 28 - stack pointer + 29 - ilink1 + 30 - ilink2 + 31 - return address register + + By default, the extension registers are not available. */ +/* Present implementations only have VR0-VR23 only. */ +/* ??? FIXME: r27 and r31 should not be fixed registers. */ +#define FIXED_REGISTERS \ +{ 0, 0, 0, 0, 0, 0, 0, 0, \ + 0, 0, 0, 0, 0, 0, 0, 0, \ + 0, 0, 0, 0, 0, 0, 0, 0, \ + 0, 0, 1, 1, 1, 1, 1, 1, \ + \ + 1, 1, 1, 1, 1, 1, 1, 1, \ + 0, 0, 0, 0, 1, 1, 1, 1, \ + 1, 1, 1, 1, 1, 1, 1, 1, \ + 1, 1, 1, 1, 0, 1, 1, 1, \ + \ + 0, 0, 0, 0, 0, 0, 0, 0, \ + 0, 0, 0, 0, 0, 0, 0, 0, \ + 0, 0, 0, 0, 0, 0, 0, 0, \ + 1, 1, 1, 1, 1, 1, 1, 1, \ + \ + 1, 1, 1, 1, 1, 1, 1, 1, \ + 1, 1, 1, 1, 1, 1, 1, 1, \ + 1, 1, 1, 1, 1, 1, 1, 1, \ + 1, 1, 1, 1, 1, 1, 1, 1, \ + \ + 0, 0, 0, 0, 0, 0, 0, 0, \ + 0, 0, 0, 0, 0, 0, 0, 0, \ + 1, 1} + +/* 1 for registers not available across function calls. + These must include the FIXED_REGISTERS and also any + registers that can be used without being saved. + The latter must include the registers where values are returned + and the register where structure-value addresses are passed. + Aside from that, you can include as many other registers as you like. */ +#define CALL_USED_REGISTERS \ +{ \ + 1, 1, 1, 1, 1, 1, 1, 1, \ + 1, 1, 1, 1, 1, 0, 0, 0, \ + 0, 0, 0, 0, 0, 0, 0, 0, \ + 0, 0, 1, 1, 1, 1, 1, 1, \ + \ + 1, 1, 1, 1, 1, 1, 1, 1, \ + 1, 1, 1, 1, 1, 1, 1, 1, \ + 1, 1, 1, 1, 1, 1, 1, 1, \ + 1, 1, 1, 1, 1, 1, 1, 1, \ + \ + 0, 0, 0, 0, 0, 0, 0, 0, \ + 0, 0, 0, 0, 0, 0, 0, 0, \ + 0, 0, 0, 0, 0, 0, 0, 0, \ + 1, 1, 1, 1, 1, 1, 1, 1, \ + \ + 1, 1, 1, 1, 1, 1, 1, 1, \ + 1, 1, 1, 1, 1, 1, 1, 1, \ + 1, 1, 1, 1, 1, 1, 1, 1, \ + 1, 1, 1, 1, 1, 1, 1, 1, \ + \ + 0, 0, 0, 0, 0, 0, 0, 0, \ + 0, 0, 0, 0, 0, 0, 0, 0, \ + 1, 1} + +/* If defined, an initializer for a vector of integers, containing the + numbers of hard registers in the order in which GCC should + prefer to use them (from most preferred to least). */ +#define REG_ALLOC_ORDER \ +{ 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 0, 1, \ + 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, \ + 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, \ + 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, \ + 27, 28, 29, 30, 31, 63} + +/* Return number of consecutive hard regs needed starting at reg REGNO + to hold something of mode MODE. + This is ordinarily the length in words of a value of mode MODE + but can be less for certain modes in special long registers. */ +#define HARD_REGNO_NREGS(REGNO, MODE) \ +((GET_MODE_SIZE (MODE) == 16 \ + && REGNO >= ARC_FIRST_SIMD_VR_REG && REGNO <= ARC_LAST_SIMD_VR_REG) ? 1 \ + : (GET_MODE_SIZE (MODE) + UNITS_PER_WORD - 1) / UNITS_PER_WORD) + +/* Value is 1 if hard register REGNO can hold a value of machine-mode MODE. */ +extern unsigned int arc_hard_regno_mode_ok[]; +extern unsigned int arc_mode_class[]; +#define HARD_REGNO_MODE_OK(REGNO, MODE) \ +((arc_hard_regno_mode_ok[REGNO] & arc_mode_class[MODE]) != 0) + +/* A C expression that is nonzero if it is desirable to choose + register allocation so as to avoid move instructions between a + value of mode MODE1 and a value of mode MODE2. + + If `HARD_REGNO_MODE_OK (R, MODE1)' and `HARD_REGNO_MODE_OK (R, + MODE2)' are ever different for any R, then `MODES_TIEABLE_P (MODE1, + MODE2)' must be zero. */ + +/* Tie QI/HI/SI modes together. */ +#define MODES_TIEABLE_P(MODE1, MODE2) \ +(GET_MODE_CLASS (MODE1) == MODE_INT \ + && GET_MODE_CLASS (MODE2) == MODE_INT \ + && GET_MODE_SIZE (MODE1) <= UNITS_PER_WORD \ + && GET_MODE_SIZE (MODE2) <= UNITS_PER_WORD) + +/* Internal macros to classify a register number as to whether it's a + general purpose register for compact insns (r0-r3,r12-r15), or + stack pointer (r28). */ + +#define COMPACT_GP_REG_P(REGNO) \ + (((signed)(REGNO) >= 0 && (REGNO) <= 3) || ((REGNO) >= 12 && (REGNO) <= 15)) +#define SP_REG_P(REGNO) ((REGNO) == 28) + + + +/* Register classes and constants. */ + +/* Define the classes of registers for register constraints in the + machine description. Also define ranges of constants. + + One of the classes must always be named ALL_REGS and include all hard regs. + If there is more than one class, another class must be named NO_REGS + and contain no registers. + + The name GENERAL_REGS must be the name of a class (or an alias for + another name such as ALL_REGS). This is the class of registers + that is allowed by "g" or "r" in a register constraint. + Also, registers outside this class are allocated only when + instructions express preferences for them. + + The classes must be numbered in nondecreasing order; that is, + a larger-numbered class must never be contained completely + in a smaller-numbered class. + + For any two classes, it is very desirable that there be another + class that represents their union. + + It is important that any condition codes have class NO_REGS. + See `register_operand'. */ + +enum reg_class +{ + NO_REGS, + R0_REGS, /* 'x' */ + GP_REG, /* 'Rgp' */ + FP_REG, /* 'f' */ + SP_REGS, /* 'b' */ + LPCOUNT_REG, /* 'l' */ + LINK_REGS, /* 'k' */ + DOUBLE_REGS, /* D0, D1 */ + SIMD_VR_REGS, /* VR00-VR63 */ + SIMD_DMA_CONFIG_REGS, /* DI0-DI7,DO0-DO7 */ + ARCOMPACT16_REGS, /* 'q' */ + AC16_BASE_REGS, /* 'e' */ + SIBCALL_REGS, /* "Rsc" */ + GENERAL_REGS, /* 'r' */ + MPY_WRITABLE_CORE_REGS, /* 'W' */ + WRITABLE_CORE_REGS, /* 'w' */ + CHEAP_CORE_REGS, /* 'c' */ + ALL_CORE_REGS, /* 'Rac' */ + ALL_REGS, + LIM_REG_CLASSES +}; + +#define N_REG_CLASSES (int) LIM_REG_CLASSES + +/* Give names of register classes as strings for dump file. */ +#define REG_CLASS_NAMES \ +{ \ + "NO_REGS", \ + "R0_REGS", \ + "GP_REG", \ + "FP_REG", \ + "SP_REGS", \ + "LPCOUNT_REG", \ + "LINK_REGS", \ + "DOUBLE_REGS", \ + "SIMD_VR_REGS", \ + "SIMD_DMA_CONFIG_REGS", \ + "ARCOMPACT16_REGS", \ + "AC16_BASE_REGS", \ + "SIBCALL_REGS", \ + "GENERAL_REGS", \ + "MPY_WRITABLE_CORE_REGS", \ + "WRITABLE_CORE_REGS", \ + "CHEAP_CORE_REGS", \ + "ALL_CORE_REGS", \ + "ALL_REGS" \ +} + +/* Define which registers fit in which classes. + This is an initializer for a vector of HARD_REG_SET + of length N_REG_CLASSES. */ + +#define REG_CLASS_CONTENTS \ +{ \ + {0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, /* No Registers */ \ + {0x00000001, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, /* 'x', r0 register , r0 */ \ + {0x04000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, /* 'Rgp', Global Pointer, r26 */ \ + {0x08000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, /* 'f', Frame Pointer, r27 */ \ + {0x10000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, /* 'b', Stack Pointer, r28 */ \ + {0x00000000, 0x10000000, 0x00000000, 0x00000000, 0x00000000}, /* 'l', LPCOUNT Register, r60 */ \ + {0xe0000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, /* 'k', LINK Registers, r29-r31 */ \ + {0x00000000, 0x00000f00, 0x00000000, 0x00000000, 0x00000000}, /* 'D', D1, D2 Registers */ \ + {0x00000000, 0x00000000, 0xffffffff, 0xffffffff, 0x00000000}, /* 'V', VR00-VR63 Registers */ \ + {0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x0000ffff}, /* 'V', DI0-7,DO0-7 Registers */ \ + {0x0000f00f, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, /* 'q', r0-r3, r12-r15 */ \ + {0x1000f00f, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, /* 'e', r0-r3, r12-r15, sp */ \ + {0x1c001fff, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, /* "Rsc", r0-r12 */ \ + {0x9fffffff, 0xc0000000, 0x00000000, 0x00000000, 0x00000000}, /* 'r', r0-r28, blink, ap and pcl */ \ + {0xffffffff, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, /* 'W', r0-r31 */ \ + /* Include ap / pcl in WRITABLE_CORE_REGS for sake of symmetry. As these \ + registers are fixed, it does not affect the literal meaning of the \ + constraints, but it makes it a superset of GENERAL_REGS, thus \ + enabling some operations that would otherwise not be possible. */ \ + {0xffffffff, 0xd0000000, 0x00000000, 0x00000000, 0x00000000}, /* 'w', r0-r31, r60 */ \ + {0xffffffff, 0xdfffffff, 0x00000000, 0x00000000, 0x00000000}, /* 'c', r0-r60, ap, pcl */ \ + {0xffffffff, 0xdfffffff, 0x00000000, 0x00000000, 0x00000000}, /* 'Rac', r0-r60, ap, pcl */ \ + {0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0x0003ffff} /* All Registers */ \ +} + +/* Local macros to mark the first and last regs of different classes. */ +#define ARC_FIRST_SIMD_VR_REG 64 +#define ARC_LAST_SIMD_VR_REG 127 + +#define ARC_FIRST_SIMD_DMA_CONFIG_REG 128 +#define ARC_FIRST_SIMD_DMA_CONFIG_IN_REG 128 +#define ARC_FIRST_SIMD_DMA_CONFIG_OUT_REG 136 +#define ARC_LAST_SIMD_DMA_CONFIG_REG 143 + +/* The same information, inverted: + Return the class number of the smallest class containing + reg number REGNO. This could be a conditional expression + or could index an array. */ + +extern enum reg_class arc_regno_reg_class[]; + +#define REGNO_REG_CLASS(REGNO) (arc_regno_reg_class[REGNO]) + +/* The class value for valid index registers. An index register is + one used in an address where its value is either multiplied by + a scale factor or added to another register (as well as added to a + displacement). */ + +#define INDEX_REG_CLASS (TARGET_MIXED_CODE ? ARCOMPACT16_REGS : GENERAL_REGS) + +/* The class value for valid base registers. A base register is one used in + an address which is the register value plus a displacement. */ + +#define BASE_REG_CLASS (TARGET_MIXED_CODE ? AC16_BASE_REGS : GENERAL_REGS) + +/* These assume that REGNO is a hard or pseudo reg number. + They give nonzero only if REGNO is a hard reg of the suitable class + or a pseudo reg currently allocated to a suitable hard reg. + Since they use reg_renumber, they are safe only once reg_renumber + has been allocated, which happens in local-alloc.c. */ +#define REGNO_OK_FOR_BASE_P(REGNO) \ +((REGNO) < 29 || ((REGNO) == ARG_POINTER_REGNUM) || ((REGNO) == 63) ||\ + (unsigned) reg_renumber[REGNO] < 29) + +#define REGNO_OK_FOR_INDEX_P(REGNO) REGNO_OK_FOR_BASE_P(REGNO) + +/* Given an rtx X being reloaded into a reg required to be + in class CLASS, return the class of reg to actually use. + In general this is just CLASS; but on some machines + in some cases it is preferable to use a more restrictive class. */ + +#define PREFERRED_RELOAD_CLASS(X, CLASS) \ + arc_preferred_reload_class((X), (CLASS)) + + extern enum reg_class arc_preferred_reload_class (rtx, enum reg_class); + +/* Return the maximum number of consecutive registers + needed to represent mode MODE in a register of class CLASS. */ + +#define CLASS_MAX_NREGS(CLASS, MODE) \ +(( GET_MODE_SIZE (MODE) == 16 && CLASS == SIMD_VR_REGS) ? 1: \ +((GET_MODE_SIZE (MODE) + UNITS_PER_WORD - 1) / UNITS_PER_WORD)) + +#define SMALL_INT(X) ((unsigned) ((X) + 0x100) < 0x200) +#define SMALL_INT_RANGE(X, OFFSET, SHIFT) \ + ((unsigned) (((X) >> (SHIFT)) + 0x100) \ + < 0x200 - ((unsigned) (OFFSET) >> (SHIFT))) +#define SIGNED_INT12(X) ((unsigned) ((X) + 0x800) < 0x1000) +#define LARGE_INT(X) \ +(((X) < 0) \ + ? (X) >= (-(HOST_WIDE_INT) 0x7fffffff - 1) \ + : (unsigned HOST_WIDE_INT) (X) <= (unsigned HOST_WIDE_INT) 0xffffffff) +#define UNSIGNED_INT3(X) ((unsigned) (X) < 0x8) +#define UNSIGNED_INT5(X) ((unsigned) (X) < 0x20) +#define UNSIGNED_INT6(X) ((unsigned) (X) < 0x40) +#define UNSIGNED_INT7(X) ((unsigned) (X) < 0x80) +#define UNSIGNED_INT8(X) ((unsigned) (X) < 0x100) +#define IS_ONE(X) ((X) == 1) +#define IS_ZERO(X) ((X) == 0) + +/* Stack layout and stack pointer usage. */ + +/* Define this macro if pushing a word onto the stack moves the stack + pointer to a smaller address. */ +#define STACK_GROWS_DOWNWARD + +/* Define this if the nominal address of the stack frame + is at the high-address end of the local variables; + that is, each additional local variable allocated + goes at a more negative offset in the frame. */ +#define FRAME_GROWS_DOWNWARD 1 + +/* Offset within stack frame to start allocating local variables at. + If FRAME_GROWS_DOWNWARD, this is the offset to the END of the + first local allocated. Otherwise, it is the offset to the BEGINNING + of the first local allocated. */ +#define STARTING_FRAME_OFFSET 0 + +/* Offset from the stack pointer register to the first location at which + outgoing arguments are placed. */ +#define STACK_POINTER_OFFSET (0) + +/* Offset of first parameter from the argument pointer register value. */ +#define FIRST_PARM_OFFSET(FNDECL) (0) + +/* A C expression whose value is RTL representing the address in a + stack frame where the pointer to the caller's frame is stored. + Assume that FRAMEADDR is an RTL expression for the address of the + stack frame itself. + + If you don't define this macro, the default is to return the value + of FRAMEADDR--that is, the stack frame address is also the address + of the stack word that points to the previous frame. */ +/* ??? unfinished */ +/*define DYNAMIC_CHAIN_ADDRESS (FRAMEADDR)*/ + +/* A C expression whose value is RTL representing the value of the + return address for the frame COUNT steps up from the current frame. + FRAMEADDR is the frame pointer of the COUNT frame, or the frame + pointer of the COUNT - 1 frame if `RETURN_ADDR_IN_PREVIOUS_FRAME' + is defined. */ +/* The current return address is in r31. The return address of anything + farther back is at [%fp,4]. */ + +#define RETURN_ADDR_RTX(COUNT, FRAME) \ +arc_return_addr_rtx(COUNT,FRAME) + +/* Register to use for pushing function arguments. */ +#define STACK_POINTER_REGNUM 28 + +/* Base register for access to local variables of the function. */ +#define FRAME_POINTER_REGNUM 27 + +/* Base register for access to arguments of the function. This register + will be eliminated into either fp or sp. */ +#define ARG_POINTER_REGNUM 62 + +#define RETURN_ADDR_REGNUM 31 + +/* TODO - check usage of STATIC_CHAIN_REGNUM with a testcase */ +/* Register in which static-chain is passed to a function. This must + not be a register used by the prologue. */ +#define STATIC_CHAIN_REGNUM 11 + +/* Function argument passing. */ + +/* If defined, the maximum amount of space required for outgoing + arguments will be computed and placed into the variable + `crtl->outgoing_args_size'. No space will be pushed + onto the stack for each call; instead, the function prologue should + increase the stack frame size by this amount. */ +#define ACCUMULATE_OUTGOING_ARGS 1 + +/* Define a data type for recording info about an argument list + during the scan of that argument list. This data type should + hold all necessary information about the function itself + and about the args processed so far, enough to enable macros + such as FUNCTION_ARG to determine where the next arg should go. */ +#define CUMULATIVE_ARGS int + +/* Initialize a variable CUM of type CUMULATIVE_ARGS + for a call to a function whose data type is FNTYPE. + For a library call, FNTYPE is 0. */ +#define INIT_CUMULATIVE_ARGS(CUM,FNTYPE,LIBNAME,INDIRECT,N_NAMED_ARGS) \ +((CUM) = 0) + +/* The number of registers used for parameter passing. Local to this file. */ +#define MAX_ARC_PARM_REGS 8 + +/* 1 if N is a possible register number for function argument passing. */ +#define FUNCTION_ARG_REGNO_P(N) \ +((unsigned) (N) < MAX_ARC_PARM_REGS) + +/* The ROUND_ADVANCE* macros are local to this file. */ +/* Round SIZE up to a word boundary. */ +#define ROUND_ADVANCE(SIZE) \ +(((SIZE) + UNITS_PER_WORD - 1) / UNITS_PER_WORD) + +/* Round arg MODE/TYPE up to the next word boundary. */ +#define ROUND_ADVANCE_ARG(MODE, TYPE) \ +((MODE) == BLKmode \ + ? ROUND_ADVANCE (int_size_in_bytes (TYPE)) \ + : ROUND_ADVANCE (GET_MODE_SIZE (MODE))) + +#define ARC_FUNCTION_ARG_BOUNDARY(MODE,TYPE) PARM_BOUNDARY +/* Round CUM up to the necessary point for argument MODE/TYPE. */ +/* N.B. Vectors have alignment exceeding BIGGEST_ALIGNMENT. + ARC_FUNCTION_ARG_BOUNDARY reduces this to no more than 32 bit. */ +#define ROUND_ADVANCE_CUM(CUM, MODE, TYPE) \ + ((((CUM) - 1) | (ARC_FUNCTION_ARG_BOUNDARY ((MODE), (TYPE)) - 1)/BITS_PER_WORD)\ + + 1) + +/* Return boolean indicating arg of type TYPE and mode MODE will be passed in + a reg. This includes arguments that have to be passed by reference as the + pointer to them is passed in a reg if one is available (and that is what + we're given). + When passing arguments NAMED is always 1. When receiving arguments NAMED + is 1 for each argument except the last in a stdarg/varargs function. In + a stdarg function we want to treat the last named arg as named. In a + varargs function we want to treat the last named arg (which is + `__builtin_va_alist') as unnamed. + This macro is only used in this file. */ +#define PASS_IN_REG_P(CUM, MODE, TYPE) \ +((CUM) < MAX_ARC_PARM_REGS) + + +/* Function results. */ + +/* Define how to find the value returned by a library function + assuming the value has mode MODE. */ +#define LIBCALL_VALUE(MODE) gen_rtx_REG (MODE, 0) + +/* 1 if N is a possible register number for a function value + as seen by the caller. */ +/* ??? What about r1 in DI/DF values. */ +#define FUNCTION_VALUE_REGNO_P(N) ((N) == 0) + +/* Tell GCC to use RETURN_IN_MEMORY. */ +#define DEFAULT_PCC_STRUCT_RETURN 0 + +/* Register in which address to store a structure value + is passed to a function, or 0 to use `invisible' first argument. */ +#define STRUCT_VALUE 0 + +/* EXIT_IGNORE_STACK should be nonzero if, when returning from a function, + the stack pointer does not matter. The value is tested only in + functions that have frame pointers. + No definition is equivalent to always zero. */ +#define EXIT_IGNORE_STACK 0 + +#define EPILOGUE_USES(REGNO) arc_epilogue_uses ((REGNO)) + +/* Definitions for register eliminations. + + This is an array of structures. Each structure initializes one pair + of eliminable registers. The "from" register number is given first, + followed by "to". Eliminations of the same "from" register are listed + in order of preference. + + We have two registers that can be eliminated on the ARC. First, the + argument pointer register can always be eliminated in favor of the stack + pointer register or frame pointer register. Secondly, the frame pointer + register can often be eliminated in favor of the stack pointer register. +*/ + +#define ELIMINABLE_REGS \ +{{ARG_POINTER_REGNUM, STACK_POINTER_REGNUM}, \ + {ARG_POINTER_REGNUM, FRAME_POINTER_REGNUM}, \ + {FRAME_POINTER_REGNUM, STACK_POINTER_REGNUM}} + +/* Define the offset between two registers, one to be eliminated, and the other + its replacement, at the start of a routine. */ +extern int arc_initial_elimination_offset(int from, int to); +#define INITIAL_ELIMINATION_OFFSET(FROM, TO, OFFSET) \ + (OFFSET) = arc_initial_elimination_offset ((FROM), (TO)) + +/* Output assembler code to FILE to increment profiler label # LABELNO + for profiling a function entry. + We actually emit the profiler code at the call site, so leave this one + empty. */ +#define FUNCTION_PROFILER(FILE, LABELNO) \ + if (TARGET_UCB_MCOUNT) \ + fprintf (FILE, "\t%s\n", arc_output_libcall ("__mcount")) + +#define NO_PROFILE_COUNTERS 1 + +/* Trampolines. */ + +/* Length in units of the trampoline for entering a nested function. */ +#define TRAMPOLINE_SIZE 20 + +/* Alignment required for a trampoline in bits . */ +/* For actual data alignment we just need 32, no more than the stack; + however, to reduce cache coherency issues, we want to make sure that + trampoline instructions always appear the same in any given cache line. */ +#define TRAMPOLINE_ALIGNMENT 256 + +/* Library calls. */ + +/* Addressing modes, and classification of registers for them. */ + +/* Maximum number of registers that can appear in a valid memory address. */ +/* The `ld' insn allows 2, but the `st' insn only allows 1. */ +#define MAX_REGS_PER_ADDRESS 1 + +/* We have pre inc/dec (load/store with update). */ +#define HAVE_PRE_INCREMENT 1 +#define HAVE_PRE_DECREMENT 1 +#define HAVE_POST_INCREMENT 1 +#define HAVE_POST_DECREMENT 1 +#define HAVE_PRE_MODIFY_DISP 1 +#define HAVE_POST_MODIFY_DISP 1 +#define HAVE_PRE_MODIFY_REG 1 +#define HAVE_POST_MODIFY_REG 1 +/* ??? should also do PRE_MODIFY_REG / POST_MODIFY_REG, but that requires + a special predicate for the memory operand of stores, like for the SH. */ + +/* Recognize any constant value that is a valid address. */ +#define CONSTANT_ADDRESS_P(X) \ +(flag_pic?arc_legitimate_pic_addr_p (X): \ +(GET_CODE (X) == LABEL_REF || GET_CODE (X) == SYMBOL_REF \ + || GET_CODE (X) == CONST_INT || GET_CODE (X) == CONST)) + +/* Is the argument a const_int rtx, containing an exact power of 2 */ +#define IS_POWEROF2_P(X) (! ( (X) & ((X) - 1)) && (X)) + +/* The macros REG_OK_FOR..._P assume that the arg is a REG rtx + and check its validity for a certain class. + We have two alternate definitions for each of them. + The *_NONSTRICT definition accepts all pseudo regs; the other rejects + them unless they have been allocated suitable hard regs. + + Most source files want to accept pseudo regs in the hope that + they will get allocated to the class that the insn wants them to be in. + Source files for reload pass need to be strict. + After reload, it makes no difference, since pseudo regs have + been eliminated by then. */ + +/* Nonzero if X is a hard reg that can be used as an index + or if it is a pseudo reg. */ +#define REG_OK_FOR_INDEX_P_NONSTRICT(X) \ +((unsigned) REGNO (X) >= FIRST_PSEUDO_REGISTER || \ + (unsigned) REGNO (X) < 29 || \ + (unsigned) REGNO (X) == 63 || \ + (unsigned) REGNO (X) == ARG_POINTER_REGNUM) +/* Nonzero if X is a hard reg that can be used as a base reg + or if it is a pseudo reg. */ +#define REG_OK_FOR_BASE_P_NONSTRICT(X) \ +((unsigned) REGNO (X) >= FIRST_PSEUDO_REGISTER || \ + (unsigned) REGNO (X) < 29 || \ + (unsigned) REGNO (X) == 63 || \ + (unsigned) REGNO (X) == ARG_POINTER_REGNUM) + +/* Nonzero if X is a hard reg that can be used as an index. */ +#define REG_OK_FOR_INDEX_P_STRICT(X) REGNO_OK_FOR_INDEX_P (REGNO (X)) +/* Nonzero if X is a hard reg that can be used as a base reg. */ +#define REG_OK_FOR_BASE_P_STRICT(X) REGNO_OK_FOR_BASE_P (REGNO (X)) + +/* GO_IF_LEGITIMATE_ADDRESS recognizes an RTL expression + that is a valid memory address for an instruction. + The MODE argument is the machine mode for the MEM expression + that wants to use this address. */ +/* The `ld' insn allows [reg],[reg+shimm],[reg+limm],[reg+reg],[limm] + but the `st' insn only allows [reg],[reg+shimm],[limm]. + The only thing we can do is only allow the most strict case `st' and hope + other parts optimize out the restrictions for `ld'. */ + +#define RTX_OK_FOR_BASE_P(X, STRICT) \ +(REG_P (X) \ + && ((STRICT) ? REG_OK_FOR_BASE_P_STRICT (X) : REG_OK_FOR_BASE_P_NONSTRICT (X))) + +#define RTX_OK_FOR_INDEX_P(X, STRICT) \ +(REG_P (X) \ + && ((STRICT) ? REG_OK_FOR_INDEX_P_STRICT (X) : REG_OK_FOR_INDEX_P_NONSTRICT (X))) + +/* A C compound statement that attempts to replace X, which is an address + that needs reloading, with a valid memory address for an operand of + mode MODE. WIN is a C statement label elsewhere in the code. + + We try to get a normal form + of the address. That will allow inheritance of the address reloads. */ + +#define LEGITIMIZE_RELOAD_ADDRESS(X,MODE,OPNUM,TYPE,IND_LEVELS,WIN) \ + do { \ + if (arc_legitimize_reload_address (&(X), (MODE), (OPNUM), (TYPE))) \ + goto WIN; \ + } while (0) + +/* Reading lp_count for anything but the lp instruction is very slow on the + ARC700. */ +#define DONT_REALLOC(REGNO,MODE) \ + (TARGET_ARC700 && (REGNO) == 60) + + +/* Given a comparison code (EQ, NE, etc.) and the first operand of a COMPARE, + return the mode to be used for the comparison. */ +/*extern enum machine_mode arc_select_cc_mode ();*/ +#define SELECT_CC_MODE(OP, X, Y) \ +arc_select_cc_mode (OP, X, Y) + +/* Return non-zero if SELECT_CC_MODE will never return MODE for a + floating point inequality comparison. */ +#define REVERSIBLE_CC_MODE(MODE) 1 /*???*/ + +/* Costs. */ + +/* Compute extra cost of moving data between one register class + and another. */ +#define REGISTER_MOVE_COST(MODE, CLASS, TO_CLASS) \ + arc_register_move_cost ((MODE), (CLASS), (TO_CLASS)) + +/* Compute the cost of moving data between registers and memory. */ +/* Memory is 3 times as expensive as registers. + ??? Is that the right way to look at it? */ +#define MEMORY_MOVE_COST(MODE,CLASS,IN) \ +(GET_MODE_SIZE (MODE) <= UNITS_PER_WORD ? 6 : 12) + +/* The cost of a branch insn. */ +/* ??? What's the right value here? Branches are certainly more + expensive than reg->reg moves. */ +#define BRANCH_COST(speed_p, predictable_p) 2 + +/* Nonzero if access to memory by bytes is slow and undesirable. + For RISC chips, it means that access to memory by bytes is no + better than access by words when possible, so grab a whole word + and maybe make use of that. */ +#define SLOW_BYTE_ACCESS 0 + +/* Define this macro if it is as good or better to call a constant + function address than to call an address kept in a register. */ +/* On the ARC, calling through registers is slow. */ +#define NO_FUNCTION_CSE + +/* Section selection. */ +/* WARNING: These section names also appear in dwarfout.c. */ + +#define TEXT_SECTION_ASM_OP "\t.section\t.text" +#define DATA_SECTION_ASM_OP "\t.section\t.data" + +#define BSS_SECTION_ASM_OP "\t.section\t.bss" +#define SDATA_SECTION_ASM_OP "\t.section\t.sdata" +#define SBSS_SECTION_ASM_OP "\t.section\t.sbss" + +/* Expression whose value is a string, including spacing, containing the + assembler operation to identify the following data as initialization/termination + code. If not defined, GCC will assume such a section does not exist. */ +#define INIT_SECTION_ASM_OP "\t.section\t.init" +#define FINI_SECTION_ASM_OP "\t.section\t.fini" + +/* Define this macro if jump tables (for tablejump insns) should be + output in the text section, along with the assembler instructions. + Otherwise, the readonly data section is used. + This macro is irrelevant if there is no separate readonly data section. */ +#define JUMP_TABLES_IN_TEXT_SECTION (flag_pic || CASE_VECTOR_PC_RELATIVE) + +/* For DWARF. Marginally different than default so output is "prettier" + (and consistent with above). */ +#define PUSHSECTION_FORMAT "\t%s %s\n" + +/* Tell crtstuff.c we're using ELF. */ +#define OBJECT_FORMAT_ELF + +/* PIC */ + +/* The register number of the register used to address a table of static + data addresses in memory. In some cases this register is defined by a + processor's ``application binary interface'' (ABI). When this macro + is defined, RTL is generated for this register once, as with the stack + pointer and frame pointer registers. If this macro is not defined, it + is up to the machine-dependent files to allocate such a register (if + necessary). */ +#define PIC_OFFSET_TABLE_REGNUM 26 + +/* Define this macro if the register defined by PIC_OFFSET_TABLE_REGNUM is + clobbered by calls. Do not define this macro if PIC_OFFSET_TABLE_REGNUM + is not defined. */ +/* This register is call-saved on the ARC. */ +/*#define PIC_OFFSET_TABLE_REG_CALL_CLOBBERED*/ + +/* A C expression that is nonzero if X is a legitimate immediate + operand on the target machine when generating position independent code. + You can assume that X satisfies CONSTANT_P, so you need not + check this. You can also assume `flag_pic' is true, so you need not + check it either. You need not define this macro if all constants + (including SYMBOL_REF) can be immediate operands when generating + position independent code. */ +#define LEGITIMATE_PIC_OPERAND_P(X) (arc_legitimate_pic_operand_p(X)) + +/* PIC and small data don't mix on ARC because they use the same register. */ +#define SDATA_BASE_REGNUM 26 + +#define ASM_PREFERRED_EH_DATA_FORMAT(CODE, GLOBAL) \ + (flag_pic \ + ? (GLOBAL ? DW_EH_PE_indirect : 0) | DW_EH_PE_pcrel | DW_EH_PE_sdata4 \ + : DW_EH_PE_absptr) + +/* Control the assembler format that we output. */ + +/* A C string constant describing how to begin a comment in the target + assembler language. The compiler assumes that the comment will + end at the end of the line. */ +/* Gas needs this to be "#" in order to recognize line directives. */ +#define ASM_COMMENT_START "#" + +/* Output to assembler file text saying following lines + may contain character constants, extra white space, comments, etc. */ +#define ASM_APP_ON "" + +/* Output to assembler file text saying following lines + no longer contain unusual constructs. */ +#define ASM_APP_OFF "" + +/* Globalizing directive for a label. */ +#define GLOBAL_ASM_OP "\t.global\t" + +/* This is how to output an assembler line defining a `char' constant. */ +#define ASM_OUTPUT_CHAR(FILE, VALUE) \ +( fprintf (FILE, "\t.byte\t"), \ + output_addr_const (FILE, (VALUE)), \ + fprintf (FILE, "\n")) + +/* This is how to output an assembler line defining a `short' constant. */ +#define ASM_OUTPUT_SHORT(FILE, VALUE) \ +( fprintf (FILE, "\t.hword\t"), \ + output_addr_const (FILE, (VALUE)), \ + fprintf (FILE, "\n")) + +/* This is how to output an assembler line defining an `int' constant. + We also handle symbol output here. Code addresses must be right shifted + by 2 because that's how the jump instruction wants them. */ +#define ASM_OUTPUT_INT(FILE, VALUE) \ +do { \ + fprintf (FILE, "\t.word\t"); \ + if (GET_CODE (VALUE) == LABEL_REF) \ + { \ + fprintf (FILE, "%%st(@"); \ + output_addr_const (FILE, (VALUE)); \ + fprintf (FILE, ")"); \ + } \ + else \ + output_addr_const (FILE, (VALUE)); \ + fprintf (FILE, "\n"); \ +} while (0) + +/* This is how to output an assembler line defining a `float' constant. */ +#define ASM_OUTPUT_FLOAT(FILE, VALUE) \ +{ \ + long t; \ + char str[30]; \ + REAL_VALUE_TO_TARGET_SINGLE ((VALUE), t); \ + REAL_VALUE_TO_DECIMAL ((VALUE), "%.20e", str); \ + fprintf (FILE, "\t.word\t0x%lx %s %s\n", \ + t, ASM_COMMENT_START, str); \ +} + +/* This is how to output an assembler line defining a `double' constant. */ +#define ASM_OUTPUT_DOUBLE(FILE, VALUE) \ +{ \ + long t[2]; \ + char str[30]; \ + REAL_VALUE_TO_TARGET_DOUBLE ((VALUE), t); \ + REAL_VALUE_TO_DECIMAL ((VALUE), "%.20e", str); \ + fprintf (FILE, "\t.word\t0x%lx %s %s\n\t.word\t0x%lx\n", \ + t[0], ASM_COMMENT_START, str, t[1]); \ +} + +/* This is how to output the definition of a user-level label named NAME, + such as the label on a static function or variable NAME. */ +#define ASM_OUTPUT_LABEL(FILE, NAME) \ +do { assemble_name (FILE, NAME); fputs (":\n", FILE); } while (0) + +#define ASM_NAME_P(NAME) ( NAME[0]=='*') + +/* This is how to output a reference to a user-level label named NAME. + `assemble_name' uses this. */ +/* We work around a dwarfout.c deficiency by watching for labels from it and + not adding the '_' prefix. There is a comment in + dwarfout.c that says it should be using ASM_OUTPUT_INTERNAL_LABEL. */ +#define ASM_OUTPUT_LABELREF(FILE, NAME1) \ +do { \ + const char *NAME; \ + NAME = (*targetm.strip_name_encoding)(NAME1); \ + if ((NAME)[0] == '.' && (NAME)[1] == 'L') \ + fprintf (FILE, "%s", NAME); \ + else \ + { \ + if (!ASM_NAME_P (NAME1)) \ + fprintf (FILE, "%s", user_label_prefix); \ + fprintf (FILE, "%s", NAME); \ + } \ +} while (0) + +/* This is how to output a reference to a symbol_ref / label_ref as + (part of) an operand. To disambiguate from register names like + a1 / a2 / status etc, symbols are preceded by '@'. */ +#define ASM_OUTPUT_SYMBOL_REF(FILE,SYM) \ + ASM_OUTPUT_LABEL_REF ((FILE), XSTR ((SYM), 0)) +#define ASM_OUTPUT_LABEL_REF(FILE,STR) \ + do \ + { \ + fputc ('@', file); \ + assemble_name ((FILE), (STR)); \ + } \ + while (0) + +/* Store in OUTPUT a string (made with alloca) containing + an assembler-name for a local static variable named NAME. + LABELNO is an integer which is different for each call. */ +#define ASM_FORMAT_PRIVATE_NAME(OUTPUT, NAME, LABELNO) \ +( (OUTPUT) = (char *) alloca (strlen ((NAME)) + 10), \ + sprintf ((OUTPUT), "%s.%d", (NAME), (LABELNO))) + +/* The following macro defines the format used to output the second + operand of the .type assembler directive. Different svr4 assemblers + expect various different forms for this operand. The one given here + is just a default. You may need to override it in your machine- + specific tm.h file (depending upon the particulars of your assembler). */ + +#undef TYPE_OPERAND_FMT +#define TYPE_OPERAND_FMT "@%s" + +/* A C string containing the appropriate assembler directive to + specify the size of a symbol, without any arguments. On systems + that use ELF, the default (in `config/elfos.h') is `"\t.size\t"'; + on other systems, the default is not to define this macro. */ +#undef SIZE_ASM_OP +#define SIZE_ASM_OP "\t.size\t" + +/* Assembler pseudo-op to equate one value with another. */ +/* ??? This is needed because dwarfout.c provides a default definition too + late for defaults.h (which contains the default definition of ASM_OTPUT_DEF + that we use). */ +#ifdef SET_ASM_OP +#undef SET_ASM_OP +#endif +#define SET_ASM_OP "\t.set\t" + +extern char rname56[], rname57[], rname58[], rname59[]; +/* How to refer to registers in assembler output. + This sequence is indexed by compiler's hard-register-number (see above). */ +#define REGISTER_NAMES \ +{ "r0", "r1", "r2", "r3", "r4", "r5", "r6", "r7", \ + "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15", \ + "r16", "r17", "r18", "r19", "r20", "r21", "r22", "r23", \ + "r24", "r25", "gp", "fp", "sp", "ilink1", "ilink2", "blink", \ + "r32", "r33", "r34", "r35", "r36", "r37", "r38", "r39", \ + "d1", "d1", "d2", "d2", "r44", "r45", "r46", "r47", \ + "r48", "r49", "r50", "r51", "r52", "r53", "r54", "r55", \ + rname56,rname57,rname58,rname59,"lp_count", "cc", "ap", "pcl", \ + "vr0", "vr1", "vr2", "vr3", "vr4", "vr5", "vr6", "vr7", \ + "vr8", "vr9", "vr10", "vr11", "vr12", "vr13", "vr14", "vr15", \ + "vr16", "vr17", "vr18", "vr19", "vr20", "vr21", "vr22", "vr23", \ + "vr24", "vr25", "vr26", "vr27", "vr28", "vr29", "vr30", "vr31", \ + "vr32", "vr33", "vr34", "vr35", "vr36", "vr37", "vr38", "vr39", \ + "vr40", "vr41", "vr42", "vr43", "vr44", "vr45", "vr46", "vr47", \ + "vr48", "vr49", "vr50", "vr51", "vr52", "vr53", "vr54", "vr55", \ + "vr56", "vr57", "vr58", "vr59", "vr60", "vr61", "vr62", "vr63", \ + "dr0", "dr1", "dr2", "dr3", "dr4", "dr5", "dr6", "dr7", \ + "dr0", "dr1", "dr2", "dr3", "dr4", "dr5", "dr6", "dr7", \ + "lp_start", "lp_end" \ +} + +/* Entry to the insn conditionalizer. */ +#define FINAL_PRESCAN_INSN(INSN, OPVEC, NOPERANDS) \ + arc_final_prescan_insn (INSN, OPVEC, NOPERANDS) + +/* A C expression which evaluates to true if CODE is a valid + punctuation character for use in the `PRINT_OPERAND' macro. */ +extern char arc_punct_chars[]; +#define PRINT_OPERAND_PUNCT_VALID_P(CHAR) \ +arc_punct_chars[(unsigned char) (CHAR)] + +/* Print operand X (an rtx) in assembler syntax to file FILE. + CODE is a letter or dot (`z' in `%z0') or 0 if no letter was specified. + For `%' followed by punctuation, CODE is the punctuation and X is null. */ +#define PRINT_OPERAND(FILE, X, CODE) \ +arc_print_operand (FILE, X, CODE) + +/* A C compound statement to output to stdio stream STREAM the + assembler syntax for an instruction operand that is a memory + reference whose address is ADDR. ADDR is an RTL expression. + + On some machines, the syntax for a symbolic address depends on + the section that the address refers to. On these machines, + define the macro `ENCODE_SECTION_INFO' to store the information + into the `symbol_ref', and then check for it here. */ +#define PRINT_OPERAND_ADDRESS(FILE, ADDR) \ +arc_print_operand_address (FILE, ADDR) + +/* This is how to output an element of a case-vector that is absolute. */ +#define ASM_OUTPUT_ADDR_VEC_ELT(FILE, VALUE) \ +do { \ + char label[30]; \ + ASM_GENERATE_INTERNAL_LABEL (label, "L", VALUE); \ + fprintf (FILE, "\t.word "); \ + assemble_name (FILE, label); \ + fprintf(FILE, "\n"); \ +} while (0) + +/* This is how to output an element of a case-vector that is relative. */ +#define ASM_OUTPUT_ADDR_DIFF_ELT(FILE, BODY, VALUE, REL) \ +do { \ + char label[30]; \ + ASM_GENERATE_INTERNAL_LABEL (label, "L", VALUE); \ + switch (GET_MODE (BODY)) \ + { \ + case QImode: fprintf (FILE, "\t.byte "); break; \ + case HImode: fprintf (FILE, "\t.hword "); break; \ + case SImode: fprintf (FILE, "\t.word "); break; \ + default: gcc_unreachable (); \ + } \ + assemble_name (FILE, label); \ + fprintf (FILE, "-"); \ + ASM_GENERATE_INTERNAL_LABEL (label, "L", REL); \ + assemble_name (FILE, label); \ + if (TARGET_COMPACT_CASESI) \ + fprintf (FILE, " + %d", 4 + arc_get_unalign ()); \ + fprintf(FILE, "\n"); \ +} while (0) + +/* ADDR_DIFF_VECs are in the text section and thus can affect the + current alignment. */ +#define ASM_OUTPUT_CASE_END(FILE, NUM, JUMPTABLE) \ + do \ + { \ + if (GET_CODE (PATTERN (JUMPTABLE)) == ADDR_DIFF_VEC \ + && ((GET_MODE_SIZE (GET_MODE (PATTERN (JUMPTABLE))) \ + * XVECLEN (PATTERN (JUMPTABLE), 1) + 1) \ + & 2)) \ + arc_toggle_unalign (); \ + } \ + while (0) + +#define JUMP_ALIGN(LABEL) (arc_size_opt_level < 2 ? 2 : 0) +#define LABEL_ALIGN_AFTER_BARRIER(LABEL) \ + (JUMP_ALIGN(LABEL) \ + ? JUMP_ALIGN(LABEL) \ + : GET_CODE (PATTERN (prev_active_insn (LABEL))) == ADDR_DIFF_VEC \ + ? 1 : 0) +/* The desired alignment for the location counter at the beginning + of a loop. */ +/* On the ARC, align loops to 4 byte boundaries unless doing all-out size + optimization. */ +#define LOOP_ALIGN JUMP_ALIGN + +#define LABEL_ALIGN(LABEL) (arc_label_align (LABEL)) + +/* This is how to output an assembler line + that says to advance the location counter + to a multiple of 2**LOG bytes. */ +#define ASM_OUTPUT_ALIGN(FILE,LOG) \ +do { \ + if ((LOG) != 0) fprintf (FILE, "\t.align %d\n", 1 << (LOG)); \ + if ((LOG) > 1) \ + arc_clear_unalign (); \ +} while (0) + +/* ASM_OUTPUT_ALIGNED_DECL_LOCAL (STREAM, DECL, NAME, SIZE, ALIGNMENT) + Define this macro when you need to see the variable's decl in order to + chose what to output. */ +#define ASM_OUTPUT_ALIGNED_DECL_LOCAL(STREAM, DECL, NAME, SIZE, ALIGNMENT) \ + arc_asm_output_aligned_decl_local (STREAM, DECL, NAME, SIZE, ALIGNMENT, 0) + +/* To translate the return value of arc_function_type into a register number + to jump through for function return. */ +extern int arc_return_address_regs[4]; + +/* Debugging information. */ + +/* Generate DBX and DWARF debugging information. */ +#ifdef DBX_DEBUGGING_INFO +#undef DBX_DEBUGGING_INFO +#endif +#define DBX_DEBUGGING_INFO + +#ifdef DWARF2_DEBUGGING_INFO +#undef DWARF2_DEBUGGING_INFO +#endif +#define DWARF2_DEBUGGING_INFO + +/* Prefer STABS (for now). */ +#undef PREFERRED_DEBUGGING_TYPE +#define PREFERRED_DEBUGGING_TYPE DWARF2_DEBUG + +/* How to renumber registers for dbx and gdb. */ +#define DBX_REGISTER_NUMBER(REGNO) \ + ((TARGET_MULMAC_32BY16_SET && (REGNO) >= 56 && (REGNO) <= 57) \ + ? ((REGNO) ^ !TARGET_BIG_ENDIAN) \ + : (TARGET_MUL64_SET && (REGNO) >= 57 && (REGNO) <= 59) \ + ? ((REGNO) == 57 \ + ? 58 /* MMED */ \ + : ((REGNO) & 1) ^ TARGET_BIG_ENDIAN \ + ? 59 /* MHI */ \ + : 57 + !!TARGET_MULMAC_32BY16_SET) /* MLO */ \ + : (REGNO)) + +#define DWARF_FRAME_REGNUM(REG) (REG) + +#define DWARF_FRAME_RETURN_COLUMN DWARF_FRAME_REGNUM (31) + +#define INCOMING_RETURN_ADDR_RTX gen_rtx_REG (Pmode, 31) + +/* Frame info. */ + +/* Define this macro to 0 if your target supports DWARF 2 frame unwind + information, but it does not yet work with exception handling. */ +/* N.B. the below test is valid in an #if, but not in a C expression. */ +#if DEFAULT_LIBC == LIBC_UCLIBC +#define DWARF2_UNWIND_INFO 1 +#else +#define DWARF2_UNWIND_INFO 0 +#endif + +#define EH_RETURN_DATA_REGNO(N) \ + ((N) < 4 ? (N) : INVALID_REGNUM) + +/* Turn off splitting of long stabs. */ +#define DBX_CONTIN_LENGTH 0 + +/* Miscellaneous. */ + +/* Specify the machine mode that this machine uses + for the index in the tablejump instruction. + If we have pc relative case vectors, we start the case vector shortening + with QImode. */ +#define CASE_VECTOR_MODE \ + ((optimize && (CASE_VECTOR_PC_RELATIVE || flag_pic)) ? QImode : Pmode) + +/* Define as C expression which evaluates to nonzero if the tablejump + instruction expects the table to contain offsets from the address of the + table. + Do not define this if the table should contain absolute addresses. */ +#define CASE_VECTOR_PC_RELATIVE TARGET_CASE_VECTOR_PC_RELATIVE + +#define CASE_VECTOR_SHORTEN_MODE(MIN_OFFSET, MAX_OFFSET, BODY) \ + CASE_VECTOR_SHORTEN_MODE_1 \ + (MIN_OFFSET, TARGET_COMPACT_CASESI ? MAX_OFFSET + 6 : MAX_OFFSET, BODY) + +#define CASE_VECTOR_SHORTEN_MODE_1(MIN_OFFSET, MAX_OFFSET, BODY) \ +((MIN_OFFSET) >= 0 && (MAX_OFFSET) <= 255 \ + ? (ADDR_DIFF_VEC_FLAGS (BODY).offset_unsigned = 1, QImode) \ + : (MIN_OFFSET) >= -128 && (MAX_OFFSET) <= 127 \ + ? (ADDR_DIFF_VEC_FLAGS (BODY).offset_unsigned = 0, QImode) \ + : (MIN_OFFSET) >= 0 && (MAX_OFFSET) <= 65535 \ + ? (ADDR_DIFF_VEC_FLAGS (BODY).offset_unsigned = 1, HImode) \ + : (MIN_OFFSET) >= -32768 && (MAX_OFFSET) <= 32767 \ + ? (ADDR_DIFF_VEC_FLAGS (BODY).offset_unsigned = 0, HImode) \ + : SImode) + +#define ADDR_VEC_ALIGN(VEC_INSN) \ + (exact_log2 (GET_MODE_SIZE (GET_MODE (PATTERN (VEC_INSN))))) +#undef ASM_OUTPUT_BEFORE_CASE_LABEL +#define ASM_OUTPUT_BEFORE_CASE_LABEL(FILE, PREFIX, NUM, TABLE) \ + ASM_OUTPUT_ALIGN ((FILE), ADDR_VEC_ALIGN (TABLE)); + +#define INSN_LENGTH_ALIGNMENT(INSN) \ + ((JUMP_P (INSN) \ + && GET_CODE (PATTERN (INSN)) == ADDR_DIFF_VEC \ + && GET_MODE (PATTERN (INSN)) == QImode) \ + ? 0 : length_unit_log) + +/* Define if operations between registers always perform the operation + on the full register even if a narrower mode is specified. */ +#define WORD_REGISTER_OPERATIONS + +/* Define if loading in MODE, an integral mode narrower than BITS_PER_WORD + will either zero-extend or sign-extend. The value of this macro should + be the code that says which one of the two operations is implicitly + done, NIL if none. */ +#define LOAD_EXTEND_OP(MODE) ZERO_EXTEND + + +/* Max number of bytes we can move from memory to memory + in one reasonably fast instruction. */ +#define MOVE_MAX 4 + +/* Let the movmem expander handle small block moves. */ +#define MOVE_BY_PIECES_P(LEN, ALIGN) 0 +#define CAN_MOVE_BY_PIECES(SIZE, ALIGN) \ + (move_by_pieces_ninsns (SIZE, ALIGN, MOVE_MAX_PIECES + 1) \ + < (unsigned int) MOVE_RATIO (!optimize_size)) + +/* Undo the effects of the movmem pattern presence on STORE_BY_PIECES_P . */ +#define MOVE_RATIO(SPEED) ((SPEED) ? 15 : 3) + +/* Define this to be nonzero if shift instructions ignore all but the low-order + few bits. Changed from 1 to 0 for rotate pattern testcases + (e.g. 20020226-1.c). This change truncates the upper 27 bits of a word + while rotating a word. Came to notice through a combine phase + optimization viz. a << (32-b) is equivalent to a << (-b). +*/ +#define SHIFT_COUNT_TRUNCATED 0 + +/* Value is 1 if truncating an integer of INPREC bits to OUTPREC bits + is done just by pretending it is already truncated. */ +#define TRULY_NOOP_TRUNCATION(OUTPREC, INPREC) 1 + +/* We assume that the store-condition-codes instructions store 0 for false + and some other value for true. This is the value stored for true. */ +#define STORE_FLAG_VALUE 1 + +/* Specify the machine mode that pointers have. + After generation of rtl, the compiler makes no further distinction + between pointers and any other objects of this machine mode. */ +/* ARCompact has full 32-bit pointers. */ +#define Pmode SImode + +/* A function address in a call instruction. */ +#define FUNCTION_MODE SImode + +/* Define the information needed to generate branch and scc insns. This is + stored from the compare operation. Note that we can't use "rtx" here + since it hasn't been defined! */ +extern struct rtx_def *arc_compare_op0, *arc_compare_op1; + +/* ARC function types. */ +enum arc_function_type { + ARC_FUNCTION_UNKNOWN, ARC_FUNCTION_NORMAL, + /* These are interrupt handlers. The name corresponds to the register + name that contains the return address. */ + ARC_FUNCTION_ILINK1, ARC_FUNCTION_ILINK2 +}; +#define ARC_INTERRUPT_P(TYPE) \ +((TYPE) == ARC_FUNCTION_ILINK1 || (TYPE) == ARC_FUNCTION_ILINK2) + +/* Compute the type of a function from its DECL. Needed for EPILOGUE_USES. */ +struct function; +extern enum arc_function_type arc_compute_function_type (struct function *); + +/* Called by crtstuff.c to make calls to function FUNCTION that are defined in + SECTION_OP, and then to switch back to text section. */ +#undef CRT_CALL_STATIC_FUNCTION +#define CRT_CALL_STATIC_FUNCTION(SECTION_OP, FUNC) \ + asm (SECTION_OP "\n\t" \ + "bl @" USER_LABEL_PREFIX #FUNC "\n" \ + TEXT_SECTION_ASM_OP); + +/* This macro expands to the name of the scratch register r12, used for + temporary calculations according to the ABI. */ +#define ARC_TEMP_SCRATCH_REG "r12" + +/* The C++ compiler must use one bit to indicate whether the function + that will be called through a pointer-to-member-function is + virtual. Normally, we assume that the low-order bit of a function + pointer must always be zero. Then, by ensuring that the + vtable_index is odd, we can distinguish which variant of the union + is in use. But, on some platforms function pointers can be odd, + and so this doesn't work. In that case, we use the low-order bit + of the `delta' field, and shift the remainder of the `delta' field + to the left. We needed to do this for A4 because the address was always + shifted and thus could be odd. */ +#define TARGET_PTRMEMFUNC_VBIT_LOCATION \ + (ptrmemfunc_vbit_in_pfn) + +#define INSN_SETS_ARE_DELAYED(X) \ + (GET_CODE (X) == INSN \ + && GET_CODE (PATTERN (X)) != SEQUENCE \ + && GET_CODE (PATTERN (X)) != USE \ + && GET_CODE (PATTERN (X)) != CLOBBER \ + && (get_attr_type (X) == TYPE_CALL || get_attr_type (X) == TYPE_SFUNC)) + +#define INSN_REFERENCES_ARE_DELAYED(insn) INSN_SETS_ARE_DELAYED (insn) + +#define CALL_ATTR(X, NAME) \ + ((CALL_P (X) || NONJUMP_INSN_P (X)) \ + && GET_CODE (PATTERN (X)) != USE \ + && GET_CODE (PATTERN (X)) != CLOBBER \ + && get_attr_is_##NAME (X) == IS_##NAME##_YES) \ + +#define REVERSE_CONDITION(CODE,MODE) \ + (((MODE) == CC_FP_GTmode || (MODE) == CC_FP_GEmode \ + || (MODE) == CC_FP_UNEQmode || (MODE) == CC_FP_ORDmode \ + || (MODE) == CC_FPXmode) \ + ? reverse_condition_maybe_unordered ((CODE)) \ + : reverse_condition ((CODE))) + +#define ADJUST_INSN_LENGTH(X, LENGTH) \ + ((LENGTH) \ + = (GET_CODE (PATTERN (X)) == SEQUENCE \ + ? ((LENGTH) \ + + arc_adjust_insn_length (XVECEXP (PATTERN (X), 0, 0), \ + get_attr_length (XVECEXP (PATTERN (X), \ + 0, 0)), \ + true) \ + - get_attr_length (XVECEXP (PATTERN (X), 0, 0)) \ + + arc_adjust_insn_length (XVECEXP (PATTERN (X), 0, 1), \ + get_attr_length (XVECEXP (PATTERN (X), \ + 0, 1)), \ + true) \ + - get_attr_length (XVECEXP (PATTERN (X), 0, 1))) \ + : arc_adjust_insn_length ((X), (LENGTH), false))) + +#define IS_ASM_LOGICAL_LINE_SEPARATOR(C,STR) ((C) == '`') + +#define INIT_EXPANDERS arc_init_expanders () + +#define CFA_FRAME_BASE_OFFSET(FUNDECL) (-arc_decl_pretend_args ((FUNDECL))) + +#define ARG_POINTER_CFA_OFFSET(FNDECL) \ + (FIRST_PARM_OFFSET (FNDECL) + arc_decl_pretend_args ((FNDECL))) + +enum +{ + ARC_LRA_PRIORITY_NONE, ARC_LRA_PRIORITY_NONCOMPACT, ARC_LRA_PRIORITY_COMPACT +}; + +/* The define_cond_exec construct is rather crude, as we can't have + different ones with different conditions apply to different sets + of instructions. We can't use an attribute test inside the condition, + because that would lead to infinite recursion as the attribute test + needs to recognize the insn. So, instead we have a clause for + the pattern condition of all sfunc patterns which is only relevant for + the predicated varaint. */ +#define SFUNC_CHECK_PREDICABLE \ + (GET_CODE (PATTERN (insn)) != COND_EXEC || !flag_pic || !TARGET_MEDIUM_CALLS) + +#endif /* GCC_ARC_H */ diff --git a/gcc/config/arc/arc.md b/gcc/config/arc/arc.md new file mode 100644 index 00000000000..37103fe6ce0 --- /dev/null +++ b/gcc/config/arc/arc.md @@ -0,0 +1,5190 @@ +;; Machine description of the Synopsys DesignWare ARC cpu for GNU C compiler +;; Copyright (C) 1994, 1997, 1999, 2006-2013 +;; Free Software Foundation, Inc. + +;; Sources derived from work done by Sankhya Technologies (www.sankhya.com) on +;; behalf of Synopsys Inc. + +;; Position Independent Code support added,Code cleaned up, +;; Comments and Support For ARC700 instructions added by +;; Saurabh Verma (saurabh.verma@codito.com) +;; Ramana Radhakrishnan(ramana.radhakrishnan@codito.com) +;; +;; Profiling support and performance improvements by +;; Joern Rennecke (joern.rennecke@embecosm.com) +;; +;; Support for DSP multiply instructions and mul64 +;; instructions for ARC600; and improvements in flag setting +;; instructions by +;; Muhammad Khurram Riaz (Khurram.Riaz@arc.com) + +;; This file is part of GCC. + +;; GCC is free software; you can redistribute it and/or modify +;; it under the terms of the GNU General Public License as published by +;; the Free Software Foundation; either version 3, or (at your option) +;; any later version. + +;; GCC is distributed in the hope that it will be useful, +;; but WITHOUT ANY WARRANTY; without even the implied warranty of +;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +;; GNU General Public License for more details. + +;; You should have received a copy of the GNU General Public License +;; along with GCC; see the file COPYING3. If not see +;; <http://www.gnu.org/licenses/>. + +;; See file "rtl.def" for documentation on define_insn, match_*, et. al. + +;; <op> dest, src Two operand instruction's syntax +;; <op> dest, src1, src2 Three operand instruction's syntax + +;; ARC and ARCompact PREDICATES: +;; +;; comparison_operator LT, GT, LE, GE, LTU, GTU, LEU, GEU, EQ, NE +;; memory_operand memory [m] +;; immediate_operand immediate constant [IKLMNOP] +;; register_operand register [rq] +;; general_operand register, memory, constant [rqmIKLMNOP] + +;; Note that the predicates are only used when selecting a pattern +;; to determine if an operand is valid. + +;; The constraints then select which of the possible valid operands +;; is present (and guide register selection). The actual assembly +;; instruction is then selected on the basis of the constraints. + +;; ARC and ARCompact CONSTRAINTS: +;; +;; b stack pointer r28 +;; f frame pointer r27 +;; Rgp global pointer r26 +;; g general reg, memory, constant +;; m memory +;; p memory address +;; q registers commonly used in +;; 16-bit insns r0-r3, r12-r15 +;; c core registers r0-r60, ap, pcl +;; r general registers r0-r28, blink, ap, pcl +;; +;; H fp 16-bit constant +;; I signed 12-bit immediate (for ARCompact) +;; K unsigned 3-bit immediate (for ARCompact) +;; L unsigned 6-bit immediate (for ARCompact) +;; M unsinged 5-bit immediate (for ARCompact) +;; O unsinged 7-bit immediate (for ARCompact) +;; P unsinged 8-bit immediate (for ARCompact) +;; N constant '1' (for ARCompact) + + +;; TODO: +;; -> prefetch instruction + +;; ----------------------------------------------------------------------------- + +;; Include DFA scheduluers +(include ("arc600.md")) +(include ("arc700.md")) + +;; Predicates + +(include ("predicates.md")) +(include ("constraints.md")) +;; ----------------------------------------------------------------------------- + +;; UNSPEC Usage: +;; ~~~~~~~~~~~~ +;; ----------------------------------------------------------------------------- +;; Symbolic name Value Desc. +;; ----------------------------------------------------------------------------- +;; UNSPEC_PLT 3 symbol to be referenced through the PLT +;; UNSPEC_GOT 4 symbol to be rerenced through the GOT +;; UNSPEC_GOTOFF 5 Local symbol.To be referenced relative to the +;; GOTBASE.(Referenced as @GOTOFF) +;; ---------------------------------------------------------------------------- + + +(define_constants + [(UNSPEC_SWAP 13) ; swap generation through builtins. candidate for scheduling + (UNSPEC_MUL64 14) ; mul64 generation through builtins. candidate for scheduling + (UNSPEC_MULU64 15) ; mulu64 generation through builtins. candidate for scheduling + (UNSPEC_DIVAW 16) ; divaw generation through builtins. candidate for scheduling + (UNSPEC_DIRECT 17) + (UNSPEC_PROF 18) ; profile callgraph counter + (UNSPEC_LP 19) ; to set LP_END + (UNSPEC_CASESI 20) + (VUNSPEC_RTIE 17) ; blockage insn for rtie generation + (VUNSPEC_SYNC 18) ; blockage insn for sync generation + (VUNSPEC_BRK 19) ; blockage insn for brk generation + (VUNSPEC_FLAG 20) ; blockage insn for flag generation + (VUNSPEC_SLEEP 21) ; blockage insn for sleep generation + (VUNSPEC_SWI 22) ; blockage insn for swi generation + (VUNSPEC_CORE_READ 23) ; blockage insn for reading a core register + (VUNSPEC_CORE_WRITE 24) ; blockage insn for writing to a core register + (VUNSPEC_LR 25) ; blockage insn for reading an auxiliary register + (VUNSPEC_SR 26) ; blockage insn for writing to an auxiliary register + (VUNSPEC_TRAP_S 27) ; blockage insn for trap_s generation + (VUNSPEC_UNIMP_S 28) ; blockage insn for unimp_s generation + + (R0_REG 0) + (R1_REG 1) + (R2_REG 2) + (R3_REG 3) + (R12_REG 12) + (SP_REG 28) + (ILINK1_REGNUM 29) + (ILINK2_REGNUM 30) + (RETURN_ADDR_REGNUM 31) + (MUL64_OUT_REG 58) + + (VUNSPEC_DEXCL 32) ; blockage insn for reading an auxiliary register without LR support + (VUNSPEC_DEXCL_NORES 33) ; blockage insn for reading an auxiliary register without LR support + (VUNSPEC_LR_HIGH 34) ; blockage insn for reading an auxiliary register + + (LP_COUNT 60) + (CC_REG 61) + (LP_START 144) + (LP_END 145) + ] +) + +(define_attr "is_sfunc" "no,yes" (const_string "no")) + +;; Insn type. Used to default other attribute values. +; While the attribute is_sfunc is set for any call of a special function, +; the instruction type sfunc is used only for the special call sequence +; that loads the (pc-relative) function address into r12 and then calls +; via r12. + +(define_attr "type" + "move,load,store,cmove,unary,binary,compare,shift,uncond_branch,jump,branch, + brcc,brcc_no_delay_slot,call,sfunc,call_no_delay_slot, + multi,umulti, two_cycle_core,lr,sr,divaw,loop_setup,loop_end,return, + misc,spfp,dpfp_mult,dpfp_addsub,mulmac_600,cc_arith, + simd_vload, simd_vload128, simd_vstore, simd_vmove, simd_vmove_else_zero, + simd_vmove_with_acc, simd_varith_1cycle, simd_varith_2cycle, + simd_varith_with_acc, simd_vlogic, simd_vlogic_with_acc, + simd_vcompare, simd_vpermute, simd_vpack, simd_vpack_with_acc, + simd_valign, simd_valign_with_acc, simd_vcontrol, + simd_vspecial_3cycle, simd_vspecial_4cycle, simd_dma" + (cond [(eq_attr "is_sfunc" "yes") + (cond [(match_test "!TARGET_LONG_CALLS_SET && (!TARGET_MEDIUM_CALLS || GET_CODE (PATTERN (insn)) != COND_EXEC)") (const_string "call") + (match_test "flag_pic") (const_string "sfunc")] + (const_string "call_no_delay_slot"))] + (const_string "binary"))) + +;; The following three attributes are mixed case so that they can be +;; used conveniently with the CALL_ATTR macro. +(define_attr "is_CALL" "no,yes" + (cond [(eq_attr "is_sfunc" "yes") (const_string "yes") + (eq_attr "type" "call,call_no_delay_slot") (const_string "yes")] + (const_string "no"))) + +(define_attr "is_SIBCALL" "no,yes" (const_string "no")) + +(define_attr "is_NON_SIBCALL" "no,yes" + (cond [(eq_attr "is_SIBCALL" "yes") (const_string "no") + (eq_attr "is_CALL" "yes") (const_string "yes")] + (const_string "no"))) + + +;; Attribute describing the processor +(define_attr "cpu" "none,A5,ARC600,ARC700" + (const (symbol_ref "arc_cpu_attr"))) + +;; true for compact instructions (those with _s suffix) +;; "maybe" means compact unless we conditionalize the insn. +(define_attr "iscompact" "true,maybe,true_limm,maybe_limm,false" + (cond [(eq_attr "type" "sfunc") + (const_string "maybe")] + (const_string "false"))) + + +; Is there an instruction that we are actually putting into the delay slot? +(define_attr "delay_slot_filled" "no,yes" + (cond [(match_test "NEXT_INSN (PREV_INSN (insn)) == insn") + (const_string "no") + (match_test "!TARGET_AT_DBR_CONDEXEC + && JUMP_P (insn) + && INSN_ANNULLED_BRANCH_P (insn) + && !INSN_FROM_TARGET_P (NEXT_INSN (insn))") + (const_string "no")] + (const_string "yes"))) + +; Is a delay slot present for purposes of shorten_branches? +; We have to take the length of this insn into account for forward branches +; even if we don't put the insn actually into a delay slot. +(define_attr "delay_slot_present" "no,yes" + (cond [(match_test "NEXT_INSN (PREV_INSN (insn)) == insn") + (const_string "no")] + (const_string "yes"))) + +; We can't use get_attr_length (NEXT_INSN (insn)) because this gives the +; length of a different insn with the same uid. +(define_attr "delay_slot_length" "" + (cond [(match_test "NEXT_INSN (PREV_INSN (insn)) == insn") + (const_int 0)] + (symbol_ref "get_attr_length (NEXT_INSN (PREV_INSN (insn))) + - get_attr_length (insn)"))) + + +(define_attr "enabled" "no,yes" (const_string "yes")) + +(define_attr "predicable" "no,yes" (const_string "no")) +;; if 'predicable' were not so brain-dead, we would specify: +;; (cond [(eq_attr "cond" "!canuse") (const_string "no") +;; (eq_attr "iscompact" "maybe") (const_string "no")] +;; (const_string "yes")) +;; and then for everything but calls, we could just set the cond attribute. + +;; Condition codes: this one is used by final_prescan_insn to speed up +;; conditionalizing instructions. It saves having to scan the rtl to see if +;; it uses or alters the condition codes. + +;; USE: This insn uses the condition codes (eg: a conditional branch). +;; CANUSE: This insn can use the condition codes (for conditional execution). +;; SET: All condition codes are set by this insn. +;; SET_ZN: the Z and N flags are set by this insn. +;; SET_ZNC: the Z, N, and C flags are set by this insn. +;; CLOB: The condition codes are set to unknown values by this insn. +;; NOCOND: This insn can't use and doesn't affect the condition codes. + +(define_attr "cond" "use,canuse,canuse_limm,canuse_limm_add,set,set_zn,clob,nocond" + (cond + [(and (eq_attr "predicable" "yes") + (eq_attr "is_sfunc" "no") + (eq_attr "delay_slot_filled" "no")) + (const_string "canuse") + + (eq_attr "type" "call") + (cond [(eq_attr "delay_slot_filled" "yes") (const_string "nocond") + (match_test "!flag_pic") (const_string "canuse_limm")] + (const_string "nocond")) + + (eq_attr "iscompact" "maybe,false") + (cond [ (and (eq_attr "type" "move") + (match_operand 1 "immediate_operand" "")) + (if_then_else + (ior (match_operand 1 "u6_immediate_operand" "") + (match_operand 1 "long_immediate_operand" "")) + (const_string "canuse") + (const_string "canuse_limm")) + + (eq_attr "type" "binary") + (cond [(ne (symbol_ref "REGNO (operands[0])") + (symbol_ref "REGNO (operands[1])")) + (const_string "nocond") + (match_operand 2 "register_operand" "") + (const_string "canuse") + (match_operand 2 "u6_immediate_operand" "") + (const_string "canuse") + (match_operand 2 "long_immediate_operand" "") + (const_string "canuse") + (match_operand 2 "const_int_operand" "") + (const_string "canuse_limm")] + (const_string "nocond")) + + (eq_attr "type" "compare") + (const_string "set") + + (eq_attr "type" "cmove,branch") + (const_string "use") + + (eq_attr "is_sfunc" "yes") + (cond [(match_test "(TARGET_MEDIUM_CALLS + && !TARGET_LONG_CALLS_SET + && flag_pic)") + (const_string "canuse_limm_add") + (match_test "(TARGET_MEDIUM_CALLS + && !TARGET_LONG_CALLS_SET)") + (const_string "canuse_limm")] + (const_string "canuse")) + + ] + + (const_string "nocond"))] + + (cond [(eq_attr "type" "compare") + (const_string "set") + + (eq_attr "type" "cmove,branch") + (const_string "use") + + ] + + (const_string "nocond")))) + +/* ??? Having all these patterns gives ifcvt more freedom to generate + inefficient code. It seem to operate on the premise that + register-register copies and registers are free. I see better code + with -fno-if-convert now than without. */ +(define_cond_exec + [(match_operator 0 "proper_comparison_operator" + [(reg CC_REG) (const_int 0)])] + "true" + "") + +;; Length (in # of bytes, long immediate constants counted too). +;; ??? There's a nasty interaction between the conditional execution fsm +;; and insn lengths: insns with shimm values cannot be conditionally executed. +(define_attr "length" "" + (cond + [(eq_attr "iscompact" "true,maybe") + (cond + [(eq_attr "type" "sfunc") + (cond [(match_test "GET_CODE (PATTERN (insn)) == COND_EXEC") + (const_int 12)] + (const_int 10)) + (match_test "GET_CODE (PATTERN (insn)) == COND_EXEC") (const_int 4)] + (const_int 2)) + + (eq_attr "iscompact" "true_limm,maybe_limm") + (const_int 6) + + (eq_attr "type" "load") + (if_then_else + (match_operand 1 "long_immediate_loadstore_operand" "") + (const_int 8) (const_int 4)) + + (eq_attr "type" "store") + (if_then_else + (ior (match_operand 0 "long_immediate_loadstore_operand" "") + (match_operand 1 "immediate_operand" "")) + (const_int 8) (const_int 4)) + + (eq_attr "type" "move,unary") + (cond + [(match_operand 1 "u6_immediate_operand" "") (const_int 4) + (match_operand 1 "register_operand" "") (const_int 4) + (match_operand 1 "long_immediate_operand" "") (const_int 8) + (match_test "GET_CODE (PATTERN (insn)) == COND_EXEC") (const_int 8)] + (const_int 4)) + + (and (eq_attr "type" "shift") + (match_operand 1 "immediate_operand")) + (const_int 8) + (eq_attr "type" "binary,shift") + (if_then_else + (ior (match_operand 2 "long_immediate_operand" "") + (and (ne (symbol_ref "REGNO (operands[0])") + (symbol_ref "REGNO (operands[1])")) + (eq (match_operand 2 "u6_immediate_operand" "") + (const_int 0)))) + + (const_int 8) (const_int 4)) + + (eq_attr "type" "cmove") + (if_then_else (match_operand 1 "register_operand" "") + (const_int 4) (const_int 8)) + + (eq_attr "type" "call_no_delay_slot") (const_int 8) + ] + + (const_int 4)) +) + +;; The length here is the length of a single asm. Unfortunately it might be +;; 4 or 8 so we must allow for 8. That's ok though. How often will users +;; lament asm's not being put in delay slots? +;; +(define_asm_attributes + [(set_attr "length" "8") + (set_attr "type" "multi") + (set_attr "cond" "clob") ]) + +;; Delay slots. +;; The first two cond clauses and the default are necessary for correctness; +;; the remaining cond clause is mainly an optimization, as otherwise nops +;; would be inserted; however, if we didn't do this optimization, we would +;; have to be more conservative in our length calculations. + +(define_attr "in_delay_slot" "false,true" + (cond [(eq_attr "type" "uncond_branch,jump,branch, + call,sfunc,call_no_delay_slot, + brcc, brcc_no_delay_slot,loop_setup,loop_end") + (const_string "false") + (match_test "arc_write_ext_corereg (insn)") + (const_string "false") + (gt (symbol_ref "arc_hazard (prev_active_insn (insn), + next_active_insn (insn))") + (symbol_ref "(arc_hazard (prev_active_insn (insn), insn) + + arc_hazard (insn, next_active_insn (insn)))")) + (const_string "false") + (eq_attr "iscompact" "maybe") (const_string "true") + ] + + (if_then_else (eq_attr "length" "2,4") + (const_string "true") + (const_string "false")))) + +; must not put an insn inside that refers to blink. +(define_attr "in_call_delay_slot" "false,true" + (cond [(eq_attr "in_delay_slot" "false") + (const_string "false") + (match_test "arc_regno_use_in (RETURN_ADDR_REGNUM, PATTERN (insn))") + (const_string "false")] + (const_string "true"))) + +(define_attr "in_sfunc_delay_slot" "false,true" + (cond [(eq_attr "in_call_delay_slot" "false") + (const_string "false") + (match_test "arc_regno_use_in (12, PATTERN (insn))") + (const_string "false")] + (const_string "true"))) + +;; Instructions that we can put into a delay slot and conditionalize. +(define_attr "cond_delay_insn" "no,yes" + (cond [(eq_attr "cond" "!canuse") (const_string "no") + (eq_attr "type" "call,branch,uncond_branch,jump,brcc") + (const_string "no") + (eq_attr "length" "2,4") (const_string "yes")] + (const_string "no"))) + +(define_attr "in_ret_delay_slot" "no,yes" + (cond [(eq_attr "in_delay_slot" "false") + (const_string "no") + (match_test "regno_clobbered_p + (arc_return_address_regs + [arc_compute_function_type (cfun)], + insn, SImode, 1)") + (const_string "no")] + (const_string "yes"))) + +(define_attr "cond_ret_delay_insn" "no,yes" + (cond [(eq_attr "in_ret_delay_slot" "no") (const_string "no") + (eq_attr "cond_delay_insn" "no") (const_string "no")] + (const_string "yes"))) + +(define_attr "annul_ret_delay_insn" "no,yes" + (cond [(eq_attr "cond_ret_delay_insn" "yes") (const_string "yes") + (match_test "TARGET_AT_DBR_CONDEXEC") (const_string "no") + (eq_attr "type" "!call,branch,uncond_branch,jump,brcc,return,sfunc") + (const_string "yes")] + (const_string "no"))) + + +;; Delay slot definition for ARCompact ISA +;; ??? FIXME: +;; When outputting an annul-true insn elegible for cond-exec +;; in a cbranch delay slot, unless optimizing for size, we use cond-exec +;; for ARC600; we could also use this for ARC700 if the branch can't be +;; unaligned and is at least somewhat likely (add parameter for this). + +(define_delay (eq_attr "type" "call") + [(eq_attr "in_call_delay_slot" "true") + (eq_attr "in_call_delay_slot" "true") + (nil)]) + +(define_delay (and (match_test "!TARGET_AT_DBR_CONDEXEC") + (eq_attr "type" "brcc")) + [(eq_attr "in_delay_slot" "true") + (eq_attr "in_delay_slot" "true") + (nil)]) + +(define_delay (and (match_test "TARGET_AT_DBR_CONDEXEC") + (eq_attr "type" "brcc")) + [(eq_attr "in_delay_slot" "true") + (nil) + (nil)]) + +(define_delay + (eq_attr "type" "return") + [(eq_attr "in_ret_delay_slot" "yes") + (eq_attr "annul_ret_delay_insn" "yes") + (eq_attr "cond_ret_delay_insn" "yes")]) + +;; For ARC600, unexposing the delay sloy incurs a penalty also in the +;; non-taken case, so the only meaningful way to have an annull-true +;; filled delay slot is to conditionalize the delay slot insn. +(define_delay (and (match_test "TARGET_AT_DBR_CONDEXEC") + (eq_attr "type" "branch,uncond_branch,jump") + (match_test "!optimize_size")) + [(eq_attr "in_delay_slot" "true") + (eq_attr "cond_delay_insn" "yes") + (eq_attr "cond_delay_insn" "yes")]) + +;; For ARC700, anything goes for annulled-true insns, since there is no +;; penalty for the unexposed delay slot when the branch is not taken, +;; however, we must avoid things that have a delay slot themselvese to +;; avoid confusing gcc. +(define_delay (and (match_test "!TARGET_AT_DBR_CONDEXEC") + (eq_attr "type" "branch,uncond_branch,jump") + (match_test "!optimize_size")) + [(eq_attr "in_delay_slot" "true") + (eq_attr "type" "!call,branch,uncond_branch,jump,brcc,return,sfunc") + (eq_attr "cond_delay_insn" "yes")]) + +;; -mlongcall -fpic sfuncs use r12 to load the function address +(define_delay (eq_attr "type" "sfunc") + [(eq_attr "in_sfunc_delay_slot" "true") + (eq_attr "in_sfunc_delay_slot" "true") + (nil)]) +;; ??? need to use a working strategy for canuse_limm: +;; - either canuse_limm is not eligible for delay slots, and has no +;; delay slots, or arc_reorg has to treat them as nocond, or it has to +;; somehow modify them to become inelegible for delay slots if a decision +;; is made that makes conditional execution required. + +(define_attr "tune" "none,arc600,arc700_4_2_std,arc700_4_2_xmac" + (const + (cond [(symbol_ref "arc_tune == TUNE_ARC600") + (const_string "arc600") + (symbol_ref "arc_tune == TUNE_ARC700_4_2_STD") + (const_string "arc700_4_2_std") + (symbol_ref "arc_tune == TUNE_ARC700_4_2_XMAC") + (const_string "arc700_4_2_xmac")] + (const_string "none")))) + +(define_attr "tune_arc700" "false,true" + (if_then_else (eq_attr "tune" "arc700_4_2_std, arc700_4_2_xmac") + (const_string "true") + (const_string "false"))) + +;; Move instructions. +(define_expand "movqi" + [(set (match_operand:QI 0 "move_dest_operand" "") + (match_operand:QI 1 "general_operand" ""))] + "" + "if (prepare_move_operands (operands, QImode)) DONE;") + +; In order to allow the ccfsm machinery to do its work, the leading compact +; alternatives say 'canuse' - there is another alternative that will match +; when the condition codes are used. +; Rcq won't match if the condition is actually used; to avoid a spurious match +; via q, q is inactivated as constraint there. +; Likewise, the length of an alternative that might be shifted to conditional +; execution must reflect this, lest out-of-range branches are created. +; The iscompact attribute allows the epilogue expander to know for which +; insns it should lengthen the return insn. +(define_insn "*movqi_insn" + [(set (match_operand:QI 0 "move_dest_operand" "=Rcq,Rcq#q,w, w,w,???w, w,Rcq,S,!*x,r,m,???m") + (match_operand:QI 1 "move_src_operand" "cL,cP,Rcq#q,cL,I,?Rac,?i,T,Rcq,Usd,m,c,?Rac"))] + "register_operand (operands[0], QImode) + || register_operand (operands[1], QImode)" + "@ + mov%? %0,%1%& + mov%? %0,%1%& + mov%? %0,%1%& + mov%? %0,%1 + mov%? %0,%1 + mov%? %0,%1 + mov%? %0,%S1 + ldb%? %0,%1%& + stb%? %1,%0%& + ldb%? %0,%1%& + ldb%U1%V1 %0,%1 + stb%U0%V0 %1,%0 + stb%U0%V0 %1,%0" + [(set_attr "type" "move,move,move,move,move,move,move,load,store,load,load,store,store") + (set_attr "iscompact" "maybe,maybe,maybe,false,false,false,false,true,true,true,false,false,false") + (set_attr "predicable" "yes,no,yes,yes,no,yes,yes,no,no,no,no,no,no")]) + +(define_expand "movhi" + [(set (match_operand:HI 0 "move_dest_operand" "") + (match_operand:HI 1 "general_operand" ""))] + "" + "if (prepare_move_operands (operands, HImode)) DONE;") + +(define_insn "*movhi_insn" + [(set (match_operand:HI 0 "move_dest_operand" "=Rcq,Rcq#q,w, w,w,???w,Rcq#q,w,Rcq,S,r,m,???m,VUsc") + (match_operand:HI 1 "move_src_operand" "cL,cP,Rcq#q,cL,I,?Rac, ?i,?i,T,Rcq,m,c,?Rac,i"))] + "register_operand (operands[0], HImode) + || register_operand (operands[1], HImode) + || (CONSTANT_P (operands[1]) + /* Don't use a LIMM that we could load with a single insn - we loose + delay-slot filling opportunities. */ + && !satisfies_constraint_I (operands[1]) + && satisfies_constraint_Usc (operands[0]))" + "@ + mov%? %0,%1%& + mov%? %0,%1%& + mov%? %0,%1%& + mov%? %0,%1 + mov%? %0,%1 + mov%? %0,%1 + mov%? %0,%S1%& + mov%? %0,%S1 + ldw%? %0,%1%& + stw%? %1,%0%& + ldw%U1%V1 %0,%1 + stw%U0%V0 %1,%0 + stw%U0%V0 %1,%0 + stw%U0%V0 %S1,%0" + [(set_attr "type" "move,move,move,move,move,move,move,move,load,store,load,store,store,store") + (set_attr "iscompact" "maybe,maybe,maybe,false,false,false,maybe_limm,false,true,true,false,false,false,false") + (set_attr "predicable" "yes,no,yes,yes,no,yes,yes,yes,no,no,no,no,no,no")]) + +(define_expand "movsi" + [(set (match_operand:SI 0 "move_dest_operand" "") + (match_operand:SI 1 "general_operand" ""))] + "" + "if (prepare_move_operands (operands, SImode)) DONE;") + +; In order to allow the ccfsm machinery to do its work, the leading compact +; alternatives say 'canuse' - there is another alternative that will match +; when the condition codes are used. +; Rcq won't match if the condition is actually used; to avoid a spurious match +; via q, q is inactivated as constraint there. +; Likewise, the length of an alternative that might be shifted to conditional +; execution must reflect this, lest out-of-range branches are created. +; the iscompact attribute allows the epilogue expander to know for which +; insns it should lengthen the return insn. +; N.B. operand 1 of alternative 7 expands into pcl,symbol@gotpc . +(define_insn "*movsi_insn" + [(set (match_operand:SI 0 "move_dest_operand" "=Rcq,Rcq#q,w, w,w, w,???w, ?w, w,Rcq#q, w,Rcq, S,Us<,RcqRck,!*x,r,m,???m,VUsc") + (match_operand:SI 1 "move_src_operand" " cL,cP,Rcq#q,cL,I,Crr,?Rac,Cpc,Clb,?Cal,?Cal,T,Rcq,RcqRck,Us>,Usd,m,c,?Rac,C32"))] + "register_operand (operands[0], SImode) + || register_operand (operands[1], SImode) + || (CONSTANT_P (operands[1]) + /* Don't use a LIMM that we could load with a single insn - we loose + delay-slot filling opportunities. */ + && !satisfies_constraint_I (operands[1]) + && satisfies_constraint_Usc (operands[0]))" + "@ + mov%? %0,%1%& ;0 + mov%? %0,%1%& ;1 + mov%? %0,%1%& ;2 + mov%? %0,%1 ;3 + mov%? %0,%1 ;4 + ror %0,((%1*2+1) & 0x3f) ;5 + mov%? %0,%1 ;6 + add %0,%S1 ;7 + * return arc_get_unalign () ? \"add %0,pcl,%1-.+2\" : \"add %0,pcl,%1-.\"; + mov%? %0,%S1%& ;9 + mov%? %0,%S1 ;10 + ld%? %0,%1%& ;11 + st%? %1,%0%& ;12 + * return arc_short_long (insn, \"push%? %1%&\", \"st%U0 %1,%0%&\"); + * return arc_short_long (insn, \"pop%? %0%&\", \"ld%U1 %0,%1%&\"); + ld%? %0,%1%& ;15 + ld%U1%V1 %0,%1 ;16 + st%U0%V0 %1,%0 ;17 + st%U0%V0 %1,%0 ;18 + st%U0%V0 %S1,%0 ;19" + [(set_attr "type" "move,move,move,move,move,two_cycle_core,move,binary,binary,move,move,load,store,store,load,load,load,store,store,store") + (set_attr "iscompact" "maybe,maybe,maybe,false,false,false,false,false,false,maybe_limm,false,true,true,true,true,true,false,false,false,false") + ; Use default length for iscompact to allow for COND_EXEC. But set length + ; of Crr to 4. + (set_attr "length" "*,*,*,4,4,4,4,8,8,*,8,*,*,*,*,*,*,*,*,8") + (set_attr "predicable" "yes,no,yes,yes,no,no,yes,no,no,yes,yes,no,no,no,no,no,no,no,no,no")]) + +;; Sometimes generated by the epilogue code. We don't want to +;; recognize these addresses in general, because the limm is costly, +;; and we can't use them for stores. */ +(define_insn "*movsi_pre_mod" + [(set (match_operand:SI 0 "register_operand" "=w") + (mem:SI (pre_modify + (reg:SI SP_REG) + (plus:SI (reg:SI SP_REG) + (match_operand 1 "immediate_operand" "Cal")))))] + "reload_completed" + "ld.a %0,[sp,%1]" + [(set_attr "type" "load") + (set_attr "length" "8")]) + +;; Store a value to directly to memory. The location might also be cached. +;; Since the cached copy can cause a write-back at unpredictable times, +;; we first write cached, then we write uncached. +(define_insn "store_direct" + [(set (match_operand:SI 0 "move_dest_operand" "=m") + (unspec:SI [(match_operand:SI 1 "register_operand" "c")] + UNSPEC_DIRECT))] + "" + "st%U0 %1,%0\;st%U0.di %1,%0" + [(set_attr "type" "store")]) + +(define_insn_and_split "*movsi_set_cc_insn" + [(set (match_operand:CC_ZN 2 "cc_set_register" "") + (match_operator 3 "zn_compare_operator" + [(match_operand:SI 1 "nonmemory_operand" "cI,cL,Cal") (const_int 0)])) + (set (match_operand:SI 0 "register_operand" "=w,w,w") + (match_dup 1))] + "" + "mov%?.f %0,%S1" + ; splitting to 'tst' allows short insns and combination into brcc. + "reload_completed && operands_match_p (operands[0], operands[1])" + [(set (match_dup 2) (match_dup 3))] + "" + [(set_attr "type" "compare") + (set_attr "predicable" "no,yes,yes") + (set_attr "cond" "set_zn") + (set_attr "length" "4,4,8")]) + +(define_insn "unary_comparison" + [(set (match_operand:CC_ZN 0 "cc_set_register" "") + (match_operator 3 "zn_compare_operator" + [(match_operator:SI 2 "unary_operator" + [(match_operand:SI 1 "register_operand" "c")]) + (const_int 0)]))] + "" + "%O2.f 0,%1" + [(set_attr "type" "compare") + (set_attr "cond" "set_zn")]) + + +; this pattern is needed by combiner for cases like if (c=(~b)) { ... } +(define_insn "*unary_comparison_result_used" + [(set (match_operand 2 "cc_register" "") + (match_operator 4 "zn_compare_operator" + [(match_operator:SI 3 "unary_operator" + [(match_operand:SI 1 "register_operand" "c")]) + (const_int 0)])) + (set (match_operand:SI 0 "register_operand" "=w") + (match_dup 3))] + "" + "%O3.f %0,%1" + [(set_attr "type" "compare") + (set_attr "cond" "set_zn") + (set_attr "length" "4")]) + +(define_insn "*tst" + [(set + (match_operand 0 "cc_register" "") + (match_operator 3 "zn_compare_operator" + [(and:SI + (match_operand:SI 1 "register_operand" + "%Rcq,Rcq, c, c, c, c, c, c") + (match_operand:SI 2 "nonmemory_operand" + " Rcq,C0p,cI,cL,C1p,Ccp,CnL,Cal")) + (const_int 0)]))] + "(register_operand (operands[1], SImode) + && nonmemory_operand (operands[2], SImode)) + || (memory_operand (operands[1], SImode) + && satisfies_constraint_Cux (operands[2]))" + "* + switch (which_alternative) + { + case 0: case 2: case 3: case 7: + return \"tst%? %1,%2\"; + case 1: + return \"btst%? %1,%z2\"; + case 4: + return \"bmsk%?.f 0,%1,%Z2%&\"; + case 5: + return \"bclr%?.f 0,%1,%M2%&\"; + case 6: + return \"bic%?.f 0,%1,%n2-1\"; + default: + gcc_unreachable (); + } + " + [(set_attr "iscompact" "maybe,maybe,false,false,false,false,false,false") + (set_attr "type" "compare") + (set_attr "length" "*,*,4,4,4,4,4,8") + (set_attr "predicable" "no,yes,no,yes,no,no,no,yes") + (set_attr "cond" "set_zn")]) + +(define_insn "*commutative_binary_comparison" + [(set (match_operand:CC_ZN 0 "cc_set_register" "") + (match_operator 5 "zn_compare_operator" + [(match_operator:SI 4 "commutative_operator" + [(match_operand:SI 1 "register_operand" "%c,c,c") + (match_operand:SI 2 "nonmemory_operand" "cL,I,?Cal")]) + (const_int 0)])) + (clobber (match_scratch:SI 3 "=X,1,X"))] + "" + "%O4.f 0,%1,%2" + [(set_attr "type" "compare") + (set_attr "cond" "set_zn") + (set_attr "length" "4,4,8")]) + +; for flag setting 'add' instructions like if (a+b) { ...} +; the combiner needs this pattern +(define_insn "*addsi_compare" + [(set (reg:CC_ZN CC_REG) + (compare:CC_ZN (match_operand:SI 0 "register_operand" "c") + (neg:SI (match_operand:SI 1 "register_operand" "c"))))] + "" + "add.f 0,%0,%1" + [(set_attr "cond" "set") + (set_attr "type" "compare") + (set_attr "length" "4")]) + +; for flag setting 'add' instructions like if (a+b < a) { ...} +; the combiner needs this pattern +(define_insn "addsi_compare_2" + [(set (reg:CC_C CC_REG) + (compare:CC_C (plus:SI (match_operand:SI 0 "register_operand" "c,c") + (match_operand:SI 1 "nonmemory_operand" "cL,Cal")) + (match_dup 0)))] + "" + "add.f 0,%0,%1" + [(set_attr "cond" "set") + (set_attr "type" "compare") + (set_attr "length" "4,8")]) + +(define_insn "*addsi_compare_3" + [(set (reg:CC_C CC_REG) + (compare:CC_C (plus:SI (match_operand:SI 0 "register_operand" "c") + (match_operand:SI 1 "register_operand" "c")) + (match_dup 1)))] + "" + "add.f 0,%0,%1" + [(set_attr "cond" "set") + (set_attr "type" "compare") + (set_attr "length" "4")]) + +; this pattern is needed by combiner for cases like if (c=a+b) { ... } +(define_insn "*commutative_binary_comparison_result_used" + [(set (match_operand 3 "cc_register" "") + (match_operator 5 "zn_compare_operator" + ; We can accept any commutative operator except mult because + ; our 'w' class below could try to use LP_COUNT. + [(match_operator:SI 4 "commutative_operator_sans_mult" + [(match_operand:SI 1 "register_operand" "c,0,c") + (match_operand:SI 2 "nonmemory_operand" "cL,I,?Cal")]) + (const_int 0)])) + (set (match_operand:SI 0 "register_operand" "=w,w,w") + (match_dup 4))] + "" + "%O4.f %0,%1,%2 ; non-mult commutative" + [(set_attr "type" "compare,compare,compare") + (set_attr "cond" "set_zn,set_zn,set_zn") + (set_attr "length" "4,4,8")]) + +; a MULT-specific version of this pattern to avoid touching the +; LP_COUNT register +(define_insn "*commutative_binary_mult_comparison_result_used" + [(set (match_operand 3 "cc_register" "") + (match_operator 5 "zn_compare_operator" + [(match_operator:SI 4 "mult_operator" + [(match_operand:SI 1 "register_operand" "c,0,c") + (match_operand:SI 2 "nonmemory_operand" "cL,I,?Cal")]) + (const_int 0)])) + ; Make sure to use the W class to not touch LP_COUNT. + (set (match_operand:SI 0 "register_operand" "=W,W,W") + (match_dup 4))] + "TARGET_ARC700" + "%O4.f %0,%1,%2 ; mult commutative" + [(set_attr "type" "compare,compare,compare") + (set_attr "cond" "set_zn,set_zn,set_zn") + (set_attr "length" "4,4,8")]) + +; this pattern is needed by combiner for cases like if (c=a<<b) { ... } +(define_insn "*noncommutative_binary_comparison_result_used" + [(set (match_operand 3 "cc_register" "") + (match_operator 5 "zn_compare_operator" + [(match_operator:SI 4 "noncommutative_operator" + [(match_operand:SI 1 "register_operand" "c,0,c") + (match_operand:SI 2 "nonmemory_operand" "cL,I,?Cal")]) + (const_int 0)])) + (set (match_operand:SI 0 "register_operand" "=w,w,w") + (match_dup 4 ))] + "TARGET_BARREL_SHIFTER || GET_CODE (operands[4]) == MINUS" + "%O4.f %0,%1,%2" + [(set_attr "type" "compare,compare,compare") + (set_attr "cond" "set_zn,set_zn,set_zn") + (set_attr "length" "4,4,8")]) + +(define_insn "*noncommutative_binary_comparison" + [(set (match_operand:CC_ZN 0 "cc_set_register" "") + (match_operator 5 "zn_compare_operator" + [(match_operator:SI 4 "noncommutative_operator" + [(match_operand:SI 1 "register_operand" "c,c,c") + (match_operand:SI 2 "nonmemory_operand" "cL,I,?Cal")]) + (const_int 0)])) + (clobber (match_scratch:SI 3 "=X,1,X"))] + "TARGET_BARREL_SHIFTER || GET_CODE (operands[4]) == MINUS" + "%O4.f 0,%1,%2" + [(set_attr "type" "compare") + (set_attr "cond" "set_zn") + (set_attr "length" "4,4,8")]) + +(define_expand "bic_f_zn" + [(parallel + [(set (reg:CC_ZN CC_REG) + (compare:CC_ZN + (and:SI (match_operand:SI 1 "register_operand" "") + (not:SI (match_operand:SI 2 "nonmemory_operand" ""))) + (const_int 0))) + (set (match_operand:SI 0 "register_operand" "") + (and:SI (match_dup 1) (not:SI (match_dup 2))))])] + "") + +(define_insn "*bic_f" + [(set (match_operand 3 "cc_register" "=Rcc,Rcc,Rcc") + (match_operator 4 "zn_compare_operator" + [(and:SI (match_operand:SI 1 "register_operand" "c,0,c") + (not:SI + (match_operand:SI 2 "nonmemory_operand" "cL,I,?Cal"))) + (const_int 0)])) + (set (match_operand:SI 0 "register_operand" "=w,w,w") + (and:SI (match_dup 1) (not:SI (match_dup 2))))] + "" + "bic.f %0,%1,%2" + [(set_attr "type" "compare,compare,compare") + (set_attr "cond" "set_zn,set_zn,set_zn") + (set_attr "length" "4,4,8")]) + +(define_expand "movdi" + [(set (match_operand:DI 0 "move_dest_operand" "") + (match_operand:DI 1 "general_operand" ""))] + "" + " +{ + /* Everything except mem = const or mem = mem can be done easily. */ + + if (GET_CODE (operands[0]) == MEM) + operands[1] = force_reg (DImode, operands[1]); +}") + +(define_insn_and_split "*movdi_insn" + [(set (match_operand:DI 0 "move_dest_operand" "=w,w,r,m") + (match_operand:DI 1 "move_double_src_operand" "c,Hi,m,c"))] + "register_operand (operands[0], DImode) + || register_operand (operands[1], DImode)" + "* +{ + switch (which_alternative) + { + default: + case 0 : + /* We normally copy the low-numbered register first. However, if + the first register operand 0 is the same as the second register of + operand 1, we must copy in the opposite order. */ + if (REGNO (operands[0]) == REGNO (operands[1]) + 1) + return \"mov%? %R0,%R1\;mov%? %0,%1\"; + else + return \"mov%? %0,%1\;mov%? %R0,%R1\"; + case 1 : + return \"mov%? %L0,%L1\;mov%? %H0,%H1\"; + case 2 : + /* If the low-address word is used in the address, we must load it + last. Otherwise, load it first. Note that we cannot have + auto-increment in that case since the address register is known to be + dead. */ + if (refers_to_regno_p (REGNO (operands[0]), REGNO (operands[0]) + 1, + operands [1], 0)) + return \"ld%V1 %R0,%R1\;ld%V1 %0,%1\"; + else switch (GET_CODE (XEXP(operands[1], 0))) + { + case POST_MODIFY: case POST_INC: case POST_DEC: + return \"ld%V1 %R0,%R1\;ld%U1%V1 %0,%1\"; + case PRE_MODIFY: case PRE_INC: case PRE_DEC: + return \"ld%U1%V1 %0,%1\;ld%V1 %R0,%R1\"; + default: + return \"ld%U1%V1 %0,%1\;ld%U1%V1 %R0,%R1\"; + } + case 3 : + switch (GET_CODE (XEXP(operands[0], 0))) + { + case POST_MODIFY: case POST_INC: case POST_DEC: + return \"st%V0 %R1,%R0\;st%U0%V0 %1,%0\"; + case PRE_MODIFY: case PRE_INC: case PRE_DEC: + return \"st%U0%V0 %1,%0\;st%V0 %R1,%R0\"; + default: + return \"st%U0%V0 %1,%0\;st%U0%V0 %R1,%R0\"; + } + } +}" + "&& reload_completed && optimize" + [(set (match_dup 2) (match_dup 3)) (set (match_dup 4) (match_dup 5))] + "arc_split_move (operands);" + [(set_attr "type" "move,move,load,store") + ;; ??? The ld/st values could be 4 if it's [reg,bignum]. + (set_attr "length" "8,16,16,16")]) + + +;; Floating point move insns. + +(define_expand "movsf" + [(set (match_operand:SF 0 "general_operand" "") + (match_operand:SF 1 "general_operand" ""))] + "" + "if (prepare_move_operands (operands, SFmode)) DONE;") + +(define_insn "*movsf_insn" + [(set (match_operand:SF 0 "move_dest_operand" "=w,w,r,m") + (match_operand:SF 1 "move_src_operand" "c,E,m,c"))] + "register_operand (operands[0], SFmode) + || register_operand (operands[1], SFmode)" + "@ + mov%? %0,%1 + mov%? %0,%1 ; %A1 + ld%U1%V1 %0,%1 + st%U0%V0 %1,%0" + [(set_attr "type" "move,move,load,store") + (set_attr "predicable" "yes,yes,no,no")]) + +(define_expand "movdf" + [(set (match_operand:DF 0 "nonimmediate_operand" "") + (match_operand:DF 1 "general_operand" ""))] + "" + "if (prepare_move_operands (operands, DFmode)) DONE;") + +(define_insn "*movdf_insn" + [(set (match_operand:DF 0 "move_dest_operand" "=D,r,c,c,r,m") + (match_operand:DF 1 "move_double_src_operand" "r,D,c,E,m,c"))] + "register_operand (operands[0], DFmode) || register_operand (operands[1], DFmode)" + "#" + [(set_attr "type" "move,move,move,move,load,store") + (set_attr "predicable" "no,no,yes,yes,no,no") + ;; ??? The ld/st values could be 16 if it's [reg,bignum]. + (set_attr "length" "4,16,8,16,16,16")]) + +(define_split + [(set (match_operand:DF 0 "move_dest_operand" "") + (match_operand:DF 1 "move_double_src_operand" ""))] + "reload_completed" + [(match_dup 2)] + "operands[2] = arc_split_move (operands);") + +(define_insn_and_split "*movdf_insn_nolrsr" + [(set (match_operand:DF 0 "register_operand" "=r") + (match_operand:DF 1 "arc_double_register_operand" "D")) + (use (match_operand:SI 2 "" "N")) ; aka const1_rtx + ] + "TARGET_DPFP && TARGET_DPFP_DISABLE_LRSR" + "#" + "&& 1" + [ + ; mov r0, 0 + (set (match_dup 0) (match_dup 3)) + + ; daddh?? r1, r0, r0 + (parallel [ + (set (match_dup 1) (plus:DF (match_dup 1) (match_dup 0))) + (use (const_int 1)) + (use (const_int 1)) + (use (match_dup 0)) ; used to block can_combine_p + (set (match_dup 0) (plus:DF (match_dup 1) (match_dup 0))) ; r1 in op 0 + ]) + + ; We have to do this twice, once to read the value into R0 and + ; second time to put back the contents which the first DEXCLx + ; will have overwritten + ; dexcl2 r0, r1, r0 + (set (match_dup 4) ; aka r0result + ; aka DF, r1, r0 + (unspec_volatile:SI [(match_dup 1) (match_dup 5) (match_dup 4)] VUNSPEC_DEXCL )) + ; Generate the second, which makes sure operand5 and operand4 values + ; are put back in the Dx register properly. + (unspec_volatile:SI [(match_dup 1) (match_dup 5) (match_dup 4)] VUNSPEC_DEXCL_NORES ) + + ; Note: we cannot use a (clobber (match_scratch)) here because + ; the combine pass will end up replacing uses of it with 0 + ] + "operands[3] = CONST0_RTX (DFmode); + operands[4] = simplify_gen_subreg (SImode, operands[0], DFmode, 0); + operands[5] = simplify_gen_subreg (SImode, operands[0], DFmode, 4);" + [(set_attr "type" "move")]) + +;; Load/Store with update instructions. +;; +;; Some of these we can get by using pre-decrement or pre-increment, but the +;; hardware can also do cases where the increment is not the size of the +;; object. +;; +;; In all these cases, we use operands 0 and 1 for the register being +;; incremented because those are the operands that local-alloc will +;; tie and these are the pair most likely to be tieable (and the ones +;; that will benefit the most). +;; +;; We use match_operator here because we need to know whether the memory +;; object is volatile or not. + + +;; Note: loadqi_update has no 16-bit variant +(define_insn "*loadqi_update" + [(set (match_operand:QI 3 "dest_reg_operand" "=r,r") + (match_operator:QI 4 "load_update_operand" + [(match_operand:SI 1 "register_operand" "0,0") + (match_operand:SI 2 "nonmemory_operand" "rI,Cal")])) + (set (match_operand:SI 0 "dest_reg_operand" "=r,r") + (plus:SI (match_dup 1) (match_dup 2)))] + "" + "ldb.a%V4 %3,[%0,%S2]" + [(set_attr "type" "load,load") + (set_attr "length" "4,8")]) + +(define_insn "*load_zeroextendqisi_update" + [(set (match_operand:SI 3 "dest_reg_operand" "=r,r") + (zero_extend:SI (match_operator:QI 4 "load_update_operand" + [(match_operand:SI 1 "register_operand" "0,0") + (match_operand:SI 2 "nonmemory_operand" "rI,Cal")]))) + (set (match_operand:SI 0 "dest_reg_operand" "=r,r") + (plus:SI (match_dup 1) (match_dup 2)))] + "" + "ldb.a%V4 %3,[%0,%S2]" + [(set_attr "type" "load,load") + (set_attr "length" "4,8")]) + +(define_insn "*load_signextendqisi_update" + [(set (match_operand:SI 3 "dest_reg_operand" "=r,r") + (sign_extend:SI (match_operator:QI 4 "load_update_operand" + [(match_operand:SI 1 "register_operand" "0,0") + (match_operand:SI 2 "nonmemory_operand" "rI,Cal")]))) + (set (match_operand:SI 0 "dest_reg_operand" "=r,r") + (plus:SI (match_dup 1) (match_dup 2)))] + "" + "ldb.x.a%V4 %3,[%0,%S2]" + [(set_attr "type" "load,load") + (set_attr "length" "4,8")]) + +(define_insn "*storeqi_update" + [(set (match_operator:QI 4 "store_update_operand" + [(match_operand:SI 1 "register_operand" "0") + (match_operand:SI 2 "short_immediate_operand" "I")]) + (match_operand:QI 3 "register_operand" "c")) + (set (match_operand:SI 0 "dest_reg_operand" "=w") + (plus:SI (match_dup 1) (match_dup 2)))] + "" + "stb.a%V4 %3,[%0,%2]" + [(set_attr "type" "store") + (set_attr "length" "4")]) + +;; ??? pattern may have to be re-written +;; Note: no 16-bit variant for this pattern +(define_insn "*loadhi_update" + [(set (match_operand:HI 3 "dest_reg_operand" "=r,r") + (match_operator:HI 4 "load_update_operand" + [(match_operand:SI 1 "register_operand" "0,0") + (match_operand:SI 2 "nonmemory_operand" "rI,Cal")])) + (set (match_operand:SI 0 "dest_reg_operand" "=w,w") + (plus:SI (match_dup 1) (match_dup 2)))] + "" + "ldw.a%V4 %3,[%0,%S2]" + [(set_attr "type" "load,load") + (set_attr "length" "4,8")]) + +(define_insn "*load_zeroextendhisi_update" + [(set (match_operand:SI 3 "dest_reg_operand" "=r,r") + (zero_extend:SI (match_operator:HI 4 "load_update_operand" + [(match_operand:SI 1 "register_operand" "0,0") + (match_operand:SI 2 "nonmemory_operand" "rI,Cal")]))) + (set (match_operand:SI 0 "dest_reg_operand" "=r,r") + (plus:SI (match_dup 1) (match_dup 2)))] + "" + "ldw.a%V4 %3,[%0,%S2]" + [(set_attr "type" "load,load") + (set_attr "length" "4,8")]) + +;; Note: no 16-bit variant for this instruction +(define_insn "*load_signextendhisi_update" + [(set (match_operand:SI 3 "dest_reg_operand" "=r,r") + (sign_extend:SI (match_operator:HI 4 "load_update_operand" + [(match_operand:SI 1 "register_operand" "0,0") + (match_operand:SI 2 "nonmemory_operand" "rI,Cal")]))) + (set (match_operand:SI 0 "dest_reg_operand" "=w,w") + (plus:SI (match_dup 1) (match_dup 2)))] + "" + "ldw.x.a%V4 %3,[%0,%S2]" + [(set_attr "type" "load,load") + (set_attr "length" "4,8")]) + +(define_insn "*storehi_update" + [(set (match_operator:HI 4 "store_update_operand" + [(match_operand:SI 1 "register_operand" "0") + (match_operand:SI 2 "short_immediate_operand" "I")]) + (match_operand:HI 3 "register_operand" "c")) + (set (match_operand:SI 0 "dest_reg_operand" "=w") + (plus:SI (match_dup 1) (match_dup 2)))] + "" + "stw.a%V4 %3,[%0,%2]" + [(set_attr "type" "store") + (set_attr "length" "4")]) + +;; No 16-bit variant for this instruction pattern +(define_insn "*loadsi_update" + [(set (match_operand:SI 3 "dest_reg_operand" "=r,r") + (match_operator:SI 4 "load_update_operand" + [(match_operand:SI 1 "register_operand" "0,0") + (match_operand:SI 2 "nonmemory_operand" "rI,Cal")])) + (set (match_operand:SI 0 "dest_reg_operand" "=w,w") + (plus:SI (match_dup 1) (match_dup 2)))] + "" + "ld.a%V4 %3,[%0,%S2]" + [(set_attr "type" "load,load") + (set_attr "length" "4,8")]) + +(define_insn "*storesi_update" + [(set (match_operator:SI 4 "store_update_operand" + [(match_operand:SI 1 "register_operand" "0") + (match_operand:SI 2 "short_immediate_operand" "I")]) + (match_operand:SI 3 "register_operand" "c")) + (set (match_operand:SI 0 "dest_reg_operand" "=w") + (plus:SI (match_dup 1) (match_dup 2)))] + "" + "st.a%V4 %3,[%0,%2]" + [(set_attr "type" "store") + (set_attr "length" "4")]) + +(define_insn "*loadsf_update" + [(set (match_operand:SF 3 "dest_reg_operand" "=r,r") + (match_operator:SF 4 "load_update_operand" + [(match_operand:SI 1 "register_operand" "0,0") + (match_operand:SI 2 "nonmemory_operand" "rI,Cal")])) + (set (match_operand:SI 0 "dest_reg_operand" "=w,w") + (plus:SI (match_dup 1) (match_dup 2)))] + "" + "ld.a%V4 %3,[%0,%S2]" + [(set_attr "type" "load,load") + (set_attr "length" "4,8")]) + +(define_insn "*storesf_update" + [(set (match_operator:SF 4 "store_update_operand" + [(match_operand:SI 1 "register_operand" "0") + (match_operand:SI 2 "short_immediate_operand" "I")]) + (match_operand:SF 3 "register_operand" "c")) + (set (match_operand:SI 0 "dest_reg_operand" "=w") + (plus:SI (match_dup 1) (match_dup 2)))] + "" + "st.a%V4 %3,[%0,%2]" + [(set_attr "type" "store") + (set_attr "length" "4")]) + +;; Conditional move instructions. + +(define_expand "movsicc" + [(set (match_operand:SI 0 "dest_reg_operand" "") + (if_then_else:SI (match_operand 1 "comparison_operator" "") + (match_operand:SI 2 "nonmemory_operand" "") + (match_operand:SI 3 "register_operand" "")))] + "" + "operands[1] = gen_compare_reg (operands[1], VOIDmode);") + + +(define_expand "movdicc" + [(set (match_operand:DI 0 "dest_reg_operand" "") + (if_then_else:DI(match_operand 1 "comparison_operator" "") + (match_operand:DI 2 "nonmemory_operand" "") + (match_operand:DI 3 "register_operand" "")))] + "" + "operands[1] = gen_compare_reg (operands[1], VOIDmode);") + + +(define_expand "movsfcc" + [(set (match_operand:SF 0 "dest_reg_operand" "") + (if_then_else:SF (match_operand 1 "comparison_operator" "") + (match_operand:SF 2 "nonmemory_operand" "") + (match_operand:SF 3 "register_operand" "")))] + "" + "operands[1] = gen_compare_reg (operands[1], VOIDmode);") + +(define_expand "movdfcc" + [(set (match_operand:DF 0 "dest_reg_operand" "") + (if_then_else:DF (match_operand 1 "comparison_operator" "") + (match_operand:DF 2 "nonmemory_operand" "") + (match_operand:DF 3 "register_operand" "")))] + "" + "operands[1] = gen_compare_reg (operands[1], VOIDmode);") + +(define_insn "*movsicc_insn" + [(set (match_operand:SI 0 "dest_reg_operand" "=w,w") + (if_then_else:SI (match_operator 3 "proper_comparison_operator" + [(match_operand 4 "cc_register" "") (const_int 0)]) + (match_operand:SI 1 "nonmemory_operand" "cL,Cal") + (match_operand:SI 2 "register_operand" "0,0")))] + "" +{ + if (rtx_equal_p (operands[1], const0_rtx) && GET_CODE (operands[3]) == NE + && satisfies_constraint_Rcq (operands[0])) + return "sub%?.ne %0,%0,%0"; + /* ??? might be good for speed on ARC600 too, *if* properly scheduled. */ + if ((TARGET_ARC700 || optimize_size) + && rtx_equal_p (operands[1], constm1_rtx) + && GET_CODE (operands[3]) == LTU) + return "sbc.cs %0,%0,%0"; + return "mov.%d3 %0,%S1"; +} + [(set_attr "type" "cmove,cmove") + (set_attr "length" "4,8")]) + +; Try to generate more short moves, and/or less limms, by substituting a +; conditional move with a conditional sub. +(define_peephole2 + [(set (match_operand:SI 0 "compact_register_operand") + (match_operand:SI 1 "const_int_operand")) + (set (match_dup 0) + (if_then_else:SI (match_operator 3 "proper_comparison_operator" + [(match_operand 4 "cc_register" "") (const_int 0)]) + (match_operand:SI 2 "const_int_operand" "") + (match_dup 0)))] + "!satisfies_constraint_P (operands[1]) + && satisfies_constraint_P (operands[2]) + && UNSIGNED_INT6 (INTVAL (operands[2]) - INTVAL (operands[1]))" + [(set (match_dup 0) (match_dup 2)) + (cond_exec + (match_dup 3) + (set (match_dup 0) + (plus:SI (match_dup 0) (match_dup 1))))] + "operands[3] = gen_rtx_fmt_ee (REVERSE_CONDITION (GET_CODE (operands[3]), + GET_MODE (operands[4])), + VOIDmode, operands[4], const0_rtx); + operands[1] = GEN_INT (INTVAL (operands[1]) - INTVAL (operands[2]));") + +(define_insn "*movdicc_insn" + [(set (match_operand:DI 0 "dest_reg_operand" "=&w,w") + (if_then_else:DI (match_operator 3 "proper_comparison_operator" + [(match_operand 4 "cc_register" "") (const_int 0)]) + (match_operand:DI 1 "nonmemory_operand" "c,i") + (match_operand:DI 2 "register_operand" "0,0")))] + "" + "* +{ + switch (which_alternative) + { + default: + case 0 : + /* We normally copy the low-numbered register first. However, if + the first register operand 0 is the same as the second register of + operand 1, we must copy in the opposite order. */ + if (REGNO (operands[0]) == REGNO (operands[1]) + 1) + return \"mov.%d3 %R0,%R1\;mov.%d3 %0,%1\"; + else + return \"mov.%d3 %0,%1\;mov.%d3 %R0,%R1\"; + case 1 : + return \"mov.%d3 %L0,%L1\;mov.%d3 %H0,%H1\"; + + + } +}" + [(set_attr "type" "cmove,cmove") + (set_attr "length" "8,16")]) + + +(define_insn "*movsfcc_insn" + [(set (match_operand:SF 0 "dest_reg_operand" "=w,w") + (if_then_else:SF (match_operator 3 "proper_comparison_operator" + [(match_operand 4 "cc_register" "") (const_int 0)]) + (match_operand:SF 1 "nonmemory_operand" "c,E") + (match_operand:SF 2 "register_operand" "0,0")))] + "" + "@ + mov.%d3 %0,%1 + mov.%d3 %0,%1 ; %A1" + [(set_attr "type" "cmove,cmove")]) + +(define_insn "*movdfcc_insn" + [(set (match_operand:DF 0 "dest_reg_operand" "=w,w") + (if_then_else:DF (match_operator 1 "proper_comparison_operator" + [(match_operand 4 "cc_register" "") (const_int 0)]) + (match_operand:DF 2 "nonmemory_operand" "c,E") + (match_operand:DF 3 "register_operand" "0,0")))] + "" + "* +{ + switch (which_alternative) + { + default: + case 0 : + /* We normally copy the low-numbered register first. However, if + the first register operand 0 is the same as the second register of + operand 1, we must copy in the opposite order. */ + if (REGNO (operands[0]) == REGNO (operands[2]) + 1) + return \"mov.%d1 %R0,%R2\;mov.%d1 %0,%2\"; + else + return \"mov.%d1 %0,%2\;mov.%d1 %R0,%R2\"; + case 1 : + return \"mov.%d1 %L0,%L2\;mov.%d1 %H0,%H2; %A2 \"; + + } +}" + [(set_attr "type" "cmove,cmove") + (set_attr "length" "8,16")]) + + +(define_insn "*zero_extendqihi2_i" + [(set (match_operand:HI 0 "dest_reg_operand" "=Rcq,Rcq#q,Rcw,w,r") + (zero_extend:HI (match_operand:QI 1 "nonvol_nonimm_operand" "0,Rcq#q,0,c,m")))] + "" + "@ + extb%? %0,%1%& + extb%? %0,%1%& + bmsk%? %0,%1,7 + extb %0,%1 + ldb%U1 %0,%1" + [(set_attr "type" "unary,unary,unary,unary,load") + (set_attr "iscompact" "maybe,true,false,false,false") + (set_attr "predicable" "no,no,yes,no,no")]) + +(define_expand "zero_extendqihi2" + [(set (match_operand:HI 0 "dest_reg_operand" "") + (zero_extend:HI (match_operand:QI 1 "nonvol_nonimm_operand" "")))] + "" + "if (prepare_extend_operands (operands, ZERO_EXTEND, HImode)) DONE;" +) + +(define_insn "*zero_extendqisi2_ac" + [(set (match_operand:SI 0 "dest_reg_operand" "=Rcq,Rcq#q,Rcw,w,qRcq,!*x,r") + (zero_extend:SI (match_operand:QI 1 "nonvol_nonimm_operand" "0,Rcq#q,0,c,T,Usd,m")))] + "" + "@ + extb%? %0,%1%& + extb%? %0,%1%& + bmsk%? %0,%1,7 + extb %0,%1 + ldb%? %0,%1%& + ldb%? %0,%1%& + ldb%U1 %0,%1" + [(set_attr "type" "unary,unary,unary,unary,load,load,load") + (set_attr "iscompact" "maybe,true,false,false,true,true,false") + (set_attr "predicable" "no,no,yes,no,no,no,no")]) + +(define_expand "zero_extendqisi2" + [(set (match_operand:SI 0 "dest_reg_operand" "") + (zero_extend:SI (match_operand:QI 1 "nonvol_nonimm_operand" "")))] + "" + "if (prepare_extend_operands (operands, ZERO_EXTEND, SImode)) DONE;" +) + +(define_insn "*zero_extendhisi2_i" + [(set (match_operand:SI 0 "dest_reg_operand" "=Rcq,q,Rcw,w,!x,Rcqq,r") + (zero_extend:SI (match_operand:HI 1 "nonvol_nonimm_operand" "0,q,0,c,Usd,Usd,m")))] + "" + "@ + extw%? %0,%1%& + extw%? %0,%1%& + bmsk%? %0,%1,15 + extw %0,%1 + ldw%? %0,%1%& + ldw%U1 %0,%1 + ldw%U1%V1 %0,%1" + [(set_attr "type" "unary,unary,unary,unary,load,load,load") + (set_attr "iscompact" "maybe,true,false,false,true,false,false") + (set_attr "predicable" "no,no,yes,no,no,no,no")]) + + +(define_expand "zero_extendhisi2" + [(set (match_operand:SI 0 "dest_reg_operand" "") + (zero_extend:SI (match_operand:HI 1 "nonvol_nonimm_operand" "")))] + "" + "if (prepare_extend_operands (operands, ZERO_EXTEND, SImode)) DONE;" +) + +;; Sign extension instructions. + +(define_insn "*extendqihi2_i" + [(set (match_operand:HI 0 "dest_reg_operand" "=Rcqq,r,r") + (sign_extend:HI (match_operand:QI 1 "nonvol_nonimm_operand" "Rcqq,r,m")))] + "" + "@ + sexb%? %0,%1%& + sexb %0,%1 + ldb.x%U1 %0,%1" + [(set_attr "type" "unary,unary,load") + (set_attr "iscompact" "true,false,false")]) + + +(define_expand "extendqihi2" + [(set (match_operand:HI 0 "dest_reg_operand" "") + (sign_extend:HI (match_operand:QI 1 "nonvol_nonimm_operand" "")))] + "" + "if (prepare_extend_operands (operands, SIGN_EXTEND, HImode)) DONE;" +) + +(define_insn "*extendqisi2_ac" + [(set (match_operand:SI 0 "dest_reg_operand" "=Rcqq,w,r") + (sign_extend:SI (match_operand:QI 1 "nonvol_nonimm_operand" "Rcqq,c,m")))] + "" + "@ + sexb%? %0,%1%& + sexb %0,%1 + ldb.x%U1 %0,%1" + [(set_attr "type" "unary,unary,load") + (set_attr "iscompact" "true,false,false")]) + +(define_expand "extendqisi2" + [(set (match_operand:SI 0 "dest_reg_operand" "") + (sign_extend:SI (match_operand:QI 1 "nonvol_nonimm_operand" "")))] + "" + "if (prepare_extend_operands (operands, SIGN_EXTEND, SImode)) DONE;" +) + +(define_insn "*extendhisi2_i" + [(set (match_operand:SI 0 "dest_reg_operand" "=Rcqq,w,r") + (sign_extend:SI (match_operand:HI 1 "nonvol_nonimm_operand" "Rcqq,c,m")))] + "" + "@ + sexw%? %0,%1%& + sexw %0,%1 + ldw.x%U1%V1 %0,%1" + [(set_attr "type" "unary,unary,load") + (set_attr "iscompact" "true,false,false")]) + +(define_expand "extendhisi2" + [(set (match_operand:SI 0 "dest_reg_operand" "") + (sign_extend:SI (match_operand:HI 1 "nonvol_nonimm_operand" "")))] + "" + "if (prepare_extend_operands (operands, SIGN_EXTEND, SImode)) DONE;" +) + +;; Unary arithmetic insns + +;; We allow constant operands to enable late constant propagation, but it is +;; not worth while to have more than one dedicated alternative to output them - +;; if we are really worried about getting these the maximum benefit of all +;; the available alternatives, we should add an extra pass to fold such +;; operations to movsi. + +;; Absolute instructions + +(define_insn "*abssi2_mixed" + [(set (match_operand:SI 0 "compact_register_operand" "=q") + (abs:SI (match_operand:SI 1 "compact_register_operand" "q")))] + "TARGET_MIXED_CODE" + "abs%? %0,%1%&" + [(set_attr "type" "two_cycle_core") + (set_attr "iscompact" "true")]) + +(define_insn "abssi2" + [(set (match_operand:SI 0 "dest_reg_operand" "=Rcq#q,w,w") + (abs:SI (match_operand:SI 1 "nonmemory_operand" "Rcq#q,cL,Cal")))] + "" + "abs%? %0,%1%&" + [(set_attr "type" "two_cycle_core") + (set_attr "length" "*,4,8") + (set_attr "iscompact" "true,false,false")]) + +;; Maximum and minimum insns + +(define_insn "smaxsi3" + [(set (match_operand:SI 0 "dest_reg_operand" "=Rcw, w, w") + (smax:SI (match_operand:SI 1 "register_operand" "%0, c, c") + (match_operand:SI 2 "nonmemory_operand" "cL,cL,Cal")))] + "" + "max%? %0,%1,%2" + [(set_attr "type" "two_cycle_core") + (set_attr "length" "4,4,8") + (set_attr "predicable" "yes,no,no")] +) + +(define_insn "sminsi3" + [(set (match_operand:SI 0 "dest_reg_operand" "=Rcw, w, w") + (smin:SI (match_operand:SI 1 "register_operand" "%0, c, c") + (match_operand:SI 2 "nonmemory_operand" "cL,cL,Cal")))] + "" + "min%? %0,%1,%2" + [(set_attr "type" "two_cycle_core") + (set_attr "length" "4,4,8") + (set_attr "predicable" "yes,no,no")] +) + +;; Arithmetic instructions. + +; We say an insn can be conditionalized if this doesn't introduce a long +; immediate. We set the type such that we still have good scheduling if the +; insn is conditionalized. +; ??? It would make sense to allow introduction of long immediates, but +; we'd need to communicate to the ccfsm machinery the extra cost. +; The alternatives in the constraints still serve three purposes: +; - estimate insn size assuming conditional execution +; - guide reload to re-order the second and third operand to get a better fit. +; - give tentative insn type to guide scheduling +; N.B. "%" for commutativity doesn't help when there is another matching +; (but longer) alternative. +; We avoid letting this pattern use LP_COUNT as a register by specifying +; register class 'W' instead of 'w'. +(define_insn_and_split "*addsi3_mixed" + ;; 0 1 2 3 4 5 6 7 8 9 a b c d e f 10 + [(set (match_operand:SI 0 "dest_reg_operand" "=Rcq#q,Rcq,Rcw,Rcw,Rcq,Rcb,Rcq, Rcw, Rcqq,Rcqq, W, W,W, W,Rcqq,Rcw, W") + (plus:SI (match_operand:SI 1 "register_operand" "%0, c, 0, c, 0, 0,Rcb, 0, Rcqq, 0, c, c,0, 0, 0, 0, c") + (match_operand:SI 2 "nonmemory_operand" "cL, 0, cL, 0,CL2,Csp,CM4,cCca,RcqqK, cO,cLCmL,Cca,I,C2a, Cal,Cal,Cal")))] + "" +{ + arc_output_addsi (operands, arc_ccfsm_cond_exec_p (), true); + return ""; +} + "&& reload_completed && get_attr_length (insn) == 8 + && satisfies_constraint_I (operands[2]) + && GET_CODE (PATTERN (insn)) != COND_EXEC" + [(set (match_dup 0) (match_dup 3)) (set (match_dup 0) (match_dup 4))] + "split_addsi (operands);" + [(set_attr "type" "*,*,*,*,two_cycle_core,two_cycle_core,*,two_cycle_core,*,*,*,two_cycle_core,*,two_cycle_core,*,*,*") + (set (attr "iscompact") + (cond [(match_test "~arc_output_addsi (operands, false, false) & 2") + (const_string "false") + (match_operand 2 "long_immediate_operand" "") + (const_string "maybe_limm")] + (const_string "maybe"))) + (set_attr "length" "*,*,4,4,*,*,*,4,*,*,4,4,4,4,*,8,8") + (set_attr "predicable" "no,no,yes,yes,no,no,no,yes,no,no,no,no,no,no,no,yes,no") + (set_attr "cond" "canuse,canuse,canuse,canuse,canuse,canuse,nocond,canuse,nocond,nocond,nocond,nocond,canuse_limm,canuse_limm,canuse,canuse,nocond") +]) + +;; ARC700/ARC600 multiply +;; SI <- SI * SI + +(define_expand "mulsi3" + [(set (match_operand:SI 0 "nonimmediate_operand" "") + (mult:SI (match_operand:SI 1 "register_operand" "") + (match_operand:SI 2 "nonmemory_operand" "")))] + "" +{ + if (TARGET_ARC700 && !TARGET_NOMPY_SET) + { + if (!register_operand (operands[0], SImode)) + { + rtx result = gen_reg_rtx (SImode); + + emit_insn (gen_mulsi3 (result, operands[1], operands[2])); + emit_move_insn (operands[0], result); + DONE; + } + } + else if (TARGET_MUL64_SET) + { + emit_insn (gen_mulsi_600 (operands[1], operands[2], + gen_mlo (), gen_mhi ())); + emit_move_insn (operands[0], gen_mlo ()); + DONE; + } + else if (TARGET_MULMAC_32BY16_SET) + { + if (immediate_operand (operands[2], SImode) + && INTVAL (operands[2]) >= 0 + && INTVAL (operands[2]) <= 65535) + { + emit_insn (gen_umul_600 (operands[1], operands[2], + gen_acc2 (), gen_acc1 ())); + emit_move_insn (operands[0], gen_acc2 ()); + DONE; + } + operands[2] = force_reg (SImode, operands[2]); + emit_insn (gen_umul_600 (operands[1], operands[2], + gen_acc2 (), gen_acc1 ())); + emit_insn (gen_mac_600 (operands[1], operands[2], + gen_acc2 (), gen_acc1 ())); + emit_move_insn (operands[0], gen_acc2 ()); + DONE; + } + else + { + emit_move_insn (gen_rtx_REG (SImode, R0_REG), operands[1]); + emit_move_insn (gen_rtx_REG (SImode, R1_REG), operands[2]); + emit_insn (gen_mulsi3_600_lib ()); + emit_move_insn (operands[0], gen_rtx_REG (SImode, R0_REG)); + DONE; + } +}) + +; mululw conditional execution without a LIMM clobbers an input register; +; we'd need a different pattern to describe this. +; To make the conditional execution valid for the LIMM alternative, we +; have to emit the LIMM before the register operand. +(define_insn "umul_600" + [(set (match_operand:SI 2 "acc2_operand" "") + (mult:SI (match_operand:SI 0 "register_operand" "c,c,c") + (zero_extract:SI (match_operand:SI 1 "nonmemory_operand" + "c,L,Cal") + (const_int 16) + (const_int 0)))) + (clobber (match_operand:SI 3 "acc1_operand" ""))] + "TARGET_MULMAC_32BY16_SET" + "@mululw 0, %0, %1 + mululw 0, %0, %1 + mululw%? 0, %1, %0" + [(set_attr "length" "4,4,8") + (set_attr "type" "mulmac_600, mulmac_600, mulmac_600") + (set_attr "predicable" "no, no, yes") + (set_attr "cond" "nocond, canuse_limm, canuse")]) + +(define_insn "mac_600" + [(set (match_operand:SI 2 "acc2_operand" "") + (plus:SI + (mult:SI (match_operand:SI 0 "register_operand" "c,c,c") + (ashift:SI + (zero_extract:SI (match_operand:SI 1 "nonmemory_operand" "c,L,Cal") + (const_int 16) + (const_int 16)) + (const_int 16))) + (match_dup 2))) + (clobber (match_operand:SI 3 "acc1_operand" ""))] + "TARGET_MULMAC_32BY16_SET" + "machlw%? 0, %0, %1" + [(set_attr "length" "4,4,8") + (set_attr "type" "mulmac_600, mulmac_600, mulmac_600") + (set_attr "predicable" "no, no, yes") + (set_attr "cond" "nocond, canuse_limm, canuse")]) + +(define_insn "mulsi_600" + [(set (match_operand:SI 2 "mlo_operand" "") + (mult:SI (match_operand:SI 0 "register_operand" "Rcq#q,c,c,%c") + (match_operand:SI 1 "nonmemory_operand" "Rcq#q,cL,I,Cal"))) + (clobber (match_operand:SI 3 "mhi_operand" ""))] + "TARGET_MUL64_SET" +; The assembler mis-assembles mul64 / mulu64 with "I" constraint constants, +; using a machine code pattern that only allows "L" constraint constants. +; "mul64%? \t0, %0, %1%&" +{ + if (satisfies_constraint_I (operands[1]) + && !satisfies_constraint_L (operands[1])) + { + /* MUL64 <0,>b,s12 00101bbb10000100 0BBBssssssSSSSSS */ + int n = true_regnum (operands[0]); + int i = INTVAL (operands[1]); + asm_fprintf (asm_out_file, "\t.short %d`", 0x2884 + ((n & 7) << 8)); + asm_fprintf (asm_out_file, "\t.short %d`", + ((i & 0x3f) << 6) + ((i >> 6) & 0x3f) + ((n & 070) << 9)); + return "; mul64%? \t0, %0, %1%&"; + } + return "mul64%? \t0, %0, %1%&"; +} + [(set_attr "length" "*,4,4,8") + (set_attr "iscompact" "maybe,false,false,false") + (set_attr "type" "multi,multi,multi,multi") + (set_attr "predicable" "yes,yes,no,yes") + (set_attr "cond" "canuse,canuse,canuse_limm,canuse")]) + +; If we compile without an mul option enabled, but link with libraries +; for a mul option, we'll see clobbers of multiplier output registers. +; There is also an implementation using norm that clobbers the loop registers. +(define_insn "mulsi3_600_lib" + [(set (reg:SI R0_REG) + (mult:SI (reg:SI R0_REG) (reg:SI R1_REG))) + (clobber (reg:SI RETURN_ADDR_REGNUM)) + (clobber (reg:SI R1_REG)) + (clobber (reg:SI R2_REG)) + (clobber (reg:SI R3_REG)) + (clobber (reg:DI MUL64_OUT_REG)) + (clobber (reg:SI LP_COUNT)) + (clobber (reg:SI LP_START)) + (clobber (reg:SI LP_END)) + (clobber (reg:CC CC_REG))] + "!TARGET_MUL64_SET && !TARGET_MULMAC_32BY16_SET + && (!TARGET_ARC700 || TARGET_NOMPY_SET) + && SFUNC_CHECK_PREDICABLE" + "*return arc_output_libcall (\"__mulsi3\");" + [(set_attr "is_sfunc" "yes") + (set_attr "predicable" "yes")]) + +(define_insn "mulsidi_600" + [(set (reg:DI MUL64_OUT_REG) + (mult:DI (sign_extend:DI + (match_operand:SI 0 "register_operand" "Rcq#q,c,c,%c")) + (sign_extend:DI +; assembler issue for "I", see mulsi_600 +; (match_operand:SI 1 "register_operand" "Rcq#q,cL,I,Cal"))))] + (match_operand:SI 1 "register_operand" "Rcq#q,cL,L,C32"))))] + "TARGET_MUL64_SET" + "mul64%? \t0, %0, %1%&" + [(set_attr "length" "*,4,4,8") + (set_attr "iscompact" "maybe,false,false,false") + (set_attr "type" "multi,multi,multi,multi") + (set_attr "predicable" "yes,yes,no,yes") + (set_attr "cond" "canuse,canuse,canuse_limm,canuse")]) + +(define_insn "umulsidi_600" + [(set (reg:DI MUL64_OUT_REG) + (mult:DI (zero_extend:DI + (match_operand:SI 0 "register_operand" "c,c,%c")) + (sign_extend:DI +; assembler issue for "I", see mulsi_600 +; (match_operand:SI 1 "register_operand" "cL,I,Cal"))))] + (match_operand:SI 1 "register_operand" "cL,L,C32"))))] + "TARGET_MUL64_SET" + "mulu64%? \t0, %0, %1%&" + [(set_attr "length" "4,4,8") + (set_attr "iscompact" "false") + (set_attr "type" "umulti") + (set_attr "predicable" "yes,no,yes") + (set_attr "cond" "canuse,canuse_limm,canuse")]) + +; ARC700 mpy* instructions: This is a multi-cycle extension, and thus 'w' +; may not be used as destination constraint. + +; The result of mpy and mpyu is the same except for flag setting (if enabled), +; but mpyu is faster for the standard multiplier. +; Note: we must make sure LP_COUNT is not one of the destination +; registers, since it cannot be the destination of a multi-cycle insn +; like MPY or MPYU. +(define_insn "mulsi3_700" + [(set (match_operand:SI 0 "mpy_dest_reg_operand" "=Rcr,r,r,Rcr,r") + (mult:SI (match_operand:SI 1 "register_operand" " 0,c,0,0,c") + (match_operand:SI 2 "nonmemory_operand" "cL,cL,I,Cal,Cal")))] +"TARGET_ARC700 && !TARGET_NOMPY_SET" + "mpyu%? %0,%1,%2" + [(set_attr "length" "4,4,4,8,8") + (set_attr "type" "umulti") + (set_attr "predicable" "yes,no,no,yes,no") + (set_attr "cond" "canuse,nocond,canuse_limm,canuse,nocond")]) + +(define_expand "mulsidi3" + [(set (match_operand:DI 0 "nonimmediate_operand" "") + (mult:DI (sign_extend:DI(match_operand:SI 1 "register_operand" "")) + (sign_extend:DI(match_operand:SI 2 "nonmemory_operand" ""))))] + "(TARGET_ARC700 && !TARGET_NOMPY_SET) + || TARGET_MUL64_SET + || TARGET_MULMAC_32BY16_SET" +" +{ + if (TARGET_ARC700 && !TARGET_NOMPY_SET) + { + operands[2] = force_reg (SImode, operands[2]); + if (!register_operand (operands[0], DImode)) + { + rtx result = gen_reg_rtx (DImode); + + operands[2] = force_reg (SImode, operands[2]); + emit_insn (gen_mulsidi3 (result, operands[1], operands[2])); + emit_move_insn (operands[0], result); + DONE; + } + } + else if (TARGET_MUL64_SET) + { + operands[2] = force_reg (SImode, operands[2]); + emit_insn (gen_mulsidi_600 (operands[1], operands[2])); + emit_move_insn (operands[0], gen_rtx_REG (DImode, MUL64_OUT_REG)); + DONE; + } + else if (TARGET_MULMAC_32BY16_SET) + { + rtx result_hi = gen_highpart(SImode, operands[0]); + rtx result_low = gen_lowpart(SImode, operands[0]); + + emit_insn (gen_mul64_600 (operands[1], operands[2])); + emit_insn (gen_mac64_600 (result_hi, operands[1], operands[2])); + emit_move_insn (result_low, gen_acc2 ()); + DONE; + } +}") + +(define_insn "mul64_600" + [(set (reg:DI 56) + (mult:DI (sign_extend:DI (match_operand:SI 0 "register_operand" + "c,c,c")) + (zero_extract:DI (match_operand:SI 1 "nonmemory_operand" + "c,L,Cal") + (const_int 16) + (const_int 0)))) + ] + "TARGET_MULMAC_32BY16_SET" + "mullw%? 0, %0, %1" + [(set_attr "length" "4,4,8") + (set_attr "type" "mulmac_600") + (set_attr "predicable" "no,no,yes") + (set_attr "cond" "nocond, canuse_limm, canuse")]) + + +;; ??? check if this is canonical rtl +(define_insn "mac64_600" + [(set (reg:DI 56) + (plus:DI + (mult:DI (sign_extend:DI (match_operand:SI 1 "register_operand" "c,c,c")) + (ashift:DI + (sign_extract:DI (match_operand:SI 2 "nonmemory_operand" "c,L,Cal") + (const_int 16) (const_int 16)) + (const_int 16))) + (reg:DI 56))) + (set (match_operand:SI 0 "register_operand" "=w,w,w") + (zero_extract:SI + (plus:DI + (mult:DI (sign_extend:DI (match_dup 1)) + (ashift:DI + (sign_extract:DI (match_dup 2) + (const_int 16) (const_int 16)) + (const_int 16))) + (reg:DI 56)) + (const_int 32) (const_int 32)))] + "TARGET_MULMAC_32BY16_SET" + "machlw%? %0, %1, %2" + [(set_attr "length" "4,4,8") + (set_attr "type" "mulmac_600") + (set_attr "predicable" "no,no,yes") + (set_attr "cond" "nocond, canuse_limm, canuse")]) + + +;; DI <- DI(signed SI) * DI(signed SI) +(define_insn_and_split "mulsidi3_700" + [(set (match_operand:DI 0 "register_operand" "=&r") + (mult:DI (sign_extend:DI (match_operand:SI 1 "register_operand" "%c")) + (sign_extend:DI (match_operand:SI 2 "register_operand" "cL"))))] + "TARGET_ARC700 && !TARGET_NOMPY_SET" + "#" + "&& reload_completed" + [(const_int 0)] +{ + int hi = TARGET_BIG_ENDIAN ? 0 : UNITS_PER_WORD; + int lo = TARGET_BIG_ENDIAN ? UNITS_PER_WORD : 0; + rtx l0 = simplify_gen_subreg (word_mode, operands[0], DImode, lo); + rtx h0 = simplify_gen_subreg (word_mode, operands[0], DImode, hi); + emit_insn (gen_mulsi3_highpart (h0, operands[1], operands[2])); + emit_insn (gen_mulsi3_700 (l0, operands[1], operands[2])); + DONE; +} + [(set_attr "type" "multi") + (set_attr "length" "8")]) + +(define_insn "mulsi3_highpart" + [(set (match_operand:SI 0 "register_operand" "=Rcr,r,Rcr,r") + (truncate:SI + (lshiftrt:DI + (mult:DI + (sign_extend:DI (match_operand:SI 1 "register_operand" "%0,c, 0,c")) + (sign_extend:DI (match_operand:SI 2 "extend_operand" "c,c, s,s"))) + (const_int 32))))] + "TARGET_ARC700 && !TARGET_NOMPY_SET" + "mpyh%? %0,%1,%2" + [(set_attr "length" "4,4,8,8") + (set_attr "type" "multi") + (set_attr "predicable" "yes,no,yes,no") + (set_attr "cond" "canuse,nocond,canuse,nocond")]) + +; Note that mpyhu has the same latency as mpy / mpyh, +; thus we use the type multi. +(define_insn "*umulsi3_highpart_i" + [(set (match_operand:SI 0 "register_operand" "=Rcr,r,Rcr,r") + (truncate:SI + (lshiftrt:DI + (mult:DI + (zero_extend:DI (match_operand:SI 1 "register_operand" "%0,c, 0,c")) + (zero_extend:DI (match_operand:SI 2 "extend_operand" "c,c, s,s"))) + (const_int 32))))] + "TARGET_ARC700 && !TARGET_NOMPY_SET" + "mpyhu%? %0,%1,%2" + [(set_attr "length" "4,4,8,8") + (set_attr "type" "multi") + (set_attr "predicable" "yes,no,yes,no") + (set_attr "cond" "canuse,nocond,canuse,nocond")]) + +; Implementations include additional labels for umulsidi3, so we got all +; the same clobbers - plus one for the result low part. */ +(define_insn "umulsi3_highpart_600_lib_le" + [(set (reg:SI R1_REG) + (truncate:SI + (lshiftrt:DI + (mult:DI (zero_extend:DI (reg:SI R0_REG)) + (zero_extend:DI (reg:SI R1_REG))) + (const_int 32)))) + (clobber (reg:SI RETURN_ADDR_REGNUM)) + (clobber (reg:SI R0_REG)) + (clobber (reg:DI R2_REG)) + (clobber (reg:SI R12_REG)) + (clobber (reg:DI MUL64_OUT_REG)) + (clobber (reg:CC CC_REG))] + "!TARGET_BIG_ENDIAN + && !TARGET_MUL64_SET && !TARGET_MULMAC_32BY16_SET + && (!TARGET_ARC700 || TARGET_NOMPY_SET) + && SFUNC_CHECK_PREDICABLE" + "*return arc_output_libcall (\"__umulsi3_highpart\");" + [(set_attr "is_sfunc" "yes") + (set_attr "predicable" "yes")]) + +(define_insn "umulsi3_highpart_600_lib_be" + [(set (reg:SI R0_REG) + (truncate:SI + (lshiftrt:DI + (mult:DI (zero_extend:DI (reg:SI R0_REG)) + (zero_extend:DI (reg:SI R1_REG))) + (const_int 32)))) + (clobber (reg:SI RETURN_ADDR_REGNUM)) + (clobber (reg:SI R1_REG)) + (clobber (reg:DI R2_REG)) + (clobber (reg:SI R12_REG)) + (clobber (reg:DI MUL64_OUT_REG)) + (clobber (reg:CC CC_REG))] + "TARGET_BIG_ENDIAN + && !TARGET_MUL64_SET && !TARGET_MULMAC_32BY16_SET + && (!TARGET_ARC700 || TARGET_NOMPY_SET) + && SFUNC_CHECK_PREDICABLE" + "*return arc_output_libcall (\"__umulsi3_highpart\");" + [(set_attr "is_sfunc" "yes") + (set_attr "predicable" "yes")]) + +;; (zero_extend:DI (const_int)) leads to internal errors in combine, so we +;; need a separate pattern for immediates +;; ??? This is fine for combine, but not for reload. +(define_insn "umulsi3_highpart_int" + [(set (match_operand:SI 0 "register_operand" "=Rcr, r, r,Rcr, r") + (truncate:SI + (lshiftrt:DI + (mult:DI + (zero_extend:DI (match_operand:SI 1 "register_operand" " 0, c, 0, 0, c")) + (match_operand:DI 2 "immediate_usidi_operand" "L, L, I, Cal, Cal")) + (const_int 32))))] + "TARGET_ARC700 && !TARGET_NOMPY_SET" + "mpyhu%? %0,%1,%2" + [(set_attr "length" "4,4,4,8,8") + (set_attr "type" "multi") + (set_attr "predicable" "yes,no,no,yes,no") + (set_attr "cond" "canuse,nocond,canuse_limm,canuse,nocond")]) + +(define_expand "umulsi3_highpart" + [(set (match_operand:SI 0 "general_operand" "") + (truncate:SI + (lshiftrt:DI + (mult:DI + (zero_extend:DI (match_operand:SI 1 "register_operand" "")) + (zero_extend:DI (match_operand:SI 2 "nonmemory_operand" ""))) + (const_int 32))))] + "TARGET_ARC700 || (!TARGET_MUL64_SET && !TARGET_MULMAC_32BY16_SET)" + " +{ + rtx target = operands[0]; + + if (!TARGET_ARC700 || TARGET_NOMPY_SET) + { + emit_move_insn (gen_rtx_REG (SImode, 0), operands[1]); + emit_move_insn (gen_rtx_REG (SImode, 1), operands[2]); + if (TARGET_BIG_ENDIAN) + emit_insn (gen_umulsi3_highpart_600_lib_be ()); + else + emit_insn (gen_umulsi3_highpart_600_lib_le ()); + emit_move_insn (target, gen_rtx_REG (SImode, 0)); + DONE; + } + + if (!register_operand (target, SImode)) + target = gen_reg_rtx (SImode); + + if (CONST_INT_P (operands[2]) && INTVAL (operands[2]) < 0) + operands[2] = simplify_const_unary_operation (ZERO_EXTEND, DImode, + operands[2], SImode); + else if (!immediate_operand (operands[2], SImode)) + operands[2] = gen_rtx_ZERO_EXTEND (DImode, operands[2]); + emit_insn (gen_umulsi3_highpart_int (target, operands[1], operands[2])); + if (target != operands[0]) + emit_move_insn (operands[0], target); + DONE; +}") + +(define_expand "umulsidi3" + [(set (match_operand:DI 0 "nonimmediate_operand" "") + (mult:DI (zero_extend:DI(match_operand:SI 1 "register_operand" "")) + (zero_extend:DI(match_operand:SI 2 "nonmemory_operand" ""))))] + "" +{ + if (TARGET_ARC700 && !TARGET_NOMPY_SET) + { + operands[2] = force_reg (SImode, operands[2]); + if (!register_operand (operands[0], DImode)) + { + rtx result = gen_reg_rtx (DImode); + + emit_insn (gen_umulsidi3 (result, operands[1], operands[2])); + emit_move_insn (operands[0], result); + DONE; + } + } + else if (TARGET_MUL64_SET) + { + operands[2] = force_reg (SImode, operands[2]); + emit_insn (gen_umulsidi_600 (operands[1], operands[2])); + emit_move_insn (operands[0], gen_rtx_REG (DImode, MUL64_OUT_REG)); + DONE; + } + else if (TARGET_MULMAC_32BY16_SET) + { + rtx result_hi = gen_reg_rtx (SImode); + rtx result_low = gen_reg_rtx (SImode); + + result_hi = gen_highpart(SImode , operands[0]); + result_low = gen_lowpart(SImode , operands[0]); + + emit_insn (gen_umul64_600 (operands[1], operands[2])); + emit_insn (gen_umac64_600 (result_hi, operands[1], operands[2])); + emit_move_insn (result_low, gen_acc2 ()); + DONE; + } + else + { + emit_move_insn (gen_rtx_REG (SImode, R0_REG), operands[1]); + emit_move_insn (gen_rtx_REG (SImode, R1_REG), operands[2]); + emit_insn (gen_umulsidi3_600_lib ()); + emit_move_insn (operands[0], gen_rtx_REG (DImode, R0_REG)); + DONE; + } +}) + +(define_insn "umul64_600" + [(set (reg:DI 56) + (mult:DI (zero_extend:DI (match_operand:SI 0 "register_operand" + "c,c,c")) + (zero_extract:DI (match_operand:SI 1 "nonmemory_operand" + "c,L,Cal") + (const_int 16) + (const_int 0)))) + ] + "TARGET_MULMAC_32BY16_SET" + "@mululw 0, %0, %1 + mululw 0, %0, %1 + mululw%? 0, %1, %0" + [(set_attr "length" "4,4,8") + (set_attr "type" "mulmac_600") + (set_attr "predicable" "no,no,yes") + (set_attr "cond" "nocond, canuse_limm, canuse")]) + + +(define_insn "umac64_600" + [(set (reg:DI 56) + (plus:DI + (mult:DI (zero_extend:DI (match_operand:SI 1 "register_operand" "c,c,c")) + (ashift:DI + (zero_extract:DI (match_operand:SI 2 "nonmemory_operand" "c,L,Cal") + (const_int 16) (const_int 16)) + (const_int 16))) + (reg:DI 56))) + (set (match_operand:SI 0 "register_operand" "=w,w,w") + (zero_extract:SI + (plus:DI + (mult:DI (zero_extend:DI (match_dup 1)) + (ashift:DI + (zero_extract:DI (match_dup 2) + (const_int 16) (const_int 16)) + (const_int 16))) + (reg:DI 56)) + (const_int 32) (const_int 32)))] + "TARGET_MULMAC_32BY16_SET" + "machulw%? %0, %1, %2" + [(set_attr "length" "4,4,8") + (set_attr "type" "mulmac_600") + (set_attr "predicable" "no,no,yes") + (set_attr "cond" "nocond, canuse_limm, canuse")]) + + + +;; DI <- DI(unsigned SI) * DI(unsigned SI) +(define_insn_and_split "umulsidi3_700" + [(set (match_operand:DI 0 "dest_reg_operand" "=&r") + (mult:DI (zero_extend:DI (match_operand:SI 1 "register_operand" "%c")) + (zero_extend:DI (match_operand:SI 2 "register_operand" "c"))))] +;; (zero_extend:DI (match_operand:SI 2 "register_operand" "rL"))))] + "TARGET_ARC700 && !TARGET_NOMPY_SET" + "#" + "reload_completed" + [(const_int 0)] +{ + int hi = !TARGET_BIG_ENDIAN; + int lo = !hi; + rtx l0 = operand_subword (operands[0], lo, 0, DImode); + rtx h0 = operand_subword (operands[0], hi, 0, DImode); + emit_insn (gen_umulsi3_highpart (h0, operands[1], operands[2])); + emit_insn (gen_mulsi3_700 (l0, operands[1], operands[2])); + DONE; +} + [(set_attr "type" "umulti") + (set_attr "length" "8")]) + +(define_insn "umulsidi3_600_lib" + [(set (reg:DI R0_REG) + (mult:DI (zero_extend:DI (reg:SI R0_REG)) + (zero_extend:DI (reg:SI R1_REG)))) + (clobber (reg:SI RETURN_ADDR_REGNUM)) + (clobber (reg:DI R2_REG)) + (clobber (reg:SI R12_REG)) + (clobber (reg:DI MUL64_OUT_REG)) + (clobber (reg:CC CC_REG))] + "!TARGET_MUL64_SET && !TARGET_MULMAC_32BY16_SET + && (!TARGET_ARC700 || TARGET_NOMPY_SET) + && SFUNC_CHECK_PREDICABLE" + "*return arc_output_libcall (\"__umulsidi3\");" + [(set_attr "is_sfunc" "yes") + (set_attr "predicable" "yes")]) + +(define_peephole2 + [(parallel + [(set (reg:DI R0_REG) + (mult:DI (zero_extend:DI (reg:SI R0_REG)) + (zero_extend:DI (reg:SI R1_REG)))) + (clobber (reg:SI RETURN_ADDR_REGNUM)) + (clobber (reg:DI R2_REG)) + (clobber (reg:SI R12_REG)) + (clobber (reg:DI MUL64_OUT_REG)) + (clobber (reg:CC CC_REG))])] + "!TARGET_MUL64_SET && !TARGET_MULMAC_32BY16_SET + && (!TARGET_ARC700 || TARGET_NOMPY_SET) + && peep2_regno_dead_p (1, TARGET_BIG_ENDIAN ? R1_REG : R0_REG)" + [(pc)] +{ + if (TARGET_BIG_ENDIAN) + emit_insn (gen_umulsi3_highpart_600_lib_be ()); + else + emit_insn (gen_umulsi3_highpart_600_lib_le ()); + DONE; +}) + +(define_expand "addsi3" + [(set (match_operand:SI 0 "dest_reg_operand" "") + (plus:SI (match_operand:SI 1 "register_operand" "") + (match_operand:SI 2 "nonmemory_operand" "")))] + "" + "if (flag_pic && arc_raw_symbolic_reference_mentioned_p (operands[2], false)) + { + operands[2]=force_reg(SImode, operands[2]); + } + else if (!TARGET_NO_SDATA_SET && small_data_pattern (operands[2], Pmode)) + { + operands[2] = force_reg (SImode, arc_rewrite_small_data (operands[2])); + } + + ") + +(define_expand "adddi3" + [(parallel [(set (match_operand:DI 0 "dest_reg_operand" "") + (plus:DI (match_operand:DI 1 "register_operand" "") + (match_operand:DI 2 "nonmemory_operand" ""))) + (clobber (reg:CC CC_REG))])] + "" +{ + if (TARGET_EXPAND_ADDDI) + { + rtx l0 = gen_lowpart (SImode, operands[0]); + rtx h0 = disi_highpart (operands[0]); + rtx l1 = gen_lowpart (SImode, operands[1]); + rtx h1 = disi_highpart (operands[1]); + rtx l2 = gen_lowpart (SImode, operands[2]); + rtx h2 = disi_highpart (operands[2]); + rtx cc_c = gen_rtx_REG (CC_Cmode, CC_REG); + + if (CONST_INT_P (h2) && INTVAL (h2) < 0 && SIGNED_INT12 (INTVAL (h2))) + { + emit_insn (gen_sub_f (l0, l1, gen_int_mode (-INTVAL (l2), SImode))); + emit_insn (gen_sbc (h0, h1, + gen_int_mode (-INTVAL (h2) - (l1 != 0), SImode), + cc_c)); + DONE; + } + emit_insn (gen_add_f (l0, l1, l2)); + emit_insn (gen_adc (h0, h1, h2)); + DONE; + } +}) + +; This assumes that there can be no strictly partial overlap between +; operands[1] and operands[2]. +(define_insn_and_split "*adddi3_i" + [(set (match_operand:DI 0 "dest_reg_operand" "=&w,w,w") + (plus:DI (match_operand:DI 1 "register_operand" "%c,0,c") + (match_operand:DI 2 "nonmemory_operand" "ci,ci,!i"))) + (clobber (reg:CC CC_REG))] + "" + "#" + "reload_completed" + [(const_int 0)] +{ + int hi = !TARGET_BIG_ENDIAN; + int lo = !hi; + rtx l0 = operand_subword (operands[0], lo, 0, DImode); + rtx h0 = operand_subword (operands[0], hi, 0, DImode); + rtx l1 = operand_subword (operands[1], lo, 0, DImode); + rtx h1 = operand_subword (operands[1], hi, 0, DImode); + rtx l2 = operand_subword (operands[2], lo, 0, DImode); + rtx h2 = operand_subword (operands[2], hi, 0, DImode); + + + if (l2 == const0_rtx) + { + if (!rtx_equal_p (l0, l1) && !rtx_equal_p (l0, h1)) + emit_move_insn (l0, l1); + emit_insn (gen_addsi3 (h0, h1, h2)); + if (!rtx_equal_p (l0, l1) && rtx_equal_p (l0, h1)) + emit_move_insn (l0, l1); + DONE; + } + if (CONST_INT_P (operands[2]) && INTVAL (operands[2]) < 0 + && INTVAL (operands[2]) >= -0x7fffffff) + { + emit_insn (gen_subdi3_i (operands[0], operands[1], + GEN_INT (-INTVAL (operands[2])))); + DONE; + } + if (rtx_equal_p (l0, h1)) + { + if (h2 != const0_rtx) + emit_insn (gen_addsi3 (h0, h1, h2)); + else if (!rtx_equal_p (h0, h1)) + emit_move_insn (h0, h1); + emit_insn (gen_add_f (l0, l1, l2)); + emit_insn + (gen_rtx_COND_EXEC + (VOIDmode, + gen_rtx_LTU (VOIDmode, gen_rtx_REG (CC_Cmode, CC_REG), GEN_INT (0)), + gen_rtx_SET (VOIDmode, h0, plus_constant (SImode, h0, 1)))); + DONE; + } + emit_insn (gen_add_f (l0, l1, l2)); + emit_insn (gen_adc (h0, h1, h2)); + DONE; +} + [(set_attr "cond" "clob") + (set_attr "type" "binary") + (set_attr "length" "16,16,20")]) + +(define_insn "add_f" + [(set (reg:CC_C CC_REG) + (compare:CC_C + (plus:SI (match_operand:SI 1 "register_operand" "c,0,c") + (match_operand:SI 2 "nonmemory_operand" "cL,I,cCal")) + (match_dup 1))) + (set (match_operand:SI 0 "dest_reg_operand" "=w,Rcw,w") + (plus:SI (match_dup 1) (match_dup 2)))] + "" + "add.f %0,%1,%2" + [(set_attr "cond" "set") + (set_attr "type" "compare") + (set_attr "length" "4,4,8")]) + +(define_insn "*add_f_2" + [(set (reg:CC_C CC_REG) + (compare:CC_C + (plus:SI (match_operand:SI 1 "register_operand" "c,0,c") + (match_operand:SI 2 "nonmemory_operand" "cL,I,cCal")) + (match_dup 2))) + (set (match_operand:SI 0 "dest_reg_operand" "=w,Rcw,w") + (plus:SI (match_dup 1) (match_dup 2)))] + "" + "add.f %0,%1,%2" + [(set_attr "cond" "set") + (set_attr "type" "compare") + (set_attr "length" "4,4,8")]) + +; w/c/c comes first (rather than w/0/C_0) to prevent the middle-end +; needlessly prioritizing the matching constraint. +; Rcw/0/C_0 comes before w/c/L so that the lower latency conditional +; execution is used where possible. +(define_insn_and_split "adc" + [(set (match_operand:SI 0 "dest_reg_operand" "=w,Rcw,w,Rcw,w") + (plus:SI (plus:SI (ltu:SI (reg:CC_C CC_REG) (const_int 0)) + (match_operand:SI 1 "nonmemory_operand" + "%c,0,c,0,cCal")) + (match_operand:SI 2 "nonmemory_operand" "c,C_0,L,I,cCal")))] + "register_operand (operands[1], SImode) + || register_operand (operands[2], SImode)" + "@ + adc %0,%1,%2 + add.cs %0,%1,1 + adc %0,%1,%2 + adc %0,%1,%2 + adc %0,%1,%2" + ; if we have a bad schedule after sched2, split. + "reload_completed + && !optimize_size && TARGET_ARC700 + && arc_scheduling_not_expected () + && arc_sets_cc_p (prev_nonnote_insn (insn)) + /* If next comes a return or other insn that needs a delay slot, + expect the adc to get into the delay slot. */ + && next_nonnote_insn (insn) + && !arc_need_delay (next_nonnote_insn (insn)) + /* Restore operands before emitting. */ + && (extract_insn_cached (insn), 1)" + [(set (match_dup 0) (match_dup 3)) + (cond_exec + (ltu (reg:CC_C CC_REG) (const_int 0)) + (set (match_dup 0) (plus:SI (match_dup 0) (const_int 1))))] + "operands[3] = simplify_gen_binary (PLUS, SImode, operands[1], operands[2]);" + [(set_attr "cond" "use") + (set_attr "type" "cc_arith") + (set_attr "length" "4,4,4,4,8")]) + +; combiner-splitter cmp / scc -> cmp / adc +(define_split + [(set (match_operand:SI 0 "dest_reg_operand" "") + (gtu:SI (match_operand:SI 1 "register_operand" "") + (match_operand:SI 2 "register_operand" ""))) + (clobber (reg CC_REG))] + "" + [(set (reg:CC_C CC_REG) (compare:CC_C (match_dup 2) (match_dup 1))) + (set (match_dup 0) (ltu:SI (reg:CC_C CC_REG) (const_int 0)))]) + +; combine won't work when an intermediate result is used later... +; add %0,%1,%2 ` cmp %0,%[12] -> add.f %0,%1,%2 +(define_peephole2 + [(set (match_operand:SI 0 "dest_reg_operand" "") + (plus:SI (match_operand:SI 1 "register_operand" "") + (match_operand:SI 2 "nonmemory_operand" ""))) + (set (reg:CC_C CC_REG) + (compare:CC_C (match_dup 0) + (match_operand:SI 3 "nonmemory_operand" "")))] + "rtx_equal_p (operands[1], operands[3]) + || rtx_equal_p (operands[2], operands[3])" + [(parallel + [(set (reg:CC_C CC_REG) + (compare:CC_C (plus:SI (match_dup 1) (match_dup 2)) (match_dup 1))) + (set (match_dup 0) + (plus:SI (match_dup 1) (match_dup 2)))])]) + +;(define_insn "*adc_0" +; [(set (match_operand:SI 0 "dest_reg_operand" "=w") +; (plus:SI (ltu:SI (reg:CC_C CC_REG) (const_int 0)) +; (match_operand:SI 1 "register_operand" "c")))] +; "" +; "adc %0,%1,0" +; [(set_attr "cond" "use") +; (set_attr "type" "cc_arith") +; (set_attr "length" "4")]) +; +;(define_split +; [(set (match_operand:SI 0 "dest_reg_operand" "=w") +; (plus:SI (gtu:SI (match_operand:SI 1 "register_operand" "c") +; (match_operand:SI 2 "register_operand" "c")) +; (match_operand:SI 3 "register_operand" "c"))) +; (clobber (reg CC_REG))] +; "" +; [(set (reg:CC_C CC_REG) (compare:CC_C (match_dup 2) (match_dup 1))) +; (set (match_dup 0) +; (plus:SI (ltu:SI (reg:CC_C CC_REG) (const_int 0)) +; (match_dup 3)))]) + +(define_expand "subsi3" + [(set (match_operand:SI 0 "dest_reg_operand" "") + (minus:SI (match_operand:SI 1 "nonmemory_operand" "") + (match_operand:SI 2 "nonmemory_operand" "")))] + "" + " +{ + int c = 1; + + if (!register_operand (operands[2], SImode)) + { + operands[1] = force_reg (SImode, operands[1]); + c = 2; + } + if (flag_pic && arc_raw_symbolic_reference_mentioned_p (operands[c], false)) + operands[c] = force_reg (SImode, operands[c]); + else if (!TARGET_NO_SDATA_SET && small_data_pattern (operands[c], Pmode)) + operands[c] = force_reg (SImode, arc_rewrite_small_data (operands[c])); +}") + +; the casesi expander might generate a sub of zero, so we have to recognize it. +; combine should make such an insn go away. +(define_insn_and_split "subsi3_insn" + [(set (match_operand:SI 0 "dest_reg_operand" "=Rcqq,Rcw,Rcw,w,w,w, w, w, w") + (minus:SI (match_operand:SI 1 "nonmemory_operand" "0, 0, cL,c,L,I,Cal,Cal, c") + (match_operand:SI 2 "nonmemory_operand" "Rcqq, c, 0,c,c,0, 0, c,Cal")))] + "register_operand (operands[1], SImode) + || register_operand (operands[2], SImode)" + "@ + sub%? %0,%1,%2%& + sub%? %0,%1,%2 + rsub%? %0,%2,%1 + sub %0,%1,%2 + rsub %0,%2,%1 + rsub %0,%2,%1 + rsub%? %0,%2,%1 + rsub %0,%2,%1 + sub %0,%1,%2" + "reload_completed && get_attr_length (insn) == 8 + && satisfies_constraint_I (operands[1]) + && GET_CODE (PATTERN (insn)) != COND_EXEC" + [(set (match_dup 0) (match_dup 3)) (set (match_dup 0) (match_dup 4))] + "split_subsi (operands);" + [(set_attr "iscompact" "maybe,false,false,false,false,false,false,false, false") + (set_attr "length" "*,4,4,4,4,4,8,8,8") + (set_attr "predicable" "yes,yes,yes,no,no,no,yes,no,no") + (set_attr "cond" "canuse,canuse,canuse,nocond,nocond,canuse_limm,canuse,nocond,nocond")]) + +(define_expand "subdi3" + [(parallel [(set (match_operand:DI 0 "dest_reg_operand" "") + (minus:DI (match_operand:DI 1 "nonmemory_operand" "") + (match_operand:DI 2 "nonmemory_operand" ""))) + (clobber (reg:CC CC_REG))])] + "" +{ + if (!register_operand (operands[2], DImode)) + operands[1] = force_reg (DImode, operands[1]); + if (TARGET_EXPAND_ADDDI) + { + rtx l0 = gen_lowpart (SImode, operands[0]); + rtx h0 = disi_highpart (operands[0]); + rtx l1 = gen_lowpart (SImode, operands[1]); + rtx h1 = disi_highpart (operands[1]); + rtx l2 = gen_lowpart (SImode, operands[2]); + rtx h2 = disi_highpart (operands[2]); + rtx cc_c = gen_rtx_REG (CC_Cmode, CC_REG); + + emit_insn (gen_sub_f (l0, l1, l2)); + emit_insn (gen_sbc (h0, h1, h2, cc_c)); + DONE; + } +}) + +(define_insn_and_split "subdi3_i" + [(set (match_operand:DI 0 "dest_reg_operand" "=&w,w,w,w,w") + (minus:DI (match_operand:DI 1 "nonmemory_operand" "ci,0,ci,c,!i") + (match_operand:DI 2 "nonmemory_operand" "ci,ci,0,!i,c"))) + (clobber (reg:CC CC_REG))] + "register_operand (operands[1], DImode) + || register_operand (operands[2], DImode)" + "#" + "reload_completed" + [(const_int 0)] +{ + int hi = !TARGET_BIG_ENDIAN; + int lo = !hi; + rtx l0 = operand_subword (operands[0], lo, 0, DImode); + rtx h0 = operand_subword (operands[0], hi, 0, DImode); + rtx l1 = operand_subword (operands[1], lo, 0, DImode); + rtx h1 = operand_subword (operands[1], hi, 0, DImode); + rtx l2 = operand_subword (operands[2], lo, 0, DImode); + rtx h2 = operand_subword (operands[2], hi, 0, DImode); + + if (rtx_equal_p (l0, h1) || rtx_equal_p (l0, h2)) + { + h1 = simplify_gen_binary (MINUS, SImode, h1, h2); + if (!rtx_equal_p (h0, h1)) + emit_insn (gen_rtx_SET (VOIDmode, h0, h1)); + emit_insn (gen_sub_f (l0, l1, l2)); + emit_insn + (gen_rtx_COND_EXEC + (VOIDmode, + gen_rtx_LTU (VOIDmode, gen_rtx_REG (CC_Cmode, CC_REG), GEN_INT (0)), + gen_rtx_SET (VOIDmode, h0, plus_constant (SImode, h0, -1)))); + DONE; + } + emit_insn (gen_sub_f (l0, l1, l2)); + emit_insn (gen_sbc (h0, h1, h2, gen_rtx_REG (CCmode, CC_REG))); + DONE; +} + [(set_attr "cond" "clob") + (set_attr "length" "16,16,16,20,20")]) + +(define_insn "*sbc_0" + [(set (match_operand:SI 0 "dest_reg_operand" "=w") + (minus:SI (match_operand:SI 1 "register_operand" "c") + (ltu:SI (match_operand:CC_C 2 "cc_use_register") + (const_int 0))))] + "" + "sbc %0,%1,0" + [(set_attr "cond" "use") + (set_attr "type" "cc_arith") + (set_attr "length" "4")]) + +; w/c/c comes first (rather than Rcw/0/C_0) to prevent the middle-end +; needlessly prioritizing the matching constraint. +; Rcw/0/C_0 comes before w/c/L so that the lower latency conditional execution +; is used where possible. +(define_insn_and_split "sbc" + [(set (match_operand:SI 0 "dest_reg_operand" "=w,Rcw,w,Rcw,w") + (minus:SI (minus:SI (match_operand:SI 1 "nonmemory_operand" + "c,0,c,0,cCal") + (ltu:SI (match_operand:CC_C 3 "cc_use_register") + (const_int 0))) + (match_operand:SI 2 "nonmemory_operand" "c,C_0,L,I,cCal")))] + "register_operand (operands[1], SImode) + || register_operand (operands[2], SImode)" + "@ + sbc %0,%1,%2 + sub.cs %0,%1,1 + sbc %0,%1,%2 + sbc %0,%1,%2 + sbc %0,%1,%2" + ; if we have a bad schedule after sched2, split. + "reload_completed + && !optimize_size && TARGET_ARC700 + && arc_scheduling_not_expected () + && arc_sets_cc_p (prev_nonnote_insn (insn)) + /* If next comes a return or other insn that needs a delay slot, + expect the adc to get into the delay slot. */ + && next_nonnote_insn (insn) + && !arc_need_delay (next_nonnote_insn (insn)) + /* Restore operands before emitting. */ + && (extract_insn_cached (insn), 1)" + [(set (match_dup 0) (match_dup 4)) + (cond_exec + (ltu (reg:CC_C CC_REG) (const_int 0)) + (set (match_dup 0) (plus:SI (match_dup 0) (const_int -1))))] + "operands[4] = simplify_gen_binary (MINUS, SImode, operands[1], operands[2]);" + [(set_attr "cond" "use") + (set_attr "type" "cc_arith") + (set_attr "length" "4,4,4,4,8")]) + +(define_insn "sub_f" + [(set (reg:CC CC_REG) + (compare:CC (match_operand:SI 1 "nonmemory_operand" " c,L,0,I,c,Cal") + (match_operand:SI 2 "nonmemory_operand" "cL,c,I,0,Cal,c"))) + (set (match_operand:SI 0 "dest_reg_operand" "=w,w,Rcw,Rcw,w,w") + (minus:SI (match_dup 1) (match_dup 2)))] + "register_operand (operands[1], SImode) + || register_operand (operands[2], SImode)" + "@ + sub.f %0,%1,%2 + rsub.f %0,%2,%1 + sub.f %0,%1,%2 + rsub.f %0,%2,%1 + sub.f %0,%1,%2 + sub.f %0,%1,%2" + [(set_attr "type" "compare") + (set_attr "length" "4,4,4,4,8,8")]) + +; combine won't work when an intermediate result is used later... +; add %0,%1,%2 ` cmp %0,%[12] -> add.f %0,%1,%2 +(define_peephole2 + [(set (reg:CC CC_REG) + (compare:CC (match_operand:SI 1 "register_operand" "") + (match_operand:SI 2 "nonmemory_operand" ""))) + (set (match_operand:SI 0 "dest_reg_operand" "") + (minus:SI (match_dup 1) (match_dup 2)))] + "" + [(parallel + [(set (reg:CC CC_REG) (compare:CC (match_dup 1) (match_dup 2))) + (set (match_dup 0) (minus:SI (match_dup 1) (match_dup 2)))])]) + +(define_peephole2 + [(set (reg:CC CC_REG) + (compare:CC (match_operand:SI 1 "register_operand" "") + (match_operand:SI 2 "nonmemory_operand" ""))) + (set (match_operand 3 "" "") (match_operand 4 "" "")) + (set (match_operand:SI 0 "dest_reg_operand" "") + (minus:SI (match_dup 1) (match_dup 2)))] + "!reg_overlap_mentioned_p (operands[3], operands[1]) + && !reg_overlap_mentioned_p (operands[3], operands[2]) + && !reg_overlap_mentioned_p (operands[0], operands[4]) + && !reg_overlap_mentioned_p (operands[0], operands[3])" + [(parallel + [(set (reg:CC CC_REG) (compare:CC (match_dup 1) (match_dup 2))) + (set (match_dup 0) (minus:SI (match_dup 1) (match_dup 2)))]) + (set (match_dup 3) (match_dup 4))]) + +(define_insn "*add_n" + [(set (match_operand:SI 0 "dest_reg_operand" "=Rcqq,Rcw,W,W,w,w") + (plus:SI (mult:SI (match_operand:SI 1 "register_operand" "Rcqq,c,c,c,c,c") + (match_operand:SI 2 "_2_4_8_operand" "")) + (match_operand:SI 3 "nonmemory_operand" "0,0,c,?Cal,?c,??Cal")))] + "" + "add%z2%? %0,%3,%1%&" + [(set_attr "type" "shift") + (set_attr "length" "*,4,4,8,4,8") + (set_attr "predicable" "yes,yes,no,no,no,no") + (set_attr "cond" "canuse,canuse,nocond,nocond,nocond,nocond") + (set_attr "iscompact" "maybe,false,false,false,false,false")]) + +;; N.B. sub[123] has the operands of the MINUS in the opposite order from +;; what synth_mult likes. +(define_insn "*sub_n" + [(set (match_operand:SI 0 "dest_reg_operand" "=Rcw,w,w") + (minus:SI (match_operand:SI 1 "nonmemory_operand" "0,c,?Cal") + (mult:SI (match_operand:SI 2 "register_operand" "c,c,c") + (match_operand:SI 3 "_2_4_8_operand" ""))))] + "" + "sub%z3%? %0,%1,%2" + [(set_attr "type" "shift") + (set_attr "length" "4,4,8") + (set_attr "predicable" "yes,no,no") + (set_attr "cond" "canuse,nocond,nocond") + (set_attr "iscompact" "false")]) + +; ??? check if combine matches this. +(define_insn "*bset" + [(set (match_operand:SI 0 "dest_reg_operand" "=Rcw,w,w") + (ior:SI (ashift:SI (const_int 1) + (match_operand:SI 1 "nonmemory_operand" "cL,cL,c")) + (match_operand:SI 2 "nonmemory_operand" "0,c,Cal")))] + "" + "bset%? %0,%2,%1" + [(set_attr "length" "4,4,8") + (set_attr "predicable" "yes,no,no") + (set_attr "cond" "canuse,nocond,nocond")] +) + +; ??? check if combine matches this. +(define_insn "*bxor" + [(set (match_operand:SI 0 "dest_reg_operand" "=Rcw,w,w") + (xor:SI (ashift:SI (const_int 1) + (match_operand:SI 1 "nonmemory_operand" "cL,cL,c")) + (match_operand:SI 2 "nonmemory_operand" "0,c,Cal")))] + "" + "bxor%? %0,%2,%1" + [(set_attr "length" "4,4,8") + (set_attr "predicable" "yes,no,no") + (set_attr "cond" "canuse,nocond,nocond")] +) + +; ??? check if combine matches this. +(define_insn "*bclr" + [(set (match_operand:SI 0 "dest_reg_operand" "=Rcw,w,w") + (and:SI (not:SI (ashift:SI (const_int 1) + (match_operand:SI 1 "nonmemory_operand" "cL,cL,c"))) + (match_operand:SI 2 "nonmemory_operand" "0,c,Cal")))] + "" + "bclr%? %0,%2,%1" + [(set_attr "length" "4,4,8") + (set_attr "predicable" "yes,no,no") + (set_attr "cond" "canuse,nocond,nocond")] +) + +; ??? FIXME: find combine patterns for bmsk. + +;;Following are the define_insns added for the purpose of peephole2's + +; see also iorsi3 for use with constant bit number. +(define_insn "*bset_insn" + [(set (match_operand:SI 0 "dest_reg_operand" "=Rcw,w,w") + (ior:SI (match_operand:SI 1 "nonmemory_operand" "0,c,Cal") + (ashift:SI (const_int 1) + (match_operand:SI 2 "nonmemory_operand" "cL,cL,c"))) ) ] + "" + "@ + bset%? %0,%1,%2 ;;peep2, constr 1 + bset %0,%1,%2 ;;peep2, constr 2 + bset %0,%S1,%2 ;;peep2, constr 3" + [(set_attr "length" "4,4,8") + (set_attr "predicable" "yes,no,no") + (set_attr "cond" "canuse,nocond,nocond")] +) + +; see also xorsi3 for use with constant bit number. +(define_insn "*bxor_insn" + [(set (match_operand:SI 0 "dest_reg_operand" "=Rcw,w,w") + (xor:SI (match_operand:SI 1 "nonmemory_operand" "0,c,Cal") + (ashift:SI (const_int 1) + (match_operand:SI 2 "nonmemory_operand" "cL,cL,c"))) ) ] + "" + "@ + bxor%? %0,%1,%2 + bxor %0,%1,%2 + bxor %0,%S1,%2" + [(set_attr "length" "4,4,8") + (set_attr "predicable" "yes,no,no") + (set_attr "cond" "canuse,nocond,nocond")] +) + +; see also andsi3 for use with constant bit number. +(define_insn "*bclr_insn" + [(set (match_operand:SI 0 "dest_reg_operand" "=Rcw,w,w") + (and:SI (not:SI (ashift:SI (const_int 1) + (match_operand:SI 2 "nonmemory_operand" "cL,rL,r"))) + (match_operand:SI 1 "nonmemory_operand" "0,c,Cal")))] + "" + "@ + bclr%? %0,%1,%2 + bclr %0,%1,%2 + bclr %0,%S1,%2" + [(set_attr "length" "4,4,8") + (set_attr "predicable" "yes,no,no") + (set_attr "cond" "canuse,nocond,nocond")] +) + +; see also andsi3 for use with constant bit number. +(define_insn "*bmsk_insn" + [(set (match_operand:SI 0 "dest_reg_operand" "=Rcw,w,w") + (and:SI (match_operand:SI 1 "nonmemory_operand" "0,c,Cal") + (plus:SI (ashift:SI (const_int 1) + (plus:SI (match_operand:SI 2 "nonmemory_operand" "rL,rL,r") + (const_int 1))) + (const_int -1))))] + "" + "@ + bmsk%? %0,%S1,%2 + bmsk %0,%1,%2 + bmsk %0,%S1,%2" + [(set_attr "length" "4,4,8") + (set_attr "predicable" "yes,no,no") + (set_attr "cond" "canuse,nocond,nocond")] +) + +;;Instructions added for peephole2s end + +;; Boolean instructions. + +(define_expand "andsi3" + [(set (match_operand:SI 0 "dest_reg_operand" "") + (and:SI (match_operand:SI 1 "nonimmediate_operand" "") + (match_operand:SI 2 "nonmemory_operand" "")))] + "" + "if (!satisfies_constraint_Cux (operands[2])) + operands[1] = force_reg (SImode, operands[1]); + else if (!TARGET_NO_SDATA_SET && small_data_pattern (operands[1], Pmode)) + operands[1] = arc_rewrite_small_data (operands[1]);") + +(define_insn "andsi3_i" + [(set (match_operand:SI 0 "dest_reg_operand" "=Rcqq,Rcq,Rcqq,Rcqq,Rcqq,Rcw,Rcw,Rcw,Rcw,Rcw,Rcw, w, w, w, w,w,Rcw, w, W") + (and:SI (match_operand:SI 1 "nonimmediate_operand" "%0,Rcq, 0, 0,Rcqq, 0, c, 0, 0, 0, 0, c, c, c, c,0, 0, c, o") + (match_operand:SI 2 "nonmemory_operand" " Rcqq, 0, C1p, Ccp, Cux, cL, 0,C1p,Ccp,CnL, I, Lc,C1p,Ccp,CnL,I,Cal,Cal,Cux")))] + "(register_operand (operands[1], SImode) + && nonmemory_operand (operands[2], SImode)) + || (memory_operand (operands[1], SImode) + && satisfies_constraint_Cux (operands[2]))" + "* +{ + switch (which_alternative) + { + case 0: case 5: case 10: case 11: case 15: case 16: case 17: + return \"and%? %0,%1,%2%&\"; + case 1: case 6: + return \"and%? %0,%2,%1%&\"; + case 2: case 7: case 12: + return \"bmsk%? %0,%1,%Z2%&\"; + case 3: case 8: case 13: + return \"bclr%? %0,%1,%M2%&\"; + case 4: + return (INTVAL (operands[2]) == 0xff + ? \"extb%? %0,%1%&\" : \"extw%? %0,%1%&\"); + case 9: case 14: return \"bic%? %0,%1,%n2-1\"; + case 18: + if (TARGET_BIG_ENDIAN) + { + rtx xop[2]; + + xop[0] = operands[0]; + xop[1] = adjust_address (operands[1], QImode, + INTVAL (operands[2]) == 0xff ? 3 : 2); + output_asm_insn (INTVAL (operands[2]) == 0xff + ? \"ldb %0,%1\" : \"ldw %0,%1\", + xop); + return \"\"; + } + return INTVAL (operands[2]) == 0xff ? \"ldb %0,%1\" : \"ldw %0,%1\"; + default: + gcc_unreachable (); + } +}" + [(set_attr "iscompact" "maybe,maybe,maybe,maybe,true,false,false,false,false,false,false,false,false,false,false,false,false,false,false") + (set_attr "type" "binary,binary,binary,binary,binary,binary,binary,binary,binary,binary,binary,binary,binary,binary,binary,binary,binary,binary,load") + (set_attr "length" "*,*,*,*,*,4,4,4,4,4,4,4,4,4,4,4,8,8,*") + (set_attr "predicable" "no,no,no,no,no,yes,yes,yes,yes,yes,no,no,no,no,no,no,yes,no,no") + (set_attr "cond" "canuse,canuse,canuse,canuse,nocond,canuse,canuse,canuse,canuse,canuse,canuse_limm,nocond,nocond,nocond,nocond,canuse_limm,canuse,nocond,nocond")]) + +; combiner splitter, pattern found in ldtoa.c . +; and op3,op0,op1 / cmp op3,op2 -> add op3,op0,op4 / bmsk.f 0,op3,op1 +(define_split + [(set (reg:CC_Z CC_REG) + (compare:CC_Z (and:SI (match_operand:SI 0 "register_operand" "") + (match_operand 1 "const_int_operand" "")) + (match_operand 2 "const_int_operand" ""))) + (clobber (match_operand:SI 3 "register_operand" ""))] + "((INTVAL (operands[1]) + 1) & INTVAL (operands[1])) == 0" + [(set (match_dup 3) + (plus:SI (match_dup 0) (match_dup 4))) + (set (reg:CC_Z CC_REG) + (compare:CC_Z (and:SI (match_dup 3) (match_dup 1)) + (const_int 0)))] + "operands[4] = GEN_INT ( -(~INTVAL (operands[1]) | INTVAL (operands[2])));") + +;;bic define_insn that allows limm to be the first operand +(define_insn "*bicsi3_insn" + [(set (match_operand:SI 0 "dest_reg_operand" "=Rcqq,Rcw,Rcw,Rcw,w,w,w") + (and:SI (not:SI (match_operand:SI 1 "nonmemory_operand" "Rcqq,Lc,I,Cal,Lc,Cal,c")) + (match_operand:SI 2 "nonmemory_operand" "0,0,0,0,c,c,Cal")))] + "" + "@ + bic%? %0, %2, %1%& ;;constraint 0 + bic%? %0,%2,%1 ;;constraint 1 + bic %0,%2,%1 ;;constraint 2, FIXME: will it ever get generated ??? + bic%? %0,%2,%S1 ;;constraint 3, FIXME: will it ever get generated ??? + bic %0,%2,%1 ;;constraint 4 + bic %0,%2,%S1 ;;constraint 5, FIXME: will it ever get generated ??? + bic %0,%S2,%1 ;;constraint 6" + [(set_attr "length" "*,4,4,8,4,8,8") + (set_attr "iscompact" "maybe, false, false, false, false, false, false") + (set_attr "predicable" "no,yes,no,yes,no,no,no") + (set_attr "cond" "canuse,canuse,canuse_limm,canuse,nocond,nocond,nocond")]) + +(define_insn "iorsi3" + [(set (match_operand:SI 0 "dest_reg_operand" "=Rcqq,Rcq,Rcqq,Rcw,Rcw,Rcw,Rcw,w, w,w,Rcw, w") + (ior:SI (match_operand:SI 1 "nonmemory_operand" "% 0,Rcq, 0, 0, c, 0, 0, c, c,0, 0, c") + (match_operand:SI 2 "nonmemory_operand" "Rcqq, 0, C0p, cL, 0,C0p, I,cL,C0p,I,Cal,Cal")))] + "" + "* + switch (which_alternative) + { + case 0: case 3: case 6: case 7: case 9: case 10: case 11: + return \"or%? %0,%1,%2%&\"; + case 1: case 4: + return \"or%? %0,%2,%1%&\"; + case 2: case 5: case 8: + return \"bset%? %0,%1,%z2%&\"; + default: + gcc_unreachable (); + }" + [(set_attr "iscompact" "maybe,maybe,maybe,false,false,false,false,false,false,false,false,false") + (set_attr "length" "*,*,*,4,4,4,4,4,4,4,8,8") + (set_attr "predicable" "no,no,no,yes,yes,yes,no,no,no,no,yes,no") + (set_attr "cond" "canuse,canuse,canuse,canuse,canuse,canuse,canuse_limm,nocond,nocond,canuse_limm,canuse,nocond")]) + +(define_insn "xorsi3" + [(set (match_operand:SI 0 "dest_reg_operand" "=Rcqq,Rcq,Rcw,Rcw,Rcw,Rcw, w, w,w, w, w") + (xor:SI (match_operand:SI 1 "register_operand" "%0, Rcq, 0, c, 0, 0, c, c,0, 0, c") + (match_operand:SI 2 "nonmemory_operand" " Rcqq, 0, cL, 0,C0p, I,cL,C0p,I,Cal,Cal")))] + "" + "* + switch (which_alternative) + { + case 0: case 2: case 5: case 6: case 8: case 9: case 10: + return \"xor%? %0,%1,%2%&\"; + case 1: case 3: + return \"xor%? %0,%2,%1%&\"; + case 4: case 7: + return \"bxor%? %0,%1,%z2\"; + default: + gcc_unreachable (); + } + " + [(set_attr "iscompact" "maybe,maybe,false,false,false,false,false,false,false,false,false") + (set_attr "type" "binary") + (set_attr "length" "*,*,4,4,4,4,4,4,4,8,8") + (set_attr "predicable" "no,no,yes,yes,yes,no,no,no,no,yes,no") + (set_attr "cond" "canuse,canuse,canuse,canuse,canuse,canuse_limm,nocond,nocond,canuse_limm,canuse,nocond")]) + +(define_insn "negsi2" + [(set (match_operand:SI 0 "dest_reg_operand" "=Rcqq,Rcqq,Rcw,w") + (neg:SI (match_operand:SI 1 "register_operand" "0,Rcqq,0,c")))] + "" + "neg%? %0,%1%&" + [(set_attr "type" "unary") + (set_attr "iscompact" "maybe,true,false,false") + (set_attr "predicable" "no,no,yes,no")]) + +(define_insn "one_cmplsi2" + [(set (match_operand:SI 0 "dest_reg_operand" "=Rcqq,w") + (not:SI (match_operand:SI 1 "register_operand" "Rcqq,c")))] + "" + "not%? %0,%1%&" + [(set_attr "type" "unary,unary") + (set_attr "iscompact" "true,false")]) + +(define_insn_and_split "one_cmpldi2" + [(set (match_operand:DI 0 "dest_reg_operand" "=q,w") + (not:DI (match_operand:DI 1 "register_operand" "q,c")))] + "" + "#" + "&& reload_completed" + [(set (match_dup 2) (not:SI (match_dup 3))) + (set (match_dup 4) (not:SI (match_dup 5)))] +{ + int swap = (true_regnum (operands[0]) == true_regnum (operands[1]) + 1); + + operands[2] = operand_subword (operands[0], 0+swap, 0, DImode); + operands[3] = operand_subword (operands[1], 0+swap, 0, DImode); + operands[4] = operand_subword (operands[0], 1-swap, 0, DImode); + operands[5] = operand_subword (operands[1], 1-swap, 0, DImode); +} + [(set_attr "type" "unary,unary") + (set_attr "cond" "nocond,nocond") + (set_attr "length" "4,8")]) + +;; Shift instructions. + +(define_expand "ashlsi3" + [(set (match_operand:SI 0 "dest_reg_operand" "") + (ashift:SI (match_operand:SI 1 "register_operand" "") + (match_operand:SI 2 "nonmemory_operand" "")))] + "" + " +{ + if (!TARGET_BARREL_SHIFTER) + { + emit_shift (ASHIFT, operands[0], operands[1], operands[2]); + DONE; + } +}") + +(define_expand "ashrsi3" + [(set (match_operand:SI 0 "dest_reg_operand" "") + (ashiftrt:SI (match_operand:SI 1 "register_operand" "") + (match_operand:SI 2 "nonmemory_operand" "")))] + "" + " +{ + if (!TARGET_BARREL_SHIFTER) + { + emit_shift (ASHIFTRT, operands[0], operands[1], operands[2]); + DONE; + } +}") + +(define_expand "lshrsi3" + [(set (match_operand:SI 0 "dest_reg_operand" "") + (lshiftrt:SI (match_operand:SI 1 "register_operand" "") + (match_operand:SI 2 "nonmemory_operand" "")))] + "" + " +{ + if (!TARGET_BARREL_SHIFTER) + { + emit_shift (LSHIFTRT, operands[0], operands[1], operands[2]); + DONE; + } +}") + +(define_insn "shift_si3" + [(set (match_operand:SI 0 "dest_reg_operand" "=r") + (match_operator:SI 3 "shift4_operator" + [(match_operand:SI 1 "register_operand" "0") + (match_operand:SI 2 "const_int_operand" "n")])) + (clobber (match_scratch:SI 4 "=&r")) + (clobber (reg:CC CC_REG)) + ] + "!TARGET_BARREL_SHIFTER" + "* return output_shift (operands);" + [(set_attr "type" "shift") + (set_attr "length" "16")]) + +(define_insn "shift_si3_loop" + [(set (match_operand:SI 0 "dest_reg_operand" "=r,r") + (match_operator:SI 3 "shift_operator" + [(match_operand:SI 1 "register_operand" "0,0") + (match_operand:SI 2 "nonmemory_operand" "rn,Cal")])) + (clobber (match_scratch:SI 4 "=X,X")) + (clobber (reg:SI LP_COUNT)) + (clobber (reg:SI LP_START)) + (clobber (reg:SI LP_END)) + (clobber (reg:CC CC_REG)) + ] + "!TARGET_BARREL_SHIFTER" + "* return output_shift (operands);" + [(set_attr "type" "shift") + (set_attr "length" "16,20")]) + +; asl, asr, lsr patterns: +; There is no point in including an 'I' alternative since only the lowest 5 +; bits are used for the shift. OTOH Cal can be useful if the shift amount +; is defined in an external symbol, as we don't have special relocations +; to truncate a symbol in a u6 immediate; but that's rather exotic, so only +; provide one alternatice for this, without condexec support. +(define_insn "*ashlsi3_insn" + [(set (match_operand:SI 0 "dest_reg_operand" "=Rcq,Rcqq,Rcqq,Rcw, w, w") + (ashift:SI (match_operand:SI 1 "nonmemory_operand" "!0,Rcqq, 0, 0, c,cCal") + (match_operand:SI 2 "nonmemory_operand" "K, K,RcqqM, cL,cL,cCal")))] + "TARGET_BARREL_SHIFTER + && (register_operand (operands[1], SImode) + || register_operand (operands[2], SImode))" + "asl%? %0,%1,%2%&" + [(set_attr "type" "shift") + (set_attr "iscompact" "maybe,maybe,maybe,false,false,false") + (set_attr "predicable" "no,no,no,yes,no,no") + (set_attr "cond" "canuse,nocond,canuse,canuse,nocond,nocond")]) + +(define_insn "*ashrsi3_insn" + [(set (match_operand:SI 0 "dest_reg_operand" "=Rcq,Rcqq,Rcqq,Rcw, w, w") + (ashiftrt:SI (match_operand:SI 1 "nonmemory_operand" "!0,Rcqq, 0, 0, c,cCal") + (match_operand:SI 2 "nonmemory_operand" "K, K,RcqqM, cL,cL,cCal")))] + "TARGET_BARREL_SHIFTER + && (register_operand (operands[1], SImode) + || register_operand (operands[2], SImode))" + "asr%? %0,%1,%2%&" + [(set_attr "type" "shift") + (set_attr "iscompact" "maybe,maybe,maybe,false,false,false") + (set_attr "predicable" "no,no,no,yes,no,no") + (set_attr "cond" "canuse,nocond,canuse,canuse,nocond,nocond")]) + +(define_insn "*lshrsi3_insn" + [(set (match_operand:SI 0 "dest_reg_operand" "=Rcq,Rcqq,Rcqq,Rcw, w, w") + (lshiftrt:SI (match_operand:SI 1 "nonmemory_operand" "!0,Rcqq, 0, 0, c,cCal") + (match_operand:SI 2 "nonmemory_operand" "N, N,RcqqM, cL,cL,cCal")))] + "TARGET_BARREL_SHIFTER + && (register_operand (operands[1], SImode) + || register_operand (operands[2], SImode))" + "*return (which_alternative <= 1 && !arc_ccfsm_cond_exec_p () + ? \"lsr%? %0,%1%&\" : \"lsr%? %0,%1,%2%&\");" + [(set_attr "type" "shift") + (set_attr "iscompact" "maybe,maybe,maybe,false,false,false") + (set_attr "predicable" "no,no,no,yes,no,no") + (set_attr "cond" "canuse,nocond,canuse,canuse,nocond,nocond")]) + +(define_insn "rotrsi3" + [(set (match_operand:SI 0 "dest_reg_operand" "=Rcw, w, w") + (rotatert:SI (match_operand:SI 1 "register_operand" " 0,cL,cCal") + (match_operand:SI 2 "nonmemory_operand" "cL,cL,cCal")))] + "TARGET_BARREL_SHIFTER" + "ror%? %0,%1,%2" + [(set_attr "type" "shift,shift,shift") + (set_attr "predicable" "yes,no,no") + (set_attr "length" "4,4,8")]) + +;; Compare / branch instructions. + +(define_expand "cbranchsi4" + [(set (reg:CC CC_REG) + (compare:CC (match_operand:SI 1 "nonmemory_operand" "") + (match_operand:SI 2 "nonmemory_operand" ""))) + (set (pc) + (if_then_else + (match_operator 0 "ordered_comparison_operator" [(reg CC_REG) + (const_int 0)]) + (label_ref (match_operand 3 "" "")) + (pc)))] + "" +{ + gcc_assert (XEXP (operands[0], 0) == operands[1]); + gcc_assert (XEXP (operands[0], 1) == operands[2]); + operands[0] = gen_compare_reg (operands[0], VOIDmode); + emit_jump_insn (gen_branch_insn (operands[3], operands[0])); + DONE; +}) + +;; ??? Could add a peephole to generate compare with swapped operands and +;; modifed cc user if second, but not first operand is a compact register. +(define_insn "cmpsi_cc_insn_mixed" + [(set (reg:CC CC_REG) + (compare:CC (match_operand:SI 0 "register_operand" "Rcq#q,c,c, qRcq, c") + (match_operand:SI 1 "nonmemory_operand" "cO,cI,cL, Cal, Cal")))] + "" + "cmp%? %0,%B1%&" + [(set_attr "type" "compare") + (set_attr "iscompact" "true,false,false,true_limm,false") + (set_attr "predicable" "no,no,yes,no,yes") + (set_attr "cond" "set") + (set_attr "length" "*,4,4,*,8")]) + +(define_insn "*cmpsi_cc_zn_insn" + [(set (reg:CC_ZN CC_REG) + (compare:CC_ZN (match_operand:SI 0 "register_operand" "qRcq,c") + (const_int 0)))] + "" + "tst%? %0,%0%&" + [(set_attr "type" "compare,compare") + (set_attr "iscompact" "true,false") + (set_attr "predicable" "no,yes") + (set_attr "cond" "set_zn") + (set_attr "length" "*,4")]) + +; combiner pattern observed for unwind-dw2-fde.c:linear_search_fdes. +(define_insn "*btst" + [(set (reg:CC_ZN CC_REG) + (compare:CC_ZN + (zero_extract:SI (match_operand:SI 0 "register_operand" "Rcqq,c") + (const_int 1) + (match_operand:SI 1 "nonmemory_operand" "L,Lc")) + (const_int 0)))] + "" + "btst%? %0,%1" + [(set_attr "iscompact" "true,false") + (set_attr "predicable" "no,yes") + (set_attr "cond" "set") + (set_attr "type" "compare") + (set_attr "length" "*,4")]) + +; combine suffers from 'simplifications' that replace a one-bit zero +; extract with a shift if it can prove that the upper bits are zero. +; arc_reorg sees the code after sched2, which can have caused our +; inputs to be clobbered even if they were not clobbered before. +; Therefore, add a third way to convert btst / b{eq,ne} to bbit{0,1} +; OTOH, this is somewhat marginal, and can leat to out-of-range +; bbit (i.e. bad scheduling) and missed conditional execution, +; so make this an option. +(define_peephole2 + [(set (reg:CC_ZN CC_REG) + (compare:CC_ZN + (zero_extract:SI (match_operand:SI 0 "register_operand" "") + (const_int 1) + (match_operand:SI 1 "nonmemory_operand" "")) + (const_int 0))) + (set (pc) + (if_then_else (match_operator 3 "equality_comparison_operator" + [(reg:CC_ZN CC_REG) (const_int 0)]) + (label_ref (match_operand 2 "" "")) + (pc)))] + "TARGET_BBIT_PEEPHOLE && peep2_regno_dead_p (2, CC_REG)" + [(parallel [(set (pc) + (if_then_else + (match_op_dup 3 + [(zero_extract:SI (match_dup 0) + (const_int 1) (match_dup 1)) + (const_int 0)]) + (label_ref (match_dup 2)) + (pc))) + (clobber (reg:CC_ZN CC_REG))])]) + +(define_insn "*cmpsi_cc_z_insn" + [(set (reg:CC_Z CC_REG) + (compare:CC_Z (match_operand:SI 0 "register_operand" "qRcq,c") + (match_operand:SI 1 "p2_immediate_operand" "O,n")))] + "" + "@ + cmp%? %0,%1%& + bxor.f 0,%0,%z1" + [(set_attr "type" "compare,compare") + (set_attr "iscompact" "true,false") + (set_attr "cond" "set,set_zn") + (set_attr "length" "*,4")]) + +(define_insn "*cmpsi_cc_c_insn" + [(set (reg:CC_C CC_REG) + (compare:CC_C (match_operand:SI 0 "register_operand" "Rcqq, c,Rcqq, c") + (match_operand:SI 1 "nonmemory_operand" "cO, cI, Cal,Cal")))] + "" + "cmp%? %0,%S1%&" + [(set_attr "type" "compare") + (set_attr "iscompact" "true,false,true_limm,false") + (set_attr "cond" "set") + (set_attr "length" "*,4,*,8")]) + +;; Next come the scc insns. + +(define_expand "cstoresi4" + [(set (reg:CC CC_REG) + (compare:CC (match_operand:SI 2 "nonmemory_operand" "") + (match_operand:SI 3 "nonmemory_operand" ""))) + (set (match_operand:SI 0 "dest_reg_operand" "") + (match_operator:SI 1 "ordered_comparison_operator" [(reg CC_REG) + (const_int 0)]))] + "" +{ + gcc_assert (XEXP (operands[1], 0) == operands[2]); + gcc_assert (XEXP (operands[1], 1) == operands[3]); + operands[1] = gen_compare_reg (operands[1], SImode); + emit_insn (gen_scc_insn (operands[0], operands[1])); + DONE; +}) + +(define_mode_iterator SDF [SF DF]) + +(define_expand "cstore<mode>4" + [(set (reg:CC CC_REG) + (compare:CC (match_operand:SDF 2 "register_operand" "") + (match_operand:SDF 3 "register_operand" ""))) + (set (match_operand:SI 0 "dest_reg_operand" "") + (match_operator:SI 1 "comparison_operator" [(reg CC_REG) + (const_int 0)]))] + + "TARGET_OPTFPE" +{ + gcc_assert (XEXP (operands[1], 0) == operands[2]); + gcc_assert (XEXP (operands[1], 1) == operands[3]); + operands[1] = gen_compare_reg (operands[1], SImode); + emit_insn (gen_scc_insn (operands[0], operands[1])); + DONE; +}) + +; We need a separate expander for this lest we loose the mode of CC_REG +; when match_operator substitutes the literal operand into the comparison. +(define_expand "scc_insn" + [(set (match_operand:SI 0 "dest_reg_operand" "=w") (match_operand:SI 1 ""))]) + +(define_insn_and_split "*scc_insn" + [(set (match_operand:SI 0 "dest_reg_operand" "=w") + (match_operator:SI 1 "proper_comparison_operator" [(reg CC_REG) (const_int 0)]))] + "" + "#" + "reload_completed" + [(set (match_dup 0) (const_int 1)) + (cond_exec + (match_dup 1) + (set (match_dup 0) (const_int 0)))] +{ + operands[1] + = gen_rtx_fmt_ee (REVERSE_CONDITION (GET_CODE (operands[1]), + GET_MODE (XEXP (operands[1], 0))), + VOIDmode, + XEXP (operands[1], 0), XEXP (operands[1], 1)); +} + [(set_attr "type" "unary")]) + +;; ??? At least for ARC600, we should use sbc b,b,s12 if we want a value +;; that is one lower if the carry flag is set. + +;; ??? Look up negscc insn. See pa.md for example. +(define_insn "*neg_scc_insn" + [(set (match_operand:SI 0 "dest_reg_operand" "=w") + (neg:SI (match_operator:SI 1 "proper_comparison_operator" + [(reg CC_REG) (const_int 0)])))] + "" + "mov %0,-1\;sub.%D1 %0,%0,%0" + [(set_attr "type" "unary") + (set_attr "length" "8")]) + +(define_insn "*not_scc_insn" + [(set (match_operand:SI 0 "dest_reg_operand" "=w") + (not:SI (match_operator:SI 1 "proper_comparison_operator" + [(reg CC_REG) (const_int 0)])))] + "" + "mov %0,1\;sub.%d1 %0,%0,%0" + [(set_attr "type" "unary") + (set_attr "length" "8")]) + +; cond_exec patterns +(define_insn "*movsi_ne" + [(cond_exec + (ne (match_operand:CC_Z 2 "cc_use_register" "Rcc,Rcc,Rcc") (const_int 0)) + (set (match_operand:SI 0 "dest_reg_operand" "=Rcq#q,w,w") + (match_operand:SI 1 "nonmemory_operand" "C_0,Lc,?Cal")))] + "" + "@ + * current_insn_predicate = 0; return \"sub%?.ne %0,%0,%0%&\"; + mov.ne %0,%1 + mov.ne %0,%S1" + [(set_attr "type" "cmove,cmove,cmove") + (set_attr "iscompact" "true,false,false") + (set_attr "length" "2,4,8")]) + +(define_insn "*movsi_cond_exec" + [(cond_exec + (match_operator 3 "proper_comparison_operator" + [(match_operand 2 "cc_register" "Rcc,Rcc") (const_int 0)]) + (set (match_operand:SI 0 "dest_reg_operand" "=w,w") + (match_operand:SI 1 "nonmemory_operand" "Lc,?Cal")))] + "" + "mov.%d3 %0,%S1" + [(set_attr "type" "cmove") + (set_attr "length" "4,8")]) + +(define_insn "*commutative_cond_exec" + [(cond_exec + (match_operator 5 "proper_comparison_operator" + [(match_operand 4 "cc_register" "Rcc,Rcc") (const_int 0)]) + (set (match_operand:SI 0 "dest_reg_operand" "=w,w") + (match_operator:SI 3 "commutative_operator" + [(match_operand:SI 1 "register_operand" "%0,0") + (match_operand:SI 2 "nonmemory_operand" "cL,?Cal")])))] + "" +{ + arc_output_commutative_cond_exec (operands, true); + return ""; +} + [(set_attr "cond" "use") + (set_attr "type" "cmove") + (set_attr_alternative "length" + [(const_int 4) + (cond + [(eq (symbol_ref "arc_output_commutative_cond_exec (operands, false)") + (const_int 4)) + (const_int 4)] + (const_int 8))])]) + +(define_insn "*sub_cond_exec" + [(cond_exec + (match_operator 4 "proper_comparison_operator" + [(match_operand 3 "cc_register" "Rcc,Rcc,Rcc") (const_int 0)]) + (set (match_operand:SI 0 "dest_reg_operand" "=w,w,w") + (minus:SI (match_operand:SI 1 "nonmemory_operand" "0,cL,Cal") + (match_operand:SI 2 "nonmemory_operand" "cL,0,0"))))] + "" + "@ + sub.%d4 %0,%1,%2 + rsub.%d4 %0,%2,%1 + rsub.%d4 %0,%2,%1" + [(set_attr "cond" "use") + (set_attr "type" "cmove") + (set_attr "length" "4,4,8")]) + +(define_insn "*noncommutative_cond_exec" + [(cond_exec + (match_operator 5 "proper_comparison_operator" + [(match_operand 4 "cc_register" "Rcc,Rcc") (const_int 0)]) + (set (match_operand:SI 0 "dest_reg_operand" "=w,w") + (match_operator:SI 3 "noncommutative_operator" + [(match_operand:SI 1 "register_operand" "0,0") + (match_operand:SI 2 "nonmemory_operand" "cL,Cal")])))] + "" + "%O3.%d5 %0,%1,%2" + [(set_attr "cond" "use") + (set_attr "type" "cmove") + (set_attr "length" "4,8")]) + +;; These control RTL generation for conditional jump insns +;; Match both normal and inverted jump. + +; We need a separate expander for this lest we loose the mode of CC_REG +; when match_operator substitutes the literal operand into the comparison. +(define_expand "branch_insn" + [(set (pc) + (if_then_else (match_operand 1 "" "") + (label_ref (match_operand 0 "" "")) + (pc)))]) + +; When estimating sizes during arc_reorg, when optimizing for speed, there +; are three reasons why we need to consider branches to be length 6: +; - annull-false delay slot insns are implemented using conditional execution, +; thus preventing short insn formation where used. +; - for ARC600: annull-true delay slot isnns are implemented where possile +; using conditional execution, preventing short insn formation where used. +; - for ARC700: likely or somewhat likely taken branches are made long and +; unaligned if possible to avoid branch penalty. +(define_insn "*branch_insn" + [(set (pc) + (if_then_else (match_operator 1 "proper_comparison_operator" + [(reg CC_REG) (const_int 0)]) + (label_ref (match_operand 0 "" "")) + (pc)))] + "" + "* +{ + if (arc_ccfsm_branch_deleted_p ()) + { + arc_ccfsm_record_branch_deleted (); + return \"; branch deleted, next insns conditionalized\"; + } + else + { + arc_ccfsm_record_condition (operands[1], false, insn, 0); + if (get_attr_length (insn) == 2) + return \"b%d1%? %^%l0%&\"; + else + return \"b%d1%# %^%l0\"; + } +}" + [(set_attr "type" "branch") + (set + (attr "length") + (cond [ + (eq_attr "delay_slot_filled" "yes") + (const_int 4) + + (ne + (if_then_else + (match_operand 1 "equality_comparison_operator" "") + (ior (lt (minus (match_dup 0) (pc)) (const_int -512)) + (gt (minus (match_dup 0) (pc)) + (minus (const_int 506) + (symbol_ref "get_attr_delay_slot_length (insn)")))) + (ior (match_test "!arc_short_comparison_p (operands[1], -64)") + (lt (minus (match_dup 0) (pc)) (const_int -64)) + (gt (minus (match_dup 0) (pc)) + (minus (const_int 58) + (symbol_ref "get_attr_delay_slot_length (insn)"))))) + (const_int 0)) + (const_int 4)] + (const_int 2))) + + (set (attr "iscompact") + (cond [(match_test "get_attr_length (insn) == 2") (const_string "true")] + (const_string "false")))]) + +(define_insn "*rev_branch_insn" + [(set (pc) + (if_then_else (match_operator 1 "proper_comparison_operator" + [(reg CC_REG) (const_int 0)]) + (pc) + (label_ref (match_operand 0 "" ""))))] + "REVERSIBLE_CC_MODE (GET_MODE (XEXP (operands[1], 0)))" + "* +{ + if (arc_ccfsm_branch_deleted_p ()) + { + arc_ccfsm_record_branch_deleted (); + return \"; branch deleted, next insns conditionalized\"; + } + else + { + arc_ccfsm_record_condition (operands[1], true, insn, 0); + if (get_attr_length (insn) == 2) + return \"b%D1%? %^%l0\"; + else + return \"b%D1%# %^%l0\"; + } +}" + [(set_attr "type" "branch") + (set + (attr "length") + (cond [ + (eq_attr "delay_slot_filled" "yes") + (const_int 4) + + (ne + (if_then_else + (match_operand 1 "equality_comparison_operator" "") + (ior (lt (minus (match_dup 0) (pc)) (const_int -512)) + (gt (minus (match_dup 0) (pc)) + (minus (const_int 506) + (symbol_ref "get_attr_delay_slot_length (insn)")))) + (ior (match_test "!arc_short_comparison_p (operands[1], -64)") + (lt (minus (match_dup 0) (pc)) (const_int -64)) + (gt (minus (match_dup 0) (pc)) + (minus (const_int 58) + (symbol_ref "get_attr_delay_slot_length (insn)"))))) + (const_int 0)) + (const_int 4)] + (const_int 2))) + + (set (attr "iscompact") + (cond [(match_test "get_attr_length (insn) == 2") (const_string "true")] + (const_string "false")))]) + +;; Unconditional and other jump instructions. + +(define_expand "jump" + [(set (pc) (label_ref (match_operand 0 "" "")))] + "" + "") + +(define_insn "jump_i" + [(set (pc) (label_ref (match_operand 0 "" "")))] + "!TARGET_LONG_CALLS_SET || !find_reg_note (insn, REG_CROSSING_JUMP, NULL_RTX)" + "b%!%* %^%l0%&" + [(set_attr "type" "uncond_branch") + (set (attr "iscompact") + (if_then_else (match_test "get_attr_length (insn) == 2") + (const_string "true") (const_string "false"))) + (set_attr "cond" "canuse") + (set (attr "length") + (cond [ + ; In arc_reorg we just guesstimate; might be more or less than 4. + (match_test "arc_branch_size_unknown_p ()") + (const_int 4) + + (eq_attr "delay_slot_filled" "yes") + (const_int 4) + + (match_test "find_reg_note (insn, REG_CROSSING_JUMP, NULL_RTX)") + (const_int 4) + + (ior (lt (minus (match_dup 0) (pc)) (const_int -512)) + (gt (minus (match_dup 0) (pc)) + (minus (const_int 506) + (symbol_ref "get_attr_delay_slot_length (insn)")))) + (const_int 4)] + (const_int 2)))]) + +(define_insn "indirect_jump" + [(set (pc) (match_operand:SI 0 "nonmemory_operand" "L,I,Cal,Rcqq,r"))] + "" + "j%!%* [%0]%&" + [(set_attr "type" "jump") + (set_attr "iscompact" "false,false,false,maybe,false") + (set_attr "cond" "canuse,canuse_limm,canuse,canuse,canuse")]) + +;; Implement a switch statement. + +(define_expand "casesi" + [(set (match_dup 5) + (minus:SI (match_operand:SI 0 "register_operand" "") + (match_operand:SI 1 "nonmemory_operand" ""))) + (set (reg:CC CC_REG) + (compare:CC (match_dup 5) + (match_operand:SI 2 "nonmemory_operand" ""))) + (set (pc) + (if_then_else (gtu (reg:CC CC_REG) + (const_int 0)) + (label_ref (match_operand 4 "" "")) + (pc))) + (set (match_dup 6) + (unspec:SI [(match_operand 3 "" "") + (match_dup 5) (match_dup 7)] UNSPEC_CASESI)) + (parallel [(set (pc) (match_dup 6)) (use (match_dup 7))])] + "" + " +{ + rtx x; + + operands[5] = gen_reg_rtx (SImode); + operands[6] = gen_reg_rtx (SImode); + operands[7] = operands[3]; + emit_insn (gen_subsi3 (operands[5], operands[0], operands[1])); + emit_insn (gen_cmpsi_cc_insn_mixed (operands[5], operands[2])); + x = gen_rtx_GTU (VOIDmode, gen_rtx_REG (CCmode, CC_REG), const0_rtx); + x = gen_rtx_IF_THEN_ELSE (VOIDmode, x, + gen_rtx_LABEL_REF (VOIDmode, operands[4]), pc_rtx); + emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, x)); + if (TARGET_COMPACT_CASESI) + { + emit_jump_insn (gen_casesi_compact_jump (operands[5], operands[7])); + } + else + { + operands[3] = gen_rtx_LABEL_REF (VOIDmode, operands[3]); + if (flag_pic || !cse_not_expected) + operands[3] = force_reg (Pmode, operands[3]); + emit_insn (gen_casesi_load (operands[6], + operands[3], operands[5], operands[7])); + if (CASE_VECTOR_PC_RELATIVE || flag_pic) + emit_insn (gen_addsi3 (operands[6], operands[6], operands[3])); + emit_jump_insn (gen_casesi_jump (operands[6], operands[7])); + } + DONE; +}") + +(define_insn "casesi_load" + [(set (match_operand:SI 0 "register_operand" "=Rcq,r,r") + (unspec:SI [(match_operand:SI 1 "nonmemory_operand" "Rcq,c,Cal") + (match_operand:SI 2 "register_operand" "Rcq,c,c") + (label_ref (match_operand 3 "" ""))] UNSPEC_CASESI))] + "" + "* +{ + rtx diff_vec = PATTERN (next_nonnote_insn (operands[3])); + + if (GET_CODE (diff_vec) != ADDR_DIFF_VEC) + { + gcc_assert (GET_CODE (diff_vec) == ADDR_VEC); + gcc_assert (GET_MODE (diff_vec) == SImode); + gcc_assert (!CASE_VECTOR_PC_RELATIVE && !flag_pic); + } + + switch (GET_MODE (diff_vec)) + { + case SImode: + return \"ld.as %0,[%1,%2]%&\"; + case HImode: + if (ADDR_DIFF_VEC_FLAGS (diff_vec).offset_unsigned) + return \"ldw.as %0,[%1,%2]\"; + return \"ldw.x.as %0,[%1,%2]\"; + case QImode: + if (ADDR_DIFF_VEC_FLAGS (diff_vec).offset_unsigned) + return \"ldb%? %0,[%1,%2]%&\"; + return \"ldb.x %0,[%1,%2]\"; + default: + gcc_unreachable (); + } +}" + [(set_attr "type" "load") + (set_attr_alternative "iscompact" + [(cond + [(ne (symbol_ref "GET_MODE (PATTERN (next_nonnote_insn (operands[3])))") + (symbol_ref "QImode")) + (const_string "false") + (match_test "!ADDR_DIFF_VEC_FLAGS (PATTERN (next_nonnote_insn (operands[3]))).offset_unsigned") + (const_string "false")] + (const_string "true")) + (const_string "false") + (const_string "false")]) + (set_attr_alternative "length" + [(cond + [(eq_attr "iscompact" "false") (const_int 4)] + (const_int 2)) + (const_int 4) + (const_int 8)])]) + +; Unlike the canonical tablejump, this pattern always uses a jump address, +; even for CASE_VECTOR_PC_RELATIVE. +(define_insn "casesi_jump" + [(set (pc) (match_operand:SI 0 "register_operand" "Cal,Rcqq,c")) + (use (label_ref (match_operand 1 "" "")))] + "" + "j%!%* [%0]%&" + [(set_attr "type" "jump") + (set_attr "iscompact" "false,maybe,false") + (set_attr "cond" "canuse")]) + +(define_insn "casesi_compact_jump" + [(set (pc) + (unspec:SI [(match_operand:SI 0 "register_operand" "c,q")] + UNSPEC_CASESI)) + (use (label_ref (match_operand 1 "" ""))) + (clobber (match_scratch:SI 2 "=q,0"))] + "TARGET_COMPACT_CASESI" + "* +{ + rtx diff_vec = PATTERN (next_nonnote_insn (operands[1])); + int unalign = arc_get_unalign (); + rtx xop[3]; + const char *s; + + xop[0] = operands[0]; + xop[2] = operands[2]; + gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC); + + switch (GET_MODE (diff_vec)) + { + case SImode: + /* Max length can be 12 in this case, but this is OK because + 2 of these are for alignment, and are anticipated in the length + of the ADDR_DIFF_VEC. */ + if (unalign && !satisfies_constraint_Rcq (xop[0])) + s = \"add2 %2,pcl,%0\n\tld_s%2,[%2,12]\"; + else if (unalign) + s = \"add_s %2,%0,2\n\tld.as %2,[pcl,%2]\"; + else + s = \"add %2,%0,2\n\tld.as %2,[pcl,%2]\"; + arc_clear_unalign (); + break; + case HImode: + if (ADDR_DIFF_VEC_FLAGS (diff_vec).offset_unsigned) + { + if (satisfies_constraint_Rcq (xop[0])) + { + s = \"add_s %2,%0,%1\n\tldw.as %2,[pcl,%2]\"; + xop[1] = GEN_INT ((10 - unalign) / 2U); + } + else + { + s = \"add1 %2,pcl,%0\n\tldw_s %2,[%2,%1]\"; + xop[1] = GEN_INT (10 + unalign); + } + } + else + { + if (satisfies_constraint_Rcq (xop[0])) + { + s = \"add_s %2,%0,%1\n\tldw.x.as %2,[pcl,%2]\"; + xop[1] = GEN_INT ((10 - unalign) / 2U); + } + else + { + s = \"add1 %2,pcl,%0\n\tldw_s.x %2,[%2,%1]\"; + xop[1] = GEN_INT (10 + unalign); + } + } + arc_toggle_unalign (); + break; + case QImode: + if (ADDR_DIFF_VEC_FLAGS (diff_vec).offset_unsigned) + { + if ((rtx_equal_p (xop[2], xop[0]) + || find_reg_note (insn, REG_DEAD, xop[0])) + && satisfies_constraint_Rcq (xop[0])) + { + s = \"add_s %0,%0,pcl\n\tldb_s %2,[%0,%1]\"; + xop[1] = GEN_INT (8 + unalign); + } + else + { + s = \"add %2,%0,pcl\n\tldb_s %2,[%2,%1]\"; + xop[1] = GEN_INT (10 + unalign); + arc_toggle_unalign (); + } + } + else if ((rtx_equal_p (xop[0], xop[2]) + || find_reg_note (insn, REG_DEAD, xop[0])) + && satisfies_constraint_Rcq (xop[0])) + { + s = \"add_s %0,%0,%1\n\tldb.x %2,[pcl,%0]\"; + xop[1] = GEN_INT (10 - unalign); + arc_toggle_unalign (); + } + else + { + /* ??? Length is 12. */ + s = \"add %2,%0,%1\n\tldb.x %2,[pcl,%2]\"; + xop[1] = GEN_INT (8 + unalign); + } + break; + default: + gcc_unreachable (); + } + output_asm_insn (s, xop); + return \"add_s %2,%2,pcl\n\tj_s%* [%2]\"; +}" + [(set_attr "length" "10") + (set_attr "type" "jump") + (set_attr "iscompact" "true") + (set_attr "cond" "nocond")]) + +(define_expand "call" + ;; operands[1] is stack_size_rtx + ;; operands[2] is next_arg_register + [(parallel [(call (match_operand:SI 0 "call_operand" "") + (match_operand 1 "" "")) + (clobber (reg:SI 31))])] + "" + "{ + rtx callee; + + gcc_assert (MEM_P (operands[0])); + callee = XEXP (operands[0], 0); + if (crtl->profile && arc_profile_call (callee)) + { + emit_call_insn (gen_call_prof (gen_rtx_SYMBOL_REF (Pmode, + \"_mcount_call\"), + operands[1])); + DONE; + } + /* This is to decide if we should generate indirect calls by loading the + 32 bit address of the callee into a register before performing the + branch and link - this exposes cse opportunities. + Also, in weird cases like compile/20010107-1.c, we may get a PLUS. */ + if (GET_CODE (callee) != REG + && (GET_CODE (callee) == PLUS || arc_is_longcall_p (callee))) + XEXP (operands[0], 0) = force_reg (Pmode, callee); + } +") + + +; Rcq, which is used in alternative 0, checks for conditional execution. +; At instruction output time, if it doesn't match and we end up with +; alternative 1 ("q"), that means that we can't use the short form. +(define_insn "*call_i" + [(call (mem:SI (match_operand:SI 0 + "call_address_operand" "Rcq,q,c,Cbp,Cbr,L,I,Cal")) + (match_operand 1 "" "")) + (clobber (reg:SI 31))] + "" + "@ + jl%!%* [%0]%& + jl%!%* [%0]%& + jl%!%* [%0] + bl%!%* %P0 + bl%!%* %P0 + jl%!%* %S0 + jl%* %S0 + jl%! %S0" + [(set_attr "type" "call,call,call,call,call,call,call,call_no_delay_slot") + (set_attr "iscompact" "maybe,false,*,*,*,*,*,*") + (set_attr "predicable" "no,no,yes,yes,no,yes,no,yes") + (set_attr "length" "*,*,4,4,4,4,4,8")]) + +(define_insn "call_prof" + [(call (mem:SI (match_operand:SI 0 "symbolic_operand" "Cbr,Cal")) + (match_operand 1 "" "")) + (clobber (reg:SI 31)) + (use (reg:SI 8)) + (use (reg:SI 9))] + "" + "@ + bl%!%* %P0;2 + jl%! %^%S0" + [(set_attr "type" "call,call_no_delay_slot") + (set_attr "predicable" "yes,yes") + (set_attr "length" "4,8")]) + +(define_expand "call_value" + ;; operand 2 is stack_size_rtx + ;; operand 3 is next_arg_register + [(parallel [(set (match_operand 0 "dest_reg_operand" "=r") + (call (match_operand:SI 1 "call_operand" "") + (match_operand 2 "" ""))) + (clobber (reg:SI 31))])] + "" + " + { + rtx callee; + + gcc_assert (MEM_P (operands[1])); + callee = XEXP (operands[1], 0); + if (crtl->profile && arc_profile_call (callee)) + { + emit_call_insn (gen_call_value_prof (operands[0], + gen_rtx_SYMBOL_REF (Pmode, + \"_mcount_call\"), + operands[2])); + DONE; + } + /* See the comment in define_expand \"call\". */ + if (GET_CODE (callee) != REG + && (GET_CODE (callee) == PLUS || arc_is_longcall_p (callee))) + XEXP (operands[1], 0) = force_reg (Pmode, callee); + }") + + +; Rcq, which is used in alternative 0, checks for conditional execution. +; At instruction output time, if it doesn't match and we end up with +; alternative 1 ("q"), that means that we can't use the short form. +(define_insn "*call_value_i" + [(set (match_operand 0 "dest_reg_operand" "=Rcq,q,w, w, w,w,w, w") + (call (mem:SI (match_operand:SI 1 + "call_address_operand" "Rcq,q,c,Cbp,Cbr,L,I,Cal")) + (match_operand 2 "" ""))) + (clobber (reg:SI 31))] + "" + "@ + jl%!%* [%1]%& + jl%!%* [%1]%& + jl%!%* [%1] + bl%!%* %P1;1 + bl%!%* %P1;1 + jl%!%* %S1 + jl%* %S1 + jl%! %S1" + [(set_attr "type" "call,call,call,call,call,call,call,call_no_delay_slot") + (set_attr "iscompact" "maybe,false,*,*,*,*,*,*") + (set_attr "predicable" "no,no,yes,yes,no,yes,no,yes") + (set_attr "length" "*,*,4,4,4,4,4,8")]) + + +;; TODO - supporting 16-bit short "branch and link" insns if required. +;(define_insn "*call_value_via_label_mixed" +; [(set (match_operand 0 "register_operand" "=r") +; (call (mem:SI (match_operand:SI 1 "call_address_operand" "")) +; (match_operand 2 "" ""))) +; (clobber (reg:SI 31))] +; "TARGET_MIXED_CODE" +; "bl_s %1" +; [(set_attr "type" "call")]) + +(define_insn "call_value_prof" + [(set (match_operand 0 "dest_reg_operand" "=r,r") + (call (mem:SI (match_operand:SI 1 "symbolic_operand" "Cbr,Cal")) + (match_operand 2 "" ""))) + (clobber (reg:SI 31)) + (use (reg:SI 8)) + (use (reg:SI 9))] + "" + "@ + bl%!%* %P1;1 + jl%! %^%S1" + [(set_attr "type" "call,call_no_delay_slot") + (set_attr "predicable" "yes,yes") + (set_attr "length" "4,8")]) + +(define_insn "nop" + [(const_int 0)] + "" + "nop%?" + [(set_attr "type" "misc") + (set_attr "iscompact" "true") + (set_attr "cond" "canuse") + (set_attr "length" "2")]) + +;; Special pattern to flush the icache. +;; ??? Not sure what to do here. Some ARC's are known to support this. + +(define_insn "flush_icache" + [(unspec_volatile [(match_operand:SI 0 "memory_operand" "m")] 0)] + "" + "* return \"\";" + [(set_attr "type" "misc")]) + +;; Split up troublesome insns for better scheduling. + +;; Peepholes go at the end. +;;asl followed by add can be replaced by an add{1,2,3} +;; Three define_peepholes have been added for this optimization +;; ??? This used to target non-canonical rtl. Now we use add_n, which +;; can be generated by combine. Check if these peepholes still provide +;; any benefit. + +;; ------------------------------------------------------------- +;; Pattern 1 : r0 = r1 << {i} +;; r3 = r4/INT + r0 ;;and commutative +;; || +;; \/ +;; add{i} r3,r4/INT,r1 +;; ------------------------------------------------------------- +;; ??? This should be covered by combine, alas, at times combine gets +;; too clever for it's own good: when the shifted input is known to be +;; either 0 or 1, the operation will be made into an if-then-else, and +;; thus fail to match the add_n pattern. Example: _mktm_r, line 85 in +;; newlib/libc/time/mktm_r.c . + +(define_peephole2 + [(set (match_operand:SI 0 "dest_reg_operand" "") + (ashift:SI (match_operand:SI 1 "register_operand" "") + (match_operand:SI 2 "const_int_operand" ""))) + (set (match_operand:SI 3 "dest_reg_operand" "") + (plus:SI (match_operand:SI 4 "nonmemory_operand" "") + (match_operand:SI 5 "nonmemory_operand" "")))] + "(INTVAL (operands[2]) == 1 + || INTVAL (operands[2]) == 2 + || INTVAL (operands[2]) == 3) + && (true_regnum (operands[4]) == true_regnum (operands[0]) + || true_regnum (operands[5]) == true_regnum (operands[0])) + && (peep2_reg_dead_p (2, operands[0]) || (true_regnum (operands[3]) == true_regnum (operands[0])))" + ;; the preparation statements take care to put proper operand in operands[4] + ;; operands[4] will always contain the correct operand. This is added to satisfy commutativity + [(set (match_dup 3) + (plus:SI (mult:SI (match_dup 1) + (match_dup 2)) + (match_dup 4)))] + "if (true_regnum (operands[4]) == true_regnum (operands[0])) + operands[4] = operands[5]; + operands[2] = GEN_INT (1 << INTVAL (operands[2]));" +) + +;; ------------------------------------------------------------- +;; Pattern 1 : r0 = r1 << {i} +;; r3 = r4 - r0 +;; || +;; \/ +;; sub{i} r3,r4,r1 +;; ------------------------------------------------------------- +;; ??? This should be covered by combine, alas, at times combine gets +;; too clever for it's own good: when the shifted input is known to be +;; either 0 or 1, the operation will be made into an if-then-else, and +;; thus fail to match the sub_n pattern. Example: __ieee754_yn, line 239 in +;; newlib/libm/math/e_jn.c . + +(define_peephole2 + [(set (match_operand:SI 0 "dest_reg_operand" "") + (ashift:SI (match_operand:SI 1 "register_operand" "") + (match_operand:SI 2 "const_int_operand" ""))) + (set (match_operand:SI 3 "dest_reg_operand" "") + (minus:SI (match_operand:SI 4 "nonmemory_operand" "") + (match_dup 0)))] + "(INTVAL (operands[2]) == 1 + || INTVAL (operands[2]) == 2 + || INTVAL (operands[2]) == 3) + && (peep2_reg_dead_p (2, operands[0]) + || (true_regnum (operands[3]) == true_regnum (operands[0])))" + [(set (match_dup 3) + (minus:SI (match_dup 4) + (mult:SI (match_dup 1) + (match_dup 2))))] + "operands[2] = GEN_INT (1 << INTVAL (operands[2]));" +) + + + +; When using the high single bit, the result of a multiply is either +; the original number or zero. But MPY costs 4 cycles, which we +; can replace with the 2 cycles for the pair of TST_S and ADD.NE. +(define_peephole2 + [(set (match_operand:SI 0 "dest_reg_operand" "") + (lshiftrt:SI (match_operand:SI 1 "register_operand" "") + (const_int 31))) + (set (match_operand:SI 4 "register_operand" "") + (mult:SI (match_operand:SI 2 "register_operand") + (match_operand:SI 3 "nonmemory_operand" "")))] + "TARGET_ARC700 && !TARGET_NOMPY_SET + && (rtx_equal_p (operands[0], operands[2]) + || rtx_equal_p (operands[0], operands[3])) + && peep2_regno_dead_p (0, CC_REG) + && (rtx_equal_p (operands[0], operands[4]) + || (peep2_reg_dead_p (2, operands[0]) + && peep2_reg_dead_p (1, operands[4])))" + [(parallel [(set (reg:CC_Z CC_REG) + (compare:CC_Z (lshiftrt:SI (match_dup 1) (const_int 31)) + (const_int 0))) + (set (match_dup 4) (lshiftrt:SI (match_dup 1) (const_int 31)))]) + (cond_exec + (ne (reg:CC_Z CC_REG) (const_int 0)) + (set (match_dup 4) (match_dup 5)))] +{ + if (!rtx_equal_p (operands[0], operands[2])) + operands[5] = operands[2]; + else if (!rtx_equal_p (operands[0], operands[3])) + operands[5] = operands[3]; + else + operands[5] = operands[4]; /* Actually a no-op... presumably rare. */ +}) + +(define_peephole2 + [(set (match_operand:SI 0 "dest_reg_operand" "") + (lshiftrt:SI (match_operand:SI 1 "register_operand" "") + (const_int 31))) + (set (match_operand:SI 4 "register_operand" "") + (mult:SI (match_operand:SI 2 "register_operand") + (match_operand:SI 3 "nonmemory_operand" "")))] + "TARGET_ARC700 && !TARGET_NOMPY_SET + && (rtx_equal_p (operands[0], operands[2]) + || rtx_equal_p (operands[0], operands[3])) + && peep2_regno_dead_p (2, CC_REG)" + [(parallel [(set (reg:CC_Z CC_REG) + (compare:CC_Z (lshiftrt:SI (match_dup 1) (const_int 31)) + (const_int 0))) + (set (match_dup 0) (lshiftrt:SI (match_dup 1) (const_int 31)))]) + (set (match_dup 4) (match_dup 5)) + (cond_exec + (eq (reg:CC_Z CC_REG) (const_int 0)) + (set (match_dup 4) (const_int 0)))] + "operands[5] = operands[rtx_equal_p (operands[0], operands[2]) ? 3 : 2];") + +;; Instructions generated through builtins + +(define_insn "clrsbsi2" + [(set (match_operand:SI 0 "dest_reg_operand" "=w,w") + (clrsb:SI (match_operand:SI 1 "general_operand" "cL,Cal")))] + "TARGET_NORM" + "@ + norm \t%0, %1 + norm \t%0, %S1" + [(set_attr "length" "4,8") + (set_attr "type" "two_cycle_core,two_cycle_core")]) + +(define_insn "norm_f" + [(set (match_operand:SI 0 "dest_reg_operand" "=w,w") + (clrsb:SI (match_operand:SI 1 "general_operand" "cL,Cal"))) + (set (reg:CC_ZN CC_REG) + (compare:CC_ZN (match_dup 1) (const_int 0)))] + "TARGET_NORM" + "@ + norm.f\t%0, %1 + norm.f\t%0, %S1" + [(set_attr "length" "4,8") + (set_attr "type" "two_cycle_core,two_cycle_core")]) + +(define_insn_and_split "clrsbhi2" + [(set (match_operand:HI 0 "dest_reg_operand" "=w,w") + (clrsb:HI (match_operand:HI 1 "general_operand" "cL,Cal")))] + "TARGET_NORM" + "#" + "reload_completed" + [(set (match_dup 0) (zero_extend:SI (clrsb:HI (match_dup 1))))] + "operands[0] = simplify_gen_subreg (SImode, operands[0], HImode, 0);") + +(define_insn "normw" + [(set (match_operand:SI 0 "dest_reg_operand" "=w,w") + (zero_extend:SI + (clrsb:HI (match_operand:HI 1 "general_operand" "cL,Cal"))))] + "TARGET_NORM" + "@ + normw \t%0, %1 + normw \t%0, %S1" + [(set_attr "length" "4,8") + (set_attr "type" "two_cycle_core,two_cycle_core")]) + +(define_expand "clzsi2" + [(set (match_operand:SI 0 "dest_reg_operand" "") + (clz:SI (match_operand:SI 1 "register_operand" "")))] + "TARGET_NORM" +{ + emit_insn (gen_norm_f (operands[0], operands[1])); + emit_insn + (gen_rtx_COND_EXEC + (VOIDmode, + gen_rtx_LT (VOIDmode, gen_rtx_REG (CC_ZNmode, CC_REG), const0_rtx), + gen_rtx_SET (VOIDmode, operands[0], const0_rtx))); + emit_insn + (gen_rtx_COND_EXEC + (VOIDmode, + gen_rtx_GE (VOIDmode, gen_rtx_REG (CC_ZNmode, CC_REG), const0_rtx), + gen_rtx_SET (VOIDmode, operands[0], + plus_constant (SImode, operands[0], 1)))); + DONE; +}) + +(define_expand "ctzsi2" + [(set (match_operand:SI 0 "register_operand" "") + (ctz:SI (match_operand:SI 1 "register_operand" "")))] + "TARGET_NORM" +{ + rtx temp = operands[0]; + + if (reg_overlap_mentioned_p (temp, operands[1]) + || (REGNO (temp) < FIRST_PSEUDO_REGISTER + && !TEST_HARD_REG_BIT (reg_class_contents[GENERAL_REGS], + REGNO (temp)))) + temp = gen_reg_rtx (SImode); + emit_insn (gen_addsi3 (temp, operands[1], constm1_rtx)); + emit_insn (gen_bic_f_zn (temp, temp, operands[1])); + emit_insn (gen_clrsbsi2 (temp, temp)); + emit_insn + (gen_rtx_COND_EXEC + (VOIDmode, + gen_rtx_LT (VOIDmode, gen_rtx_REG (CC_ZNmode, CC_REG), const0_rtx), + gen_rtx_SET (VOIDmode, operands[0], GEN_INT (32)))); + emit_insn + (gen_rtx_COND_EXEC + (VOIDmode, + gen_rtx_GE (VOIDmode, gen_rtx_REG (CC_ZNmode, CC_REG), const0_rtx), + gen_rtx_SET (VOIDmode, operands[0], + gen_rtx_MINUS (SImode, GEN_INT (31), temp)))); + DONE; +}) + + +(define_insn "swap" + [(set (match_operand:SI 0 "dest_reg_operand" "=w,w,w") + (unspec:SI [(match_operand:SI 1 "general_operand" "L,Cal,c")] + UNSPEC_SWAP))] + "TARGET_SWAP" + "@ + swap \t%0, %1 + swap \t%0, %S1 + swap \t%0, %1" + [(set_attr "length" "4,8,4") + (set_attr "type" "two_cycle_core,two_cycle_core,two_cycle_core")]) + +;; FIXME: an intrinsic for multiply is daft. Can we remove this? +(define_insn "mul64" + [(unspec [(match_operand:SI 0 "general_operand" "q,r,r,%r") + (match_operand:SI 1 "general_operand" "q,rL,I,Cal")] + UNSPEC_MUL64)] + "TARGET_MUL64_SET" + "@ + mul64%? \t0, %0, %1%& + mul64%? \t0, %0, %1 + mul64 \t0, %0, %1 + mul64%? \t0, %0, %S1" + [(set_attr "length" "2,4,4,8") + (set_attr "iscompact" "true,false,false,false") + (set_attr "type" "binary,binary,binary,binary") + (set_attr "cond" "canuse,canuse, nocond, canuse")]) + +(define_insn "mulu64" + [(unspec [(match_operand:SI 0 "general_operand" "%r,r,r,r") + (match_operand:SI 1 "general_operand" "rL,I,r,Cal")] + UNSPEC_MULU64)] + "TARGET_MUL64_SET" + "@ + mulu64%? \t0, %0, %1 + mulu64 \t0, %0, %1 + mulu64 \t0, %0, %1 + mulu64%? \t0, %0, %S1" + [(set_attr "length" "4,4,4,8") + (set_attr "type" "binary,binary,binary,binary") + (set_attr "cond" "canuse,nocond,nocond,canuse")]) + +(define_insn "divaw" + [(set (match_operand:SI 0 "dest_reg_operand" "=&w,&w,&w") + (unspec:SI [(div:SI (match_operand:SI 1 "general_operand" "r,Cal,r") + (match_operand:SI 2 "general_operand" "r,r,Cal"))] + UNSPEC_DIVAW))] + "TARGET_ARC700 || TARGET_EA_SET" + "@ + divaw \t%0, %1, %2 + divaw \t%0, %S1, %2 + divaw \t%0, %1, %S2" + [(set_attr "length" "4,8,8") + (set_attr "type" "divaw,divaw,divaw")]) + +(define_insn "flag" + [(unspec_volatile [(match_operand:SI 0 "nonmemory_operand" "rL,I,Cal")] + VUNSPEC_FLAG)] + "" + "@ + flag%? %0 + flag %0 + flag%? %S0" + [(set_attr "length" "4,4,8") + (set_attr "type" "misc,misc,misc") + (set_attr "predicable" "yes,no,yes") + (set_attr "cond" "clob,clob,clob")]) + +(define_insn "brk" + [(unspec_volatile [(match_operand:SI 0 "immediate_operand" "N")] + VUNSPEC_BRK)] + "" + "brk" + [(set_attr "length" "4") + (set_attr "type" "misc")]) + +(define_insn "rtie" + [(unspec_volatile [(match_operand:SI 0 "immediate_operand" "N")] + VUNSPEC_RTIE)] + "" + "rtie" + [(set_attr "length" "4") + (set_attr "type" "misc") + (set_attr "cond" "clob")]) + +(define_insn "sync" + [(unspec_volatile [(match_operand:SI 0 "immediate_operand" "N")] + VUNSPEC_SYNC)] + "" + "sync" + [(set_attr "length" "4") + (set_attr "type" "misc")]) + +(define_insn "swi" + [(unspec_volatile [(match_operand:SI 0 "immediate_operand" "N")] + VUNSPEC_SWI)] + "" + "* +{ + if(TARGET_ARC700) + return \"trap0\"; + else + return \"swi\"; +}" + [(set_attr "length" "4") + (set_attr "type" "misc")]) + + +(define_insn "sleep" + [(unspec_volatile [(match_operand:SI 0 "immediate_operand" "L")] + VUNSPEC_SLEEP)] + "check_if_valid_sleep_operand(operands,0)" + "sleep %0" + [(set_attr "length" "4") + (set_attr "type" "misc")]) + +(define_insn "core_read" + [(set (match_operand:SI 0 "dest_reg_operand" "=r,r") + (unspec_volatile:SI [(match_operand:SI 1 "general_operand" "Hn,!r")] + VUNSPEC_CORE_READ))] + "" + "* + if (check_if_valid_regno_const (operands, 1)) + return \"mov \t%0, r%1\"; + return \"mov \t%0, r%1\"; + " + [(set_attr "length" "4") + (set_attr "type" "unary")]) + +(define_insn "core_write" + [(unspec_volatile [(match_operand:SI 0 "general_operand" "r,r") + (match_operand:SI 1 "general_operand" "Hn,!r")] + VUNSPEC_CORE_WRITE)] + "" + "* + if (check_if_valid_regno_const (operands, 1)) + return \"mov \tr%1, %0\"; + return \"mov \tr%1, %0\"; + " + [(set_attr "length" "4") + (set_attr "type" "unary")]) + +(define_insn "lr" + [(set (match_operand:SI 0 "dest_reg_operand" "=r,r,r,r") + (unspec_volatile:SI [(match_operand:SI 1 "general_operand" "I,HCal,r,D")] + VUNSPEC_LR))] + "" + "lr\t%0, [%1]" + [(set_attr "length" "4,8,4,8") + (set_attr "type" "lr,lr,lr,lr")]) + +(define_insn "sr" + [(unspec_volatile [(match_operand:SI 0 "general_operand" "Cal,r,r,r") + (match_operand:SI 1 "general_operand" "Ir,I,HCal,r")] + VUNSPEC_SR)] + "" + "sr\t%S0, [%1]" + [(set_attr "length" "8,4,8,4") + (set_attr "type" "sr,sr,sr,sr")]) + +(define_insn "trap_s" + [(unspec_volatile [(match_operand:SI 0 "immediate_operand" "L,Cal")] + VUNSPEC_TRAP_S)] + "TARGET_ARC700" +{ + if (which_alternative == 0) + { + arc_toggle_unalign (); + return \"trap_s %0\"; + } + + /* Keep this message in sync with the one in arc.c:arc_expand_builtin, + because *.md files do not get scanned by exgettext. */ + fatal_error (\"operand to trap_s should be an unsigned 6-bit value\"); +} + [(set_attr "length" "2") + (set_attr "type" "misc")]) + +(define_insn "unimp_s" + [(unspec_volatile [(match_operand:SI 0 "immediate_operand" "N")] + VUNSPEC_UNIMP_S)] + "TARGET_ARC700" + "unimp_s" + [(set_attr "length" "4") + (set_attr "type" "misc")]) + +;; End of instructions generated through builtins + +; Since the demise of REG_N_SETS as reliable data readily available to the +; target, it is no longer possible to find out +; in the prologue / epilogue expanders how many times blink is set. +; Using df_regs_ever_live_p to decide if blink needs saving means that +; any explicit use of blink will cause it to be saved; hence we cannot +; represent the blink use in return / sibcall instructions themselves, and +; instead have to show it in EPILOGUE_USES and must explicitly +; forbid instructions that change blink in the return / sibcall delay slot. +(define_expand "sibcall" + [(parallel [(call (match_operand 0 "memory_operand" "") + (match_operand 1 "general_operand" "")) + (simple_return) + (use (match_operand 2 "" ""))])] + "" + " + { + rtx callee = XEXP (operands[0], 0); + + if (operands[2] == NULL_RTX) + operands[2] = const0_rtx; + if (crtl->profile && arc_profile_call (callee)) + { + emit_insn (gen_sibcall_prof + (gen_rtx_SYMBOL_REF (Pmode, \"_mcount_call\"), + operands[1], operands[2])); + DONE; + } + if (GET_CODE (callee) != REG + && (GET_CODE (callee) == PLUS || arc_is_longcall_p (callee))) + XEXP (operands[0], 0) = force_reg (Pmode, callee); + }" +) + +(define_expand "sibcall_value" + [(parallel [(set (match_operand 0 "dest_reg_operand" "") + (call (match_operand 1 "memory_operand" "") + (match_operand 2 "general_operand" ""))) + (simple_return) + (use (match_operand 3 "" ""))])] + "" + " + { + rtx callee = XEXP (operands[1], 0); + + if (operands[3] == NULL_RTX) + operands[3] = const0_rtx; + if (crtl->profile && arc_profile_call (XEXP (operands[1], 0))) + { + emit_insn (gen_sibcall_value_prof + (operands[0], gen_rtx_SYMBOL_REF (Pmode, \"_mcount_call\"), + operands[2], operands[3])); + DONE; + } + if (GET_CODE (callee) != REG && arc_is_longcall_p (callee)) + XEXP (operands[1], 0) = force_reg (Pmode, callee); + }" +) + +(define_insn "*sibcall_insn" + [(call (mem:SI (match_operand:SI 0 "call_address_operand" + "Cbp,Cbr,Rs5,Rsc,Cal")) + (match_operand 1 "" "")) + (simple_return) + (use (match_operand 2 "" ""))] + "" + "@ + b%!%* %P0 + b%!%* %P0 + j%!%* [%0]%& + j%!%* [%0] + j%! %P0" + [(set_attr "type" "call,call,call,call,call_no_delay_slot") + (set_attr "predicable" "yes,no,no,yes,yes") + (set_attr "iscompact" "false,false,maybe,false,false") + (set_attr "is_SIBCALL" "yes")] +) + +(define_insn "*sibcall_value_insn" + [(set (match_operand 0 "dest_reg_operand" "") + (call (mem:SI (match_operand:SI 1 "call_address_operand" + "Cbp,Cbr,Rs5,Rsc,Cal")) + (match_operand 2 "" ""))) + (simple_return) + (use (match_operand 3 "" ""))] + "" + "@ + b%!%* %P1 + b%!%* %P1 + j%!%* [%1]%& + j%!%* [%1] + j%! %P1" + [(set_attr "type" "call,call,call,call,call_no_delay_slot") + (set_attr "predicable" "yes,no,no,yes,yes") + (set_attr "iscompact" "false,false,maybe,false,false") + (set_attr "is_SIBCALL" "yes")] +) + +(define_insn "sibcall_prof" + [(call (mem:SI (match_operand:SI 0 "call_address_operand" "Cbr,Cal")) + (match_operand 1 "" "")) + (simple_return) + (use (match_operand 2 "" "")) + (use (reg:SI 8)) + (use (reg:SI 9))] + "" + "@ + b%!%* %P0;2 + j%! %^%S0;2" + [(set_attr "type" "call,call_no_delay_slot") + (set_attr "predicable" "yes") + (set_attr "is_SIBCALL" "yes")] +) + +(define_insn "sibcall_value_prof" + [(set (match_operand 0 "dest_reg_operand" "") + (call (mem:SI (match_operand:SI 1 "call_address_operand" "Cbr,Cal")) + (match_operand 2 "" ""))) + (simple_return) + (use (match_operand 3 "" "")) + (use (reg:SI 8)) + (use (reg:SI 9))] + "" + "@ + b%!%* %P1;1 + j%! %^%S1;1" + [(set_attr "type" "call,call_no_delay_slot") + (set_attr "predicable" "yes") + (set_attr "is_SIBCALL" "yes")] +) + +(define_expand "prologue" + [(pc)] + "" +{ + arc_expand_prologue (); + DONE; +}) + +(define_expand "epilogue" + [(pc)] + "" +{ + arc_expand_epilogue (0); + DONE; +}) + +(define_expand "sibcall_epilogue" + [(pc)] + "" +{ + arc_expand_epilogue (1); + DONE; +}) + +; Since the demise of REG_N_SETS, it is no longer possible to find out +; in the prologue / epilogue expanders how many times blink is set. +; Using df_regs_ever_live_p to decide if blink needs saving means that +; any explicit use of blink will cause it to be saved; hence we cannot +; represent the blink use in return / sibcall instructions themselves, and +; instead have to show it in EPILOGUE_USES and must explicitly +; forbid instructions that change blink in the return / sibcall delay slot. +(define_insn "simple_return" + [(simple_return)] + "reload_completed" +{ + rtx reg + = gen_rtx_REG (Pmode, + arc_return_address_regs[arc_compute_function_type (cfun)]); + + if (TARGET_PAD_RETURN) + arc_pad_return (); + output_asm_insn (\"j%!%* [%0]%&\", ®); + return \"\"; +} + [(set_attr "type" "return") + ; predicable won't help here since the canonical rtl looks different + ; for branches. + (set_attr "cond" "canuse") + (set (attr "iscompact") + (cond [(eq (symbol_ref "arc_compute_function_type (cfun)") + (symbol_ref "ARC_FUNCTION_NORMAL")) + (const_string "maybe")] + (const_string "false"))) + (set (attr "length") + (cond [(ne (symbol_ref "arc_compute_function_type (cfun)") + (symbol_ref "ARC_FUNCTION_NORMAL")) + (const_int 4)] + (const_int 2)))]) + +(define_insn "p_return_i" + [(set (pc) + (if_then_else (match_operator 0 "proper_comparison_operator" + [(reg CC_REG) (const_int 0)]) + (simple_return) (pc)))] + "reload_completed" +{ + rtx xop[2]; + xop[0] = operands[0]; + xop[1] + = gen_rtx_REG (Pmode, + arc_return_address_regs[arc_compute_function_type (cfun)]); + + if (TARGET_PAD_RETURN) + arc_pad_return (); + output_asm_insn (\"j%d0%!%# [%1]%&\", xop); + /* record the condition in case there is a delay insn. */ + arc_ccfsm_record_condition (xop[0], false, insn, 0); + return \"\"; +} + [(set_attr "type" "return") + (set_attr "cond" "use") + (set (attr "iscompact") + (cond [(eq (symbol_ref "arc_compute_function_type (cfun)") + (symbol_ref "ARC_FUNCTION_NORMAL")) + (const_string "maybe")] + (const_string "false"))) + (set (attr "length") + (cond [(ne (symbol_ref "arc_compute_function_type (cfun)") + (symbol_ref "ARC_FUNCTION_NORMAL")) + (const_int 4) + (not (match_operand 0 "equality_comparison_operator" "")) + (const_int 4) + (eq_attr "delay_slot_filled" "yes") + (const_int 4)] + (const_int 2)))]) + +(define_insn_and_split "eh_return" + [(eh_return) + (use (match_operand:SI 0 "move_src_operand" "rC32,mCalCpc")) + (clobber (match_scratch:SI 1 "=X,r")) + (clobber (match_scratch:SI 2 "=&r,r"))] + "" + "#" + "reload_completed" + [(set (match_dup 2) (match_dup 0))] +{ + int offs = arc_return_slot_offset (); + + if (offs < 0) + operands[2] = gen_rtx_REG (Pmode, RETURN_ADDR_REGNUM); + else + { + if (!register_operand (operands[0], Pmode) + && !satisfies_constraint_C32 (operands[0])) + { + emit_move_insn (operands[1], operands[0]); + operands[0] = operands[1]; + } + rtx addr = plus_constant (Pmode, stack_pointer_rtx, offs); + if (!strict_memory_address_p (Pmode, addr)) + { + emit_move_insn (operands[2], addr); + addr = operands[2]; + } + operands[2] = gen_frame_mem (Pmode, addr); + } +} + [(set_attr "length" "12")]) + +;; ??? #ifdefs in function.c require the presence of this pattern, with a +;; non-constant predicate. +(define_expand "return" + [(return)] + "optimize < 0") + + ;; Comment in final.c (insn_current_reference_address) says + ;; forward branch addresses are calculated from the next insn after branch + ;; and for backward branches, it is calculated from the branch insn start. + ;; The shortening logic here is tuned to accomodate this behaviour +;; ??? This should be grokked by the ccfsm machinery. +(define_insn "cbranchsi4_scratch" + [(set (pc) + (if_then_else (match_operator 0 "proper_comparison_operator" + [(match_operand:SI 1 "register_operand" "c,c, c") + (match_operand:SI 2 "nonmemory_operand" "L,c,?Cal")]) + (label_ref (match_operand 3 "" "")) + (pc))) + (clobber (match_operand 4 "cc_register" ""))] + "(reload_completed + || (TARGET_EARLY_CBRANCHSI + && brcc_nolimm_operator (operands[0], VOIDmode))) + && !find_reg_note (insn, REG_CROSSING_JUMP, NULL_RTX)" + "* + switch (get_attr_length (insn)) + { + case 2: return \"br%d0%? %1, %2, %^%l3%&\"; + case 4: return \"br%d0%* %1, %B2, %^%l3\"; + case 8: if (!brcc_nolimm_operator (operands[0], VOIDmode)) + return \"br%d0%* %1, %B2, %^%l3\"; + case 6: case 10: + case 12:return \"cmp%? %1, %B2\\n\\tb%d0%* %^%l3%&;br%d0 out of range\"; + default: fprintf (stderr, \"unexpected length %d\\n\", get_attr_length (insn)); fflush (stderr); gcc_unreachable (); + } + " + [(set_attr "cond" "clob, clob, clob") + (set (attr "type") + (if_then_else + (match_test "valid_brcc_with_delay_p (operands)") + (const_string "brcc") + (const_string "brcc_no_delay_slot"))) + ; For forward branches, we need to account not only for the distance to + ; the target, but also the difference between pcl and pc, the instruction + ; length, and any delay insn, if present. + (set + (attr "length") + (cond ; the outer cond does a test independent of branch shortening. + [(match_operand 0 "brcc_nolimm_operator" "") + (cond + [(and (match_operand:CC_Z 4 "cc_register") + (eq_attr "delay_slot_filled" "no") + (ge (minus (match_dup 3) (pc)) (const_int -128)) + (le (minus (match_dup 3) (pc)) + (minus (const_int 122) + (symbol_ref "get_attr_delay_slot_length (insn)")))) + (const_int 2) + (and (ge (minus (match_dup 3) (pc)) (const_int -256)) + (le (minus (match_dup 3) (pc)) + (minus (const_int 244) + (symbol_ref "get_attr_delay_slot_length (insn)")))) + (const_int 4) + (match_operand:SI 1 "compact_register_operand" "") + (const_int 6)] + (const_int 8))] + (cond [(and (ge (minus (match_dup 3) (pc)) (const_int -256)) + (le (minus (match_dup 3) (pc)) (const_int 244))) + (const_int 8) + (match_operand:SI 1 "compact_register_operand" "") + (const_int 10)] + (const_int 12)))) + (set (attr "iscompact") + (if_then_else (match_test "get_attr_length (insn) & 2") + (const_string "true") (const_string "false")))]) + +; combiner pattern observed for unwind-dw2-fde.c:linear_search_fdes. +(define_insn "*bbit" + [(set (pc) + (if_then_else + (match_operator 3 "equality_comparison_operator" + [(zero_extract:SI (match_operand:SI 1 "register_operand" "Rcqq,c") + (const_int 1) + (match_operand:SI 2 "nonmemory_operand" "L,Lc")) + (const_int 0)]) + (label_ref (match_operand 0 "" "")) + (pc))) + (clobber (reg:CC_ZN CC_REG))] + "!find_reg_note (insn, REG_CROSSING_JUMP, NULL_RTX)" +{ + switch (get_attr_length (insn)) + { + case 4: return (GET_CODE (operands[3]) == EQ + ? \"bbit0%* %1,%2,%0\" : \"bbit1%* %1,%2,%0\"); + case 6: + case 8: return \"btst%? %1,%2\n\tb%d3%* %0; bbit out of range\"; + default: gcc_unreachable (); + } +} + [(set_attr "type" "brcc") + (set_attr "cond" "clob") + (set (attr "length") + (cond [(and (ge (minus (match_dup 0) (pc)) (const_int -254)) + (le (minus (match_dup 0) (pc)) + (minus (const_int 248) + (symbol_ref "get_attr_delay_slot_length (insn)")))) + (const_int 4) + (eq (symbol_ref "which_alternative") (const_int 0)) + (const_int 6)] + (const_int 8))) + (set (attr "iscompact") + (if_then_else (match_test "get_attr_length (insn) == 6") + (const_string "true") (const_string "false")))]) + +; ??? When testing a bit from a DImode register, combine creates a +; zero_extract in DImode. This goes via an AND with a DImode constant, +; so can only be observed on 64 bit hosts. +(define_insn_and_split "*bbit_di" + [(set (pc) + (if_then_else + (match_operator 3 "equality_comparison_operator" + [(zero_extract:DI (match_operand:SI 1 "register_operand" "Rcqq,c") + (const_int 1) + (match_operand 2 "immediate_operand" "L,L")) + (const_int 0)]) + (label_ref (match_operand 0 "" "")) + (pc))) + (clobber (reg:CC_ZN CC_REG))] + "!find_reg_note (insn, REG_CROSSING_JUMP, NULL_RTX)" + "#" + "" + [(parallel + [(set (pc) (if_then_else (match_dup 3) (label_ref (match_dup 0)) (pc))) + (clobber (reg:CC_ZN CC_REG))])] +{ + rtx xtr; + + xtr = gen_rtx_ZERO_EXTRACT (SImode, operands[1], const1_rtx, operands[2]); + operands[3] = gen_rtx_fmt_ee (GET_CODE (operands[3]), GET_MODE (operands[3]), + xtr, const0_rtx); +}) + +; operand 0 is the loop count pseudo register +; operand 1 is the number of loop iterations or 0 if it is unknown +; operand 2 is the maximum number of loop iterations +; operand 3 is the number of levels of enclosed loops +; operand 4 is the loop end pattern +(define_expand "doloop_begin" + [(use (match_operand 0 "register_operand" "")) + (use (match_operand:QI 1 "const_int_operand" "")) + (use (match_operand:QI 2 "const_int_operand" "")) + (use (match_operand:QI 3 "const_int_operand" "")) + (use (match_operand 4 "" ""))] + "" +{ + /* Using the INSN_UID of the loop end pattern to identify it causes + trouble with -fcompare-debug, so allocate a debug-independent + id instead. We use negative numbers so that we can use the same + slot in doloop_end_i where we later store a CODE_LABEL_NUMBER, and + still be able to tell what kind of number this is. */ + static HOST_WIDE_INT loop_end_id = 0; + + if (INTVAL (operands[3]) > 1) + FAIL; + rtx id = GEN_INT (--loop_end_id); + XEXP (XVECEXP (PATTERN (operands[4]), 0, 4), 0) = id; + emit_insn (gen_doloop_begin_i (operands[0], const0_rtx, id, + const0_rtx, const0_rtx)); + DONE; +}) + +; ??? can't describe the insn properly as then the optimizers try to +; hoist the SETs. +;(define_insn "doloop_begin_i" +; [(set (reg:SI LP_START) (pc)) +; (set (reg:SI LP_END) (unspec:SI [(pc)] UNSPEC_LP)) +; (use (match_operand 0 "const_int_operand" "n"))] +; "" +; "lp .L__GCC__LP%0" +;) + +; The operands of doloop_end_i are also read / written by arc_reorg with +; XVECEXP (PATTERN (lp, 0, N), so if you want to change the pattern, you +; might have to adjust arc_reorg. +; operands 0 / 2 are supplied by the expander, 1, 3 and 4 are filled in +; by arc_reorg. arc_reorg might also alter operand 0. +; +; N in XVECEXP PATTERN (lp, 0 N) +; V rtl purpose +; 0 unspec UNSPEC_LP identify pattern +; 1 clobber LP_START show LP_START is set +; 2 clobber LP_END show LP_END is set +; 3 use operand0 loop count pseudo register +; 4 use operand1 before arc_reorg: -id +; after : CODE_LABEL_NUMBER of loop top label +; 5 use operand2 INSN_UID of loop end insn +; 6 use operand3 loop setup not at start (1 above, 2 below) +; 7 use operand4 LABEL_REF of top label, if not +; immediately following +; If operand1 is still zero after arc_reorg, this is an orphaned loop +; instruction that was not at the start of the loop. +; There is no point is reloading this insn - then lp_count would still not +; be available for the loop end. +(define_insn "doloop_begin_i" + [(unspec:SI [(pc)] UNSPEC_LP) + (clobber (reg:SI LP_START)) + (clobber (reg:SI LP_END)) + (use (match_operand:SI 0 "register_operand" "l,l,????*X")) + (use (match_operand 1 "const_int_operand" "n,n,C_0")) + (use (match_operand 2 "const_int_operand" "n,n,X")) + (use (match_operand 3 "const_int_operand" "C_0,n,X")) + (use (match_operand 4 "const_int_operand" "C_0,X,X"))] + "" +{ + rtx scan; + int len, size = 0; + int n_insns = 0; + rtx loop_start = operands[4]; + + if (CONST_INT_P (loop_start)) + loop_start = NULL_RTX; + /* Size implications of the alignment will be taken care of by the + alignment inserted at the loop start. */ + if (LOOP_ALIGN (0) && INTVAL (operands[1])) + { + asm_fprintf (asm_out_file, "\t.p2align %d\\n", LOOP_ALIGN (0)); + arc_clear_unalign (); + } + if (!INTVAL (operands[1])) + return "; LITTLE LOST LOOP"; + if (loop_start && flag_pic) + { + /* ??? Can do better for when a scratch register + is known. But that would require extra testing. */ + arc_clear_unalign (); + return ".p2align 2\;push_s r0\;add r0,pcl,%4-.+2\;sr r0,[2]; LP_START\;add r0,pcl,.L__GCC__LP%1-.+2\;sr r0,[3]; LP_END\;pop_s r0"; + } + /* Check if the loop end is in range to be set by the lp instruction. */ + size = INTVAL (operands[3]) < 2 ? 0 : 2048; + for (scan = insn; scan && size < 2048; scan = NEXT_INSN (scan)) + { + if (!INSN_P (scan)) + continue; + if (recog_memoized (scan) == CODE_FOR_doloop_end_i + && (XEXP (XVECEXP (PATTERN (scan), 0, 4), 0) + == XEXP (XVECEXP (PATTERN (insn), 0, 4), 0))) + break; + len = get_attr_length (scan); + size += len; + } + /* Try to verify that there are at least three instruction fetches + between the loop setup and the first encounter of the loop end. */ + for (scan = NEXT_INSN (insn); scan && n_insns < 3; scan = NEXT_INSN (scan)) + { + if (!INSN_P (scan)) + continue; + if (GET_CODE (PATTERN (scan)) == SEQUENCE) + scan = XVECEXP (PATTERN (scan), 0, 0); + if (JUMP_P (scan)) + { + if (recog_memoized (scan) != CODE_FOR_doloop_end_i) + { + n_insns += 2; + if (simplejump_p (scan)) + { + scan = XEXP (SET_SRC (PATTERN (scan)), 0); + continue; + } + if (JUMP_LABEL (scan) + /* JUMP_LABEL might be simple_return instead if an insn. */ + && (!INSN_P (JUMP_LABEL (scan)) + || (!next_active_insn (JUMP_LABEL (scan)) + || (recog_memoized (next_active_insn (JUMP_LABEL (scan))) + != CODE_FOR_doloop_begin_i))) + && (!next_active_insn (NEXT_INSN (PREV_INSN (scan))) + || (recog_memoized + (next_active_insn (NEXT_INSN (PREV_INSN (scan)))) + != CODE_FOR_doloop_begin_i))) + n_insns++; + } + break; + } + len = get_attr_length (scan); + /* Size estimation of asms assumes that each line which is nonempty + codes an insn, and that each has a long immediate. For minimum insn + count, assume merely that a nonempty asm has at least one insn. */ + if (GET_CODE (PATTERN (scan)) == ASM_INPUT + || asm_noperands (PATTERN (scan)) >= 0) + n_insns += (len != 0); + else + n_insns += (len > 4 ? 2 : (len ? 1 : 0)); + } + if (LOOP_ALIGN (0)) + { + asm_fprintf (asm_out_file, "\t.p2align %d\\n", LOOP_ALIGN (0)); + arc_clear_unalign (); + } + gcc_assert (n_insns || GET_CODE (next_nonnote_insn (insn)) == CODE_LABEL); + if (size >= 2048 || (TARGET_ARC600 && n_insns == 1) || loop_start) + { + if (flag_pic) + { + /* ??? Can do better for when a scratch register + is known. But that would require extra testing. */ + arc_clear_unalign (); + return ".p2align 2\;push_s r0\;add r0,pcl,24\;sr r0,[2]; LP_START\;add r0,pcl,.L__GCC__LP%1-.+2\;sr r0,[3]; LP_END\;pop_s r0"; + } + output_asm_insn ((size < 2048 + ? "lp .L__GCC__LP%1" : "sr .L__GCC__LP%1,[3]; LP_END"), + operands); + output_asm_insn (loop_start + ? "sr %4,[2]; LP_START" : "sr 0f,[2]; LP_START", + operands); + if (TARGET_ARC600 && n_insns < 1) + output_asm_insn ("nop", operands); + return (TARGET_ARC600 && n_insns < 3) ? "nop_s\;nop_s\;0:" : "0:"; + } + else if (TARGET_ARC600 && n_insns < 3) + { + /* At least four instructions are needed between the setting of LP_COUNT + and the loop end - but the lp instruction qualifies as one. */ + rtx prev = prev_nonnote_insn (insn); + + if (!INSN_P (prev) || dead_or_set_regno_p (prev, LP_COUNT)) + output_asm_insn ("nop", operands); + } + return "lp .L__GCC__LP%1"; +} + [(set_attr "type" "loop_setup") + (set_attr_alternative "length" +; FIXME: length is usually 4, but we need branch shortening +; to get this right. +; [(if_then_else (match_test "TARGET_ARC600") (const_int 16) (const_int 4)) + [(if_then_else (match_test "flag_pic") (const_int 24) (const_int 16)) + (if_then_else (match_test "flag_pic") (const_int 28) (const_int 16)) + (const_int 0)])] + ;; ??? we should really branch shorten this insn, but then we'd + ;; need a proper label first. N.B. the end label can not only go out + ;; of range when it is far away, but also when it precedes the loop - + ;; which, unfortunately, it sometimes does, when the loop "optimizer" + ;; messes things up. +) + +; operand 0 is the loop count pseudo register +; operand 1 is the number of loop iterations or 0 if it is unknown +; operand 2 is the maximum number of loop iterations +; operand 3 is the number of levels of enclosed loops +; operand 4 is the label to jump to at the top of the loop +; operand 5 is nonzero if the loop is entered at its top. +; Use this for the ARC600 and ARC700. For ARCtangent-A5, this is unsafe +; without further checking for nearby branches etc., and without proper +; annotation of shift patterns that clobber lp_count +; ??? ARC600 might want to check if the loop has few iteration and only a +; single insn - loop setup is expensive then. +(define_expand "doloop_end" + [(use (match_operand 0 "register_operand" "")) + (use (match_operand:QI 1 "const_int_operand" "")) + (use (match_operand:QI 2 "const_int_operand" "")) + (use (match_operand:QI 3 "const_int_operand" "")) + (use (label_ref (match_operand 4 "" ""))) + (use (match_operand:QI 5 "const_int_operand" ""))] + "TARGET_ARC600 || TARGET_ARC700" +{ + if (INTVAL (operands[3]) > 1) + FAIL; + /* Setting up the loop with two sr isntructions costs 6 cycles. */ + if (TARGET_ARC700 && !INTVAL (operands[5]) + && INTVAL (operands[1]) && INTVAL (operands[1]) <= (flag_pic ? 6 : 3)) + FAIL; + /* We could do smaller bivs with biv widening, and wider bivs by having + a high-word counter in an outer loop - but punt on this for now. */ + if (GET_MODE (operands[0]) != SImode) + FAIL; + emit_jump_insn (gen_doloop_end_i (operands[0], operands[4], const0_rtx)); + DONE; +}) + +(define_insn_and_split "doloop_end_i" + [(set (pc) + (if_then_else (ne (match_operand:SI 0 "shouldbe_register_operand" "+l,*c,*m") + (const_int 1)) + (label_ref (match_operand 1 "" "")) + (pc))) + (set (match_dup 0) (plus:SI (match_dup 0) (const_int -1))) + (use (reg:SI LP_START)) + (use (reg:SI LP_END)) + (use (match_operand 2 "const_int_operand" "n,???Cn0,???X")) + (clobber (match_scratch:SI 3 "=X,X,&????r"))] + "" + "* +{ + rtx prev = prev_nonnote_insn (insn); + + /* If there is an immediately preceding label, we must output a nop, + lest a branch to that label will fall out of the loop. + ??? We could try to avoid this by claiming to have a delay slot if there + is a preceding label, and outputting the delay slot insn instead, if + present. + Or we could have some optimization that changes the source edge to update + the loop count and jump to the loop start instead. */ + /* For ARC600, we must also prevent jumps inside the loop and jumps where + the loop counter value is live at the target from being directly at the + loop end. Being sure that the loop counter is dead at the target is + too much hair - we can't rely on data flow information at this point - + so insert a nop for all branches. + The ARC600 also can't read the loop counter in the last insn of a loop. */ + if (LABEL_P (prev)) + output_asm_insn (\"nop%?\", operands); + return \"\\n.L__GCC__LP%2: ; loop end, start is %1\"; +}" + "&& memory_operand (operands[0], SImode)" + [(pc)] +{ + emit_move_insn (operands[3], operands[0]); + emit_jump_insn (gen_doloop_fallback_m (operands[3], operands[1], operands[0])); + DONE; +} + [(set_attr "type" "loop_end") + (set (attr "length") + (if_then_else (match_test "LABEL_P (prev_nonnote_insn (insn))") + (const_int 4) (const_int 0)))] +) + +; This pattern is generated by arc_reorg when there is no recognizable +; loop start. +(define_insn "*doloop_fallback" + [(set (pc) (if_then_else (ne (match_operand:SI 0 "register_operand" "+r,!w") + (const_int 1)) + (label_ref (match_operand 1 "" "")) + (pc))) + (set (match_dup 0) (plus:SI (match_dup 0) (const_int -1)))] + ; avoid fooling the loop optimizer into assuming this is a special insn. + "reload_completed" + "*return get_attr_length (insn) == 8 + ? \"brne.d %0,1,%1\;sub %0,%0,1\" + : \"breq %0,1,0f\;b.d %1\;sub %0,%0,1\\n0:\";" + [(set (attr "length") + (if_then_else (and (ge (minus (match_dup 1) (pc)) (const_int -256)) + (le (minus (match_dup 1) (pc)) (const_int 244))) + (const_int 8) (const_int 12))) + (set_attr "type" "brcc_no_delay_slot") + (set_attr "cond" "nocond")] +) + +; reload can't make output reloads for jump insns, so we have to do this by hand. +(define_insn "doloop_fallback_m" + [(set (pc) (if_then_else (ne (match_operand:SI 0 "register_operand" "+&r") + (const_int 1)) + (label_ref (match_operand 1 "" "")) + (pc))) + (set (match_dup 0) (plus:SI (match_dup 0) (const_int -1))) + (set (match_operand:SI 2 "memory_operand" "=m") + (plus:SI (match_dup 0) (const_int -1)))] + ; avoid fooling the loop optimizer into assuming this is a special insn. + "reload_completed" + "*return get_attr_length (insn) == 12 + ? \"sub %0,%0,1\;brne.d %0,0,%1\;st%U2%V2 %0,%2\" + : \"sub %0,%0,1\;breq %0,0,0f\;b.d %1\\n0:\tst%U2%V2 %0,%2\";" + [(set (attr "length") + (if_then_else (and (ge (minus (match_dup 1) (pc)) (const_int -252)) + (le (minus (match_dup 1) (pc)) (const_int 244))) + (const_int 12) (const_int 16))) + (set_attr "type" "brcc_no_delay_slot") + (set_attr "cond" "nocond")] +) + +(define_expand "movmemsi" + [(match_operand:BLK 0 "" "") + (match_operand:BLK 1 "" "") + (match_operand:SI 2 "nonmemory_operand" "") + (match_operand 3 "immediate_operand" "")] + "" + "if (arc_expand_movmem (operands)) DONE; else FAIL;") + +;; Close http://gcc.gnu.org/bugzilla/show_bug.cgi?id=35803 if this works +;; to the point that we can generate cmove instructions. +(define_expand "cbranch<mode>4" + [(set (reg:CC CC_REG) + (compare:CC (match_operand:SDF 1 "register_operand" "") + (match_operand:SDF 2 "register_operand" ""))) + (set (pc) + (if_then_else + (match_operator 0 "comparison_operator" [(reg CC_REG) + (const_int 0)]) + (label_ref (match_operand 3 "" "")) + (pc)))] + + "TARGET_OPTFPE" +{ + gcc_assert (XEXP (operands[0], 0) == operands[1]); + gcc_assert (XEXP (operands[0], 1) == operands[2]); + operands[0] = gen_compare_reg (operands[0], VOIDmode); + emit_jump_insn (gen_branch_insn (operands[3], operands[0])); + DONE; +}) + +(define_expand "cmp_float" + [(parallel [(set (match_operand 0 "") (match_operand 1 "")) + (clobber (reg:SI RETURN_ADDR_REGNUM)) + (clobber (reg:SI R12_REG))])] + "" + "") + +(define_mode_iterator OPTFPE_CMP [CC_Z CC_FP_GT CC_FP_GE CC_FP_UNEQ CC_FP_ORD]) +(define_mode_attr cmp [(CC_Z "eq") (CC_FP_GT "gt") (CC_FP_GE "ge") + (CC_FP_UNEQ "uneq") (CC_FP_ORD "ord")]) + +(define_insn "*cmpsf_<cmp>" + [(set (reg:OPTFPE_CMP CC_REG) (compare:OPTFPE_CMP (reg:SF 0) (reg:SF 1))) + (clobber (reg:SI RETURN_ADDR_REGNUM)) + (clobber (reg:SI R12_REG))] + "TARGET_OPTFPE && (!TARGET_ARGONAUT_SET || !TARGET_SPFP) + && SFUNC_CHECK_PREDICABLE" + "*return arc_output_libcall (\"__<cmp>sf2\");" + [(set_attr "is_sfunc" "yes") + (set_attr "predicable" "yes")]) + +;; N.B. for "*cmpdf_ord": +;; double precision fpx sets bit 31 for NaNs. We need bit 51 set +;; for the floating point emulation to recognize the NaN. +(define_insn "*cmpdf_<cmp>" + [(set (reg:OPTFPE_CMP CC_REG) (compare:OPTFPE_CMP (reg:DF 0) (reg:DF 2))) + (clobber (reg:SI RETURN_ADDR_REGNUM)) + (clobber (reg:SI R12_REG))] + "TARGET_OPTFPE && (!TARGET_ARGONAUT_SET || !TARGET_DPFP) + && SFUNC_CHECK_PREDICABLE" + "*return arc_output_libcall (\"__<cmp>df2\");" + [(set_attr "is_sfunc" "yes") + (set_attr "predicable" "yes")]) + +(define_insn "abssf2" + [(set (match_operand:SF 0 "dest_reg_operand" "=Rcq#q,Rcw,w") + (abs:SF (match_operand:SF 1 "register_operand" "0, 0,c")))] + "" + "bclr%? %0,%1,31%&" + [(set_attr "type" "unary") + (set_attr "iscompact" "maybe,false,false") + (set_attr "length" "2,4,4") + (set_attr "predicable" "no,yes,no")]) + +(define_insn "negsf2" + [(set (match_operand:SF 0 "dest_reg_operand" "=Rcw,w") + (neg:SF (match_operand:SF 1 "register_operand" "0,c")))] + "" + "bxor%? %0,%1,31" + [(set_attr "type" "unary") + (set_attr "predicable" "yes,no")]) + +;; ??? Should this use arc_output_libcall and set is_sfunc? +(define_insn "*millicode_thunk_st" + [(match_parallel 0 "millicode_store_operation" + [(set (mem:SI (reg:SI SP_REG)) (reg:SI 13))])] + "" +{ + output_asm_insn ("bl%* __st_r13_to_%0", + &SET_SRC (XVECEXP (operands[0], 0, + XVECLEN (operands[0], 0) - 2))); + return ""; +} + [(set_attr "type" "call")]) + +(define_insn "*millicode_thunk_ld" + [(match_parallel 0 "millicode_load_clob_operation" + [(set (reg:SI 13) (mem:SI (reg:SI SP_REG)))])] + "" +{ + output_asm_insn ("bl%* __ld_r13_to_%0", + &SET_DEST (XVECEXP (operands[0], 0, + XVECLEN (operands[0], 0) - 2))); + return ""; +} + [(set_attr "type" "call")]) + +; the sibthunk restores blink, so we use the return rtx. +(define_insn "*millicode_sibthunk_ld" + [(match_parallel 0 "millicode_load_operation" + [(return) + (set (reg:SI SP_REG) (plus:SI (reg:SI SP_REG) (reg:SI 12))) + (set (reg:SI 13) (mem:SI (reg:SI SP_REG)))])] + "" +{ + output_asm_insn ("b%* __ld_r13_to_%0_ret", + &SET_DEST (XVECEXP (operands[0], 0, + XVECLEN (operands[0], 0) - 1))); + return ""; +} + [(set_attr "type" "call") + (set_attr "is_SIBCALL" "yes")]) + +;; If hardware floating point is available, don't define a negdf pattern; +;; it would be something like: +;;(define_insn "negdf2" +;; [(set (match_operand:DF 0 "register_operand" "=w,w,D,?r") +;; (neg:DF (match_operand:DF 1 "register_operand" "0,c,D,D"))) +;; (clobber (match_scratch:DF 2 "=X,X,X,X,D1"))] +;; "" +;; "@ +;; bxor%? %H0,%H1,31 +;; bxor %H0,%H1,31 ` mov %L0,%L1 +;; drsubh%F0%F1 0,0,0 +;; drsubh%F2%F1 %H0,0,0 ` dexcl%F2 %L0,%H0,%L0" +;; [(set_attr "type" "unary,unary,dpfp_addsub,dpfp_addsub") +;; (set_attr "iscompact" "false,false,false,false") +;; (set_attr "length" "4,4,8,12") +;; (set_attr "cond" "canuse,nocond,nocond,nocond")]) +;; and this suffers from always requiring a long immediate when using +;; the floating point hardware. +;; We then want the sub[sd]f patterns to be used, so that we can load the +;; constant zero efficiently into a register when we want to do the +;; computation using the floating point hardware. There should be a special +;; subdf alternative that matches a zero operand 1, which then can allow +;; to use bxor to flip the high bit of an integer register. +;; ??? we actually can't use the floating point hardware for neg, because +;; this would not work right for -0. OTOH optabs.c has already code +;; to synthesyze negate by flipping the sign bit. + + +;; include the arc-FPX instructions +(include "fpx.md") + +(include "simdext.md") diff --git a/gcc/config/arc/arc.opt b/gcc/config/arc/arc.opt new file mode 100644 index 00000000000..26e0de43fda --- /dev/null +++ b/gcc/config/arc/arc.opt @@ -0,0 +1,390 @@ +; Options for the Synopsys DesignWare ARC port of the compiler +; +; Copyright (C) 2005, 2007-2013 Free Software Foundation, Inc. +; +; This file is part of GCC. +; +; GCC is free software; you can redistribute it and/or modify it under +; the terms of the GNU General Public License as published by the Free +; Software Foundation; either version 3, or (at your option) any later +; version. +; +; GCC is distributed in the hope that it will be useful, but WITHOUT +; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public +; License for more details. +; +; You should have received a copy of the GNU General Public License +; along with GCC; see the file COPYING3. If not see +; <http://www.gnu.org/licenses/>. + +HeaderInclude +config/arc/arc-opts.h + +mbig-endian +Target Report RejectNegative Mask(BIG_ENDIAN) +Compile code for big endian mode + +mlittle-endian +Target Report RejectNegative InverseMask(BIG_ENDIAN) +Compile code for little endian mode. This is the default + +mno-cond-exec +Target Report RejectNegative Mask(NO_COND_EXEC) +Disable ARCompact specific pass to generate conditional execution instructions + +mA5 +Target Report +Generate ARCompact 32-bit code for ARCtangent-A5 processor + +mA6 +Target Report +Generate ARCompact 32-bit code for ARC600 processor + +mARC600 +Target Report +Same as -mA6 + +mARC601 +Target Report +Generate ARCompact 32-bit code for ARC601 processor + +mA7 +Target Report +Generate ARCompact 32-bit code for ARC700 processor + +mARC700 +Target Report +Same as -mA7 + +mmixed-code +Target Report Mask(MIXED_CODE_SET) +Tweak register allocation to help 16-bit instruction generation +; originally this was: +;Generate ARCompact 16-bit instructions intermixed with 32-bit instructions for ARCtangent-A5 and higher processors +; but we do that without -mmixed-code, too, it's just a different instruction +; count / size tradeoff. + +; We use an explict definition for the negative form because that is the +; actually interesting option, and we want that to have its own comment. +mvolatile-cache +Target Report RejectNegative Mask(VOLATILE_CACHE_SET) +Use ordinarily cached memory accesses for volatile references + +mno-volatile-cache +Target Report RejectNegative InverseMask(VOLATILE_CACHE_SET) +Enable cache bypass for volatile references + +mbarrel-shifter +Target Report Mask(BARREL_SHIFTER) +Generate instructions supported by barrel shifter + +mnorm +Target Report Mask(NORM_SET) +Generate norm instruction + +mswap +Target Report Mask(SWAP_SET) +Generate swap instruction + +mmul64 +Target Report Mask(MUL64_SET) +Generate mul64 and mulu64 instructions + +mno-mpy +Target Report Mask(NOMPY_SET) +Do not generate mpy instructions for ARC700 + +mea +Target Report Mask(EA_SET) +Generate Extended arithmetic instructions. Currently only divaw, adds, subs and sat16 are supported + +msoft-float +Target Report Mask(0) +Dummy flag. This is the default unless FPX switches are provided explicitly + +mlong-calls +Target Report Mask(LONG_CALLS_SET) +Generate call insns as register indirect calls + +mno-brcc +Target Report Mask(NO_BRCC_SET) +Do no generate BRcc instructions in arc_reorg. + +msdata +Target Report InverseMask(NO_SDATA_SET) +Generate sdata references. This is the default, unless you compile for PIC. + +mno-millicode +Target Report Mask(NO_MILLICODE_THUNK_SET) +Do not generate millicode thunks (needed only with -Os) + +mspfp +Target Report Mask(SPFP_COMPACT_SET) +FPX: Generate Single Precision FPX (compact) instructions. + +mspfp-compact +Target Report Mask(SPFP_COMPACT_SET) MaskExists +FPX: Generate Single Precision FPX (compact) instructions. + +mspfp-fast +Target Report Mask(SPFP_FAST_SET) +FPX: Generate Single Precision FPX (fast) instructions. + +margonaut +Target Report Mask(ARGONAUT_SET) +FPX: Enable Argonaut ARC CPU Double Precision Floating Point extensions. + +mdpfp +Target Report Mask(DPFP_COMPACT_SET) +FPX: Generate Double Precision FPX (compact) instructions. + +mdpfp-compact +Target Report Mask(DPFP_COMPACT_SET) MaskExists +FPX: Generate Double Precision FPX (compact) instructions. + +mdpfp-fast +Target Report Mask(DPFP_FAST_SET) +FPX: Generate Double Precision FPX (fast) instructions. + +mno-dpfp-lrsr +Target Report Mask(DPFP_DISABLE_LRSR) +Disable LR and SR instructions from using FPX extension aux registers. + +msimd +Target Report Mask(SIMD_SET) +Enable generation of ARC SIMD instructions via target-specific builtins. + +mcpu= +Target RejectNegative Joined Var(arc_cpu) Enum(processor_type) Init(PROCESSOR_NONE) +-mcpu=CPU Compile code for ARC variant CPU + +Enum +Name(processor_type) Type(enum processor_type) + +EnumValue +Enum(processor_type) String(A5) Value(PROCESSOR_A5) + +EnumValue +Enum(processor_type) String(ARC600) Value(PROCESSOR_ARC600) + +EnumValue +Enum(processor_type) String(ARC601) Value(PROCESSOR_ARC601) + +EnumValue +Enum(processor_type) String(ARC700) Value(PROCESSOR_ARC700) + +msize-level= +Target RejectNegative Joined UInteger Var(arc_size_opt_level) Init(-1) +size optimization level: 0:none 1:opportunistic 2: regalloc 3:drop align, -Os + +misize +Target Report Var(TARGET_DUMPISIZE) +Annotate assembler instructions with estimated addresses + +mmultcost= +Target RejectNegative Joined UInteger Var(arc_multcost) Init(-1) +Cost to assume for a multiply instruction, with 4 being equal to a normal insn. + +mtune=ARC600 +Target RejectNegative Var(arc_tune, TUNE_ARC600) +Tune for ARC600 cpu. + +mtune=ARC601 +Target RejectNegative Var(arc_tune, TUNE_ARC600) +Tune for ARC601 cpu. + +mtune=ARC700 +Target RejectNegative Var(arc_tune, TUNE_ARC700_4_2_STD) +Tune for ARC700 R4.2 Cpu with standard multiplier block. + +mtune=ARC700-xmac +Target RejectNegative Var(arc_tune, TUNE_ARC700_4_2_XMAC) +Tune for ARC700 R4.2 Cpu with XMAC block. + +mtune=ARC725D +Target RejectNegative Var(arc_tune, TUNE_ARC700_4_2_XMAC) +Tune for ARC700 R4.2 Cpu with XMAC block. + +mtune=ARC750D +Target RejectNegative Var(arc_tune, TUNE_ARC700_4_2_XMAC) +Tune for ARC700 R4.2 Cpu with XMAC block. + +mindexed-loads +Target Var(TARGET_INDEXED_LOADS) +Enable the use of indexed loads + +mauto-modify-reg +Target Var(TARGET_AUTO_MODIFY_REG) +Enable the use of pre/post modify with register displacement. + +mmul32x16 +Target Report Mask(MULMAC_32BY16_SET) +Generate 32x16 multiply and mac instructions + +; the initializer is supposed to be: Init(REG_BR_PROB_BASE/2) , +; alas, basic-block.h is not included in options.c . +munalign-prob-threshold= +Target RejectNegative Joined UInteger Var(arc_unalign_prob_threshold) Init(10000/2) +Set probability threshold for unaligning branches + +mmedium-calls +Target Var(TARGET_MEDIUM_CALLS) Init(TARGET_MMEDIUM_CALLS_DEFAULT) +Don't use less than 25 bit addressing range for calls. + +mannotate-align +Target Var(TARGET_ANNOTATE_ALIGN) +Explain what alignment considerations lead to the decision to make an insn short or long. + +malign-call +Target Var(TARGET_ALIGN_CALL) +Do alignment optimizations for call instructions. + +mRcq +Target Var(TARGET_Rcq) +Enable Rcq constraint handling - most short code generation depends on this. + +mRcw +Target Var(TARGET_Rcw) +Enable Rcw constraint handling - ccfsm condexec mostly depends on this. + +mearly-cbranchsi +Target Var(TARGET_EARLY_CBRANCHSI) +Enable pre-reload use of cbranchsi pattern + +mbbit-peephole +Target Var(TARGET_BBIT_PEEPHOLE) +Enable bbit peephole2 + +mcase-vector-pcrel +Target Var(TARGET_CASE_VECTOR_PC_RELATIVE) +Use pc-relative switch case tables - this enables case table shortening. + +mcompact-casesi +Target Var(TARGET_COMPACT_CASESI) +Enable compact casesi pattern + +mq-class +Target Var(TARGET_Q_CLASS) +Enable 'q' instruction alternatives. + +mexpand-adddi +Target Var(TARGET_EXPAND_ADDDI) +Expand adddi3 and subdi3 at rtl generation time into add.f / adc etc. + + +; Flags used by the assembler, but for which we define preprocessor +; macro symbols as well. +mcrc +Target Report +Enable variable polynomial CRC extension + +mdsp-packa +Target Report +Enable DSP 3.1 Pack A extensions + +mdvbf +Target Report +Enable dual viterbi butterfly extension + +mmac-d16 +Target Report Undocumented + +mmac-24 +Target Report Undocumented + +mtelephony +Target Report RejectNegative +Enable Dual and Single Operand Instructions for Telephony + +mxy +Target Report +Enable XY Memory extension (DSP version 3) + +; ARC700 4.10 extension instructions +mlock +Target Report +Enable Locked Load/Store Conditional extension + +mswape +Target Report +Enable swap byte ordering extension instruction + +mrtsc +Target Report +Enable 64-bit Time-Stamp Counter extension instruction + +mno-epilogue-cfi +Target Report RejectNegative InverseMask(EPILOGUE_CFI) +Disable generation of cfi for epilogues. + +mepilogue-cfi +Target RejectNegative Mask(EPILOGUE_CFI) +Enable generation of cfi for epilogues. + +EB +Target +Pass -EB option through to linker. + +EL +Target +Pass -EL option through to linker. + +marclinux +target +Pass -marclinux option through to linker. + +marclinux_prof +target +Pass -marclinux_prof option through to linker. + +;; lra is still unproven for ARC, so allow to fall back to reload with -mno-lra. +;Target InverseMask(NO_LRA) +mlra +; lra still won't allow to configure libgcc; see PR rtl-optimization/55464. +; so don't enable by default. +Target Mask(LRA) +Enable lra + +mlra-priority-none +Target RejectNegative Var(arc_lra_priority_tag, ARC_LRA_PRIORITY_NONE) +Don't indicate any priority with TARGET_REGISTER_PRIORITY + +mlra-priority-compact +Target RejectNegative Var(arc_lra_prioritytag, ARC_LRA_PRIORITY_COMPACT) +Indicate priority for r0..r3 / r12..r15 with TARGET_REGISTER_PRIORITY + +mlra-priority-noncompact +Target RejectNegative Var(arc_lra_prioritytag, ARC_LRA_PRIORITY_NONCOMPACT) +Reduce priority for r0..r3 / r12..r15 with TARGET_REGISTER_PRIORITY + +mucb-mcount +Target Report Var(TARGET_UCB_MCOUNT) +instrument with mcount calls as in the ucb code + +; backward-compatibility aliases, translated by DRIVER_SELF_SPECS + +mEA +Target + +multcost= +Target RejectNegative Joined + +; Unfortunately, listing the full option name gives us clashes +; with OPT_opt_name being claimed for both opt_name and opt-name, +; so we leave out the last character or more. +mbarrel_shifte +Target Joined + +mspfp_ +Target Joined + +mdpfp_ +Target Joined + +mdsp_pack +Target Joined + +mmac_ +Target Joined + diff --git a/gcc/config/arc/arc600.md b/gcc/config/arc/arc600.md new file mode 100644 index 00000000000..f5665178322 --- /dev/null +++ b/gcc/config/arc/arc600.md @@ -0,0 +1,63 @@ +;; DFA scheduling description of the Synopsys DesignWare ARC600 cpu +;; for GNU C compiler +;; Copyright (C) 2007-2013 Free Software Foundation, Inc. +;; Contributor: Joern Rennecke <joern.rennecke@embecosm.com> +;; on behalf of Synopsys Inc. + +;; This file is part of GCC. + +;; GCC is free software; you can redistribute it and/or modify +;; it under the terms of the GNU General Public License as published by +;; the Free Software Foundation; either version 3, or (at your option) +;; any later version. + +;; GCC is distributed in the hope that it will be useful, +;; but WITHOUT ANY WARRANTY; without even the implied warranty of +;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +;; GNU General Public License for more details. + +;; You should have received a copy of the GNU General Public License +;; along with GCC; see the file COPYING3. If not see +;; <http://www.gnu.org/licenses/>. + +(define_automaton "ARC600") + +(define_cpu_unit "issue_600" "ARC600") +(define_cpu_unit "mul64_600" "ARC600") + +; latency from flag-setting insns to branches is 3. +(define_insn_reservation "compare_600" 3 + (and (eq_attr "tune" "arc600") + (eq_attr "type" "compare")) + "issue_600") + +(define_insn_reservation "load_DI_600" 4 + (and (eq_attr "tune" "arc600") + (eq_attr "type" "load") + (match_operand:DI 0 "" "")) + "issue_600") + +(define_insn_reservation "load_600" 3 + (and (eq_attr "tune" "arc600") + (eq_attr "type" "load") + (not (match_operand:DI 0 "" ""))) + "issue_600") + +(define_insn_reservation "mul_600_fast" 3 + (and (eq_attr "tune" "arc600") + (match_test "arc_multcost < COSTS_N_INSNS (7)") + (eq_attr "type" "multi,umulti")) + "mul64_600*3") + +(define_insn_reservation "mul_600_slow" 8 + (and (eq_attr "tune" "arc600") + (match_test "arc_multcost >= COSTS_N_INSNS (7)") + (eq_attr "type" "multi,umulti")) + "mul64_600*8") + +(define_insn_reservation "mul_mac_600" 3 + (and (eq_attr "tune" "arc600") + (eq_attr "type" "mulmac_600")) + "nothing*3") + +(define_bypass 1 "mul_mac_600" "mul_mac_600") diff --git a/gcc/config/arc/arc700.md b/gcc/config/arc/arc700.md new file mode 100644 index 00000000000..59fc9417620 --- /dev/null +++ b/gcc/config/arc/arc700.md @@ -0,0 +1,170 @@ +;; DFA scheduling description of the Synopsys DesignWare ARC700 cpu +;; for GNU C compiler +;; Comments and Support For ARC700 instructions added by +;; Saurabh Verma (saurabh.verma@codito.com) +;; Ramana Radhakrishnan(ramana.radhakrishnan@codito.com) +;; Factoring out and improvement of ARC700 Scheduling by +;; Joern Rennecke (joern.rennecke@embecosm.com) +;; Copyright (C) 2006-2012 Free Software Foundation, Inc. + +;; This file is part of GCC. + +;; GCC is free software; you can redistribute it and/or modify +;; it under the terms of the GNU General Public License as published by +;; the Free Software Foundation; either version 3, or (at your option) +;; any later version. + +;; GCC is distributed in the hope that it will be useful, +;; but WITHOUT ANY WARRANTY; without even the implied warranty of +;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +;; GNU General Public License for more details. + +;; You should have received a copy of the GNU General Public License +;; along with GCC; see the file COPYING3. If not see +;; <http://www.gnu.org/licenses/>. + +(define_automaton "ARC700") + +;; aux to be added here +(define_cpu_unit "core, dmp, write_port, dmp_write_port, multiplier, issue, blockage, simd_unit" "ARC700") + +(define_insn_reservation "core_insn_DI" 2 + (and (eq_attr "tune_arc700" "true") + (eq_attr "type" "unary, move, cmove, binary") + (match_operand:DI 0 "" "")) + "issue+core, issue+core+write_port, write_port") + +(define_insn_reservation "lr" 2 + (and (eq_attr "tune_arc700" "true") + (eq_attr "type" "lr")) + "issue+blockage, blockage*2, write_port") + +(define_insn_reservation "sr" 1 + (and (eq_attr "tune_arc700" "true") + (eq_attr "type" "sr")) + "issue+dmp_write_port+blockage, blockage*9") + +(define_insn_reservation "core_insn" 1 + (and (eq_attr "tune_arc700" "true") + (eq_attr "type" "unary, move, binary")) + "issue+core, nothing, write_port") + +(define_insn_reservation "cmove" 1 + (and (eq_attr "tune_arc700" "true") + (eq_attr "type" "cmove")) + "issue+core, nothing, write_port") + +(define_insn_reservation "cc_arith" 1 + (and (eq_attr "tune_arc700" "true") + (eq_attr "type" "cc_arith")) + "issue+core, nothing, write_port") + +(define_insn_reservation "two_cycle_core_insn" 2 + (and (eq_attr "tune_arc700" "true") + (eq_attr "type" "two_cycle_core")) + "issue+core, nothing, write_port") + +(define_insn_reservation "divaw_insn" 2 + (and (eq_attr "tune_arc700" "true") + (eq_attr "type" "divaw")) + "issue+core, nothing, write_port") + +(define_insn_reservation "shift_insn" 2 + (and (eq_attr "tune_arc700" "true") + (eq_attr "type" "shift")) + "issue+core, nothing, write_port") + +; Latency from flag setters to arithmetic with carry is 3. +(define_insn_reservation "compare_700" 3 + (and (eq_attr "tune_arc700" "true") + (eq_attr "type" "compare")) + "issue+core, nothing, write_port") + +; Assume here the branch is predicted correctly and has a delay slot insn +; or is properly unaligned. +(define_insn_reservation "branch_700" 1 + (and (eq_attr "tune_arc700" "true") + (eq_attr "type" "compare")) + "issue+core, nothing, write_port") + +; TODOs: is this correct ?? +(define_insn_reservation "multi_DI" 10 + (and (eq_attr "tune_arc700" "true") + (eq_attr "type" "multi") + (match_operand:DI 0 "" "")) + "issue+multiplier, multiplier*2,issue+multiplier, multiplier*2, + nothing,write_port,nothing*2, write_port") + +(define_insn_reservation "umulti_DI" 9 + (and (eq_attr "tune_arc700" "true") + (eq_attr "type" "umulti") + (match_operand:DI 0 "" "")) + "issue+multiplier, multiplier,issue+multiplier, multiplier*2, + write_port,nothing*3, write_port") + +(define_insn_reservation "umulti_xmac" 5 + (and (eq_attr "tune_arc700" "true") + (eq_attr "type" "umulti")) + "issue+multiplier, multiplier, nothing*3, write_port") + +; latency of mpyu is lower than mpy / mpyh / mpyhu +(define_insn_reservation "umulti_std" 6 + (and (eq_attr "tune_arc700" "true") + (eq_attr "type" "umulti")) + "issue+multiplier, multiplier*3, nothing*2, write_port") + +;; arc700 xmac multiplier +(define_insn_reservation "multi_xmac" 5 + (and (eq_attr "tune" "arc700_4_2_xmac") + (eq_attr "type" "multi")) + "issue+multiplier,multiplier,nothing*3,write_port") + +; arc700 standard multiplier +(define_insn_reservation "multi_std" 7 + (and (eq_attr "tune" "arc700_4_2_std") + (eq_attr "type" "multi")) + "issue+multiplier,multiplier*4,nothing*2,write_port") + +;(define_insn_reservation "multi_SI" 7 +; (eq_attr "type" "multi") +; "issue+multiplier, multiplier*2, nothing*4, write_port") + +; There is no multiplier -> multiplier bypass except for the +; mac -> mac dependency on the accumulator. + +; divaw -> divaw latency is 1 cycle +(define_bypass 1 "divaw_insn" "divaw_insn") + +(define_bypass 1 "compare_700" "branch_700,core_insn,data_store,data_load") + +; we could shedule the cmove immediately after the compare, but then +; the cmove would have higher latency... so just keep the cmove apart +; from the compare. +(define_bypass 2 "compare_700" "cmove") + +; no functional unit runs when blockage is reserved +(exclusion_set "blockage" "core, multiplier") + +(define_insn_reservation "data_load_DI" 4 + (and (eq_attr "tune_arc700" "true") + (eq_attr "type" "load") + (match_operand:DI 0 "" "")) + "issue+dmp, issue+dmp, dmp_write_port, dmp_write_port") + +(define_insn_reservation "data_load" 3 + (and (eq_attr "tune_arc700" "true") + (eq_attr "type" "load") + (not (match_operand:DI 0 "" ""))) + "issue+dmp, nothing, dmp_write_port") + +(define_insn_reservation "data_store_DI" 2 + (and (eq_attr "tune_arc700" "true") + (eq_attr "type" "store") + (match_operand:DI 0 "" "")) + "issue+dmp_write_port, issue+dmp_write_port") + +(define_insn_reservation "data_store" 1 + (and (eq_attr "tune_arc700" "true") + (eq_attr "type" "store") + (not (match_operand:DI 0 "" ""))) + "issue+dmp_write_port") diff --git a/gcc/config/arc/constraints.md b/gcc/config/arc/constraints.md new file mode 100644 index 00000000000..088013bbdb7 --- /dev/null +++ b/gcc/config/arc/constraints.md @@ -0,0 +1,399 @@ +;; Constraint definitions for Synopsys DesignWare ARC. +;; Copyright (C) 2007-2013 Free Software Foundation, Inc. +;; +;; This file is part of GCC. +;; +;; GCC is free software; you can redistribute it and/or modify +;; it under the terms of the GNU General Public License as published by +;; the Free Software Foundation; either version 3, or (at your option) +;; any later version. +;; +;; GCC is distributed in the hope that it will be useful, +;; but WITHOUT ANY WARRANTY; without even the implied warranty of +;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +;; GNU General Public License for more details. +;; +;; You should have received a copy of the GNU General Public License +;; along with GCC; see the file COPYING3. If not see +;; <http://www.gnu.org/licenses/>. + +;; Register constraints + +; Most instructions accept arbitrary core registers for their inputs, even +; if the core register in question cannot be written to, like the multiply +; result registers of the ARCtangent-A5 and ARC600 . +; First, define a class for core registers that can be read cheaply. This +; is most or all core registers for ARC600, but only r0-r31 for ARC700 +(define_register_constraint "c" "CHEAP_CORE_REGS" + "core register @code{r0}-@code{r31}, @code{ap},@code{pcl}") + +; All core regs - e.g. for when we must have a way to reload a register. +(define_register_constraint "Rac" "ALL_CORE_REGS" + "core register @code{r0}-@code{r60}, @code{ap},@code{pcl}") + +; Some core registers (.e.g lp_count) aren't general registers because they +; can't be used as the destination of a multi-cycle operation like +; load and/or multiply, yet they are still writable in the sense that +; register-register moves and single-cycle arithmetic (e.g "add", "and", +; but not "mpy") can write to them. +(define_register_constraint "w" "WRITABLE_CORE_REGS" + "writable core register: @code{r0}-@code{r31}, @code{r60}, nonfixed core register") + +(define_register_constraint "W" "MPY_WRITABLE_CORE_REGS" + "writable core register except @code{LP_COUNT} (@code{r60}): @code{r0}-@code{r31}, nonfixed core register") + +(define_register_constraint "l" "LPCOUNT_REG" + "@internal + Loop count register @code{r60}") + +(define_register_constraint "x" "R0_REGS" + "@code{R0} register.") + +(define_register_constraint "Rgp" "GP_REG" + "@internal + Global Pointer register @code{r26}") + +(define_register_constraint "f" "FP_REG" + "@internal + Frame Pointer register @code{r27}") + +(define_register_constraint "b" "SP_REGS" + "@internal + Stack Pointer register @code{r28}") + +(define_register_constraint "k" "LINK_REGS" + "@internal + Link Registers @code{ilink1}:@code{r29}, @code{ilink2}:@code{r30}, + @code{blink}:@code{r31},") + +(define_register_constraint "q" "ARCOMPACT16_REGS" + "Registers usable in ARCompact 16-bit instructions: @code{r0}-@code{r3}, + @code{r12}-@code{r15}") + +(define_register_constraint "e" "AC16_BASE_REGS" + "Registers usable as base-regs of memory addresses in ARCompact 16-bit memory + instructions: @code{r0}-@code{r3}, @code{r12}-@code{r15}, @code{sp}") + +(define_register_constraint "D" "DOUBLE_REGS" + "ARC FPX (dpfp) 64-bit registers. @code{D0}, @code{D1}") + +(define_register_constraint "d" "SIMD_DMA_CONFIG_REGS" + "@internal + ARC SIMD DMA configuration registers @code{di0}-@code{di7}, + @code{do0}-@code{do7}") + +(define_register_constraint "v" "SIMD_VR_REGS" + "ARC SIMD 128-bit registers @code{VR0}-@code{VR23}") + +; We could allow call-saved registers for sibling calls if we restored them +; in the delay slot of the call. However, that would not allow to adjust the +; stack pointer afterwards, so the call-saved register would have to be +; restored from a call-used register that was just loaded with the value +; before. So sticking to call-used registers for sibcalls will likely +; generate better code overall. +(define_register_constraint "Rsc" "SIBCALL_REGS" + "@internal + Sibling call register") + +;; Integer constraints + +(define_constraint "I" + "@internal + A signed 12-bit integer constant." + (and (match_code "const_int") + (match_test "SIGNED_INT12 (ival)"))) + +(define_constraint "K" + "@internal + A 3-bit unsigned integer constant" + (and (match_code "const_int") + (match_test "UNSIGNED_INT3 (ival)"))) + +(define_constraint "L" + "@internal + A 6-bit unsigned integer constant" + (and (match_code "const_int") + (match_test "UNSIGNED_INT6 (ival)"))) + +(define_constraint "CnL" + "@internal + One's complement of a 6-bit unsigned integer constant" + (and (match_code "const_int") + (match_test "UNSIGNED_INT6 (~ival)"))) + +(define_constraint "CmL" + "@internal + Two's complement of a 6-bit unsigned integer constant" + (and (match_code "const_int") + (match_test "UNSIGNED_INT6 (-ival)"))) + +(define_constraint "M" + "@internal + A 5-bit unsigned integer constant" + (and (match_code "const_int") + (match_test "UNSIGNED_INT5 (ival)"))) + +(define_constraint "N" + "@internal + Integer constant 1" + (and (match_code "const_int") + (match_test "IS_ONE (ival)"))) + +(define_constraint "O" + "@internal + A 7-bit unsigned integer constant" + (and (match_code "const_int") + (match_test "UNSIGNED_INT7 (ival)"))) + +(define_constraint "P" + "@internal + An 8-bit unsigned integer constant" + (and (match_code "const_int") + (match_test "UNSIGNED_INT8 (ival)"))) + +(define_constraint "C_0" + "@internal + Zero" + (and (match_code "const_int") + (match_test "ival == 0"))) + +(define_constraint "Cn0" + "@internal + Negative or zero" + (and (match_code "const_int") + (match_test "ival <= 0"))) + +(define_constraint "Cca" + "@internal + Conditional or three-address add / sub constant" + (and (match_code "const_int") + (match_test "ival == -1 << 31 + || (ival >= -0x1f8 && ival <= 0x1f8 + && ((ival >= 0 ? ival : -ival) + <= 0x3f * (ival & -ival)))"))) + +; intersection of "O" and "Cca". +(define_constraint "CL2" + "@internal + A 6-bit unsigned integer constant times 2" + (and (match_code "const_int") + (match_test "!(ival & ~126)"))) + +(define_constraint "CM4" + "@internal + A 5-bit unsigned integer constant times 4" + (and (match_code "const_int") + (match_test "!(ival & ~124)"))) + +(define_constraint "Csp" + "@internal + A valid stack pointer offset for a short add" + (and (match_code "const_int") + (match_test "!(ival & ~124) || !(-ival & ~124)"))) + +(define_constraint "C2a" + "@internal + Unconditional two-address add / sub constant" + (and (match_code "const_int") + (match_test "ival == -1 << 31 + || (ival >= -0x4000 && ival <= 0x4000 + && ((ival >= 0 ? ival : -ival) + <= 0x7ff * (ival & -ival)))"))) + +(define_constraint "C0p" + "@internal + power of two" + (and (match_code "const_int") + (match_test "IS_POWEROF2_P (ival)"))) + +(define_constraint "C1p" + "@internal + constant such that x+1 is a power of two, and x != 0" + (and (match_code "const_int") + (match_test "ival && IS_POWEROF2_P (ival + 1)"))) + +(define_constraint "Ccp" + "@internal + constant such that ~x (one's Complement) is a power of two" + (and (match_code "const_int") + (match_test "IS_POWEROF2_P (~ival)"))) + +(define_constraint "Cux" + "@internal + constant such that AND gives an unsigned extension" + (and (match_code "const_int") + (match_test "ival == 0xff || ival == 0xffff"))) + +(define_constraint "Crr" + "@internal + constant that can be loaded with ror b,u6" + (and (match_code "const_int") + (match_test "(ival & ~0x8000001f) == 0 && !arc_ccfsm_cond_exec_p ()"))) + +;; Floating-point constraints + +(define_constraint "G" + "@internal + A 32-bit constant double value" + (and (match_code "const_double") + (match_test "arc_double_limm_p (op)"))) + +(define_constraint "H" + "@internal + All const_double values (including 64-bit values)" + (and (match_code "const_double") + (match_test "1"))) + +;; Memory constraints +(define_memory_constraint "T" + "@internal + A valid memory operand for ARCompact load instructions" + (and (match_code "mem") + (match_test "compact_load_memory_operand (op, VOIDmode)"))) + +(define_memory_constraint "S" + "@internal + A valid memory operand for ARCompact store instructions" + (and (match_code "mem") + (match_test "compact_store_memory_operand (op, VOIDmode)"))) + +(define_memory_constraint "Usd" + "@internal + A valid _small-data_ memory operand for ARCompact instructions" + (and (match_code "mem") + (match_test "compact_sda_memory_operand (op, VOIDmode)"))) + +(define_memory_constraint "Usc" + "@internal + A valid memory operand for storing constants" + (and (match_code "mem") + (match_test "!CONSTANT_P (XEXP (op,0))") +;; ??? the assembler rejects stores of immediates to small data. + (match_test "!compact_sda_memory_operand (op, VOIDmode)"))) + +(define_memory_constraint "Us<" + "@internal + Stack pre-decrement" + (and (match_code "mem") + (match_test "GET_CODE (XEXP (op, 0)) == PRE_DEC") + (match_test "REG_P (XEXP (XEXP (op, 0), 0))") + (match_test "REGNO (XEXP (XEXP (op, 0), 0)) == SP_REG"))) + +(define_memory_constraint "Us>" + "@internal + Stack post-increment" + (and (match_code "mem") + (match_test "GET_CODE (XEXP (op, 0)) == POST_INC") + (match_test "REG_P (XEXP (XEXP (op, 0), 0))") + (match_test "REGNO (XEXP (XEXP (op, 0), 0)) == SP_REG"))) + +;; General constraints + +(define_constraint "Cbr" + "Branch destination" + (ior (and (match_code "symbol_ref") + (match_test "!arc_is_longcall_p (op)")) + (match_code "label_ref"))) + +(define_constraint "Cbp" + "predicable branch/call destination" + (ior (and (match_code "symbol_ref") + (match_test "arc_is_shortcall_p (op)")) + (match_code "label_ref"))) + +(define_constraint "Cpc" + "pc-relative constant" + (match_test "arc_legitimate_pc_offset_p (op)")) + +(define_constraint "Clb" + "label" + (and (match_code "label_ref") + (match_test "arc_text_label (XEXP (op, 0))"))) + +(define_constraint "Cal" + "constant for arithmetic/logical operations" + (match_test "immediate_operand (op, VOIDmode) && !arc_legitimate_pc_offset_p (op)")) + +(define_constraint "C32" + "32 bit constant for arithmetic/logical operations" + (match_test "immediate_operand (op, VOIDmode) + && !arc_legitimate_pc_offset_p (op) + && !satisfies_constraint_I (op)")) + +; Note that the 'cryptic' register constraints will not make reload use the +; associated class to reload into, but this will not penalize reloading of any +; other operands, or using an alternate part of the same alternative. + +; Rcq is different in three important ways from a register class constraint: +; - It does not imply a register class, hence reload will not use it to drive +; reloads. +; - It matches even when there is no register class to describe its accepted +; set; not having such a set again lessens the impact on register allocation. +; - It won't match when the instruction is conditionalized by the ccfsm. +(define_constraint "Rcq" + "@internal + Cryptic q - for short insn generation while not affecting register allocation + Registers usable in ARCompact 16-bit instructions: @code{r0}-@code{r3}, + @code{r12}-@code{r15}" + (and (match_code "REG") + (match_test "TARGET_Rcq + && !arc_ccfsm_cond_exec_p () + && ((((REGNO (op) & 7) ^ 4) - 4) & 15) == REGNO (op)"))) + +; If we need a reload, we generally want to steer reload to use three-address +; alternatives in preference of two-address alternatives, unless the +; three-address alternative introduces a LIMM that is unnecessary for the +; two-address alternative. +(define_constraint "Rcw" + "@internal + Cryptic w - for use in early alternatives with matching constraint" + (and (match_code "REG") + (match_test + "TARGET_Rcw + && REGNO (op) < FIRST_PSEUDO_REGISTER + && TEST_HARD_REG_BIT (reg_class_contents[WRITABLE_CORE_REGS], + REGNO (op))"))) + +(define_constraint "Rcr" + "@internal + Cryptic r - for use in early alternatives with matching constraint" + (and (match_code "REG") + (match_test + "TARGET_Rcw + && REGNO (op) < FIRST_PSEUDO_REGISTER + && TEST_HARD_REG_BIT (reg_class_contents[GENERAL_REGS], + REGNO (op))"))) + +(define_constraint "Rcb" + "@internal + Stack Pointer register @code{r28} - do not reload into its class" + (and (match_code "REG") + (match_test "REGNO (op) == 28"))) + +(define_constraint "Rck" + "@internal + blink (usful for push_s / pop_s)" + (and (match_code "REG") + (match_test "REGNO (op) == 31"))) + +(define_constraint "Rs5" + "@internal + sibcall register - only allow one of the five available 16 bit isnsn. + Registers usable in ARCompact 16-bit instructions: @code{r0}-@code{r3}, + @code{r12}" + (and (match_code "REG") + (match_test "!arc_ccfsm_cond_exec_p ()") + (ior (match_test "(unsigned) REGNO (op) <= 3") + (match_test "REGNO (op) == 12")))) + +(define_constraint "Rcc" + "@internal + Condition Codes" + (and (match_code "REG") (match_test "cc_register (op, VOIDmode)"))) + + +(define_constraint "Q" + "@internal + Integer constant zero" + (and (match_code "const_int") + (match_test "IS_ZERO (ival)"))) diff --git a/gcc/config/arc/fpx.md b/gcc/config/arc/fpx.md new file mode 100644 index 00000000000..10a5dcd3b66 --- /dev/null +++ b/gcc/config/arc/fpx.md @@ -0,0 +1,674 @@ +;; Machine description of the Synopsys DesignWare ARC cpu Floating Point +;; extensions for GNU C compiler +;; Copyright (C) 2007-2013 Free Software Foundation, Inc. + +;; This file is part of GCC. + +;; GCC is free software; you can redistribute it and/or modify +;; it under the terms of the GNU General Public License as published by +;; the Free Software Foundation; either version 3, or (at your option) +;; any later version. + +;; GCC is distributed in the hope that it will be useful, +;; but WITHOUT ANY WARRANTY; without even the implied warranty of +;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +;; GNU General Public License for more details. + +;; You should have received a copy of the GNU General Public License +;; along with GCC; see the file COPYING3. If not see +;; <http://www.gnu.org/licenses/>. + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; TODOs: +;; dpfp blocks? +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; Scheduler descriptions for the fpx instructions +(define_insn_reservation "spfp_compact" 3 + (and (match_test "TARGET_SPFP_COMPACT_SET") + (eq_attr "type" "spfp")) + "issue+core, nothing*2, write_port") + +(define_insn_reservation "spfp_fast" 6 + (and (match_test "TARGET_SPFP_FAST_SET") + (eq_attr "type" "spfp")) + "issue+core, nothing*5, write_port") + +(define_insn_reservation "dpfp_compact_mult" 7 + (and (match_test "TARGET_DPFP_COMPACT_SET") + (eq_attr "type" "dpfp_mult")) + "issue+core, nothing*6, write_port") + +(define_insn_reservation "dpfp_compact_addsub" 5 + (and (match_test "TARGET_DPFP_COMPACT_SET") + (eq_attr "type" "dpfp_addsub")) + "issue+core, nothing*4, write_port") + +(define_insn_reservation "dpfp_fast" 5 + (and (match_test "TARGET_DPFP_FAST_SET") + (eq_attr "type" "dpfp_mult,dpfp_addsub")) + "issue+core, nothing*4, write_port") + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +(define_insn "addsf3" + [(set (match_operand:SF 0 "register_operand" "=r,r,r,r,r ") + (plus:SF (match_operand:SF 1 "nonmemory_operand" "0,r,GCal,r,0") + (match_operand:SF 2 "nonmemory_operand" "I,rL,r,GCal,LrCal")))] +; "(TARGET_ARC700 || TARGET_ARC600) && TARGET_SPFP_SET";Add flag for float + "TARGET_SPFP" + "@ + fadd %0,%1,%2 + fadd %0,%1,%2 + fadd %0,%S1,%2 + fadd %0,%1,%S2 + fadd%? %0,%1,%S2" + [(set_attr "type" "spfp") + (set_attr "length" "4,4,8,8,8")]) + +(define_insn "subsf3" + [(set (match_operand:SF 0 "register_operand" "=r,r,r,r,r ") + (minus:SF (match_operand:SF 1 "nonmemory_operand" "r,0,GCal,r,0") + (match_operand:SF 2 "nonmemory_operand" "rL,I,r,GCal,LrCal")))] + ;"(TARGET_ARC700 || TARGET_ARC600) && TARGET_SPFP_SET";Add flag for float + "TARGET_SPFP" + "@ + fsub %0,%1,%2 + fsub %0,%1,%2 + fsub %0,%S1,%2 + fsub %0,%1,%S2 + fsub%? %0,%1,%S2" + [(set_attr "type" "spfp") + (set_attr "length" "4,4,8,8,8")]) + +(define_insn "mulsf3" + [(set (match_operand:SF 0 "register_operand" "=r,r,r,r,r ") + (mult:SF (match_operand:SF 1 "nonmemory_operand" "r,0,GCal,r,0") + (match_operand:SF 2 "nonmemory_operand" "rL,I,r,GCal,LrCal")))] +; "(TARGET_ARC700 || TARGET_ARC600) && TARGET_SPFP_SET" ;Add flag for float + "TARGET_SPFP" + "@ + fmul %0,%1,%2 + fmul %0,%1,%2 + fmul %0,%S1,%2 + fmul %0,%1,%S2 + fmul%? %0,%1,%S2" + [(set_attr "type" "spfp") + (set_attr "length" "4,4,8,8,8")]) + + +;; For comparisons, we can avoid storing the top half of the result into +;; a register since '.f' lets us set the Z bit for the conditional +;; branch insns. + +;; ??? FIXME (x-y)==0 is not a correct comparison for floats: +;; http://www.cygnus-software.com/papers/comparingfloats/comparingfloats.htm +(define_insn "cmpsfpx_raw" + [(set (reg:CC_FPX 61) + (compare:CC_FPX (match_operand:SF 0 "register_operand" "r") + (match_operand:SF 1 "register_operand" "r")))] + "TARGET_ARGONAUT_SET && TARGET_SPFP" + "fsub.f 0,%0,%1" + [(set_attr "type" "spfp") + (set_attr "length" "4")]) + +;; ??? FIXME (x-y)==0 is not a correct comparison for floats: +;; http://www.cygnus-software.com/papers/comparingfloats/comparingfloats.htm +;; ??? FIXME we claim to clobber operand 2, yet the two numbers appended +;; to the actual instructions are incorrect. The result of the d*subh +;; insn is stored in the Dx register specified by that first number. +(define_insn "cmpdfpx_raw" + [(set (reg:CC_FPX 61) + (compare:CC_FPX (match_operand:DF 0 "nonmemory_operand" "D,r") + (match_operand:DF 1 "nonmemory_operand" "r,D"))) + (clobber (match_scratch:DF 2 "=D,D"))] + "TARGET_ARGONAUT_SET && TARGET_DPFP" + "@ + dsubh%F0%F1.f 0,%H2,%L2 + drsubh%F0%F2.f 0,%H1,%L1" + [(set_attr "type" "dpfp_addsub") + (set_attr "length" "4")]) + +;; ??? FIXME subtraction is not a correct comparison for floats: +;; http://www.cygnus-software.com/papers/comparingfloats/comparingfloats.htm +(define_insn "*cmpfpx_gt" + [(set (reg:CC_FP_GT 61) (compare:CC_FP_GT (reg:CC_FPX 61) (const_int 0)))] + "TARGET_ARGONAUT_SET" + "cmp.ls pcl,pcl" + [(set_attr "type" "compare") + (set_attr "length" "4")]) + +;; ??? FIXME subtraction is not a correct comparison for floats: +;; http://www.cygnus-software.com/papers/comparingfloats/comparingfloats.htm +(define_insn "*cmpfpx_ge" + [(set (reg:CC_FP_GE 61) (compare:CC_FP_GE (reg:CC_FPX 61) (const_int 0)))] + "TARGET_ARGONAUT_SET" + "rcmp.pnz pcl,0" + [(set_attr "type" "compare") + (set_attr "length" "4")]) + +;; DPFP instructions begin... + +;; op0_reg = D1_reg.low +(define_insn "*lr_double_lower" + [(set (match_operand:SI 0 "register_operand" "=r") + (unspec_volatile:SI [(match_operand:DF 1 "arc_double_register_operand" "D")] VUNSPEC_LR ))] + "TARGET_DPFP && !TARGET_DPFP_DISABLE_LRSR" +"lr %0, [%1l] ; *lr_double_lower" +[(set_attr "length" "8") +(set_attr "type" "lr")] +) + +(define_insn "*lr_double_higher" + [(set (match_operand:SI 0 "register_operand" "=r") + (unspec_volatile:SI [(match_operand:DF 1 "arc_double_register_operand" "D")] VUNSPEC_LR_HIGH ))] + "TARGET_DPFP && !TARGET_DPFP_DISABLE_LRSR" +"lr %0, [%1h] ; *lr_double_higher" +[(set_attr "length" "8") +(set_attr "type" "lr")] +) + + +(define_insn "*dexcl_3op_peep2_insn" + [(set (match_operand:SI 0 "dest_reg_operand" "=r") ; not register_operand, to accept SUBREG + (unspec_volatile:SI [ + (match_operand:DF 1 "arc_double_register_operand" "D") + (match_operand:SI 2 "shouldbe_register_operand" "r") ; r1 + (match_operand:SI 3 "shouldbe_register_operand" "r") ; r0 + ] VUNSPEC_DEXCL )) + ] + "TARGET_DPFP" + "dexcl%F1 %0, %2, %3" + [(set_attr "type" "move") + (set_attr "length" "4")] +) + +;; version which will not overwrite operand0 +(define_insn "*dexcl_3op_peep2_insn_nores" + [ (unspec_volatile:SI [ + (match_operand:DF 0 "arc_double_register_operand" "D") + (match_operand:SI 1 "shouldbe_register_operand" "r") ; r1 + (match_operand:SI 2 "shouldbe_register_operand" "r") ; r0 + ] VUNSPEC_DEXCL_NORES ) + ] + "TARGET_DPFP" + "dexcl%F0 0, %1, %2" + [(set_attr "type" "move") + (set_attr "length" "4")] +) + +;; dexcl a,b,c pattern generated by the peephole2 above +(define_insn "*dexcl_3op_peep2_insn_lr" + [(parallel [(set (match_operand:SI 0 "register_operand" "=r") + (unspec_volatile:SI [(match_operand:DF 1 "arc_double_register_operand" "=D")] VUNSPEC_LR )) + (set (match_dup 1) (match_operand:DF 2 "register_operand" "r"))] + ) + ] + "TARGET_DPFP && !TARGET_DPFP_DISABLE_LRSR" + "dexcl%F1 %0, %H2, %L2" + [(set_attr "type" "move") + (set_attr "length" "4")] +) + + +;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; doubles support for ARC +;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +;; D0 = D1+{reg_pair}2 +;; (define_expand "adddf3" +;; [(set (match_operand:DF 0 "arc_double_register_operand" "") +;; (plus:DF (match_operand:DF 1 "arc_double_register_operand" "") +;; (match_operand:DF 2 "nonmemory_operand" "")))] +;; "TARGET_DPFP" +;; " " +;; ) +;; daddh{0}{1} 0, {reg_pair}2.hi, {reg_pair}2.lo +;; OR +;; daddh{0}{1} 0, reg3, limm2.lo +(define_expand "adddf3" + [(set (match_operand:DF 0 "arc_double_register_operand" "") + (plus:DF (match_operand:DF 1 "arc_double_register_operand" "") + (match_operand:DF 2 "nonmemory_operand" ""))) + ] + "TARGET_DPFP" + " if (GET_CODE (operands[2]) == CONST_DOUBLE) + { + rtx high, low, tmp; + split_double (operands[2], &low, &high); + tmp = force_reg (SImode, high); + emit_insn(gen_adddf3_insn(operands[0], operands[1], operands[2],tmp,const0_rtx)); + } + else + emit_insn(gen_adddf3_insn(operands[0], operands[1], operands[2],const1_rtx,const1_rtx)); + DONE; + " +) + +;; daddh{0}{1} 0, {reg_pair}2.hi, {reg_pair}2.lo /* operand 4 = 1*/ +;; OR +;; daddh{0}{1} 0, reg3, limm2.lo /* operand 4 = 0 */ +;; +(define_insn "adddf3_insn" + [(set (match_operand:DF 0 "arc_double_register_operand" "=D,D") + (plus:DF (match_operand:DF 1 "arc_double_register_operand" "D,D") + (match_operand:DF 2 "nonmemory_operand" "!r,G"))) + (use (match_operand:SI 3 "" "N,r")) + (use (match_operand:SI 4 "" "N,Q")) + ; Prevent can_combine_p from combining muldf3_insn patterns with + ; different USE pairs. + (use (match_dup 2)) + ] + "TARGET_DPFP && + !(GET_CODE(operands[2]) == CONST_DOUBLE && GET_CODE(operands[3]) == CONST_INT)" + "@ + daddh%F0%F1 0,%H2,%L2 + daddh%F0%F1 0,%3,%L2" + [(set_attr "type" "dpfp_addsub") + (set_attr "length" "4,8")]) + +;; dmulh{0}{1} 0, {reg_pair}2.hi, {reg_pair}2.lo +;; OR +;; dmulh{0}{1} 0, reg3, limm2.lo +(define_expand "muldf3" + [(set (match_operand:DF 0 "arc_double_register_operand" "") + (mult:DF (match_operand:DF 1 "arc_double_register_operand" "") + (match_operand:DF 2 "nonmemory_operand" "")))] +"TARGET_DPFP" +" if (GET_CODE (operands[2]) == CONST_DOUBLE) + { + rtx high, low, tmp; + split_double (operands[2], &low, &high); + tmp = force_reg (SImode, high); + emit_insn(gen_muldf3_insn(operands[0], operands[1], operands[2],tmp,const0_rtx)); + } + else + emit_insn(gen_muldf3_insn(operands[0], operands[1], operands[2],const1_rtx,const1_rtx)); + + DONE; + ") + + +;; dmulh{0}{1} 0, {reg_pair}2.hi, {reg_pair}2.lo /* operand 4 = 1*/ +;; OR +;; dmulh{0}{1} 0, reg3, limm2.lo /* operand 4 = 0*/ +(define_insn "muldf3_insn" + [(set (match_operand:DF 0 "arc_double_register_operand" "=D,D") + (mult:DF (match_operand:DF 1 "arc_double_register_operand" "D,D") + (match_operand:DF 2 "nonmemory_operand" "!r,G"))) + (use (match_operand:SI 3 "" "N,!r")) + (use (match_operand:SI 4 "" "N,Q")) + ; Prevent can_combine_p from combining muldf3_insn patterns with + ; different USE pairs. + (use (match_dup 2)) + ] + "TARGET_DPFP && + !(GET_CODE(operands[2]) == CONST_DOUBLE && GET_CODE(operands[3]) == CONST_INT)" + "@ + dmulh%F0%F1 0,%H2,%L2 + dmulh%F0%F1 0,%3, %L2" + [(set_attr "type" "dpfp_mult") + (set_attr "length" "4,8")]) + +;; dsubh{0}{1} 0, {reg_pair}2.hi, {reg_pair}2.lo +;; OR +;; dsubh{0}{1} 0, reg3, limm2.lo +;; OR +;; drsubh{0}{2} 0, {reg_pair}1.hi, {reg_pair}1.lo +;; OR +;; drsubh{0}{2} 0, reg3, limm1.lo +(define_expand "subdf3" + [(set (match_operand:DF 0 "arc_double_register_operand" "") + (minus:DF (match_operand:DF 1 "nonmemory_operand" "") + (match_operand:DF 2 "nonmemory_operand" "")))] +"TARGET_DPFP" +" if (GET_CODE (operands[1]) == CONST_DOUBLE || GET_CODE (operands[2]) == CONST_DOUBLE) + { + rtx high, low, tmp; + int const_index = ((GET_CODE (operands[1]) == CONST_DOUBLE) ? 1: 2); + split_double (operands[const_index], &low, &high); + tmp = force_reg (SImode, high); + emit_insn(gen_subdf3_insn(operands[0], operands[1], operands[2],tmp,const0_rtx)); + } + else + emit_insn(gen_subdf3_insn(operands[0], operands[1], operands[2],const1_rtx,const1_rtx)); + + DONE; + " +) + +;; dsubh{0}{1} 0, {reg_pair}2.hi, {reg_pair}2.lo /* operand 4 = 1 */ +;; OR +;; dsubh{0}{1} 0, reg3, limm2.lo /* operand 4 = 0*/ +;; OR +;; drsubh{0}{2} 0, {reg_pair}1.hi, {reg_pair}1.lo /* operand 4 = 1 */ +;; OR +;; drsubh{0}{2} 0, reg3, limm1.lo /* operand 4 = 0*/ +(define_insn "subdf3_insn" + [(set (match_operand:DF 0 "arc_double_register_operand" "=D,D,D,D") + (minus:DF (match_operand:DF 1 "nonmemory_operand" "D,D,!r,G") + (match_operand:DF 2 "nonmemory_operand" "!r,G,D,D"))) + (use (match_operand:SI 3 "" "N,r,N,r")) + (use (match_operand:SI 4 "" "N,Q,N,Q")) + ; Prevent can_combine_p from combining muldf3_insn patterns with + ; different USE pairs. + (use (match_dup 2))] + "TARGET_DPFP && + !(GET_CODE(operands[2]) == CONST_DOUBLE && GET_CODE(operands[3]) == CONST_INT) && + !(GET_CODE(operands[1]) == CONST_DOUBLE && GET_CODE(operands[3]) == CONST_INT)" + "@ + dsubh%F0%F1 0,%H2,%L2 + dsubh%F0%F1 0,%3,%L2 + drsubh%F0%F2 0,%H1,%L1 + drsubh%F0%F2 0,%3,%L1" + [(set_attr "type" "dpfp_addsub") + (set_attr "length" "4,8,4,8")]) + +;; ;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; ;; Peephole for following conversion +;; ;; D0 = D2<op>{reg_pair}3 +;; ;; {reg_pair}5 = D0 +;; ;; D0 = {reg_pair}6 +;; ;; | +;; ;; V +;; ;; _________________________________________________________ +;; ;; / D0 = D2 <op> {regpair3_or_limmreg34} +;; ;; ---- + {reg_pair}5.hi = ( D2<op>{regpair3_or_limmreg34} ).hi +;; ;; | \_________________________________________________________ +;; ;; | +;; ;; | ________________________________________________________ +;; ;; | / {reg_pair}5.lo = ( D2<op>{regpair3_or_limmreg34} ).lo +;; ;; +-----+ D0 = {reg_pair}6 +;; ;; \ _________________________________________________________ +;; ;; || +;; ;; || +;; ;; \/ +;; ;; d<op>{0}{2}h {reg_pair}5.hi, {regpair3_or_limmreg34}.lo, {regpair3_or_limmreg34}.hi +;; ;; dexcl{0} {reg_pair}5.lo, {reg_pair}6.lo, {reg_pair}6.hi +;; ;; ----------------------------------------------------------------------------------------- +;; ;; where <op> is one of {+,*,-} +;; ;; <opname> is {add,mult,sub} +;; ;; +;; ;; NOTE: For rsub insns D2 and {regpair3_or_limmreg34} get interchanged as +;; ;; {regpair2_or_limmreg24} and D3 +;; ;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; (define_peephole2 +;; [(parallel [(set (match_operand:DF 0 "register_operand" "") +;; (match_operator:DF 1 "arc_dpfp_operator" [(match_operand:DF 2 "nonmemory_operand" "") +;; (match_operand:DF 3 "nonmemory_operand" "")])) +;; (use (match_operand:SI 4 "" ""))]) +;; (set (match_operand:DF 5 "register_operand" "") +;; (match_dup 0)) +;; (set (match_dup 0) +;; (match_operand:DF 6 "register_operand" "")) +;; ] +;; "TARGET_DPFP" +;; [ +;; (parallel [(set (match_dup 0) +;; (match_op_dup:DF 1 [(match_dup 2) +;; (match_dup 3)])) +;; (use (match_dup 4)) +;; (set (match_dup 5) +;; (match_op_dup:DF 1 [(match_dup 2) +;; (match_dup 3)]))]) +;; (parallel [ +;; ;; (set (subreg:SI (match_dup 5) 0) +;; (set (match_dup 7) +;; (unspec_volatile [(match_dup 0)] VUNSPEC_LR )) +;; (set (match_dup 0) (match_dup 6))] +;; ) +;; ] +;; "operands[7] = simplify_gen_subreg(SImode,operands[5],DFmode,0);" +;; ) +;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; Peephole for following conversion +;; D0 = D2<op>{reg_pair}3 +;; {reg_pair}6 = D0 +;; D0 = {reg_pair}7 +;; | +;; V +;; _________________________________________________________ +;; / D0 = D2 <op> {regpair3_or_limmreg34} +;; ---- + {reg_pair}6.hi = ( D2<op>{regpair3_or_limmreg34} ).hi +;; | \_________________________________________________________ +;; | +;; | ________________________________________________________ +;; | / {reg_pair}6.lo = ( D2<op>{regpair3_or_limmreg34} ).lo +;; +-----+ D0 = {reg_pair}7 +;; \ _________________________________________________________ +;; || +;; || +;; \/ +;; d<op>{0}{2}h {reg_pair}6.hi, {regpair3_or_limmreg34}.lo, {regpair3_or_limmreg34}.hi +;; dexcl{0} {reg_pair}6.lo, {reg_pair}7.lo, {reg_pair}7.hi +;; ----------------------------------------------------------------------------------------- +;; where <op> is one of {+,*,-} +;; <opname> is {add,mult,sub} +;; +;; NOTE: For rsub insns D2 and {regpair3_or_limmreg34} get interchanged as +;; {regpair2_or_limmreg24} and D3 +;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +(define_peephole2 + [(parallel [(set (match_operand:DF 0 "register_operand" "") + (match_operator:DF 1 "arc_dpfp_operator" [(match_operand:DF 2 "nonmemory_operand" "") + (match_operand:DF 3 "nonmemory_operand" "")])) + (use (match_operand:SI 4 "" "")) + (use (match_operand:SI 5 "" "")) + (use (match_operand:SI 6 "" ""))]) + (set (match_operand:DF 7 "register_operand" "") + (match_dup 0)) + (set (match_dup 0) + (match_operand:DF 8 "register_operand" "")) + ] + "TARGET_DPFP && !TARGET_DPFP_DISABLE_LRSR" + [ + (parallel [(set (match_dup 0) + (match_op_dup:DF 1 [(match_dup 2) + (match_dup 3)])) + (use (match_dup 4)) + (use (match_dup 5)) + (set (match_dup 7) + (match_op_dup:DF 1 [(match_dup 2) + (match_dup 3)]))]) + (parallel [ +;; (set (subreg:SI (match_dup 7) 0) + (set (match_dup 9) + (unspec_volatile:SI [(match_dup 0)] VUNSPEC_LR )) + (set (match_dup 0) (match_dup 8))] + ) + ] + "operands[9] = simplify_gen_subreg(SImode,operands[7],DFmode,0);" + ) + +;; ;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; ;; Peephole to generate d<opname>{ij}h a,b,c instructions +;; ;; D0 = D2<op>{reg_pair}3 +;; ;; {reg_pair}5 = D0 +;; ;; | +;; ;; V +;; ;; __________________________________________ +;; ;; / D0 = D2 <op> {regpair3_or_limmreg34} +;; ;; ---- + {reg_pair}5.hi = ( D2<op>{regpair3_or_limmreg34} ).hi +;; ;; | \__________________________________________ +;; ;; | +;; ;; + --- {reg_pair}5.lo = ( D2<op>{regpair3_or_limmreg34} ).lo +;; ;; || +;; ;; || +;; ;; \/ +;; ;; d<op>{0}{2}h {reg_pair}4.hi, {regpair3_or_limmreg34}.lo, {regpair3_or_limmreg34}.hi +;; ;; lr {reg_pair}4.lo, {D2l} +;; ;; ---------------------------------------------------------------------------------------- +;; ;; where <op> is one of {+,*,-} +;; ;; <opname> is {add,mult,sub} +;; ;; +;; ;; NOTE: For rsub insns D2 and {regpair3_or_limmreg34} get interchanged as +;; ;; {regpair2_or_limmreg24} and D3 +;; ;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; (define_peephole2 +;; [(parallel [(set (match_operand:DF 0 "register_operand" "") +;; (match_operator:DF 1 "arc_dpfp_operator" [(match_operand:DF 2 "nonmemory_operand" "") +;; (match_operand:DF 3 "nonmemory_operand" "")])) +;; (use (match_operand:SI 4 "" ""))]) +;; (set (match_operand:DF 5 "register_operand" "") +;; (match_dup 0)) +;; ] +;; "TARGET_DPFP" +;; [ +;; (parallel [(set (match_dup 0) +;; (match_op_dup:DF 1 [(match_dup 2) +;; (match_dup 3)])) +;; (use (match_dup 4)) +;; (set (match_dup 5) +;; (match_op_dup:DF 1 [(match_dup 2) +;; (match_dup 3)]))]) +;; ; (set (subreg:SI (match_dup 5) 0) +;; (set (match_dup 6) +;; (unspec_volatile [(match_dup 0)] VUNSPEC_LR )) +;; ] +;; "operands[6] = simplify_gen_subreg(SImode,operands[5],DFmode,0);" +;; ) +;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; Peephole to generate d<opname>{ij}h a,b,c instructions +;; D0 = D2<op>{reg_pair}3 +;; {reg_pair}6 = D0 +;; | +;; V +;; __________________________________________ +;; / D0 = D2 <op> {regpair3_or_limmreg34} +;; ---- + {reg_pair}6.hi = ( D2<op>{regpair3_or_limmreg34} ).hi +;; | \__________________________________________ +;; | +;; + --- {reg_pair}6.lo = ( D2<op>{regpair3_or_limmreg34} ).lo +;; || +;; || +;; \/ +;; d<op>{0}{2}h {reg_pair}4.hi, {regpair3_or_limmreg34}.lo, {regpair3_or_limmreg34}.hi +;; lr {reg_pair}4.lo, {D2l} +;; ---------------------------------------------------------------------------------------- +;; where <op> is one of {+,*,-} +;; <opname> is {add,mult,sub} +;; +;; NOTE: For rsub insns D2 and {regpair3_or_limmreg34} get interchanged as +;; {regpair2_or_limmreg24} and D3 +;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +(define_peephole2 + [(parallel [(set (match_operand:DF 0 "register_operand" "") + (match_operator:DF 1 "arc_dpfp_operator" [(match_operand:DF 2 "nonmemory_operand" "") + (match_operand:DF 3 "nonmemory_operand" "")])) + (use (match_operand:SI 4 "" "")) + (use (match_operand:SI 5 "" "")) + (use (match_operand:SI 6 "" ""))]) + (set (match_operand:DF 7 "register_operand" "") + (match_dup 0)) + ] + "TARGET_DPFP && !TARGET_DPFP_DISABLE_LRSR" + [ + (parallel [(set (match_dup 0) + (match_op_dup:DF 1 [(match_dup 2) + (match_dup 3)])) + (use (match_dup 4)) + (use (match_dup 5)) + (set (match_dup 7) + (match_op_dup:DF 1 [(match_dup 2) + (match_dup 3)]))]) +; (set (subreg:SI (match_dup 7) 0) + (set (match_dup 8) + (unspec_volatile:SI [(match_dup 0)] VUNSPEC_LR )) + ] + "operands[8] = simplify_gen_subreg(SImode,operands[7],DFmode,0);" + ) + +;; ;; _______________________________________________________ +;; ;; / D0 = D1 + {regpair2_or_limmreg23} +;; ;; + {reg_pair}4.hi = ( D1 + {regpair2_or_limmreg23} ).hi +;; ;; \_______________________________________________________ +;; (define_insn "*daddh_peep2_insn" +;; [(parallel [(set (match_operand:DF 0 "arc_double_register_operand" "=D,D") +;; (plus:DF (match_operand:DF 1 "arc_double_register_operand" "D,D") +;; (match_operand:DF 2 "nonmemory_operand" "r,G"))) +;; (use (match_operand:SI 3 "" "N,r")) +;; (set (match_operand:DF 4 "register_operand" "=r,r") +;; (plus:DF (match_dup 1) +;; (match_dup 2)))])] +;; "TARGET_DPFP" +;; "@ +;; daddh%F0%F1 %H4, %H2, %L2 +;; daddh%F0%F1 %H4, %3, %L2" +;; [(set_attr "type" "dpfp_addsub") +;; (set_attr "length" "4,8")] +;; ) +;; _______________________________________________________ +;; / D0 = D1 + {regpair2_or_limmreg23} +;; + {reg_pair}5.hi = ( D1 + {regpair2_or_limmreg23} ).hi +;; \_______________________________________________________ +(define_insn "*daddh_peep2_insn" + [(parallel [(set (match_operand:DF 0 "arc_double_register_operand" "=D,D") + (plus:DF (match_operand:DF 1 "arc_double_register_operand" "D,D") + (match_operand:DF 2 "nonmemory_operand" "r,G"))) + (use (match_operand:SI 3 "" "N,r")) + (use (match_operand:SI 4 "" "N,Q")) + (use (match_operand:SI 5 "" "")) + (set (match_operand:DF 6 "register_operand" "=r,r") + (plus:DF (match_dup 1) + (match_dup 2)))])] + "TARGET_DPFP && + !(GET_CODE(operands[2]) == CONST_DOUBLE && GET_CODE(operands[3]) == CONST_INT)" + "@ + daddh%F0%F1 %H6, %H2, %L2 + daddh%F0%F1 %H6, %3, %L2" + [(set_attr "type" "dpfp_addsub") + (set_attr "length" "4,8")] +) + +;; _______________________________________________________ +;; / D0 = D1 * {regpair2_or_limmreg23} +;; + {reg_pair}5.hi = ( D1 * {regpair2_or_limmreg23} ).hi +;; \_______________________________________________________ +(define_insn "*dmulh_peep2_insn" + [(parallel [(set (match_operand:DF 0 "arc_double_register_operand" "=D,D") + (mult:DF (match_operand:DF 1 "arc_double_register_operand" "D,D") + (match_operand:DF 2 "nonmemory_operand" "r,G"))) + (use (match_operand:SI 3 "" "N,r")) + (use (match_operand:SI 4 "" "N,Q")) + (use (match_operand:SI 5 "" "")) + (set (match_operand:DF 6 "register_operand" "=r,r") + (mult:DF (match_dup 1) + (match_dup 2)))])] + "TARGET_DPFP && + !(GET_CODE(operands[2]) == CONST_DOUBLE && GET_CODE(operands[3]) == CONST_INT)" + "@ + dmulh%F0%F1 %H6, %H2, %L2 + dmulh%F0%F1 %H6, %3, %L2" + [(set_attr "type" "dpfp_mult") + (set_attr "length" "4,8")] +) + +;; _______________________________________________________ +;; / D0 = D1 - {regpair2_or_limmreg23} +;; + {reg_pair}5.hi = ( D1 - {regpair2_or_limmreg23} ).hi +;; \_______________________________________________________ +;; OR +;; _______________________________________________________ +;; / D0 = {regpair1_or_limmreg13} - D2 +;; + {reg_pair}5.hi = ( {regpair1_or_limmreg13} ).hi - D2 +;; \_______________________________________________________ +(define_insn "*dsubh_peep2_insn" + [(parallel [(set (match_operand:DF 0 "arc_double_register_operand" "=D,D,D,D") + (minus:DF (match_operand:DF 1 "nonmemory_operand" "D,D,r,G") + (match_operand:DF 2 "nonmemory_operand" "r,G,D,D"))) + (use (match_operand:SI 3 "" "N,r,N,r")) + (use (match_operand:SI 4 "" "N,Q,N,Q")) + (use (match_operand:SI 5 "" "")) + (set (match_operand:DF 6 "register_operand" "=r,r,r,r") + (minus:DF (match_dup 1) + (match_dup 2)))])] + "TARGET_DPFP && + !(GET_CODE(operands[2]) == CONST_DOUBLE && GET_CODE(operands[3]) == CONST_INT) && + !(GET_CODE(operands[1]) == CONST_DOUBLE && GET_CODE(operands[3]) == CONST_INT)" + "@ + dsubh%F0%F1 %H6, %H2, %L2 + dsubh%F0%F1 %H6, %3, %L2 + drsubh%F0%F2 %H6, %H1, %L1 + drsubh%F0%F2 %H6, %3, %L1" + [(set_attr "type" "dpfp_addsub") + (set_attr "length" "4,8,4,8")] +) diff --git a/gcc/config/arc/predicates.md b/gcc/config/arc/predicates.md new file mode 100644 index 00000000000..241fb23ee36 --- /dev/null +++ b/gcc/config/arc/predicates.md @@ -0,0 +1,807 @@ +;; Predicate definitions for Synopsys DesignWare ARC. +;; Copyright (C) 2007-2013 Free Software Foundation, Inc. +;; +;; This file is part of GCC. +;; +;; GCC is free software; you can redistribute it and/or modify +;; it under the terms of the GNU General Public License as published by +;; the Free Software Foundation; either version 3, or (at your option) +;; any later version. +;; +;; GCC is distributed in the hope that it will be useful, +;; but WITHOUT ANY WARRANTY; without even the implied warranty of +;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +;; GNU General Public License for more details. +;; +;; You should have received a copy of the GNU General Public License +;; along with GCC; see the file COPYING3. If not see +;; <http://www.gnu.org/licenses/>. + +(define_predicate "dest_reg_operand" + (match_code "reg,subreg") +{ + rtx op0 = op; + + if (GET_CODE (op0) == SUBREG) + op0 = SUBREG_REG (op0); + if (REG_P (op0) && REGNO (op0) < FIRST_PSEUDO_REGISTER + && TEST_HARD_REG_BIT (reg_class_contents[ALL_CORE_REGS], + REGNO (op0)) + && !TEST_HARD_REG_BIT (reg_class_contents[WRITABLE_CORE_REGS], + REGNO (op0))) + return 0; + return register_operand (op, mode); +}) + +(define_predicate "mpy_dest_reg_operand" + (match_code "reg,subreg") +{ + rtx op0 = op; + + if (GET_CODE (op0) == SUBREG) + op0 = SUBREG_REG (op0); + if (REG_P (op0) && REGNO (op0) < FIRST_PSEUDO_REGISTER + && TEST_HARD_REG_BIT (reg_class_contents[ALL_CORE_REGS], + REGNO (op0)) + /* Make sure the destination register is not LP_COUNT. */ + && !TEST_HARD_REG_BIT (reg_class_contents[MPY_WRITABLE_CORE_REGS], + REGNO (op0))) + return 0; + return register_operand (op, mode); +}) + + +;; Returns 1 if OP is a symbol reference. +(define_predicate "symbolic_operand" + (match_code "symbol_ref, label_ref, const") +) + +;; Acceptable arguments to the call insn. +(define_predicate "call_address_operand" + (ior (match_code "const_int, reg") + (match_operand 0 "symbolic_operand") + (match_test "CONSTANT_P (op) + && arc_legitimate_constant_p (VOIDmode, op)")) +) + +(define_predicate "call_operand" + (and (match_code "mem") + (match_test "call_address_operand (XEXP (op, 0), mode)")) +) + +;; Return true if OP is a unsigned 6-bit immediate (u6) value. +(define_predicate "u6_immediate_operand" + (and (match_code "const_int") + (match_test "UNSIGNED_INT6 (INTVAL (op))")) +) + +;; Return true if OP is a short immediate (shimm) value. +(define_predicate "short_immediate_operand" + (and (match_code "const_int") + (match_test "SMALL_INT (INTVAL (op))")) +) + +(define_predicate "p2_immediate_operand" + (and (match_code "const_int") + (match_test "((INTVAL (op) - 1) & INTVAL (op)) == 0") + (match_test "INTVAL (op)")) +) + +;; Return true if OP will require a long immediate (limm) value. +;; This is currently only used when calculating length attributes. +(define_predicate "long_immediate_operand" + (match_code "symbol_ref, label_ref, const, const_double, const_int") +{ + switch (GET_CODE (op)) + { + case SYMBOL_REF : + case LABEL_REF : + case CONST : + return 1; + case CONST_INT : + return !SIGNED_INT12 (INTVAL (op)); + case CONST_DOUBLE : + /* These can happen because large unsigned 32 bit constants are + represented this way (the multiplication patterns can cause these + to be generated). They also occur for SFmode values. */ + return 1; + default: + break; + } + return 0; +} +) + +;; Return true if OP is a MEM that when used as a load or store address will +;; require an 8 byte insn. +;; Load and store instructions don't allow the same possibilities but they're +;; similar enough that this one function will do. +;; This is currently only used when calculating length attributes. */ +(define_predicate "long_immediate_loadstore_operand" + (match_code "mem") +{ + int size = GET_MODE_SIZE (GET_MODE (op)); + + op = XEXP (op, 0); + switch (GET_CODE (op)) + { + case SYMBOL_REF : + case LABEL_REF : + case CONST : + return 1; + case CONST_INT : + /* This must be handled as "st c,[limm]". Ditto for load. + Technically, the assembler could translate some possibilities to + "st c,[limm/2 + limm/2]" if limm/2 will fit in a shimm, but we don't + assume that it does. */ + return 1; + case CONST_DOUBLE : + /* These can happen because large unsigned 32 bit constants are + represented this way (the multiplication patterns can cause these + to be generated). They also occur for SFmode values. */ + return 1; + case REG : + return 0; + case PLUS : + { + rtx x = XEXP (op, 1); + + if (GET_CODE (x) == CONST) + { + x = XEXP (x, 0); + if (GET_CODE (x) == PLUS) + x = XEXP (x, 0); + } + if (CONST_INT_P (x)) + return (!SMALL_INT (INTVAL (x)) + && (size <= 1 || size > 4 + || (INTVAL (x) & (size - 1)) != 0 + || !SMALL_INT (INTVAL (x) / size))); + else if (GET_CODE (x) == SYMBOL_REF) + return TARGET_NO_SDATA_SET || !SYMBOL_REF_SMALL_P (x); + return 0; + } + default: + break; + } + return 0; +} +) + +;; Return true if OP is any of R0-R3,R12-R15 for ARCompact 16-bit +;; instructions +(define_predicate "compact_register_operand" + (match_code "reg, subreg") + { + if ((GET_MODE (op) != mode) && (mode != VOIDmode)) + return 0; + + return (GET_CODE (op) == REG) + && (REGNO (op) >= FIRST_PSEUDO_REGISTER + || COMPACT_GP_REG_P (REGNO (op))) ; + } +) + +;; Return true if OP is an acceptable memory operand for ARCompact +;; 16-bit load instructions. +(define_predicate "compact_load_memory_operand" + (match_code "mem") +{ + rtx addr, plus0, plus1; + int size, off; + + /* Eliminate non-memory operations. */ + if (GET_CODE (op) != MEM) + return 0; + + /* .di instructions have no 16-bit form. */ + if (MEM_VOLATILE_P (op) && !TARGET_VOLATILE_CACHE_SET) + return 0; + + if (mode == VOIDmode) + mode = GET_MODE (op); + + size = GET_MODE_SIZE (mode); + + /* dword operations really put out 2 instructions, so eliminate them. */ + if (size > UNITS_PER_WORD) + return 0; + + /* Decode the address now. */ + addr = XEXP (op, 0); + switch (GET_CODE (addr)) + { + case REG: + return (REGNO (addr) >= FIRST_PSEUDO_REGISTER + || COMPACT_GP_REG_P (REGNO (addr)) + || (SP_REG_P (REGNO (addr)) && (size != 2))); + /* Reverting for the moment since ldw_s does not have sp as a valid + parameter. */ + case PLUS: + plus0 = XEXP (addr, 0); + plus1 = XEXP (addr, 1); + + if ((GET_CODE (plus0) == REG) + && ((REGNO (plus0) >= FIRST_PSEUDO_REGISTER) + || COMPACT_GP_REG_P (REGNO (plus0))) + && ((GET_CODE (plus1) == REG) + && ((REGNO (plus1) >= FIRST_PSEUDO_REGISTER) + || COMPACT_GP_REG_P (REGNO (plus1))))) + { + return 1; + } + + if ((GET_CODE (plus0) == REG) + && ((REGNO (plus0) >= FIRST_PSEUDO_REGISTER) + || COMPACT_GP_REG_P (REGNO (plus0))) + && (GET_CODE (plus1) == CONST_INT)) + { + off = INTVAL (plus1); + + /* Negative offset is not supported in 16-bit load/store insns. */ + if (off < 0) + return 0; + + switch (size) + { + case 1: + return (off < 32); + case 2: + return ((off < 64) && (off % 2 == 0)); + case 4: + return ((off < 128) && (off % 4 == 0)); + } + } + + if ((GET_CODE (plus0) == REG) + && ((REGNO (plus0) >= FIRST_PSEUDO_REGISTER) + || SP_REG_P (REGNO (plus0))) + && (GET_CODE (plus1) == CONST_INT)) + { + off = INTVAL (plus1); + return ((size != 2) && (off >= 0 && off < 128) && (off % 4 == 0)); + } + default: + break ; + /* TODO: 'gp' and 'pcl' are to supported as base address operand + for 16-bit load instructions. */ + } + return 0; + +} +) + +;; Return true if OP is an acceptable memory operand for ARCompact +;; 16-bit store instructions +(define_predicate "compact_store_memory_operand" + (match_code "mem") +{ + rtx addr, plus0, plus1; + int size, off; + + if (mode == VOIDmode) + mode = GET_MODE (op); + + /* .di instructions have no 16-bit form. */ + if (MEM_VOLATILE_P (op) && !TARGET_VOLATILE_CACHE_SET) + return 0; + + size = GET_MODE_SIZE (mode); + + /* dword operations really put out 2 instructions, so eliminate them. */ + if (size > UNITS_PER_WORD) + return 0; + + /* Decode the address now. */ + addr = XEXP (op, 0); + switch (GET_CODE (addr)) + { + case REG: + return (REGNO (addr) >= FIRST_PSEUDO_REGISTER + || COMPACT_GP_REG_P (REGNO (addr)) + || (SP_REG_P (REGNO (addr)) && (size != 2))); + /* stw_s does not support SP as a parameter. */ + case PLUS: + plus0 = XEXP (addr, 0); + plus1 = XEXP (addr, 1); + + if ((GET_CODE (plus0) == REG) + && ((REGNO (plus0) >= FIRST_PSEUDO_REGISTER) + || COMPACT_GP_REG_P (REGNO (plus0))) + && (GET_CODE (plus1) == CONST_INT)) + { + off = INTVAL (plus1); + + /* Negative offset is not supported in 16-bit load/store insns. */ + if (off < 0) + return 0; + + switch (size) + { + case 1: + return (off < 32); + case 2: + return ((off < 64) && (off % 2 == 0)); + case 4: + return ((off < 128) && (off % 4 == 0)); + } + } + + if ((GET_CODE (plus0) == REG) + && ((REGNO (plus0) >= FIRST_PSEUDO_REGISTER) + || SP_REG_P (REGNO (plus0))) + && (GET_CODE (plus1) == CONST_INT)) + { + off = INTVAL (plus1); + + return ((size != 2) && (off >= 0 && off < 128) && (off % 4 == 0)); + } + default: + break; + } + return 0; + } +) + +;; Return true if OP is an acceptable argument for a single word +;; move source. +(define_predicate "move_src_operand" + (match_code "symbol_ref, label_ref, const, const_int, const_double, reg, subreg, mem") +{ + switch (GET_CODE (op)) + { + case SYMBOL_REF : + case LABEL_REF : + case CONST : + return (!flag_pic || arc_legitimate_pic_operand_p(op)); + case CONST_INT : + return (LARGE_INT (INTVAL (op))); + case CONST_DOUBLE : + /* We can handle DImode integer constants in SImode if the value + (signed or unsigned) will fit in 32 bits. This is needed because + large unsigned 32 bit constants are represented as CONST_DOUBLEs. */ + if (mode == SImode) + return arc_double_limm_p (op); + /* We can handle 32 bit floating point constants. */ + if (mode == SFmode) + return GET_MODE (op) == SFmode; + return 0; + case REG : + return register_operand (op, mode); + case SUBREG : + /* (subreg (mem ...) ...) can occur here if the inner part was once a + pseudo-reg and is now a stack slot. */ + if (GET_CODE (SUBREG_REG (op)) == MEM) + return address_operand (XEXP (SUBREG_REG (op), 0), mode); + else + return register_operand (op, mode); + case MEM : + return address_operand (XEXP (op, 0), mode); + default : + return 0; + } +} +) + +;; Return true if OP is an acceptable argument for a double word +;; move source. +(define_predicate "move_double_src_operand" + (match_code "reg, subreg, mem, const_int, const_double") +{ + switch (GET_CODE (op)) + { + case REG : + return register_operand (op, mode); + case SUBREG : + /* (subreg (mem ...) ...) can occur here if the inner part was once a + pseudo-reg and is now a stack slot. */ + if (GET_CODE (SUBREG_REG (op)) == MEM) + return move_double_src_operand (SUBREG_REG (op), mode); + else + return register_operand (op, mode); + case MEM : + return address_operand (XEXP (op, 0), mode); + case CONST_INT : + case CONST_DOUBLE : + return 1; + default : + return 0; + } +} +) + +;; Return true if OP is an acceptable argument for a move destination. +(define_predicate "move_dest_operand" + (match_code "reg, subreg, mem") +{ + switch (GET_CODE (op)) + { + case REG : + /* Program Counter register cannot be the target of a move. It is + a readonly register. */ + if (REGNO (op) == PROGRAM_COUNTER_REGNO) + return 0; + else if (TARGET_MULMAC_32BY16_SET + && (REGNO (op) == 56 || REGNO(op) == 57)) + return 0; + else if (TARGET_MUL64_SET + && (REGNO (op) == 57 || REGNO(op) == 58 || REGNO(op) == 59 )) + return 0; + else + return dest_reg_operand (op, mode); + case SUBREG : + /* (subreg (mem ...) ...) can occur here if the inner part was once a + pseudo-reg and is now a stack slot. */ + if (GET_CODE (SUBREG_REG (op)) == MEM) + return address_operand (XEXP (SUBREG_REG (op), 0), mode); + else + return dest_reg_operand (op, mode); + case MEM : + { + rtx addr = XEXP (op, 0); + + if (GET_CODE (addr) == PLUS + && (GET_CODE (XEXP (addr, 0)) == MULT + || (!CONST_INT_P (XEXP (addr, 1)) + && (TARGET_NO_SDATA_SET + || GET_CODE (XEXP (addr, 1)) != SYMBOL_REF + || !SYMBOL_REF_SMALL_P (XEXP (addr, 1)))))) + return 0; + if ((GET_CODE (addr) == PRE_MODIFY || GET_CODE (addr) == POST_MODIFY) + && (GET_CODE (XEXP (addr, 1)) != PLUS + || !CONST_INT_P (XEXP (XEXP (addr, 1), 1)))) + return 0; + return address_operand (addr, mode); + } + default : + return 0; + } + +} +) + +;; Return true if OP is valid load with update operand. +(define_predicate "load_update_operand" + (match_code "mem") +{ + if (GET_CODE (op) != MEM + || GET_MODE (op) != mode) + return 0; + op = XEXP (op, 0); + if (GET_CODE (op) != PLUS + || GET_MODE (op) != Pmode + || !register_operand (XEXP (op, 0), Pmode) + || !nonmemory_operand (XEXP (op, 1), Pmode)) + return 0; + return 1; + +} +) + +;; Return true if OP is valid store with update operand. +(define_predicate "store_update_operand" + (match_code "mem") +{ + if (GET_CODE (op) != MEM + || GET_MODE (op) != mode) + return 0; + op = XEXP (op, 0); + if (GET_CODE (op) != PLUS + || GET_MODE (op) != Pmode + || !register_operand (XEXP (op, 0), Pmode) + || !(GET_CODE (XEXP (op, 1)) == CONST_INT + && SMALL_INT (INTVAL (XEXP (op, 1))))) + return 0; + return 1; +} +) + +;; Return true if OP is a non-volatile non-immediate operand. +;; Volatile memory refs require a special "cache-bypass" instruction +;; and only the standard movXX patterns are set up to handle them. +(define_predicate "nonvol_nonimm_operand" + (and (match_code "subreg, reg, mem") + (match_test "(GET_CODE (op) != MEM || !MEM_VOLATILE_P (op)) && nonimmediate_operand (op, mode)")) +) + +;; Return 1 if OP is a comparison operator valid for the mode of CC. +;; This allows the use of MATCH_OPERATOR to recognize all the branch insns. + +(define_predicate "proper_comparison_operator" + (match_code "eq, ne, le, lt, ge, gt, leu, ltu, geu, gtu, unordered, ordered, uneq, unge, ungt, unle, unlt, ltgt") +{ + enum rtx_code code = GET_CODE (op); + + if (!COMPARISON_P (op)) + return 0; + + /* After generic flag-setting insns, we can use eq / ne / pl / mi / pnz . + There are some creative uses for hi / ls after shifts, but these are + hard to understand for the compiler and could be at best the target of + a peephole. */ + switch (GET_MODE (XEXP (op, 0))) + { + case CC_ZNmode: + return (code == EQ || code == NE || code == GE || code == LT + || code == GT); + case CC_Zmode: + return code == EQ || code == NE; + case CC_Cmode: + return code == LTU || code == GEU; + case CC_FP_GTmode: + return code == GT || code == UNLE; + case CC_FP_GEmode: + return code == GE || code == UNLT; + case CC_FP_ORDmode: + return code == ORDERED || code == UNORDERED; + case CC_FP_UNEQmode: + return code == UNEQ || code == LTGT; + case CC_FPXmode: + return (code == EQ || code == NE || code == UNEQ || code == LTGT + || code == ORDERED || code == UNORDERED); + + case CCmode: + case SImode: /* Used for BRcc. */ + return 1; + /* From combiner. */ + case QImode: case HImode: case DImode: case SFmode: case DFmode: + return 0; + default: + gcc_unreachable (); + } +}) + +(define_predicate "equality_comparison_operator" + (match_code "eq, ne")) + +(define_predicate "brcc_nolimm_operator" + (ior (match_test "REG_P (XEXP (op, 1))") + (and (match_code "eq, ne, lt, ge, ltu, geu") + (match_test "u6_immediate_operand (XEXP (op, 1), SImode)")) + (and (match_code "le, gt, leu, gtu") + (match_test "UNSIGNED_INT6 (INTVAL (XEXP (op, 1)) + 1)")))) + +;; Return TRUE if this is the condition code register, if we aren't given +;; a mode, accept any CCmode register +(define_special_predicate "cc_register" + (match_code "reg") +{ + if (mode == VOIDmode) + { + mode = GET_MODE (op); + if (GET_MODE_CLASS (mode) != MODE_CC) + return FALSE; + } + + if (mode == GET_MODE (op) && GET_CODE (op) == REG && REGNO (op) == CC_REG) + return TRUE; + + return FALSE; +}) + +;; Return TRUE if this is the condition code register; if we aren't given +;; a mode, accept any CCmode register. If we are given a mode, accept +;; modes that set a subset of flags. +(define_special_predicate "cc_set_register" + (match_code "reg") +{ + enum machine_mode rmode = GET_MODE (op); + + if (mode == VOIDmode) + { + mode = rmode; + if (GET_MODE_CLASS (mode) != MODE_CC) + return FALSE; + } + + if (REGNO (op) != 61) + return FALSE; + if (mode == rmode + || (mode == CC_ZNmode && rmode == CC_Zmode) + || (mode == CCmode && rmode == CC_Zmode) + || (mode == CCmode && rmode == CC_ZNmode) + || (mode == CCmode && rmode == CC_Cmode)) + return TRUE; + + return FALSE; +}) + +; Accept CC_REG in modes which provide the flags needed for MODE. */ +(define_special_predicate "cc_use_register" + (match_code "reg") +{ + if (REGNO (op) != CC_REG) + return 0; + if (GET_MODE (op) == mode) + return 1; + switch (mode) + { + case CC_Zmode: + if (GET_MODE (op) == CC_ZNmode) + return 1; + /* Fall through. */ + case CC_ZNmode: case CC_Cmode: + return GET_MODE (op) == CCmode; + default: + gcc_unreachable (); + } +}) + +(define_special_predicate "zn_compare_operator" + (match_code "compare") +{ + return GET_MODE (op) == CC_ZNmode || GET_MODE (op) == CC_Zmode; +}) + +;; Return true if OP is a shift operator. +(define_predicate "shift_operator" + (match_code "ashiftrt, lshiftrt, ashift") +) + +;; Return true if OP is a left shift operator that can be implemented in +;; four insn words or less without a barrel shifter or multiplier. +(define_predicate "shiftl4_operator" + (and (match_code "ashift") + (match_test "const_int_operand (XEXP (op, 1), VOIDmode) ") + (match_test "UINTVAL (XEXP (op, 1)) <= 9U + || INTVAL (XEXP (op, 1)) == 29 + || INTVAL (XEXP (op, 1)) == 30 + || INTVAL (XEXP (op, 1)) == 31"))) + +;; Return true if OP is a right shift operator that can be implemented in +;; four insn words or less without a barrel shifter or multiplier. +(define_predicate "shiftr4_operator" + (and (match_code "ashiftrt, lshiftrt") + (match_test "const_int_operand (XEXP (op, 1), VOIDmode) ") + (match_test "UINTVAL (XEXP (op, 1)) <= 4U + || INTVAL (XEXP (op, 1)) == 30 + || INTVAL (XEXP (op, 1)) == 31"))) + +;; Return true if OP is a shift operator that can be implemented in +;; four insn words or less without a barrel shifter or multiplier. +(define_predicate "shift4_operator" + (ior (match_operand 0 "shiftl4_operator") + (match_operand 0 "shiftr4_operator"))) + +(define_predicate "mult_operator" + (and (match_code "mult") (match_test "TARGET_ARC700 && !TARGET_NOMPY_SET")) +) + +(define_predicate "commutative_operator" + (ior (match_code "plus,ior,xor,and") + (match_operand 0 "mult_operator") + (and (match_code "ss_plus") + (match_test "TARGET_ARC700 || TARGET_EA_SET"))) +) + +(define_predicate "commutative_operator_sans_mult" + (ior (match_code "plus,ior,xor,and") + (and (match_code "ss_plus") + (match_test "TARGET_ARC700 || TARGET_EA_SET"))) +) + +(define_predicate "noncommutative_operator" + (ior (match_code "minus,ashift,ashiftrt,lshiftrt,rotatert") + (and (match_code "ss_minus") + (match_test "TARGET_ARC700 || TARGET_EA_SET"))) +) + +(define_predicate "unary_operator" + (ior (match_code "abs,neg,not,sign_extend,zero_extend") + (and (ior (match_code "ss_neg") + (and (match_code "ss_truncate") + (match_test "GET_MODE (XEXP (op, 0)) == HImode"))) + (match_test "TARGET_ARC700 || TARGET_EA_SET"))) +) + +(define_predicate "_2_4_8_operand" + (and (match_code "const_int") + (match_test "INTVAL (op) == 2 || INTVAL (op) == 4 || INTVAL (op) == 8")) +) + +(define_predicate "arc_double_register_operand" + (match_code "reg") +{ + if ((GET_MODE (op) != mode) && (mode != VOIDmode)) + return 0; + + return (GET_CODE (op) == REG + && (REGNO (op) >= FIRST_PSEUDO_REGISTER + || REGNO_REG_CLASS (REGNO (op)) == DOUBLE_REGS)); +}) + +(define_predicate "shouldbe_register_operand" + (match_code "reg,subreg,mem") +{ + return ((reload_in_progress || reload_completed) + ? general_operand : register_operand) (op, mode); +}) + +(define_predicate "vector_register_operand" + (match_code "reg") +{ + if ((GET_MODE (op) != mode) && (mode != VOIDmode)) + return 0; + + return (GET_CODE (op) == REG + && (REGNO (op) >= FIRST_PSEUDO_REGISTER + || REGNO_REG_CLASS (REGNO (op)) == SIMD_VR_REGS)); +}) + +(define_predicate "vector_register_or_memory_operand" + ( ior (match_code "reg") + (match_code "mem")) +{ + if ((GET_MODE (op) != mode) && (mode != VOIDmode)) + return 0; + + if ((GET_CODE (op) == MEM) + && (mode == V8HImode) + && GET_CODE (XEXP (op,0)) == REG) + return 1; + + return (GET_CODE (op) == REG + && (REGNO (op) >= FIRST_PSEUDO_REGISTER + || REGNO_REG_CLASS (REGNO (op)) == SIMD_VR_REGS)); +}) + +(define_predicate "arc_dpfp_operator" + (match_code "plus, mult,minus") +) + +(define_predicate "arc_simd_dma_register_operand" + (match_code "reg") +{ + if ((GET_MODE (op) != mode) && (mode != VOIDmode)) + return 0; + + return (GET_CODE (op) == REG + && (REGNO (op) >= FIRST_PSEUDO_REGISTER + || REGNO_REG_CLASS (REGNO (op)) == SIMD_DMA_CONFIG_REGS)); +}) + +(define_predicate "acc1_operand" + (and (match_code "reg") + (match_test "REGNO (op) == (TARGET_BIG_ENDIAN ? 56 : 57)"))) + +(define_predicate "acc2_operand" + (and (match_code "reg") + (match_test "REGNO (op) == (TARGET_BIG_ENDIAN ? 57 : 56)"))) + +(define_predicate "mlo_operand" + (and (match_code "reg") + (match_test "REGNO (op) == (TARGET_BIG_ENDIAN ? 59 : 58)"))) + +(define_predicate "mhi_operand" + (and (match_code "reg") + (match_test "REGNO (op) == (TARGET_BIG_ENDIAN ? 58 : 59)"))) + +(define_predicate "extend_operand" + (ior (match_test "register_operand (op, mode)") + (and (match_test "immediate_operand (op, mode)") + (not (match_test "const_int_operand (op, mode)"))))) + +(define_predicate "millicode_store_operation" + (match_code "parallel") +{ + return arc_check_millicode (op, 0, 0); +}) + +(define_predicate "millicode_load_operation" + (match_code "parallel") +{ + return arc_check_millicode (op, 2, 2); +}) + +(define_predicate "millicode_load_clob_operation" + (match_code "parallel") +{ + return arc_check_millicode (op, 0, 1); +}) + +(define_special_predicate "immediate_usidi_operand" + (if_then_else + (match_code "const_int") + (match_test "INTVAL (op) >= 0") + (and (match_test "const_double_operand (op, mode)") + (match_test "CONST_DOUBLE_HIGH (op) == 0")))) diff --git a/gcc/config/arc/simdext.md b/gcc/config/arc/simdext.md new file mode 100644 index 00000000000..22daf51fa66 --- /dev/null +++ b/gcc/config/arc/simdext.md @@ -0,0 +1,1313 @@ +;; Machine description of the Synopsys DesignWare ARC cpu for GNU C compiler +;; Copyright (C) 2007-2012 Free Software Foundation, Inc. + +;; This file is part of GCC. + +;; GCC is free software; you can redistribute it and/or modify +;; it under the terms of the GNU General Public License as published by +;; the Free Software Foundation; either version 3, or (at your option) +;; any later version. + +;; GCC is distributed in the hope that it will be useful, +;; but WITHOUT ANY WARRANTY; without even the implied warranty of +;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +;; GNU General Public License for more details. + +;; You should have received a copy of the GNU General Public License +;; along with GCC; see the file COPYING3. If not see +;; <http://www.gnu.org/licenses/>. + +(define_constants + [ + ;; Va, Vb, Vc builtins + (UNSPEC_ARC_SIMD_VADDAW 1000) + (UNSPEC_ARC_SIMD_VADDW 1001) + (UNSPEC_ARC_SIMD_VAVB 1002) + (UNSPEC_ARC_SIMD_VAVRB 1003) + (UNSPEC_ARC_SIMD_VDIFAW 1004) + (UNSPEC_ARC_SIMD_VDIFW 1005) + (UNSPEC_ARC_SIMD_VMAXAW 1006) + (UNSPEC_ARC_SIMD_VMAXW 1007) + (UNSPEC_ARC_SIMD_VMINAW 1008) + (UNSPEC_ARC_SIMD_VMINW 1009) + (UNSPEC_ARC_SIMD_VMULAW 1010) + (UNSPEC_ARC_SIMD_VMULFAW 1011) + (UNSPEC_ARC_SIMD_VMULFW 1012) + (UNSPEC_ARC_SIMD_VMULW 1013) + (UNSPEC_ARC_SIMD_VSUBAW 1014) + (UNSPEC_ARC_SIMD_VSUBW 1015) + (UNSPEC_ARC_SIMD_VSUMMW 1016) + (UNSPEC_ARC_SIMD_VAND 1017) + (UNSPEC_ARC_SIMD_VANDAW 1018) + (UNSPEC_ARC_SIMD_VBIC 1019) + (UNSPEC_ARC_SIMD_VBICAW 1020) + (UNSPEC_ARC_SIMD_VOR 1021) + (UNSPEC_ARC_SIMD_VXOR 1022) + (UNSPEC_ARC_SIMD_VXORAW 1023) + (UNSPEC_ARC_SIMD_VEQW 1024) + (UNSPEC_ARC_SIMD_VLEW 1025) + (UNSPEC_ARC_SIMD_VLTW 1026) + (UNSPEC_ARC_SIMD_VNEW 1027) + (UNSPEC_ARC_SIMD_VMR1AW 1028) + (UNSPEC_ARC_SIMD_VMR1W 1029) + (UNSPEC_ARC_SIMD_VMR2AW 1030) + (UNSPEC_ARC_SIMD_VMR2W 1031) + (UNSPEC_ARC_SIMD_VMR3AW 1032) + (UNSPEC_ARC_SIMD_VMR3W 1033) + (UNSPEC_ARC_SIMD_VMR4AW 1034) + (UNSPEC_ARC_SIMD_VMR4W 1035) + (UNSPEC_ARC_SIMD_VMR5AW 1036) + (UNSPEC_ARC_SIMD_VMR5W 1037) + (UNSPEC_ARC_SIMD_VMR6AW 1038) + (UNSPEC_ARC_SIMD_VMR6W 1039) + (UNSPEC_ARC_SIMD_VMR7AW 1040) + (UNSPEC_ARC_SIMD_VMR7W 1041) + (UNSPEC_ARC_SIMD_VMRB 1042) + (UNSPEC_ARC_SIMD_VH264F 1043) + (UNSPEC_ARC_SIMD_VH264FT 1044) + (UNSPEC_ARC_SIMD_VH264FW 1045) + (UNSPEC_ARC_SIMD_VVC1F 1046) + (UNSPEC_ARC_SIMD_VVC1FT 1047) + ;; Va, Vb, rc/limm builtins + (UNSPEC_ARC_SIMD_VBADDW 1050) + (UNSPEC_ARC_SIMD_VBMAXW 1051) + (UNSPEC_ARC_SIMD_VBMINW 1052) + (UNSPEC_ARC_SIMD_VBMULAW 1053) + (UNSPEC_ARC_SIMD_VBMULFW 1054) + (UNSPEC_ARC_SIMD_VBMULW 1055) + (UNSPEC_ARC_SIMD_VBRSUBW 1056) + (UNSPEC_ARC_SIMD_VBSUBW 1057) + + ;; Va, Vb, Ic builtins + (UNSPEC_ARC_SIMD_VASRW 1060) + (UNSPEC_ARC_SIMD_VSR8 1061) + (UNSPEC_ARC_SIMD_VSR8AW 1062) + + ;; Va, Vb, Ic builtins + (UNSPEC_ARC_SIMD_VASRRWi 1065) + (UNSPEC_ARC_SIMD_VASRSRWi 1066) + (UNSPEC_ARC_SIMD_VASRWi 1067) + (UNSPEC_ARC_SIMD_VASRPWBi 1068) + (UNSPEC_ARC_SIMD_VASRRPWBi 1069) + (UNSPEC_ARC_SIMD_VSR8AWi 1070) + (UNSPEC_ARC_SIMD_VSR8i 1071) + + ;; Va, Vb, u8 (simm) builtins + (UNSPEC_ARC_SIMD_VMVAW 1075) + (UNSPEC_ARC_SIMD_VMVW 1076) + (UNSPEC_ARC_SIMD_VMVZW 1077) + (UNSPEC_ARC_SIMD_VD6TAPF 1078) + + ;; Va, rlimm, u8 (simm) builtins + (UNSPEC_ARC_SIMD_VMOVAW 1080) + (UNSPEC_ARC_SIMD_VMOVW 1081) + (UNSPEC_ARC_SIMD_VMOVZW 1082) + + ;; Va, Vb builtins + (UNSPEC_ARC_SIMD_VABSAW 1085) + (UNSPEC_ARC_SIMD_VABSW 1086) + (UNSPEC_ARC_SIMD_VADDSUW 1087) + (UNSPEC_ARC_SIMD_VSIGNW 1088) + (UNSPEC_ARC_SIMD_VEXCH1 1089) + (UNSPEC_ARC_SIMD_VEXCH2 1090) + (UNSPEC_ARC_SIMD_VEXCH4 1091) + (UNSPEC_ARC_SIMD_VUPBAW 1092) + (UNSPEC_ARC_SIMD_VUPBW 1093) + (UNSPEC_ARC_SIMD_VUPSBAW 1094) + (UNSPEC_ARC_SIMD_VUPSBW 1095) + + (UNSPEC_ARC_SIMD_VDIRUN 1100) + (UNSPEC_ARC_SIMD_VDORUN 1101) + (UNSPEC_ARC_SIMD_VDIWR 1102) + (UNSPEC_ARC_SIMD_VDOWR 1103) + + (UNSPEC_ARC_SIMD_VREC 1105) + (UNSPEC_ARC_SIMD_VRUN 1106) + (UNSPEC_ARC_SIMD_VRECRUN 1107) + (UNSPEC_ARC_SIMD_VENDREC 1108) + + (UNSPEC_ARC_SIMD_VLD32WH 1110) + (UNSPEC_ARC_SIMD_VLD32WL 1111) + + (UNSPEC_ARC_SIMD_VCAST 1200) + (UNSPEC_ARC_SIMD_VINTI 1201) + ] +) + +;; Scheduler descriptions for the simd instructions +(define_insn_reservation "simd_lat_0_insn" 1 + (eq_attr "type" "simd_dma, simd_vstore, simd_vcontrol") + "issue+simd_unit") + +(define_insn_reservation "simd_lat_1_insn" 2 + (eq_attr "type" "simd_vcompare, simd_vlogic, + simd_vmove_else_zero, simd_varith_1cycle") + "issue+simd_unit, nothing") + +(define_insn_reservation "simd_lat_2_insn" 3 + (eq_attr "type" "simd_valign, simd_vpermute, + simd_vpack, simd_varith_2cycle") + "issue+simd_unit, nothing*2") + +(define_insn_reservation "simd_lat_3_insn" 4 + (eq_attr "type" "simd_valign_with_acc, simd_vpack_with_acc, + simd_vlogic_with_acc, simd_vload128, + simd_vmove_with_acc, simd_vspecial_3cycle, + simd_varith_with_acc") + "issue+simd_unit, nothing*3") + +(define_insn_reservation "simd_lat_4_insn" 5 + (eq_attr "type" "simd_vload, simd_vmove, simd_vspecial_4cycle") + "issue+simd_unit, nothing*4") + +(define_expand "movv8hi" + [(set (match_operand:V8HI 0 "general_operand" "") + (match_operand:V8HI 1 "general_operand" ""))] + "" + " +{ + /* Everything except mem = const or mem = mem can be done easily. */ + + if (GET_CODE (operands[0]) == MEM && GET_CODE(operands[1]) == MEM) + operands[1] = force_reg (V8HImode, operands[1]); +}") + +;; This pattern should appear before the movv8hi_insn pattern +(define_insn "vld128_insn" + [(set (match_operand:V8HI 0 "vector_register_operand" "=v") + (mem:V8HI (plus:SI (zero_extend:SI (vec_select:HI (match_operand:V8HI 1 "vector_register_operand" "v") + (parallel [(match_operand:SI 2 "immediate_operand" "L")]))) + (match_operand:SI 3 "immediate_operand" "P"))))] + "TARGET_SIMD_SET" + "vld128 %0, [i%2, %3]" + [(set_attr "type" "simd_vload128") + (set_attr "length" "4") + (set_attr "cond" "nocond")] +) + +(define_insn "vst128_insn" + [(set (mem:V8HI (plus:SI (zero_extend:SI (vec_select:HI (match_operand:V8HI 0 "vector_register_operand" "v") + (parallel [(match_operand:SI 1 "immediate_operand" "L")]))) + (match_operand:SI 2 "immediate_operand" "P"))) + (match_operand:V8HI 3 "vector_register_operand" "=v"))] + "TARGET_SIMD_SET" + "vst128 %3, [i%1, %2]" + [(set_attr "type" "simd_vstore") + (set_attr "length" "4") + (set_attr "cond" "nocond")] +) + +(define_insn "vst64_insn" + [(set (mem:V4HI (plus:SI (zero_extend:SI (vec_select:HI (match_operand:V8HI 0 "vector_register_operand" "v") + (parallel [(match_operand:SI 1 "immediate_operand" "L")]))) + (match_operand:SI 2 "immediate_operand" "P"))) + (vec_select:V4HI (match_operand:V8HI 3 "vector_register_operand" "=v") + (parallel [(const_int 0)])))] + "TARGET_SIMD_SET" + "vst64 %3, [i%1, %2]" + [(set_attr "type" "simd_vstore") + (set_attr "length" "4") + (set_attr "cond" "nocond")] +) + +(define_insn "movv8hi_insn" + [(set (match_operand:V8HI 0 "vector_register_or_memory_operand" "=v,m,v") + (match_operand:V8HI 1 "vector_register_or_memory_operand" "m,v,v"))] + "TARGET_SIMD_SET && !(GET_CODE (operands[0]) == MEM && GET_CODE(operands[1]) == MEM)" + "@ + vld128r %0, %1 + vst128r %1, %0 + vmvzw %0,%1,0xffff" + [(set_attr "type" "simd_vload128,simd_vstore,simd_vmove_else_zero") + (set_attr "length" "8,8,4") + (set_attr "cond" "nocond, nocond, nocond")]) + +(define_insn "movti_insn" + [(set (match_operand:TI 0 "vector_register_or_memory_operand" "=v,m,v") + (match_operand:TI 1 "vector_register_or_memory_operand" "m,v,v"))] + "" + "@ + vld128r %0, %1 + vst128r %1, %0 + vmvzw %0,%1,0xffff" + [(set_attr "type" "simd_vload128,simd_vstore,simd_vmove_else_zero") + (set_attr "length" "8,8,4") + (set_attr "cond" "nocond, nocond, nocond")]) + +;; (define_insn "*movv8hi_insn_rr" +;; [(set (match_operand:V8HI 0 "vector_register_operand" "=v") +;; (match_operand:V8HI 1 "vector_register_operand" "v"))] +;; "" +;; "mov reg,reg" +;; [(set_attr "length" "8") +;; (set_attr "type" "move")]) + +;; (define_insn "*movv8_out" +;; [(set (match_operand:V8HI 0 "memory_operand" "=m") +;; (match_operand:V8HI 1 "vector_register_operand" "v"))] +;; "" +;; "mov out" +;; [(set_attr "length" "8") +;; (set_attr "type" "move")]) + + +;; (define_insn "addv8hi3" +;; [(set (match_operand:V8HI 0 "vector_register_operand" "=v") +;; (plus:V8HI (match_operand:V8HI 1 "vector_register_operand" "v") +;; (match_operand:V8HI 2 "vector_register_operand" "v")))] +;; "TARGET_SIMD_SET" +;; "vaddw %0, %1, %2" +;; [(set_attr "length" "8") +;; (set_attr "cond" "nocond")]) + +;; (define_insn "vaddw_insn" +;; [(set (match_operand:V8HI 0 "vector_register_operand" "=v") +;; (unspec [(match_operand:V8HI 1 "vector_register_operand" "v") +;; (match_operand:V8HI 2 "vector_register_operand" "v")] UNSPEC_ARC_SIMD_VADDW))] +;; "TARGET_SIMD_SET" +;; "vaddw %0, %1, %2" +;; [(set_attr "length" "8") +;; (set_attr "cond" "nocond")]) + +;; V V V Insns +(define_insn "vaddaw_insn" + [(set (match_operand:V8HI 0 "vector_register_operand" "=v") + (unspec:V8HI [(match_operand:V8HI 1 "vector_register_operand" "v") + (match_operand:V8HI 2 "vector_register_operand" "v")] UNSPEC_ARC_SIMD_VADDAW))] + "TARGET_SIMD_SET" + "vaddaw %0, %1, %2" + [(set_attr "type" "simd_varith_with_acc") + (set_attr "length" "4") + (set_attr "cond" "nocond")]) + +(define_insn "vaddw_insn" + [(set (match_operand:V8HI 0 "vector_register_operand" "=v") + (unspec:V8HI [(match_operand:V8HI 1 "vector_register_operand" "v") + (match_operand:V8HI 2 "vector_register_operand" "v")] UNSPEC_ARC_SIMD_VADDW))] + "TARGET_SIMD_SET" + "vaddw %0, %1, %2" + [(set_attr "type" "simd_varith_1cycle") + (set_attr "length" "4") + (set_attr "cond" "nocond")]) + +(define_insn "vavb_insn" + [(set (match_operand:V8HI 0 "vector_register_operand" "=v") + (unspec:V8HI [(match_operand:V8HI 1 "vector_register_operand" "v") + (match_operand:V8HI 2 "vector_register_operand" "v")] UNSPEC_ARC_SIMD_VAVB))] + "TARGET_SIMD_SET" + "vavb %0, %1, %2" + [(set_attr "type" "simd_varith_1cycle") + (set_attr "length" "4") + (set_attr "cond" "nocond")]) + +(define_insn "vavrb_insn" + [(set (match_operand:V8HI 0 "vector_register_operand" "=v") + (unspec:V8HI [(match_operand:V8HI 1 "vector_register_operand" "v") + (match_operand:V8HI 2 "vector_register_operand" "v")] UNSPEC_ARC_SIMD_VAVRB))] + "TARGET_SIMD_SET" + "vavrb %0, %1, %2" + [(set_attr "type" "simd_varith_1cycle") + (set_attr "length" "4") + (set_attr "cond" "nocond")]) + +(define_insn "vdifaw_insn" + [(set (match_operand:V8HI 0 "vector_register_operand" "=v") + (unspec:V8HI [(match_operand:V8HI 1 "vector_register_operand" "v") + (match_operand:V8HI 2 "vector_register_operand" "v")] UNSPEC_ARC_SIMD_VDIFAW))] + "TARGET_SIMD_SET" + "vdifaw %0, %1, %2" + [(set_attr "type" "simd_varith_with_acc") + (set_attr "length" "4") + (set_attr "cond" "nocond")]) + +(define_insn "vdifw_insn" + [(set (match_operand:V8HI 0 "vector_register_operand" "=v") + (unspec:V8HI [(match_operand:V8HI 1 "vector_register_operand" "v") + (match_operand:V8HI 2 "vector_register_operand" "v")] UNSPEC_ARC_SIMD_VDIFW))] + "TARGET_SIMD_SET" + "vdifw %0, %1, %2" + [(set_attr "type" "simd_varith_1cycle") + (set_attr "length" "4") + (set_attr "cond" "nocond")]) + +(define_insn "vmaxaw_insn" + [(set (match_operand:V8HI 0 "vector_register_operand" "=v") + (unspec:V8HI [(match_operand:V8HI 1 "vector_register_operand" "v") + (match_operand:V8HI 2 "vector_register_operand" "v")] UNSPEC_ARC_SIMD_VMAXAW))] + "TARGET_SIMD_SET" + "vmaxaw %0, %1, %2" + [(set_attr "type" "simd_varith_with_acc") + (set_attr "length" "4") + (set_attr "cond" "nocond")]) + +(define_insn "vmaxw_insn" + [(set (match_operand:V8HI 0 "vector_register_operand" "=v") + (unspec:V8HI [(match_operand:V8HI 1 "vector_register_operand" "v") + (match_operand:V8HI 2 "vector_register_operand" "v")] UNSPEC_ARC_SIMD_VMAXW))] + "TARGET_SIMD_SET" + "vmaxw %0, %1, %2" + [(set_attr "type" "simd_varith_1cycle") + (set_attr "length" "4") + (set_attr "cond" "nocond")]) + +(define_insn "vminaw_insn" + [(set (match_operand:V8HI 0 "vector_register_operand" "=v") + (unspec:V8HI [(match_operand:V8HI 1 "vector_register_operand" "v") + (match_operand:V8HI 2 "vector_register_operand" "v")] UNSPEC_ARC_SIMD_VMINAW))] + "TARGET_SIMD_SET" + "vminaw %0, %1, %2" + [(set_attr "type" "simd_varith_with_acc") + (set_attr "length" "4") + (set_attr "cond" "nocond")]) + +(define_insn "vminw_insn" + [(set (match_operand:V8HI 0 "vector_register_operand" "=v") + (unspec:V8HI [(match_operand:V8HI 1 "vector_register_operand" "v") + (match_operand:V8HI 2 "vector_register_operand" "v")] UNSPEC_ARC_SIMD_VMINW))] + "TARGET_SIMD_SET" + "vminw %0, %1, %2" + [(set_attr "type" "simd_varith_1cycle") + (set_attr "length" "4") + (set_attr "cond" "nocond")]) + +(define_insn "vmulaw_insn" + [(set (match_operand:V8HI 0 "vector_register_operand" "=v") + (unspec:V8HI [(match_operand:V8HI 1 "vector_register_operand" "v") + (match_operand:V8HI 2 "vector_register_operand" "v")] UNSPEC_ARC_SIMD_VMULAW))] + "TARGET_SIMD_SET" + "vmulaw %0, %1, %2" + [(set_attr "type" "simd_varith_with_acc") + (set_attr "length" "4") + (set_attr "cond" "nocond")]) + +(define_insn "vmulfaw_insn" + [(set (match_operand:V8HI 0 "vector_register_operand" "=v") + (unspec:V8HI [(match_operand:V8HI 1 "vector_register_operand" "v") + (match_operand:V8HI 2 "vector_register_operand" "v")] UNSPEC_ARC_SIMD_VMULFAW))] + "TARGET_SIMD_SET" + "vmulfaw %0, %1, %2" + [(set_attr "type" "simd_varith_with_acc") + (set_attr "length" "4") + (set_attr "cond" "nocond")]) + +(define_insn "vmulfw_insn" + [(set (match_operand:V8HI 0 "vector_register_operand" "=v") + (unspec:V8HI [(match_operand:V8HI 1 "vector_register_operand" "v") + (match_operand:V8HI 2 "vector_register_operand" "v")] UNSPEC_ARC_SIMD_VMULFW))] + "TARGET_SIMD_SET" + "vmulfw %0, %1, %2" + [(set_attr "type" "simd_varith_2cycle") + (set_attr "length" "4") + (set_attr "cond" "nocond")]) + +(define_insn "vmulw_insn" + [(set (match_operand:V8HI 0 "vector_register_operand" "=v") + (unspec:V8HI [(match_operand:V8HI 1 "vector_register_operand" "v") + (match_operand:V8HI 2 "vector_register_operand" "v")] UNSPEC_ARC_SIMD_VMULW))] + "TARGET_SIMD_SET" + "vmulw %0, %1, %2" + [(set_attr "type" "simd_varith_2cycle") + (set_attr "length" "4") + (set_attr "cond" "nocond")]) + +(define_insn "vsubaw_insn" + [(set (match_operand:V8HI 0 "vector_register_operand" "=v") + (unspec:V8HI [(match_operand:V8HI 1 "vector_register_operand" "v") + (match_operand:V8HI 2 "vector_register_operand" "v")] UNSPEC_ARC_SIMD_VSUBAW))] + "TARGET_SIMD_SET" + "vsubaw %0, %1, %2" + [(set_attr "type" "simd_varith_with_acc") + (set_attr "length" "4") + (set_attr "cond" "nocond")]) + +(define_insn "vsubw_insn" + [(set (match_operand:V8HI 0 "vector_register_operand" "=v") + (unspec:V8HI [(match_operand:V8HI 1 "vector_register_operand" "v") + (match_operand:V8HI 2 "vector_register_operand" "v")] UNSPEC_ARC_SIMD_VSUBW))] + "TARGET_SIMD_SET" + "vsubw %0, %1, %2" + [(set_attr "type" "simd_varith_1cycle") + (set_attr "length" "4") + (set_attr "cond" "nocond")]) + +(define_insn "vsummw_insn" + [(set (match_operand:V8HI 0 "vector_register_operand" "=v") + (unspec:V8HI [(match_operand:V8HI 1 "vector_register_operand" "v") + (match_operand:V8HI 2 "vector_register_operand" "v")] UNSPEC_ARC_SIMD_VSUMMW))] + "TARGET_SIMD_SET" + "vsummw %0, %1, %2" + [(set_attr "type" "simd_varith_2cycle") + (set_attr "length" "4") + (set_attr "cond" "nocond")]) + +(define_insn "vand_insn" + [(set (match_operand:V8HI 0 "vector_register_operand" "=v") + (unspec:V8HI [(match_operand:V8HI 1 "vector_register_operand" "v") + (match_operand:V8HI 2 "vector_register_operand" "v")] UNSPEC_ARC_SIMD_VAND))] + "TARGET_SIMD_SET" + "vand %0, %1, %2" + [(set_attr "type" "simd_vlogic") + (set_attr "length" "4") + (set_attr "cond" "nocond")]) + +(define_insn "vandaw_insn" + [(set (match_operand:V8HI 0 "vector_register_operand" "=v") + (unspec:V8HI [(match_operand:V8HI 1 "vector_register_operand" "v") + (match_operand:V8HI 2 "vector_register_operand" "v")] UNSPEC_ARC_SIMD_VANDAW))] + "TARGET_SIMD_SET" + "vandaw %0, %1, %2" + [(set_attr "type" "simd_vlogic_with_acc") + (set_attr "length" "4") + (set_attr "cond" "nocond")]) + +(define_insn "vbic_insn" + [(set (match_operand:V8HI 0 "vector_register_operand" "=v") + (unspec:V8HI [(match_operand:V8HI 1 "vector_register_operand" "v") + (match_operand:V8HI 2 "vector_register_operand" "v")] UNSPEC_ARC_SIMD_VBIC))] + "TARGET_SIMD_SET" + "vbic %0, %1, %2" + [(set_attr "type" "simd_vlogic") + (set_attr "length" "4") + (set_attr "cond" "nocond")]) + +(define_insn "vbicaw_insn" + [(set (match_operand:V8HI 0 "vector_register_operand" "=v") + (unspec:V8HI [(match_operand:V8HI 1 "vector_register_operand" "v") + (match_operand:V8HI 2 "vector_register_operand" "v")] UNSPEC_ARC_SIMD_VBICAW))] + "TARGET_SIMD_SET" + "vbicaw %0, %1, %2" + [(set_attr "type" "simd_vlogic_with_acc") + (set_attr "length" "4") + (set_attr "cond" "nocond")]) + +(define_insn "vor_insn" + [(set (match_operand:V8HI 0 "vector_register_operand" "=v") + (unspec:V8HI [(match_operand:V8HI 1 "vector_register_operand" "v") + (match_operand:V8HI 2 "vector_register_operand" "v")] UNSPEC_ARC_SIMD_VOR))] + "TARGET_SIMD_SET" + "vor %0, %1, %2" + [(set_attr "type" "simd_vlogic") + (set_attr "length" "4") + (set_attr "cond" "nocond")]) + +(define_insn "vxor_insn" + [(set (match_operand:V8HI 0 "vector_register_operand" "=v") + (unspec:V8HI [(match_operand:V8HI 1 "vector_register_operand" "v") + (match_operand:V8HI 2 "vector_register_operand" "v")] UNSPEC_ARC_SIMD_VXOR))] + "TARGET_SIMD_SET" + "vxor %0, %1, %2" + [(set_attr "type" "simd_vlogic") + (set_attr "length" "4") + (set_attr "cond" "nocond")]) + +(define_insn "vxoraw_insn" + [(set (match_operand:V8HI 0 "vector_register_operand" "=v") + (unspec:V8HI [(match_operand:V8HI 1 "vector_register_operand" "v") + (match_operand:V8HI 2 "vector_register_operand" "v")] UNSPEC_ARC_SIMD_VXORAW))] + "TARGET_SIMD_SET" + "vxoraw %0, %1, %2" + [(set_attr "type" "simd_vlogic_with_acc") + (set_attr "length" "4") + (set_attr "cond" "nocond")]) + +(define_insn "veqw_insn" + [(set (match_operand:V8HI 0 "vector_register_operand" "=v") + (unspec:V8HI [(match_operand:V8HI 1 "vector_register_operand" "v") + (match_operand:V8HI 2 "vector_register_operand" "v")] UNSPEC_ARC_SIMD_VEQW))] + "TARGET_SIMD_SET" + "veqw %0, %1, %2" + [(set_attr "type" "simd_vcompare") + (set_attr "length" "4") + (set_attr "cond" "nocond")]) + +(define_insn "vlew_insn" + [(set (match_operand:V8HI 0 "vector_register_operand" "=v") + (unspec:V8HI [(match_operand:V8HI 1 "vector_register_operand" "v") + (match_operand:V8HI 2 "vector_register_operand" "v")] UNSPEC_ARC_SIMD_VLEW))] + "TARGET_SIMD_SET" + "vlew %0, %1, %2" + [(set_attr "type" "simd_vcompare") + (set_attr "length" "4") + (set_attr "cond" "nocond")]) + +(define_insn "vltw_insn" + [(set (match_operand:V8HI 0 "vector_register_operand" "=v") + (unspec:V8HI [(match_operand:V8HI 1 "vector_register_operand" "v") + (match_operand:V8HI 2 "vector_register_operand" "v")] UNSPEC_ARC_SIMD_VLTW))] + "TARGET_SIMD_SET" + "vltw %0, %1, %2" + [(set_attr "type" "simd_vcompare") + (set_attr "length" "4") + (set_attr "cond" "nocond")]) + +(define_insn "vnew_insn" + [(set (match_operand:V8HI 0 "vector_register_operand" "=v") + (unspec:V8HI [(match_operand:V8HI 1 "vector_register_operand" "v") + (match_operand:V8HI 2 "vector_register_operand" "v")] UNSPEC_ARC_SIMD_VNEW))] + "TARGET_SIMD_SET" + "vnew %0, %1, %2" + [(set_attr "type" "simd_vcompare") + (set_attr "length" "4") + (set_attr "cond" "nocond")]) + +(define_insn "vmr1aw_insn" + [(set (match_operand:V8HI 0 "vector_register_operand" "=v") + (unspec:V8HI [(match_operand:V8HI 1 "vector_register_operand" "v") + (match_operand:V8HI 2 "vector_register_operand" "v")] UNSPEC_ARC_SIMD_VMR1AW))] + "TARGET_SIMD_SET" + "vmr1aw %0, %1, %2" + [(set_attr "type" "simd_valign_with_acc") + (set_attr "length" "4") + (set_attr "cond" "nocond")]) + +(define_insn "vmr1w_insn" + [(set (match_operand:V8HI 0 "vector_register_operand" "=v") + (unspec:V8HI [(match_operand:V8HI 1 "vector_register_operand" "v") + (match_operand:V8HI 2 "vector_register_operand" "v")] UNSPEC_ARC_SIMD_VMR1W))] + "TARGET_SIMD_SET" + "vmr1w %0, %1, %2" + [(set_attr "type" "simd_valign") + (set_attr "length" "4") + (set_attr "cond" "nocond")]) + +(define_insn "vmr2aw_insn" + [(set (match_operand:V8HI 0 "vector_register_operand" "=v") + (unspec:V8HI [(match_operand:V8HI 1 "vector_register_operand" "v") + (match_operand:V8HI 2 "vector_register_operand" "v")] UNSPEC_ARC_SIMD_VMR2AW))] + "TARGET_SIMD_SET" + "vmr2aw %0, %1, %2" + [(set_attr "type" "simd_valign_with_acc") + (set_attr "length" "4") + (set_attr "cond" "nocond")]) + +(define_insn "vmr2w_insn" + [(set (match_operand:V8HI 0 "vector_register_operand" "=v") + (unspec:V8HI [(match_operand:V8HI 1 "vector_register_operand" "v") + (match_operand:V8HI 2 "vector_register_operand" "v")] UNSPEC_ARC_SIMD_VMR2W))] + "TARGET_SIMD_SET" + "vmr2w %0, %1, %2" + [(set_attr "type" "simd_valign") + (set_attr "length" "4") + (set_attr "cond" "nocond")]) + +(define_insn "vmr3aw_insn" + [(set (match_operand:V8HI 0 "vector_register_operand" "=v") + (unspec:V8HI [(match_operand:V8HI 1 "vector_register_operand" "v") + (match_operand:V8HI 2 "vector_register_operand" "v")] UNSPEC_ARC_SIMD_VMR3AW))] + "TARGET_SIMD_SET" + "vmr3aw %0, %1, %2" + [(set_attr "type" "simd_valign_with_acc") + (set_attr "length" "4") + (set_attr "cond" "nocond")]) + +(define_insn "vmr3w_insn" + [(set (match_operand:V8HI 0 "vector_register_operand" "=v") + (unspec:V8HI [(match_operand:V8HI 1 "vector_register_operand" "v") + (match_operand:V8HI 2 "vector_register_operand" "v")] UNSPEC_ARC_SIMD_VMR3W))] + "TARGET_SIMD_SET" + "vmr3w %0, %1, %2" + [(set_attr "type" "simd_valign") + (set_attr "length" "4") + (set_attr "cond" "nocond")]) + +(define_insn "vmr4aw_insn" + [(set (match_operand:V8HI 0 "vector_register_operand" "=v") + (unspec:V8HI [(match_operand:V8HI 1 "vector_register_operand" "v") + (match_operand:V8HI 2 "vector_register_operand" "v")] UNSPEC_ARC_SIMD_VMR4AW))] + "TARGET_SIMD_SET" + "vmr4aw %0, %1, %2" + [(set_attr "type" "simd_valign_with_acc") + (set_attr "length" "4") + (set_attr "cond" "nocond")]) + +(define_insn "vmr4w_insn" + [(set (match_operand:V8HI 0 "vector_register_operand" "=v") + (unspec:V8HI [(match_operand:V8HI 1 "vector_register_operand" "v") + (match_operand:V8HI 2 "vector_register_operand" "v")] UNSPEC_ARC_SIMD_VMR4W))] + "TARGET_SIMD_SET" + "vmr4w %0, %1, %2" + [(set_attr "type" "simd_valign") + (set_attr "length" "4") + (set_attr "cond" "nocond")]) + +(define_insn "vmr5aw_insn" + [(set (match_operand:V8HI 0 "vector_register_operand" "=v") + (unspec:V8HI [(match_operand:V8HI 1 "vector_register_operand" "v") + (match_operand:V8HI 2 "vector_register_operand" "v")] UNSPEC_ARC_SIMD_VMR5AW))] + "TARGET_SIMD_SET" + "vmr5aw %0, %1, %2" + [(set_attr "type" "simd_valign_with_acc") + (set_attr "length" "4") + (set_attr "cond" "nocond")]) + +(define_insn "vmr5w_insn" + [(set (match_operand:V8HI 0 "vector_register_operand" "=v") + (unspec:V8HI [(match_operand:V8HI 1 "vector_register_operand" "v") + (match_operand:V8HI 2 "vector_register_operand" "v")] UNSPEC_ARC_SIMD_VMR5W))] + "TARGET_SIMD_SET" + "vmr5w %0, %1, %2" + [(set_attr "type" "simd_valign") + (set_attr "length" "4") + (set_attr "cond" "nocond")]) + +(define_insn "vmr6aw_insn" + [(set (match_operand:V8HI 0 "vector_register_operand" "=v") + (unspec:V8HI [(match_operand:V8HI 1 "vector_register_operand" "v") + (match_operand:V8HI 2 "vector_register_operand" "v")] UNSPEC_ARC_SIMD_VMR6AW))] + "TARGET_SIMD_SET" + "vmr6aw %0, %1, %2" + [(set_attr "type" "simd_valign_with_acc") + (set_attr "length" "4") + (set_attr "cond" "nocond")]) + +(define_insn "vmr6w_insn" + [(set (match_operand:V8HI 0 "vector_register_operand" "=v") + (unspec:V8HI [(match_operand:V8HI 1 "vector_register_operand" "v") + (match_operand:V8HI 2 "vector_register_operand" "v")] UNSPEC_ARC_SIMD_VMR6W))] + "TARGET_SIMD_SET" + "vmr6w %0, %1, %2" + [(set_attr "type" "simd_valign") + (set_attr "length" "4") + (set_attr "cond" "nocond")]) + +(define_insn "vmr7aw_insn" + [(set (match_operand:V8HI 0 "vector_register_operand" "=v") + (unspec:V8HI [(match_operand:V8HI 1 "vector_register_operand" "v") + (match_operand:V8HI 2 "vector_register_operand" "v")] UNSPEC_ARC_SIMD_VMR7AW))] + "TARGET_SIMD_SET" + "vmr7aw %0, %1, %2" + [(set_attr "type" "simd_valign_with_acc") + (set_attr "length" "4") + (set_attr "cond" "nocond")]) + +(define_insn "vmr7w_insn" + [(set (match_operand:V8HI 0 "vector_register_operand" "=v") + (unspec:V8HI [(match_operand:V8HI 1 "vector_register_operand" "v") + (match_operand:V8HI 2 "vector_register_operand" "v")] UNSPEC_ARC_SIMD_VMR7W))] + "TARGET_SIMD_SET" + "vmr7w %0, %1, %2" + [(set_attr "type" "simd_valign") + (set_attr "length" "4") + (set_attr "cond" "nocond")]) + +(define_insn "vmrb_insn" + [(set (match_operand:V8HI 0 "vector_register_operand" "=v") + (unspec:V8HI [(match_operand:V8HI 1 "vector_register_operand" "v") + (match_operand:V8HI 2 "vector_register_operand" "v")] UNSPEC_ARC_SIMD_VMRB))] + "TARGET_SIMD_SET" + "vmrb %0, %1, %2" + [(set_attr "type" "simd_valign") + (set_attr "length" "4") + (set_attr "cond" "nocond")]) + +(define_insn "vh264f_insn" + [(set (match_operand:V8HI 0 "vector_register_operand" "=v") + (unspec:V8HI [(match_operand:V8HI 1 "vector_register_operand" "v") + (match_operand:V8HI 2 "vector_register_operand" "v")] UNSPEC_ARC_SIMD_VH264F))] + "TARGET_SIMD_SET" + "vh264f %0, %1, %2" + [(set_attr "type" "simd_vspecial_3cycle") + (set_attr "length" "4") + (set_attr "cond" "nocond")]) + +(define_insn "vh264ft_insn" + [(set (match_operand:V8HI 0 "vector_register_operand" "=v") + (unspec:V8HI [(match_operand:V8HI 1 "vector_register_operand" "v") + (match_operand:V8HI 2 "vector_register_operand" "v")] UNSPEC_ARC_SIMD_VH264FT))] + "TARGET_SIMD_SET" + "vh264ft %0, %1, %2" + [(set_attr "type" "simd_vspecial_3cycle") + (set_attr "length" "4") + (set_attr "cond" "nocond")]) + +(define_insn "vh264fw_insn" + [(set (match_operand:V8HI 0 "vector_register_operand" "=v") + (unspec:V8HI [(match_operand:V8HI 1 "vector_register_operand" "v") + (match_operand:V8HI 2 "vector_register_operand" "v")] UNSPEC_ARC_SIMD_VH264FW))] + "TARGET_SIMD_SET" + "vh264fw %0, %1, %2" + [(set_attr "type" "simd_vspecial_3cycle") + (set_attr "length" "4") + (set_attr "cond" "nocond")]) + +(define_insn "vvc1f_insn" + [(set (match_operand:V8HI 0 "vector_register_operand" "=v") + (unspec:V8HI [(match_operand:V8HI 1 "vector_register_operand" "v") + (match_operand:V8HI 2 "vector_register_operand" "v")] UNSPEC_ARC_SIMD_VVC1F))] + "TARGET_SIMD_SET" + "vvc1f %0, %1, %2" + [(set_attr "type" "simd_vspecial_3cycle") + (set_attr "length" "4") + (set_attr "cond" "nocond")]) + +(define_insn "vvc1ft_insn" + [(set (match_operand:V8HI 0 "vector_register_operand" "=v") + (unspec:V8HI [(match_operand:V8HI 1 "vector_register_operand" "v") + (match_operand:V8HI 2 "vector_register_operand" "v")] UNSPEC_ARC_SIMD_VVC1FT))] + "TARGET_SIMD_SET" + "vvc1ft %0, %1, %2" + [(set_attr "type" "simd_vspecial_3cycle") + (set_attr "length" "4") + (set_attr "cond" "nocond")]) + + + +;;--- +;; V V r/limm Insns + +;; (define_insn "vbaddw_insn" +;; [(set (match_operand:V8HI 0 "vector_register_operand" "=v") +;; (unspec:V8HI [(match_operand:V8HI 1 "vector_register_operand" "v") +;; (match_operand:SI 2 "nonmemory_operand" "rCal")] UNSPEC_ARC_SIMD_VBADDW))] +;; "TARGET_SIMD_SET" +;; "vbaddw %0, %1, %2" +;; [(set_attr "length" "4") +;; (set_attr "cond" "nocond")]) + +(define_insn "vbaddw_insn" + [(set (match_operand:V8HI 0 "vector_register_operand" "=v") + (unspec:V8HI [(match_operand:V8HI 1 "vector_register_operand" "v") + (match_operand:SI 2 "nonmemory_operand" "r")] UNSPEC_ARC_SIMD_VBADDW))] + "TARGET_SIMD_SET" + "vbaddw %0, %1, %2" + [(set_attr "type" "simd_varith_1cycle") + (set_attr "length" "4") + (set_attr "cond" "nocond")]) + +(define_insn "vbmaxw_insn" + [(set (match_operand:V8HI 0 "vector_register_operand" "=v") + (unspec:V8HI [(match_operand:V8HI 1 "vector_register_operand" "v") + (match_operand:SI 2 "nonmemory_operand" "r")] UNSPEC_ARC_SIMD_VBMAXW))] + "TARGET_SIMD_SET" + "vbmaxw %0, %1, %2" + [(set_attr "type" "simd_varith_1cycle") + (set_attr "length" "4") + (set_attr "cond" "nocond")]) + +(define_insn "vbminw_insn" + [(set (match_operand:V8HI 0 "vector_register_operand" "=v") + (unspec:V8HI [(match_operand:V8HI 1 "vector_register_operand" "v") + (match_operand:SI 2 "nonmemory_operand" "r")] UNSPEC_ARC_SIMD_VBMINW))] + "TARGET_SIMD_SET" + "vbminw %0, %1, %2" + [(set_attr "type" "simd_varith_1cycle") + (set_attr "length" "4") + (set_attr "cond" "nocond")]) + +(define_insn "vbmulaw_insn" + [(set (match_operand:V8HI 0 "vector_register_operand" "=v") + (unspec:V8HI [(match_operand:V8HI 1 "vector_register_operand" "v") + (match_operand:SI 2 "nonmemory_operand" "r")] UNSPEC_ARC_SIMD_VBMULAW))] + "TARGET_SIMD_SET" + "vbmulaw %0, %1, %2" + [(set_attr "type" "simd_varith_with_acc") + (set_attr "length" "4") + (set_attr "cond" "nocond")]) + +(define_insn "vbmulfw_insn" + [(set (match_operand:V8HI 0 "vector_register_operand" "=v") + (unspec:V8HI [(match_operand:V8HI 1 "vector_register_operand" "v") + (match_operand:SI 2 "nonmemory_operand" "r")] UNSPEC_ARC_SIMD_VBMULFW))] + "TARGET_SIMD_SET" + "vbmulfw %0, %1, %2" + [(set_attr "type" "simd_varith_2cycle") + (set_attr "length" "4") + (set_attr "cond" "nocond")]) + +(define_insn "vbmulw_insn" + [(set (match_operand:V8HI 0 "vector_register_operand" "=v") + (unspec:V8HI [(match_operand:V8HI 1 "vector_register_operand" "v") + (match_operand:SI 2 "nonmemory_operand" "r")] UNSPEC_ARC_SIMD_VBMULW))] + "TARGET_SIMD_SET" + "vbmulw %0, %1, %2" + [(set_attr "type" "simd_varith_2cycle") + (set_attr "length" "4") + (set_attr "cond" "nocond")]) + +(define_insn "vbrsubw_insn" + [(set (match_operand:V8HI 0 "vector_register_operand" "=v") + (unspec:V8HI [(match_operand:V8HI 1 "vector_register_operand" "v") + (match_operand:SI 2 "nonmemory_operand" "r")] UNSPEC_ARC_SIMD_VBRSUBW))] + "TARGET_SIMD_SET" + "vbrsubw %0, %1, %2" + [(set_attr "type" "simd_varith_1cycle") + (set_attr "length" "4") + (set_attr "cond" "nocond")]) + +(define_insn "vbsubw_insn" + [(set (match_operand:V8HI 0 "vector_register_operand" "=v") + (unspec:V8HI [(match_operand:V8HI 1 "vector_register_operand" "v") + (match_operand:SI 2 "nonmemory_operand" "r")] UNSPEC_ARC_SIMD_VBSUBW))] + "TARGET_SIMD_SET" + "vbsubw %0, %1, %2" + [(set_attr "type" "simd_varith_1cycle") + (set_attr "length" "4") + (set_attr "cond" "nocond")]) +; Va, Vb, Ic instructions + +; Va, Vb, u6 instructions +(define_insn "vasrrwi_insn" + [(set (match_operand:V8HI 0 "vector_register_operand" "=v") + (unspec:V8HI [(match_operand:V8HI 1 "vector_register_operand" "v") + (match_operand:SI 2 "immediate_operand" "L")] UNSPEC_ARC_SIMD_VASRRWi))] + "TARGET_SIMD_SET" + "vasrrwi %0, %1, %2" + [(set_attr "type" "simd_varith_2cycle") + (set_attr "length" "4") + (set_attr "cond" "nocond")]) + +(define_insn "vasrsrwi_insn" + [(set (match_operand:V8HI 0 "vector_register_operand" "=v") + (unspec:V8HI [(match_operand:V8HI 1 "vector_register_operand" "v") + (match_operand:SI 2 "immediate_operand" "L")] UNSPEC_ARC_SIMD_VASRSRWi))] + "TARGET_SIMD_SET" + "vasrsrwi %0, %1, %2" + [(set_attr "type" "simd_varith_2cycle") + (set_attr "length" "4") + (set_attr "cond" "nocond")]) + +(define_insn "vasrwi_insn" + [(set (match_operand:V8HI 0 "vector_register_operand" "=v") + (unspec:V8HI [(match_operand:V8HI 1 "vector_register_operand" "v") + (match_operand:SI 2 "immediate_operand" "L")] UNSPEC_ARC_SIMD_VASRWi))] + "TARGET_SIMD_SET" + "vasrwi %0, %1, %2" + [(set_attr "type" "simd_varith_1cycle") + (set_attr "length" "4") + (set_attr "cond" "nocond")]) + +(define_insn "vasrpwbi_insn" + [(set (match_operand:V8HI 0 "vector_register_operand" "=v") + (unspec:V8HI [(match_operand:V8HI 1 "vector_register_operand" "v") + (match_operand:SI 2 "immediate_operand" "L")] UNSPEC_ARC_SIMD_VASRPWBi))] + "TARGET_SIMD_SET" + "vasrpwbi %0, %1, %2" + [(set_attr "type" "simd_vpack") + (set_attr "length" "4") + (set_attr "cond" "nocond")]) + +(define_insn "vasrrpwbi_insn" + [(set (match_operand:V8HI 0 "vector_register_operand" "=v") + (unspec:V8HI [(match_operand:V8HI 1 "vector_register_operand" "v") + (match_operand:SI 2 "immediate_operand" "L")] UNSPEC_ARC_SIMD_VASRRPWBi))] + "TARGET_SIMD_SET" + "vasrrpwbi %0, %1, %2" + [(set_attr "type" "simd_vpack") + (set_attr "length" "4") + (set_attr "cond" "nocond")]) + +(define_insn "vsr8awi_insn" + [(set (match_operand:V8HI 0 "vector_register_operand" "=v") + (unspec:V8HI [(match_operand:V8HI 1 "vector_register_operand" "v") + (match_operand:SI 2 "immediate_operand" "L")] UNSPEC_ARC_SIMD_VSR8AWi))] + "TARGET_SIMD_SET" + "vsr8awi %0, %1, %2" + [(set_attr "type" "simd_valign_with_acc") + (set_attr "length" "4") + (set_attr "cond" "nocond")]) + +(define_insn "vsr8i_insn" + [(set (match_operand:V8HI 0 "vector_register_operand" "=v") + (unspec:V8HI [(match_operand:V8HI 1 "vector_register_operand" "v") + (match_operand:SI 2 "immediate_operand" "L")] UNSPEC_ARC_SIMD_VSR8i))] + "TARGET_SIMD_SET" + "vsr8i %0, %1, %2" + [(set_attr "type" "simd_valign") + (set_attr "length" "4") + (set_attr "cond" "nocond")]) + +;; Va, Vb, u8 (simm) insns + +(define_insn "vmvaw_insn" + [(set (match_operand:V8HI 0 "vector_register_operand" "=v") + (unspec:V8HI [(match_operand:V8HI 1 "vector_register_operand" "v") + (match_operand:SI 2 "immediate_operand" "P")] UNSPEC_ARC_SIMD_VMVAW))] + "TARGET_SIMD_SET" + "vmvaw %0, %1, %2" + [(set_attr "type" "simd_vmove_with_acc") + (set_attr "length" "4") + (set_attr "cond" "nocond")]) + +(define_insn "vmvw_insn" + [(set (match_operand:V8HI 0 "vector_register_operand" "=v") + (unspec:V8HI [(match_operand:V8HI 1 "vector_register_operand" "v") + (match_operand:SI 2 "immediate_operand" "P")] UNSPEC_ARC_SIMD_VMVW))] + "TARGET_SIMD_SET" + "vmvw %0, %1, %2" + [(set_attr "type" "simd_vmove") + (set_attr "length" "4") + (set_attr "cond" "nocond")]) + +(define_insn "vmvzw_insn" + [(set (match_operand:V8HI 0 "vector_register_operand" "=v") + (unspec:V8HI [(match_operand:V8HI 1 "vector_register_operand" "v") + (match_operand:SI 2 "immediate_operand" "P")] UNSPEC_ARC_SIMD_VMVZW))] + "TARGET_SIMD_SET" + "vmvzw %0, %1, %2" + [(set_attr "type" "simd_vmove_else_zero") + (set_attr "length" "4") + (set_attr "cond" "nocond")]) + +(define_insn "vd6tapf_insn" + [(set (match_operand:V8HI 0 "vector_register_operand" "=v") + (unspec:V8HI [(match_operand:V8HI 1 "vector_register_operand" "v") + (match_operand:SI 2 "immediate_operand" "P")] UNSPEC_ARC_SIMD_VD6TAPF))] + "TARGET_SIMD_SET" + "vd6tapf %0, %1, %2" + [(set_attr "type" "simd_vspecial_4cycle") + (set_attr "length" "4") + (set_attr "cond" "nocond")]) + +;; Va, rlimm, u8 (simm) insns +(define_insn "vmovaw_insn" + [(set (match_operand:V8HI 0 "vector_register_operand" "=v") + (unspec:V8HI [(match_operand:SI 1 "nonmemory_operand" "r") + (match_operand:SI 2 "immediate_operand" "P")] UNSPEC_ARC_SIMD_VMOVAW))] + "TARGET_SIMD_SET" + "vmovaw %0, %1, %2" + [(set_attr "type" "simd_vmove_with_acc") + (set_attr "length" "4") + (set_attr "cond" "nocond")]) + +(define_insn "vmovw_insn" + [(set (match_operand:V8HI 0 "vector_register_operand" "=v") + (unspec:V8HI [(match_operand:SI 1 "nonmemory_operand" "r") + (match_operand:SI 2 "immediate_operand" "P")] UNSPEC_ARC_SIMD_VMOVW))] + "TARGET_SIMD_SET" + "vmovw %0, %1, %2" + [(set_attr "type" "simd_vmove") + (set_attr "length" "4") + (set_attr "cond" "nocond")]) + +(define_insn "vmovzw_insn" + [(set (match_operand:V8HI 0 "vector_register_operand" "=v") + (unspec:V8HI [(match_operand:SI 1 "nonmemory_operand" "r") + (match_operand:SI 2 "immediate_operand" "P")] UNSPEC_ARC_SIMD_VMOVZW))] + "TARGET_SIMD_SET" + "vmovzw %0, %1, %2" + [(set_attr "type" "simd_vmove_else_zero") + (set_attr "length" "4") + (set_attr "cond" "nocond")]) + +;; Va, rlimm, Ic insns +(define_insn "vsr8_insn" + [(set (match_operand:V8HI 0 "vector_register_operand" "=v") + (unspec:V8HI [(match_operand:V8HI 1 "vector_register_operand" "v") + (match_operand:SI 2 "immediate_operand" "K") + (match_operand:V8HI 3 "vector_register_operand" "v")] UNSPEC_ARC_SIMD_VSR8))] + "TARGET_SIMD_SET" + "vsr8 %0, %1, i%2" + [(set_attr "type" "simd_valign") + (set_attr "length" "4") + (set_attr "cond" "nocond")]) + +(define_insn "vasrw_insn" + [(set (match_operand:V8HI 0 "vector_register_operand" "=v") + (unspec:V8HI [(match_operand:V8HI 1 "vector_register_operand" "v") + (match_operand:SI 2 "immediate_operand" "K") + (match_operand:V8HI 3 "vector_register_operand" "v")] UNSPEC_ARC_SIMD_VASRW))] + "TARGET_SIMD_SET" + "vasrw %0, %1, i%2" + [(set_attr "type" "simd_varith_1cycle") + (set_attr "length" "4") + (set_attr "cond" "nocond")]) + +(define_insn "vsr8aw_insn" + [(set (match_operand:V8HI 0 "vector_register_operand" "=v") + (unspec:V8HI [(match_operand:V8HI 1 "vector_register_operand" "v") + (match_operand:SI 2 "immediate_operand" "K") + (match_operand:V8HI 3 "vector_register_operand" "v")] UNSPEC_ARC_SIMD_VSR8AW))] + "TARGET_SIMD_SET" + "vsr8aw %0, %1, i%2" + [(set_attr "type" "simd_valign_with_acc") + (set_attr "length" "4") + (set_attr "cond" "nocond")]) + +;; Va, Vb insns +(define_insn "vabsaw_insn" + [(set (match_operand:V8HI 0 "vector_register_operand" "=v") + (unspec:V8HI [(match_operand:V8HI 1 "vector_register_operand" "v")] UNSPEC_ARC_SIMD_VABSAW))] + "TARGET_SIMD_SET" + "vabsaw %0, %1" + [(set_attr "type" "simd_varith_with_acc") + (set_attr "length" "4") + (set_attr "cond" "nocond")]) + +(define_insn "vabsw_insn" + [(set (match_operand:V8HI 0 "vector_register_operand" "=v") + (unspec:V8HI [(match_operand:V8HI 1 "vector_register_operand" "v")] UNSPEC_ARC_SIMD_VABSW))] + "TARGET_SIMD_SET" + "vabsw %0, %1" + [(set_attr "type" "simd_varith_1cycle") + (set_attr "length" "4") + (set_attr "cond" "nocond")]) + +(define_insn "vaddsuw_insn" + [(set (match_operand:V8HI 0 "vector_register_operand" "=v") + (unspec:V8HI [(match_operand:V8HI 1 "vector_register_operand" "v")] UNSPEC_ARC_SIMD_VADDSUW))] + "TARGET_SIMD_SET" + "vaddsuw %0, %1" + [(set_attr "type" "simd_varith_1cycle") + (set_attr "length" "4") + (set_attr "cond" "nocond")]) + +(define_insn "vsignw_insn" + [(set (match_operand:V8HI 0 "vector_register_operand" "=v") + (unspec:V8HI [(match_operand:V8HI 1 "vector_register_operand" "v")] UNSPEC_ARC_SIMD_VSIGNW))] + "TARGET_SIMD_SET" + "vsignw %0, %1" + [(set_attr "type" "simd_varith_1cycle") + (set_attr "length" "4") + (set_attr "cond" "nocond")]) + +(define_insn "vexch1_insn" + [(set (match_operand:V8HI 0 "vector_register_operand" "=v") + (unspec:V8HI [(match_operand:V8HI 1 "vector_register_operand" "v")] UNSPEC_ARC_SIMD_VEXCH1))] + "TARGET_SIMD_SET" + "vexch1 %0, %1" + [(set_attr "type" "simd_vpermute") + (set_attr "length" "4") + (set_attr "cond" "nocond")]) + +(define_insn "vexch2_insn" + [(set (match_operand:V8HI 0 "vector_register_operand" "=v") + (unspec:V8HI [(match_operand:V8HI 1 "vector_register_operand" "v")] UNSPEC_ARC_SIMD_VEXCH2))] + "TARGET_SIMD_SET" + "vexch2 %0, %1" + [(set_attr "type" "simd_vpermute") + (set_attr "length" "4") + (set_attr "cond" "nocond")]) + +(define_insn "vexch4_insn" + [(set (match_operand:V8HI 0 "vector_register_operand" "=v") + (unspec:V8HI [(match_operand:V8HI 1 "vector_register_operand" "v")] UNSPEC_ARC_SIMD_VEXCH4))] + "TARGET_SIMD_SET" + "vexch4 %0, %1" + [(set_attr "type" "simd_vpermute") + (set_attr "length" "4") + (set_attr "cond" "nocond")]) + +(define_insn "vupbaw_insn" + [(set (match_operand:V8HI 0 "vector_register_operand" "=v") + (unspec:V8HI [(match_operand:V8HI 1 "vector_register_operand" "v")] UNSPEC_ARC_SIMD_VUPBAW))] + "TARGET_SIMD_SET" + "vupbaw %0, %1" + [(set_attr "type" "simd_vpack_with_acc") + (set_attr "length" "4") + (set_attr "cond" "nocond")]) + +(define_insn "vupbw_insn" + [(set (match_operand:V8HI 0 "vector_register_operand" "=v") + (unspec:V8HI [(match_operand:V8HI 1 "vector_register_operand" "v")] UNSPEC_ARC_SIMD_VUPBW))] + "TARGET_SIMD_SET" + "vupbw %0, %1" + [(set_attr "type" "simd_vpack") + (set_attr "length" "4") + (set_attr "cond" "nocond")]) + +(define_insn "vupsbaw_insn" + [(set (match_operand:V8HI 0 "vector_register_operand" "=v") + (unspec:V8HI [(match_operand:V8HI 1 "vector_register_operand" "v")] UNSPEC_ARC_SIMD_VUPSBAW))] + "TARGET_SIMD_SET" + "vupsbaw %0, %1" + [(set_attr "type" "simd_vpack_with_acc") + (set_attr "length" "4") + (set_attr "cond" "nocond")]) + +(define_insn "vupsbw_insn" + [(set (match_operand:V8HI 0 "vector_register_operand" "=v") + (unspec:V8HI [(match_operand:V8HI 1 "vector_register_operand" "v")] UNSPEC_ARC_SIMD_VUPSBW))] + "TARGET_SIMD_SET" + "vupsbw %0, %1" + [(set_attr "type" "simd_vpack") + (set_attr "length" "4") + (set_attr "cond" "nocond")]) + +; DMA setup instructions +(define_insn "vdirun_insn" + [(set (match_operand:SI 0 "arc_simd_dma_register_operand" "=d") + (unspec_volatile:SI [(match_operand:SI 1 "nonmemory_operand" "r") + (match_operand:SI 2 "nonmemory_operand" "r")] UNSPEC_ARC_SIMD_VDIRUN))] + "TARGET_SIMD_SET" + "vdirun %1, %2" + [(set_attr "type" "simd_dma") + (set_attr "length" "4") + (set_attr "cond" "nocond")]) + +(define_insn "vdorun_insn" + [(set (match_operand:SI 0 "arc_simd_dma_register_operand" "=d") + (unspec_volatile:SI [(match_operand:SI 1 "nonmemory_operand" "r") + (match_operand:SI 2 "nonmemory_operand" "r")] UNSPEC_ARC_SIMD_VDORUN))] + "TARGET_SIMD_SET" + "vdorun %1, %2" + [(set_attr "type" "simd_dma") + (set_attr "length" "4") + (set_attr "cond" "nocond")]) + +(define_insn "vdiwr_insn" + [(set (match_operand:SI 0 "arc_simd_dma_register_operand" "=d,d") + (unspec_volatile:SI [(match_operand:SI 1 "nonmemory_operand" "r,Cal")] UNSPEC_ARC_SIMD_VDIWR))] + "TARGET_SIMD_SET" + "vdiwr %0, %1" + [(set_attr "type" "simd_dma") + (set_attr "length" "4,8") + (set_attr "cond" "nocond,nocond")]) + +(define_insn "vdowr_insn" + [(set (match_operand:SI 0 "arc_simd_dma_register_operand" "=d,d") + (unspec_volatile:SI [(match_operand:SI 1 "nonmemory_operand" "r,Cal")] UNSPEC_ARC_SIMD_VDOWR))] + "TARGET_SIMD_SET" + "vdowr %0, %1" + [(set_attr "type" "simd_dma") + (set_attr "length" "4,8") + (set_attr "cond" "nocond,nocond")]) + +;; vector record and run instructions +(define_insn "vrec_insn" + [(unspec_volatile [(match_operand:SI 0 "nonmemory_operand" "r")] UNSPEC_ARC_SIMD_VREC)] + "TARGET_SIMD_SET" + "vrec %0" + [(set_attr "type" "simd_vcontrol") + (set_attr "length" "4") + (set_attr "cond" "nocond")]) + +(define_insn "vrun_insn" + [(unspec_volatile [(match_operand:SI 0 "nonmemory_operand" "r")] UNSPEC_ARC_SIMD_VRUN)] + "TARGET_SIMD_SET" + "vrun %0" + [(set_attr "type" "simd_vcontrol") + (set_attr "length" "4") + (set_attr "cond" "nocond")]) + +(define_insn "vrecrun_insn" + [(unspec_volatile [(match_operand:SI 0 "nonmemory_operand" "r")] UNSPEC_ARC_SIMD_VRECRUN)] + "TARGET_SIMD_SET" + "vrecrun %0" + [(set_attr "type" "simd_vcontrol") + (set_attr "length" "4") + (set_attr "cond" "nocond")]) + +(define_insn "vendrec_insn" + [(unspec_volatile [(match_operand:SI 0 "nonmemory_operand" "r")] UNSPEC_ARC_SIMD_VENDREC)] + "TARGET_SIMD_SET" + "vendrec %S0" + [(set_attr "type" "simd_vcontrol") + (set_attr "length" "4") + (set_attr "cond" "nocond")]) + +;; Va, [Ib,u8] instructions +;; (define_insn "vld32wh_insn" +;; [(set (match_operand:V8HI 0 "vector_register_operand" "=v") +;; (vec_concat:V8HI (unspec:V4HI [(match_operand:SI 1 "immediate_operand" "P") +;; (vec_select:HI (match_operand:V8HI 2 "vector_register_operand" "v") +;; (parallel [(match_operand:SI 3 "immediate_operand" "L")]))] UNSPEC_ARC_SIMD_VLD32WH) +;; (vec_select:V4HI (match_dup 0) +;; (parallel[(const_int 0)]))))] +;; (define_insn "vld32wl_insn" +;; [(set (match_operand:V8HI 0 "vector_register_operand" "=v") +;; (unspec:V8HI [(match_operand:SI 1 "immediate_operand" "L") +;; (match_operand:SI 2 "immediate_operand" "P") +;; (match_operand:V8HI 3 "vector_register_operand" "v") +;; (match_dup 0)] UNSPEC_ARC_SIMD_VLD32WL))] +;; "TARGET_SIMD_SET" +;; "vld32wl %0, [I%1,%2]" +;; [(set_attr "length" "4") +;; (set_attr "cond" "nocond")]) +(define_insn "vld32wh_insn" + [(set (match_operand:V8HI 0 "vector_register_operand" "=v") + (vec_concat:V8HI (zero_extend:V4HI (mem:V4QI (plus:SI (match_operand:SI 1 "immediate_operand" "P") + (zero_extend: SI (vec_select:HI (match_operand:V8HI 2 "vector_register_operand" "v") + (parallel [(match_operand:SI 3 "immediate_operand" "L")])))))) + (vec_select:V4HI (match_dup 0) + (parallel [(const_int 0)]))))] + "TARGET_SIMD_SET" + "vld32wh %0, [i%3,%1]" + [(set_attr "type" "simd_vload") + (set_attr "length" "4") + (set_attr "cond" "nocond")]) + +(define_insn "vld32wl_insn" + [(set (match_operand:V8HI 0 "vector_register_operand" "=v") + (vec_concat:V8HI (vec_select:V4HI (match_dup 0) + (parallel [(const_int 1)])) + (zero_extend:V4HI (mem:V4QI (plus:SI (match_operand:SI 1 "immediate_operand" "P") + (zero_extend: SI (vec_select:HI (match_operand:V8HI 2 "vector_register_operand" "v") + (parallel [(match_operand:SI 3 "immediate_operand" "L")])))))) ))] + "TARGET_SIMD_SET" + "vld32wl %0, [i%3,%1]" + [(set_attr "type" "simd_vload") + (set_attr "length" "4") + (set_attr "cond" "nocond")]) + +(define_insn "vld64w_insn" + [(set (match_operand:V8HI 0 "vector_register_operand" "=v") + (zero_extend:V8HI (mem:V4HI (plus:SI (zero_extend:SI (vec_select:HI (match_operand:V8HI 1 "vector_register_operand" "v") + (parallel [(match_operand:SI 2 "immediate_operand" "L")]))) + (match_operand:SI 3 "immediate_operand" "P")))))] + "TARGET_SIMD_SET" + "vld64w %0, [i%2, %3]" + [(set_attr "type" "simd_vload") + (set_attr "length" "4") + (set_attr "cond" "nocond")] +) + +(define_insn "vld64_insn" + [(set (match_operand:V8HI 0 "vector_register_operand" "=v") + (vec_concat:V8HI (vec_select:V4HI (match_dup 0) + (parallel [(const_int 1)])) + (mem:V4HI (plus:SI (match_operand:SI 1 "immediate_operand" "P") + (zero_extend: SI (vec_select:HI (match_operand:V8HI 2 "vector_register_operand" "v") + (parallel [(match_operand:SI 3 "immediate_operand" "L")]))))) ))] + "TARGET_SIMD_SET" + "vld64 %0, [i%3,%1]" + [(set_attr "type" "simd_vload") + (set_attr "length" "4") + (set_attr "cond" "nocond")]) + +(define_insn "vld32_insn" + [(set (match_operand:V8HI 0 "vector_register_operand" "=v") + (vec_concat:V8HI (vec_select:V4HI (match_dup 0) + (parallel [(const_int 1)])) + (vec_concat:V4HI (vec_select:V2HI (match_dup 0) + (parallel [(const_int 1)])) + (mem:V2HI (plus:SI (match_operand:SI 1 "immediate_operand" "P") + (zero_extend: SI (vec_select:HI (match_operand:V8HI 2 "vector_register_operand" "v") + (parallel [(match_operand:SI 3 "immediate_operand" "L")])))))) ))] + "TARGET_SIMD_SET" + "vld32 %0, [i%3,%1]" + [(set_attr "type" "simd_vload") + (set_attr "length" "4") + (set_attr "cond" "nocond")]) + +(define_insn "vst16_n_insn" + [(set (mem:HI (plus:SI (match_operand:SI 0 "immediate_operand" "P") + (zero_extend: SI (vec_select:HI (match_operand:V8HI 1 "vector_register_operand" "v") + (parallel [(match_operand:SI 2 "immediate_operand" "L")]))))) + (vec_select:HI (match_operand:V8HI 3 "vector_register_operand" "v") + (parallel [(match_operand:SI 4 "immediate_operand" "L")])))] + "TARGET_SIMD_SET" + "vst16_%4 %3,[i%2, %0]" + [(set_attr "type" "simd_vstore") + (set_attr "length" "4") + (set_attr "cond" "nocond")]) + +(define_insn "vst32_n_insn" + [(set (mem:SI (plus:SI (match_operand:SI 0 "immediate_operand" "P") + (zero_extend: SI (vec_select:HI (match_operand:V8HI 1 "vector_register_operand" "v") + (parallel [(match_operand:SI 2 "immediate_operand" "L")]))))) + (vec_select:SI (unspec:V4SI [(match_operand:V8HI 3 "vector_register_operand" "v")] UNSPEC_ARC_SIMD_VCAST) + (parallel [(match_operand:SI 4 "immediate_operand" "L")])))] + "TARGET_SIMD_SET" + "vst32_%4 %3,[i%2, %0]" + [(set_attr "type" "simd_vstore") + (set_attr "length" "4") + (set_attr "cond" "nocond")]) + +;; SIMD unit interrupt +(define_insn "vinti_insn" + [(unspec_volatile [(match_operand:SI 0 "nonmemory_operand" "L")] UNSPEC_ARC_SIMD_VINTI)] + "TARGET_SIMD_SET" + "vinti %0" + [(set_attr "type" "simd_vcontrol") + (set_attr "length" "4") + (set_attr "cond" "nocond")]) diff --git a/gcc/config/arc/t-arc-newlib b/gcc/config/arc/t-arc-newlib new file mode 100644 index 00000000000..5ce33b7a8ce --- /dev/null +++ b/gcc/config/arc/t-arc-newlib @@ -0,0 +1,38 @@ +# GCC Makefile fragment for Synopsys DesignWare ARC with newlib. + +# Copyright (C) 2007-2012 Free Software Foundation, Inc. + +# This file is part of GCC. + +# GCC is free software; you can redistribute it and/or modify it under the +# terms of the GNU General Public License as published by the Free Software +# Foundation; either version 3, or (at your option) any later version. + +# GCC is distributed in the hope that it will be useful, but WITHOUT ANY +# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +# FOR A PARTICULAR PURPOSE. See the GNU General Public License for more +# details. + +# You should have received a copy of the GNU General Public License along +# with GCC; see the file COPYING3. If not see +# <http://www.gnu.org/licenses/>. + +# Selecting -mA5 uses the same functional multilib files/libraries +# as get used for -mARC600 aka -mA6. +MULTILIB_OPTIONS=mcpu=ARC600/mcpu=ARC601 mmul64/mmul32x16 mnorm +MULTILIB_DIRNAMES=arc600 arc601 mul64 mul32x16 norm +# +# Aliases: +MULTILIB_MATCHES = mcpu?ARC600=mcpu?arc600 +MULTILIB_MATCHES += mcpu?ARC600=mARC600 +MULTILIB_MATCHES += mcpu?ARC600=mA6 +MULTILIB_MATCHES += mcpu?ARC600=mA5 +MULTILIB_MATCHES += mcpu?ARC600=mno-mpy +MULTILIB_MATCHES += mcpu?ARC601=mcpu?arc601 +MULTILIB_MATCHES += EL=mlittle-endian +MULTILIB_MATCHES += EB=mbig-endian +# +# These don't make sense for the ARC700 default target: +MULTILIB_EXCEPTIONS=mmul64* mmul32x16* mnorm* +# And neither of the -mmul* options make sense without -mnorm: +MULTILIB_EXCLUSIONS=mARC600/mmul64/!mnorm mcpu=ARC601/mmul64/!mnorm mARC600/mmul32x16/!mnorm diff --git a/gcc/config/arc/t-arc-uClibc b/gcc/config/arc/t-arc-uClibc new file mode 100644 index 00000000000..a08978d20ce --- /dev/null +++ b/gcc/config/arc/t-arc-uClibc @@ -0,0 +1,20 @@ +# GCC Makefile fragment for Synopsys DesignWare ARC with uClibc + +# Copyright (C) 2007-2012 Free Software Foundation, Inc. + +# This file is part of GCC. + +# GCC is free software; you can redistribute it and/or modify it under the +# terms of the GNU General Public License as published by the Free Software +# Foundation; either version 3, or (at your option) any later version. + +# GCC is distributed in the hope that it will be useful, but WITHOUT ANY +# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +# FOR A PARTICULAR PURPOSE. See the GNU General Public License for more +# details. + +# You should have received a copy of the GNU General Public License along +# with GCC; see the file COPYING3. If not see +# <http://www.gnu.org/licenses/>. + +MULTILIB_EXTRA_OPTS = mno-sdata |