diff options
Diffstat (limited to 'gcc/config')
28 files changed, 1035 insertions, 99 deletions
diff --git a/gcc/config/arm/aout.h b/gcc/config/arm/aout.h index e9104220feb..5abad67dd69 100644 --- a/gcc/config/arm/aout.h +++ b/gcc/config/arm/aout.h @@ -243,7 +243,30 @@ if (TARGET_ARM) \ asm_fprintf (STREAM, "\tb\t%LL%d\n", VALUE); \ else if (TARGET_THUMB1) \ - asm_fprintf (STREAM, "\t.word\t%LL%d-%LL%d\n", VALUE, REL); \ + { \ + if (flag_pic || optimize_size) \ + { \ + switch (GET_MODE(body)) \ + { \ + case QImode: \ + asm_fprintf (STREAM, "\t.byte\t(%LL%d-%LL%d)/2\n", \ + VALUE, REL); \ + break; \ + case HImode: /* TBH */ \ + asm_fprintf (STREAM, "\t.2byte\t(%LL%d-%LL%d)/2\n", \ + VALUE, REL); \ + break; \ + case SImode: \ + asm_fprintf (STREAM, "\t.word\t%LL%d-%LL%d\n", \ + VALUE, REL); \ + break; \ + default: \ + gcc_unreachable(); \ + } \ + } \ + else \ + asm_fprintf (STREAM, "\t.word\t%LL%d+1\n", VALUE); \ + } \ else /* Thumb-2 */ \ { \ switch (GET_MODE(body)) \ diff --git a/gcc/config/arm/arm-modes.def b/gcc/config/arm/arm-modes.def index 73b5b4d3d0d..813ce8ec142 100644 --- a/gcc/config/arm/arm-modes.def +++ b/gcc/config/arm/arm-modes.def @@ -25,6 +25,11 @@ FIXME What format is this? */ FLOAT_MODE (XF, 12, 0); +/* Half-precision floating point */ +FLOAT_MODE (HF, 2, 0); +ADJUST_FLOAT_FORMAT (HF, ((arm_fp16_format == ARM_FP16_FORMAT_ALTERNATIVE) + ? &arm_half_format : &ieee_half_format)); + /* CCFPEmode should be used with floating inequalities, CCFPmode should be used with floating equalities. CC_NOOVmode should be used with SImode integer equalities. diff --git a/gcc/config/arm/arm-protos.h b/gcc/config/arm/arm-protos.h index 857d575e5d3..07772eb8c88 100644 --- a/gcc/config/arm/arm-protos.h +++ b/gcc/config/arm/arm-protos.h @@ -140,6 +140,7 @@ extern void arm_final_prescan_insn (rtx); extern int arm_debugger_arg_offset (int, rtx); extern bool arm_is_long_call_p (tree); extern int arm_emit_vector_const (FILE *, rtx); +extern void arm_emit_fp16_const (rtx c); extern const char * arm_output_load_gr (rtx *); extern const char *vfp_output_fstmd (rtx *); extern void arm_set_return_address (rtx, rtx); @@ -182,7 +183,8 @@ extern rtx arm_return_addr (int, rtx); extern void thumb_reload_out_hi (rtx *); extern void thumb_reload_in_hi (rtx *); extern void thumb_set_return_address (rtx, rtx); -extern const char *thumb2_output_casesi(rtx *); +extern const char *thumb1_output_casesi (rtx *); +extern const char *thumb2_output_casesi (rtx *); #endif /* Defined in pe.c. */ diff --git a/gcc/config/arm/arm.c b/gcc/config/arm/arm.c index 3809270b2c5..6f615c59312 100644 --- a/gcc/config/arm/arm.c +++ b/gcc/config/arm/arm.c @@ -53,6 +53,7 @@ #include "debug.h" #include "langhooks.h" #include "df.h" +#include "intl.h" /* Forward definitions of types. */ typedef struct minipool_node Mnode; @@ -200,6 +201,11 @@ static bool arm_tls_symbol_p (rtx x); static int arm_issue_rate (void); static void arm_output_dwarf_dtprel (FILE *, int, rtx) ATTRIBUTE_UNUSED; static bool arm_allocate_stack_slots_for_args (void); +static const char *arm_invalid_parameter_type (const_tree t); +static const char *arm_invalid_return_type (const_tree t); +static tree arm_promoted_type (const_tree t); +static tree arm_convert_to_type (tree type, tree expr); +static bool arm_scalar_mode_supported_p (enum machine_mode); /* Initialize the GCC target structure. */ @@ -407,6 +413,21 @@ static bool arm_allocate_stack_slots_for_args (void); #undef TARGET_LEGITIMATE_ADDRESS_P #define TARGET_LEGITIMATE_ADDRESS_P arm_legitimate_address_p +#undef TARGET_INVALID_PARAMETER_TYPE +#define TARGET_INVALID_PARAMETER_TYPE arm_invalid_parameter_type + +#undef TARGET_INVALID_RETURN_TYPE +#define TARGET_INVALID_RETURN_TYPE arm_invalid_return_type + +#undef TARGET_PROMOTED_TYPE +#define TARGET_PROMOTED_TYPE arm_promoted_type + +#undef TARGET_CONVERT_TO_TYPE +#define TARGET_CONVERT_TO_TYPE arm_convert_to_type + +#undef TARGET_SCALAR_MODE_SUPPORTED_P +#define TARGET_SCALAR_MODE_SUPPORTED_P arm_scalar_mode_supported_p + struct gcc_target targetm = TARGET_INITIALIZER; /* Obstack for minipool constant handling. */ @@ -440,6 +461,9 @@ enum fputype arm_fpu_tune; /* Whether to use floating point hardware. */ enum float_abi_type arm_float_abi; +/* Which __fp16 format to use. */ +enum arm_fp16_format_type arm_fp16_format; + /* Which ABI to use. */ enum arm_abi_type arm_abi; @@ -719,15 +743,16 @@ struct fpu_desc static const struct fpu_desc all_fpus[] = { - {"fpa", FPUTYPE_FPA}, - {"fpe2", FPUTYPE_FPA_EMU2}, - {"fpe3", FPUTYPE_FPA_EMU2}, - {"maverick", FPUTYPE_MAVERICK}, - {"vfp", FPUTYPE_VFP}, - {"vfp3", FPUTYPE_VFP3}, - {"vfpv3", FPUTYPE_VFP3}, - {"vfpv3-d16", FPUTYPE_VFP3D16}, - {"neon", FPUTYPE_NEON} + {"fpa", FPUTYPE_FPA}, + {"fpe2", FPUTYPE_FPA_EMU2}, + {"fpe3", FPUTYPE_FPA_EMU2}, + {"maverick", FPUTYPE_MAVERICK}, + {"vfp", FPUTYPE_VFP}, + {"vfp3", FPUTYPE_VFP3}, + {"vfpv3", FPUTYPE_VFP3}, + {"vfpv3-d16", FPUTYPE_VFP3D16}, + {"neon", FPUTYPE_NEON}, + {"neon-fp16", FPUTYPE_NEON_FP16} }; @@ -745,7 +770,8 @@ static const enum arm_fp_model fp_model_for_fpu[] = ARM_FP_MODEL_VFP, /* FPUTYPE_VFP */ ARM_FP_MODEL_VFP, /* FPUTYPE_VFP3D16 */ ARM_FP_MODEL_VFP, /* FPUTYPE_VFP3 */ - ARM_FP_MODEL_VFP /* FPUTYPE_NEON */ + ARM_FP_MODEL_VFP, /* FPUTYPE_NEON */ + ARM_FP_MODEL_VFP /* FPUTYPE_NEON_FP16 */ }; @@ -766,6 +792,23 @@ static const struct float_abi all_float_abis[] = }; +struct fp16_format +{ + const char *name; + enum arm_fp16_format_type fp16_format_type; +}; + + +/* Available values for -mfp16-format=. */ + +static const struct fp16_format all_fp16_formats[] = +{ + {"none", ARM_FP16_FORMAT_NONE}, + {"ieee", ARM_FP16_FORMAT_IEEE}, + {"alternative", ARM_FP16_FORMAT_ALTERNATIVE} +}; + + struct abi_name { const char *name; @@ -923,6 +966,44 @@ arm_init_libfuncs (void) set_optab_libfunc (umod_optab, DImode, NULL); set_optab_libfunc (smod_optab, SImode, NULL); set_optab_libfunc (umod_optab, SImode, NULL); + + /* Half-precision float operations. The compiler handles all operations + with NULL libfuncs by converting the SFmode. */ + switch (arm_fp16_format) + { + case ARM_FP16_FORMAT_IEEE: + case ARM_FP16_FORMAT_ALTERNATIVE: + + /* Conversions. */ + set_conv_libfunc (trunc_optab, HFmode, SFmode, + (arm_fp16_format == ARM_FP16_FORMAT_IEEE + ? "__gnu_f2h_ieee" + : "__gnu_f2h_alternative")); + set_conv_libfunc (sext_optab, SFmode, HFmode, + (arm_fp16_format == ARM_FP16_FORMAT_IEEE + ? "__gnu_h2f_ieee" + : "__gnu_h2f_alternative")); + + /* Arithmetic. */ + set_optab_libfunc (add_optab, HFmode, NULL); + set_optab_libfunc (sdiv_optab, HFmode, NULL); + set_optab_libfunc (smul_optab, HFmode, NULL); + set_optab_libfunc (neg_optab, HFmode, NULL); + set_optab_libfunc (sub_optab, HFmode, NULL); + + /* Comparisons. */ + set_optab_libfunc (eq_optab, HFmode, NULL); + set_optab_libfunc (ne_optab, HFmode, NULL); + set_optab_libfunc (lt_optab, HFmode, NULL); + set_optab_libfunc (le_optab, HFmode, NULL); + set_optab_libfunc (ge_optab, HFmode, NULL); + set_optab_libfunc (gt_optab, HFmode, NULL); + set_optab_libfunc (unord_optab, HFmode, NULL); + break; + + default: + break; + } } /* On AAPCS systems, this is the "struct __va_list". */ @@ -1294,6 +1375,23 @@ arm_override_options (void) tune_flags = all_cores[(int)arm_tune].flags; + if (target_fp16_format_name) + { + for (i = 0; i < ARRAY_SIZE (all_fp16_formats); i++) + { + if (streq (all_fp16_formats[i].name, target_fp16_format_name)) + { + arm_fp16_format = all_fp16_formats[i].fp16_format_type; + break; + } + } + if (i == ARRAY_SIZE (all_fp16_formats)) + error ("invalid __fp16 format option: -mfp16-format=%s", + target_fp16_format_name); + } + else + arm_fp16_format = ARM_FP16_FORMAT_NONE; + if (target_abi_name) { for (i = 0; i < ARRAY_SIZE (arm_all_abis); i++) @@ -1525,6 +1623,10 @@ arm_override_options (void) if (TARGET_THUMB2 && TARGET_IWMMXT) sorry ("Thumb-2 iWMMXt"); + /* __fp16 support currently assumes the core has ldrh. */ + if (!arm_arch4 && arm_fp16_format != ARM_FP16_FORMAT_NONE) + sorry ("__fp16 and no ldrh"); + /* If soft-float is specified then don't use FPU. */ if (TARGET_SOFT_FLOAT) arm_fpu_arch = FPUTYPE_NONE; @@ -4173,6 +4275,7 @@ arm_legitimate_index_p (enum machine_mode mode, rtx index, RTX_CODE outer, if (GET_MODE_SIZE (mode) <= 4 && ! (arm_arch4 && (mode == HImode + || mode == HFmode || (mode == QImode && outer == SIGN_EXTEND)))) { if (code == MULT) @@ -4201,13 +4304,15 @@ arm_legitimate_index_p (enum machine_mode mode, rtx index, RTX_CODE outer, load. */ if (arm_arch4) { - if (mode == HImode || (outer == SIGN_EXTEND && mode == QImode)) + if (mode == HImode + || mode == HFmode + || (outer == SIGN_EXTEND && mode == QImode)) range = 256; else range = 4096; } else - range = (mode == HImode) ? 4095 : 4096; + range = (mode == HImode || mode == HFmode) ? 4095 : 4096; return (code == CONST_INT && INTVAL (index) < range @@ -4380,7 +4485,8 @@ thumb1_legitimate_address_p (enum machine_mode mode, rtx x, int strict_p) return 1; /* This is PC relative data after arm_reorg runs. */ - else if (GET_MODE_SIZE (mode) >= 4 && reload_completed + else if ((GET_MODE_SIZE (mode) >= 4 || mode == HFmode) + && reload_completed && (GET_CODE (x) == LABEL_REF || (GET_CODE (x) == CONST && GET_CODE (XEXP (x, 0)) == PLUS @@ -7121,6 +7227,13 @@ arm_eliminable_register (rtx x) enum reg_class coproc_secondary_reload_class (enum machine_mode mode, rtx x, bool wb) { + if (mode == HFmode) + { + if (s_register_operand (x, mode) || neon_vector_mem_operand (x, 2)) + return NO_REGS; + return GENERAL_REGS; + } + if (TARGET_NEON && (GET_MODE_CLASS (mode) == MODE_VECTOR_INT || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT) @@ -13926,6 +14039,31 @@ arm_print_operand (FILE *stream, rtx x, int code) } return; + /* Register specifier for vld1.16/vst1.16. Translate the S register + number into a D register number and element index. */ + case 'z': + { + int mode = GET_MODE (x); + int regno; + + if (GET_MODE_SIZE (mode) != 2 || GET_CODE (x) != REG) + { + output_operand_lossage ("invalid operand for code '%c'", code); + return; + } + + regno = REGNO (x); + if (!VFP_REGNO_OK_FOR_SINGLE (regno)) + { + output_operand_lossage ("invalid operand for code '%c'", code); + return; + } + + regno = regno - FIRST_VFP_REGNUM; + fprintf (stream, "d%d[%d]", regno/2, ((regno % 2) ? 2 : 0)); + } + return; + default: if (x == 0) { @@ -14723,6 +14861,12 @@ arm_hard_regno_mode_ok (unsigned int regno, enum machine_mode mode) if (mode == DFmode) return VFP_REGNO_OK_FOR_DOUBLE (regno); + /* VFP registers can hold HFmode values, but there is no point in + putting them there unless we have the NEON extensions for + loading/storing them, too. */ + if (mode == HFmode) + return TARGET_NEON_FP16 && VFP_REGNO_OK_FOR_SINGLE (regno); + if (TARGET_NEON) return (VALID_NEON_DREG_MODE (mode) && VFP_REGNO_OK_FOR_DOUBLE (regno)) || (VALID_NEON_QREG_MODE (mode) @@ -16209,6 +16353,15 @@ arm_init_neon_builtins (void) } static void +arm_init_fp16_builtins (void) +{ + tree fp16_type = make_node (REAL_TYPE); + TYPE_PRECISION (fp16_type) = 16; + layout_type (fp16_type); + (*lang_hooks.types.register_builtin_type) (fp16_type, "__fp16"); +} + +static void arm_init_builtins (void) { arm_init_tls_builtins (); @@ -16218,6 +16371,71 @@ arm_init_builtins (void) if (TARGET_NEON) arm_init_neon_builtins (); + + if (arm_fp16_format) + arm_init_fp16_builtins (); +} + +/* Implement TARGET_INVALID_PARAMETER_TYPE. */ + +static const char * +arm_invalid_parameter_type (const_tree t) +{ + if (SCALAR_FLOAT_TYPE_P (t) && TYPE_PRECISION (t) == 16) + return N_("function parameters cannot have __fp16 type"); + return NULL; +} + +/* Implement TARGET_INVALID_PARAMETER_TYPE. */ + +static const char * +arm_invalid_return_type (const_tree t) +{ + if (SCALAR_FLOAT_TYPE_P (t) && TYPE_PRECISION (t) == 16) + return N_("functions cannot return __fp16 type"); + return NULL; +} + +/* Implement TARGET_PROMOTED_TYPE. */ + +static tree +arm_promoted_type (const_tree t) +{ + if (SCALAR_FLOAT_TYPE_P (t) && TYPE_PRECISION (t) == 16) + return float_type_node; + return NULL_TREE; +} + +/* Implement TARGET_CONVERT_TO_TYPE. + Specifically, this hook implements the peculiarity of the ARM + half-precision floating-point C semantics that requires conversions between + __fp16 to or from double to do an intermediate conversion to float. */ + +static tree +arm_convert_to_type (tree type, tree expr) +{ + tree fromtype = TREE_TYPE (expr); + if (!SCALAR_FLOAT_TYPE_P (fromtype) || !SCALAR_FLOAT_TYPE_P (type)) + return NULL_TREE; + if ((TYPE_PRECISION (fromtype) == 16 && TYPE_PRECISION (type) > 32) + || (TYPE_PRECISION (type) == 16 && TYPE_PRECISION (fromtype) > 32)) + return convert (type, convert (float_type_node, expr)); + return NULL_TREE; +} + +/* Implement TARGET_SCALAR_MODE_SUPPORTED_P. + This simply adds HFmode as a supported mode; even though we don't + implement arithmetic on this type directly, it's supported by + optabs conversions, much the way the double-word arithmetic is + special-cased in the default hook. */ + +static bool +arm_scalar_mode_supported_p (enum machine_mode mode) +{ + if (mode == HFmode) + return (arm_fp16_format != ARM_FP16_FORMAT_NONE); + else + return default_scalar_mode_supported_p (mode); } /* Errors in the source file can cause expand_expr to return const0_rtx @@ -17297,6 +17515,7 @@ thumb_shiftable_const (unsigned HOST_WIDE_INT val) unsigned HOST_WIDE_INT mask = 0xff; int i; + val = val & (unsigned HOST_WIDE_INT)0xffffffffu; if (val == 0) /* XXX */ return 0; @@ -18413,6 +18632,10 @@ arm_file_start (void) fpu_name = "neon"; set_float_abi_attributes = 1; break; + case FPUTYPE_NEON_FP16: + fpu_name = "neon-fp16"; + set_float_abi_attributes = 1; + break; default: abort(); } @@ -18466,6 +18689,11 @@ arm_file_start (void) val = 6; asm_fprintf (asm_out_file, "\t.eabi_attribute 30, %d\n", val); + /* Tag_ABI_FP_16bit_format. */ + if (arm_fp16_format) + asm_fprintf (asm_out_file, "\t.eabi_attribute 38, %d\n", + (int)arm_fp16_format); + if (arm_lang_output_object_attributes_hook) arm_lang_output_object_attributes_hook(); } @@ -18695,6 +18923,23 @@ arm_emit_vector_const (FILE *file, rtx x) return 1; } +/* Emit a fp16 constant appropriately padded to occupy a 4-byte word. + HFmode constant pool entries are actually loaded with ldr. */ +void +arm_emit_fp16_const (rtx c) +{ + REAL_VALUE_TYPE r; + long bits; + + REAL_VALUE_FROM_CONST_DOUBLE (r, c); + bits = real_to_target (NULL, &r, HFmode); + if (WORDS_BIG_ENDIAN) + assemble_zeros (2); + assemble_integer (GEN_INT (bits), 2, BITS_PER_WORD, 1); + if (!WORDS_BIG_ENDIAN) + assemble_zeros (2); +} + const char * arm_output_load_gr (rtx *operands) { @@ -19615,6 +19860,32 @@ arm_output_shift(rtx * operands, int set_flags) return ""; } +/* Output a Thumb-1 casesi dispatch sequence. */ +const char * +thumb1_output_casesi (rtx *operands) +{ + rtx diff_vec = PATTERN (next_real_insn (operands[0])); + addr_diff_vec_flags flags; + + gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC); + + flags = ADDR_DIFF_VEC_FLAGS (diff_vec); + + switch (GET_MODE(diff_vec)) + { + case QImode: + return (ADDR_DIFF_VEC_FLAGS (diff_vec).offset_unsigned ? + "bl\t%___gnu_thumb1_case_uqi" : "bl\t%___gnu_thumb1_case_sqi"); + case HImode: + return (ADDR_DIFF_VEC_FLAGS (diff_vec).offset_unsigned ? + "bl\t%___gnu_thumb1_case_uhi" : "bl\t%___gnu_thumb1_case_shi"); + case SImode: + return "bl\t%___gnu_thumb1_case_si"; + default: + gcc_unreachable (); + } +} + /* Output a Thumb-2 casesi instruction. */ const char * thumb2_output_casesi (rtx *operands) @@ -19724,6 +19995,10 @@ arm_mangle_type (const_tree type) return "St9__va_list"; } + /* Half-precision float. */ + if (TREE_CODE (type) == REAL_TYPE && TYPE_PRECISION (type) == 16) + return "Dh"; + if (TREE_CODE (type) != VECTOR_TYPE) return NULL; diff --git a/gcc/config/arm/arm.h b/gcc/config/arm/arm.h index ee0eee694d2..98115d8a140 100644 --- a/gcc/config/arm/arm.h +++ b/gcc/config/arm/arm.h @@ -215,20 +215,25 @@ extern void (*arm_lang_output_object_attributes_hook)(void); /* FPU is has the full VFPv3/NEON register file of 32 D registers. */ #define TARGET_VFPD32 (arm_fp_model == ARM_FP_MODEL_VFP \ && (arm_fpu_arch == FPUTYPE_VFP3 \ - || arm_fpu_arch == FPUTYPE_NEON)) + || arm_fpu_arch == FPUTYPE_NEON \ + || arm_fpu_arch == FPUTYPE_NEON_FP16)) /* FPU supports VFPv3 instructions. */ #define TARGET_VFP3 (arm_fp_model == ARM_FP_MODEL_VFP \ && (arm_fpu_arch == FPUTYPE_VFP3D16 \ || TARGET_VFPD32)) +/* FPU supports NEON/VFP half-precision floating-point. */ +#define TARGET_NEON_FP16 (arm_fpu_arch == FPUTYPE_NEON_FP16) + /* FPU supports Neon instructions. The setting of this macro gets revealed via __ARM_NEON__ so we add extra guards upon TARGET_32BIT and TARGET_HARD_FLOAT to ensure that NEON instructions are available. */ #define TARGET_NEON (TARGET_32BIT && TARGET_HARD_FLOAT \ && arm_fp_model == ARM_FP_MODEL_VFP \ - && arm_fpu_arch == FPUTYPE_NEON) + && (arm_fpu_arch == FPUTYPE_NEON \ + || arm_fpu_arch == FPUTYPE_NEON_FP16)) /* "DSP" multiply instructions, eg. SMULxy. */ #define TARGET_DSP_MULTIPLY \ @@ -308,7 +313,9 @@ enum fputype /* VFPv3. */ FPUTYPE_VFP3, /* Neon. */ - FPUTYPE_NEON + FPUTYPE_NEON, + /* Neon with half-precision float extensions. */ + FPUTYPE_NEON_FP16 }; /* Recast the floating point class to be the floating point attribute. */ @@ -333,6 +340,21 @@ extern enum float_abi_type arm_float_abi; #define TARGET_DEFAULT_FLOAT_ABI ARM_FLOAT_ABI_SOFT #endif +/* Which __fp16 format to use. + The enumeration values correspond to the numbering for the + Tag_ABI_FP_16bit_format attribute. + */ +enum arm_fp16_format_type +{ + ARM_FP16_FORMAT_NONE = 0, + ARM_FP16_FORMAT_IEEE = 1, + ARM_FP16_FORMAT_ALTERNATIVE = 2 +}; + +extern enum arm_fp16_format_type arm_fp16_format; +#define LARGEST_EXPONENT_IS_NORMAL(bits) \ + ((bits) == 16 && arm_fp16_format == ARM_FP16_FORMAT_ALTERNATIVE) + /* Which ABI to use. */ enum arm_abi_type { @@ -2174,12 +2196,24 @@ typedef struct for the index in the tablejump instruction. */ #define CASE_VECTOR_MODE Pmode -#define CASE_VECTOR_PC_RELATIVE TARGET_THUMB2 - -#define CASE_VECTOR_SHORTEN_MODE(min, max, body) \ - ((min < 0 || max >= 0x2000 || !TARGET_THUMB2) ? SImode \ - : (max >= 0x200) ? HImode \ - : QImode) +#define CASE_VECTOR_PC_RELATIVE (TARGET_THUMB2 \ + || (TARGET_THUMB \ + && (optimize_size || flag_pic))) + +#define CASE_VECTOR_SHORTEN_MODE(min, max, body) \ + (TARGET_THUMB \ + ? (min >= 0 && max < 512 \ + ? (ADDR_DIFF_VEC_FLAGS (body).offset_unsigned = 1, QImode) \ + : min >= -256 && max < 256 \ + ? (ADDR_DIFF_VEC_FLAGS (body).offset_unsigned = 0, QImode) \ + : min >= 0 && max < 8192 \ + ? (ADDR_DIFF_VEC_FLAGS (body).offset_unsigned = 1, HImode) \ + : min >= -4096 && max < 4096 \ + ? (ADDR_DIFF_VEC_FLAGS (body).offset_unsigned = 0, HImode) \ + : SImode) \ + : ((min < 0 || max >= 0x2000 || !TARGET_THUMB2) ? SImode \ + : (max >= 0x200) ? HImode \ + : QImode)) /* signed 'char' is most compatible, but RISC OS wants it unsigned. unsigned is probably best, but may break some code. */ diff --git a/gcc/config/arm/arm.md b/gcc/config/arm/arm.md index 40e41c56021..47972424dbf 100644 --- a/gcc/config/arm/arm.md +++ b/gcc/config/arm/arm.md @@ -99,6 +99,7 @@ ; correctly for PIC usage. (UNSPEC_GOTSYM_OFF 24) ; The offset of the start of the the GOT from a ; a given symbolic address. + (UNSPEC_THUMB1_CASESI 25) ; A Thumb1 compressed dispatch-table call. ] ) @@ -158,7 +159,7 @@ ; Floating Point Unit. If we only have floating point emulation, then there ; is no point in scheduling the floating point insns. (Well, for best ; performance we should try and group them together). -(define_attr "fpu" "none,fpa,fpe2,fpe3,maverick,vfp,vfpv3d16,vfpv3,neon" +(define_attr "fpu" "none,fpa,fpe2,fpe3,maverick,vfp,vfpv3d16,vfpv3,neon,neon_fp16" (const (symbol_ref "arm_fpu_attr"))) ; LENGTH of an instruction (in bytes) @@ -3734,6 +3735,34 @@ ;; Fixed <--> Floating conversion insns +(define_expand "floatsihf2" + [(set (match_operand:HF 0 "general_operand" "") + (float:HF (match_operand:SI 1 "general_operand" "")))] + "TARGET_EITHER" + " + { + rtx op1 = gen_reg_rtx (SFmode); + expand_float (op1, operands[1], 0); + op1 = convert_to_mode (HFmode, op1, 0); + emit_move_insn (operands[0], op1); + DONE; + }" +) + +(define_expand "floatdihf2" + [(set (match_operand:HF 0 "general_operand" "") + (float:HF (match_operand:DI 1 "general_operand" "")))] + "TARGET_EITHER" + " + { + rtx op1 = gen_reg_rtx (SFmode); + expand_float (op1, operands[1], 0); + op1 = convert_to_mode (HFmode, op1, 0); + emit_move_insn (operands[0], op1); + DONE; + }" +) + (define_expand "floatsisf2" [(set (match_operand:SF 0 "s_register_operand" "") (float:SF (match_operand:SI 1 "s_register_operand" "")))] @@ -3758,6 +3787,30 @@ } ") +(define_expand "fix_trunchfsi2" + [(set (match_operand:SI 0 "general_operand" "") + (fix:SI (fix:HF (match_operand:HF 1 "general_operand" ""))))] + "TARGET_EITHER" + " + { + rtx op1 = convert_to_mode (SFmode, operands[1], 0); + expand_fix (operands[0], op1, 0); + DONE; + }" +) + +(define_expand "fix_trunchfdi2" + [(set (match_operand:DI 0 "general_operand" "") + (fix:DI (fix:HF (match_operand:HF 1 "general_operand" ""))))] + "TARGET_EITHER" + " + { + rtx op1 = convert_to_mode (SFmode, operands[1], 0); + expand_fix (operands[0], op1, 0); + DONE; + }" +) + (define_expand "fix_truncsfsi2" [(set (match_operand:SI 0 "s_register_operand" "") (fix:SI (fix:SF (match_operand:SF 1 "s_register_operand" ""))))] @@ -3797,6 +3850,22 @@ "TARGET_32BIT && TARGET_HARD_FLOAT" "" ) + +/* DFmode -> HFmode conversions have to go through SFmode. */ +(define_expand "truncdfhf2" + [(set (match_operand:HF 0 "general_operand" "") + (float_truncate:HF + (match_operand:DF 1 "general_operand" "")))] + "TARGET_EITHER" + " + { + rtx op1; + op1 = convert_to_mode (SFmode, operands[1], 0); + op1 = convert_to_mode (HFmode, op1, 0); + emit_move_insn (operands[0], op1); + DONE; + }" +) ;; Zero and sign extension instructions. @@ -4660,6 +4729,21 @@ "TARGET_32BIT && TARGET_HARD_FLOAT" "" ) + +/* HFmode -> DFmode conversions have to go through SFmode. */ +(define_expand "extendhfdf2" + [(set (match_operand:DF 0 "general_operand" "") + (float_extend:DF (match_operand:HF 1 "general_operand" "")))] + "TARGET_EITHER" + " + { + rtx op1; + op1 = convert_to_mode (SFmode, operands[1], 0); + op1 = convert_to_mode (DFmode, op1, 0); + emit_insn (gen_movdf (operands[0], op1)); + DONE; + }" +) ;; Move insns (including loads and stores) @@ -5083,7 +5167,7 @@ (set (match_dup 0) (ashift:SI (match_dup 0) (match_dup 2)))] " { - unsigned HOST_WIDE_INT val = INTVAL (operands[1]); + unsigned HOST_WIDE_INT val = INTVAL (operands[1]) & 0xffffffffu; unsigned HOST_WIDE_INT mask = 0xff; int i; @@ -5808,6 +5892,107 @@ (set_attr "pool_range" "*,32,*,*,*,*")] ) +;; HFmode moves +(define_expand "movhf" + [(set (match_operand:HF 0 "general_operand" "") + (match_operand:HF 1 "general_operand" ""))] + "TARGET_EITHER" + " + if (TARGET_32BIT) + { + if (GET_CODE (operands[0]) == MEM) + operands[1] = force_reg (HFmode, operands[1]); + } + else /* TARGET_THUMB1 */ + { + if (can_create_pseudo_p ()) + { + if (GET_CODE (operands[0]) != REG) + operands[1] = force_reg (HFmode, operands[1]); + } + } + " +) + +(define_insn "*arm32_movhf" + [(set (match_operand:HF 0 "nonimmediate_operand" "=r,m,r,r") + (match_operand:HF 1 "general_operand" " m,r,r,F"))] + "TARGET_32BIT && !(TARGET_HARD_FLOAT && TARGET_NEON_FP16) + && ( s_register_operand (operands[0], HFmode) + || s_register_operand (operands[1], HFmode))" + "* + switch (which_alternative) + { + case 0: /* ARM register from memory */ + return \"ldr%(h%)\\t%0, %1\\t%@ __fp16\"; + case 1: /* memory from ARM register */ + return \"str%(h%)\\t%1, %0\\t%@ __fp16\"; + case 2: /* ARM register from ARM register */ + return \"mov%?\\t%0, %1\\t%@ __fp16\"; + case 3: /* ARM register from constant */ + { + REAL_VALUE_TYPE r; + long bits; + rtx ops[4]; + + REAL_VALUE_FROM_CONST_DOUBLE (r, operands[1]); + bits = real_to_target (NULL, &r, HFmode); + ops[0] = operands[0]; + ops[1] = GEN_INT (bits); + ops[2] = GEN_INT (bits & 0xff00); + ops[3] = GEN_INT (bits & 0x00ff); + + if (arm_arch_thumb2) + output_asm_insn (\"movw%?\\t%0, %1\", ops); + else + output_asm_insn (\"mov%?\\t%0, %2\;orr%?\\t%0, %0, %3\", ops); + return \"\"; + } + default: + gcc_unreachable (); + } + " + [(set_attr "conds" "unconditional") + (set_attr "type" "load1,store1,*,*") + (set_attr "length" "4,4,4,8") + (set_attr "predicable" "yes") + ] +) + +(define_insn "*thumb1_movhf" + [(set (match_operand:HF 0 "nonimmediate_operand" "=l,l,m,*r,*h") + (match_operand:HF 1 "general_operand" "l,mF,l,*h,*r"))] + "TARGET_THUMB1 + && ( s_register_operand (operands[0], HFmode) + || s_register_operand (operands[1], HFmode))" + "* + switch (which_alternative) + { + case 1: + { + rtx addr; + gcc_assert (GET_CODE(operands[1]) == MEM); + addr = XEXP (operands[1], 0); + if (GET_CODE (addr) == LABEL_REF + || (GET_CODE (addr) == CONST + && GET_CODE (XEXP (addr, 0)) == PLUS + && GET_CODE (XEXP (XEXP (addr, 0), 0)) == LABEL_REF + && GET_CODE (XEXP (XEXP (addr, 0), 1)) == CONST_INT)) + { + /* Constant pool entry. */ + return \"ldr\\t%0, %1\"; + } + return \"ldrh\\t%0, %1\"; + } + case 2: return \"strh\\t%1, %0\"; + default: return \"mov\\t%0, %1\"; + } + " + [(set_attr "length" "2") + (set_attr "type" "*,load1,store1,*,*") + (set_attr "pool_range" "*,1020,*,*,*")] +) + (define_expand "movsf" [(set (match_operand:SF 0 "general_operand" "") (match_operand:SF 1 "general_operand" ""))] @@ -8717,37 +8902,33 @@ (match_operand:SI 2 "const_int_operand" "") ; total range (match_operand:SI 3 "" "") ; table label (match_operand:SI 4 "" "")] ; Out of range label - "TARGET_32BIT" + "TARGET_32BIT || optimize_size || flag_pic" " { - rtx reg; + enum insn_code code; if (operands[1] != const0_rtx) { - reg = gen_reg_rtx (SImode); + rtx reg = gen_reg_rtx (SImode); emit_insn (gen_addsi3 (reg, operands[0], GEN_INT (-INTVAL (operands[1])))); operands[0] = reg; } - if (!const_ok_for_arm (INTVAL (operands[2]))) - operands[2] = force_reg (SImode, operands[2]); - if (TARGET_ARM) - { - emit_jump_insn (gen_arm_casesi_internal (operands[0], operands[2], - operands[3], operands[4])); - } + code = CODE_FOR_arm_casesi_internal; + else if (TARGET_THUMB) + code = CODE_FOR_thumb1_casesi_internal_pic; else if (flag_pic) - { - emit_jump_insn (gen_thumb2_casesi_internal_pic (operands[0], - operands[2], operands[3], operands[4])); - } + code = CODE_FOR_thumb2_casesi_internal_pic; else - { - emit_jump_insn (gen_thumb2_casesi_internal (operands[0], operands[2], - operands[3], operands[4])); - } + code = CODE_FOR_thumb2_casesi_internal; + + if (!insn_data[(int) code].operand[1].predicate(operands[2], SImode)) + operands[2] = force_reg (SImode, operands[2]); + + emit_jump_insn (GEN_FCN ((int) code) (operands[0], operands[2], + operands[3], operands[4])); DONE; }" ) @@ -8774,6 +8955,37 @@ (set_attr "length" "12")] ) +(define_expand "thumb1_casesi_internal_pic" + [(match_operand:SI 0 "s_register_operand" "") + (match_operand:SI 1 "thumb1_cmp_operand" "") + (match_operand 2 "" "") + (match_operand 3 "" "")] + "TARGET_THUMB" + { + rtx reg0; + rtx test = gen_rtx_GTU (VOIDmode, operands[0], operands[1]); + emit_jump_insn (gen_cbranchsi4 (test, operands[0], operands[1], + operands[3])); + reg0 = gen_rtx_REG (SImode, 0); + emit_move_insn (reg0, operands[0]); + emit_jump_insn (gen_thumb1_casesi_dispatch (operands[2]/*, operands[3]*/)); + DONE; + } +) + +(define_insn "thumb1_casesi_dispatch" + [(parallel [(set (pc) (unspec [(reg:SI 0) + (label_ref (match_operand 0 "" "")) +;; (label_ref (match_operand 1 "" "")) +] + UNSPEC_THUMB1_CASESI)) + (clobber (reg:SI IP_REGNUM)) + (clobber (reg:SI LR_REGNUM))])] + "TARGET_THUMB" + "* return thumb1_output_casesi(operands);" + [(set_attr "length" "4")] +) + (define_expand "indirect_jump" [(set (pc) (match_operand:SI 0 "s_register_operand" ""))] @@ -10674,6 +10886,7 @@ "TARGET_THUMB1" "* making_const_table = TRUE; + gcc_assert (GET_MODE_CLASS (GET_MODE (operands[0])) != MODE_FLOAT); assemble_integer (operands[0], 2, BITS_PER_WORD, 1); assemble_zeros (2); return \"\"; @@ -10686,19 +10899,23 @@ "TARGET_EITHER" "* { + rtx x = operands[0]; making_const_table = TRUE; - switch (GET_MODE_CLASS (GET_MODE (operands[0]))) + switch (GET_MODE_CLASS (GET_MODE (x))) { case MODE_FLOAT: - { - REAL_VALUE_TYPE r; - REAL_VALUE_FROM_CONST_DOUBLE (r, operands[0]); - assemble_real (r, GET_MODE (operands[0]), BITS_PER_WORD); - break; - } + if (GET_MODE (x) == HFmode) + arm_emit_fp16_const (x); + else + { + REAL_VALUE_TYPE r; + REAL_VALUE_FROM_CONST_DOUBLE (r, x); + assemble_real (r, GET_MODE (x), BITS_PER_WORD); + } + break; default: - assemble_integer (operands[0], 4, BITS_PER_WORD, 1); - mark_symbol_refs_as_used (operands[0]); + assemble_integer (x, 4, BITS_PER_WORD, 1); + mark_symbol_refs_as_used (x); break; } return \"\"; diff --git a/gcc/config/arm/arm.opt b/gcc/config/arm/arm.opt index 6aca3950db5..a39bb3a8d5c 100644 --- a/gcc/config/arm/arm.opt +++ b/gcc/config/arm/arm.opt @@ -78,6 +78,10 @@ Specify if floating point hardware should be used mfp= Target RejectNegative Joined Undocumented Var(target_fpe_name) +mfp16-format= +Target RejectNegative Joined Var(target_fp16_format_name) +Specify the __fp16 floating-point format + ;; Now ignored. mfpe Target RejectNegative Mask(FPE) Undocumented diff --git a/gcc/config/arm/coff.h b/gcc/config/arm/coff.h index d5f4ed8eac5..bd3e6f85dd4 100644 --- a/gcc/config/arm/coff.h +++ b/gcc/config/arm/coff.h @@ -60,8 +60,9 @@ Otherwise, the readonly data section is used. */ /* We put ARM and Thumb-2 jump tables in the text section, because it makes the code more efficient, but for Thumb-1 it's better to put them out of - band. */ -#define JUMP_TABLES_IN_TEXT_SECTION (TARGET_32BIT) + band unless we are generating compressed tables. */ +#define JUMP_TABLES_IN_TEXT_SECTION \ + (TARGET_32BIT || (TARGET_THUMB && (optimize_size || flag_pic))) #undef READONLY_DATA_SECTION_ASM_OP #define READONLY_DATA_SECTION_ASM_OP "\t.section .rdata" diff --git a/gcc/config/arm/elf.h b/gcc/config/arm/elf.h index 7c3eddbe058..88400884ec1 100644 --- a/gcc/config/arm/elf.h +++ b/gcc/config/arm/elf.h @@ -100,8 +100,9 @@ Otherwise, the readonly data section is used. */ /* We put ARM and Thumb-2 jump tables in the text section, because it makes the code more efficient, but for Thumb-1 it's better to put them out of - band. */ -#define JUMP_TABLES_IN_TEXT_SECTION (TARGET_32BIT) + band unless we are generating compressed tables. */ +#define JUMP_TABLES_IN_TEXT_SECTION \ + (TARGET_32BIT || (TARGET_THUMB && (optimize_size || flag_pic))) #ifndef LINK_SPEC #define LINK_SPEC "%{mbig-endian:-EB} %{mlittle-endian:-EL} -X" diff --git a/gcc/config/arm/fp16.c b/gcc/config/arm/fp16.c new file mode 100644 index 00000000000..936caeb78d0 --- /dev/null +++ b/gcc/config/arm/fp16.c @@ -0,0 +1,145 @@ +/* Half-float conversion routines. + + Copyright (C) 2008, 2009 Free Software Foundation, Inc. + Contributed by CodeSourcery. + + This file is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License as published by the + Free Software Foundation; either version 3, or (at your option) any + later version. + + This file is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + Under Section 7 of GPL version 3, you are granted additional + permissions described in the GCC Runtime Library Exception, version + 3.1, as published by the Free Software Foundation. + + You should have received a copy of the GNU General Public License and + a copy of the GCC Runtime Library Exception along with this program; + see the files COPYING3 and COPYING.RUNTIME respectively. If not, see + <http://www.gnu.org/licenses/>. */ + +static inline unsigned short +__gnu_f2h_internal(unsigned int a, int ieee) +{ + unsigned short sign = (a >> 16) & 0x8000; + int aexp = (a >> 23) & 0xff; + unsigned int mantissa = a & 0x007fffff; + unsigned int mask; + unsigned int increment; + + if (aexp == 0xff) + { + if (!ieee) + return sign; + return sign | 0x7e00 | (mantissa >> 13); + } + + if (aexp == 0 && mantissa == 0) + return sign; + + aexp -= 127; + + /* Decimal point between bits 22 and 23. */ + mantissa |= 0x00800000; + if (aexp < -14) + { + mask = 0x007fffff; + if (aexp < -25) + aexp = -26; + else if (aexp != -25) + mask >>= 24 + aexp; + } + else + mask = 0x00001fff; + + /* Round. */ + if (mantissa & mask) + { + increment = (mask + 1) >> 1; + if ((mantissa & mask) == increment) + increment = mantissa & (increment << 1); + mantissa += increment; + if (mantissa >= 0x01000000) + { + mantissa >>= 1; + aexp++; + } + } + + if (ieee) + { + if (aexp > 15) + return sign | 0x7c00; + } + else + { + if (aexp > 16) + return sign | 0x7fff; + } + + if (aexp < -24) + return sign; + + if (aexp < -14) + { + mantissa >>= -14 - aexp; + aexp = -14; + } + + /* We leave the leading 1 in the mantissa, and subtract one + from the exponent bias to compensate. */ + return sign | (((aexp + 14) << 10) + (mantissa >> 13)); +} + +unsigned int +__gnu_h2f_internal(unsigned short a, int ieee) +{ + unsigned int sign = (unsigned int)(a & 0x8000) << 16; + int aexp = (a >> 10) & 0x1f; + unsigned int mantissa = a & 0x3ff; + + if (aexp == 0x1f && ieee) + return sign | 0x7f800000 | (mantissa << 13); + + if (aexp == 0) + { + int shift; + + if (mantissa == 0) + return sign; + + shift = __builtin_clz(mantissa) - 21; + mantissa <<= shift; + aexp = -shift; + } + + return sign | (((aexp + 0x70) << 23) + (mantissa << 13)); +} + +unsigned short +__gnu_f2h_ieee(unsigned int a) +{ + return __gnu_f2h_internal(a, 1); +} + +unsigned int +__gnu_h2f_ieee(unsigned short a) +{ + return __gnu_h2f_internal(a, 1); +} + +unsigned short +__gnu_f2h_alternative(unsigned int x) +{ + return __gnu_f2h_internal(x, 0); +} + +unsigned int +__gnu_h2f_alternative(unsigned short a) +{ + return __gnu_h2f_internal(a, 0); +} diff --git a/gcc/config/arm/lib1funcs.asm b/gcc/config/arm/lib1funcs.asm index cc5b94e91fe..987bfcb3ba4 100644 --- a/gcc/config/arm/lib1funcs.asm +++ b/gcc/config/arm/lib1funcs.asm @@ -27,8 +27,17 @@ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see #if defined(__ELF__) && defined(__linux__) .section .note.GNU-stack,"",%progbits .previous -#endif - +#endif /* __ELF__ and __linux__ */ + +#ifdef __ARM_EABI__ +/* Some attributes that are common to all routines in this file. */ + /* Tag_ABI_align8_needed: This code does not require 8-byte + alignment from the caller. */ + /* .eabi_attribute 24, 0 -- default setting. */ + /* Tag_ABI_align8_preserved: This code preserves 8-byte + alignment in any callee. */ + .eabi_attribute 25, 1 +#endif /* __ARM_EABI__ */ /* ------------------------------------------------------------------------ */ /* We need to know what prefix to add to function names. */ @@ -1533,6 +1542,111 @@ LSYM(Lchange_\register): #endif /* L_interwork_call_via_rX */ #endif /* !__thumb2__ */ + +/* Functions to support compact pic switch tables in thumb1 state. + All these routines take an index into the table in r0. The + table is at LR & ~1 (but this must be rounded up in the case + of 32-bit entires). They are only permitted to clobber r12 + and r14 and r0 must be preserved on exit. */ +#ifdef L_thumb1_case_sqi + + .text + .align 0 + .force_thumb + .syntax unified + THUMB_FUNC_START __gnu_thumb1_case_sqi + push {r1} + mov r1, lr + lsrs r1, r1, #1 + lsls r1, r1, #1 + ldrsb r1, [r1, r0] + lsls r1, r1, #1 + add lr, lr, r1 + pop {r1} + bx lr + SIZE (__gnu_thumb1_case_sqi) +#endif + +#ifdef L_thumb1_case_uqi + + .text + .align 0 + .force_thumb + .syntax unified + THUMB_FUNC_START __gnu_thumb1_case_uqi + push {r1} + mov r1, lr + lsrs r1, r1, #1 + lsls r1, r1, #1 + ldrb r1, [r1, r0] + lsls r1, r1, #1 + add lr, lr, r1 + pop {r1} + bx lr + SIZE (__gnu_thumb1_case_uqi) +#endif + +#ifdef L_thumb1_case_shi + + .text + .align 0 + .force_thumb + .syntax unified + THUMB_FUNC_START __gnu_thumb1_case_shi + push {r0, r1} + mov r1, lr + lsrs r1, r1, #1 + lsls r0, r0, #1 + lsls r1, r1, #1 + ldrsh r1, [r1, r0] + lsls r1, r1, #1 + add lr, lr, r1 + pop {r0, r1} + bx lr + SIZE (__gnu_thumb1_case_shi) +#endif + +#ifdef L_thumb1_case_uhi + + .text + .align 0 + .force_thumb + .syntax unified + THUMB_FUNC_START __gnu_thumb1_case_uhi + push {r0, r1} + mov r1, lr + lsrs r1, r1, #1 + lsls r0, r0, #1 + lsls r1, r1, #1 + ldrh r1, [r1, r0] + lsls r1, r1, #1 + add lr, lr, r1 + pop {r0, r1} + bx lr + SIZE (__gnu_thumb1_case_uhi) +#endif + +#ifdef L_thumb1_case_si + + .text + .align 0 + .force_thumb + .syntax unified + THUMB_FUNC_START __gnu_thumb1_case_si + push {r0, r1} + mov r1, lr + adds.n r1, r1, #2 /* Align to word. */ + lsrs r1, r1, #2 + lsls r0, r0, #2 + lsls r1, r1, #2 + ldr r0, [r1, r0] + adds r0, r0, r1 + mov lr, r0 + pop {r0, r1} + mov pc, lr /* We know we were called from thumb code. */ + SIZE (__gnu_thumb1_case_si) +#endif + #endif /* Arch supports thumb. */ #ifndef __symbian__ diff --git a/gcc/config/arm/sfp-machine.h b/gcc/config/arm/sfp-machine.h index 4a456ae03a2..a89d05a00ba 100644 --- a/gcc/config/arm/sfp-machine.h +++ b/gcc/config/arm/sfp-machine.h @@ -19,9 +19,11 @@ typedef int __gcc_CMPtype __attribute__ ((mode (__libgcc_cmp_return__))); #define _FP_DIV_MEAT_D(R,X,Y) _FP_DIV_MEAT_2_udiv(D,R,X,Y) #define _FP_DIV_MEAT_Q(R,X,Y) _FP_DIV_MEAT_4_udiv(Q,R,X,Y) +#define _FP_NANFRAC_H ((_FP_QNANBIT_H << 1) - 1) #define _FP_NANFRAC_S ((_FP_QNANBIT_S << 1) - 1) #define _FP_NANFRAC_D ((_FP_QNANBIT_D << 1) - 1), -1 #define _FP_NANFRAC_Q ((_FP_QNANBIT_Q << 1) - 1), -1, -1, -1 +#define _FP_NANSIGN_H 0 #define _FP_NANSIGN_S 0 #define _FP_NANSIGN_D 0 #define _FP_NANSIGN_Q 0 @@ -97,5 +99,7 @@ typedef int __gcc_CMPtype __attribute__ ((mode (__libgcc_cmp_return__))); #define __fixdfdi __aeabi_d2lz #define __fixunsdfdi __aeabi_d2ulz #define __floatdidf __aeabi_l2d +#define __extendhfsf2 __gnu_h2f_ieee +#define __truncsfhf2 __gnu_f2h_ieee #endif /* __ARM_EABI__ */ diff --git a/gcc/config/arm/t-arm b/gcc/config/arm/t-arm index c47297f828b..de2bbc4ca68 100644 --- a/gcc/config/arm/t-arm +++ b/gcc/config/arm/t-arm @@ -33,6 +33,9 @@ MD_INCLUDES= $(srcdir)/config/arm/arm-tune.md \ $(srcdir)/config/arm/neon.md \ $(srcdir)/config/arm/thumb2.md +LIB1ASMSRC = arm/lib1funcs.asm +LIB1ASMFUNCS = _thumb1_case_sqi _thumb1_case_uqi _thumb1_case_shi \ + _thumb1_case_uhi _thumb1_case_si s-config s-conditions s-flags s-codes s-constants s-emit s-recog s-preds \ s-opinit s-extract s-peep s-attr s-attrtab s-output: $(MD_INCLUDES) diff --git a/gcc/config/arm/t-arm-elf b/gcc/config/arm/t-arm-elf index a268ddb8cd6..6a90d331148 100644 --- a/gcc/config/arm/t-arm-elf +++ b/gcc/config/arm/t-arm-elf @@ -17,12 +17,11 @@ # along with GCC; see the file COPYING3. If not see # <http://www.gnu.org/licenses/>. -LIB1ASMSRC = arm/lib1funcs.asm # For most CPUs we have an assembly soft-float implementations. # However this is not true for ARMv6M. Here we want to use the soft-fp C # implementation. The soft-fp code is only build for ARMv6M. This pulls # in the asm implementation for other CPUs. -LIB1ASMFUNCS = _udivsi3 _divsi3 _umodsi3 _modsi3 _dvmd_tls _bb_init_func \ +LIB1ASMFUNCS += _udivsi3 _divsi3 _umodsi3 _modsi3 _dvmd_tls _bb_init_func \ _call_via_rX _interwork_call_via_rX \ _lshrdi3 _ashrdi3 _ashldi3 \ _arm_negdf2 _arm_addsubdf3 _arm_muldivdf3 _arm_cmpdf2 _arm_unorddf2 \ @@ -30,7 +29,7 @@ LIB1ASMFUNCS = _udivsi3 _divsi3 _umodsi3 _modsi3 _dvmd_tls _bb_init_func \ _arm_truncdfsf2 _arm_negsf2 _arm_addsubsf3 _arm_muldivsf3 \ _arm_cmpsf2 _arm_unordsf2 _arm_fixsfsi _arm_fixunssfsi \ _arm_floatdidf _arm_floatdisf _arm_floatundidf _arm_floatundisf \ - _clzsi2 _clzdi2 + _clzsi2 _clzdi2 MULTILIB_OPTIONS = marm/mthumb MULTILIB_DIRNAMES = arm thumb diff --git a/gcc/config/arm/t-bpabi b/gcc/config/arm/t-bpabi index c31d357bedb..61da9ec7b4c 100644 --- a/gcc/config/arm/t-bpabi +++ b/gcc/config/arm/t-bpabi @@ -23,6 +23,8 @@ LIB1ASMFUNCS += _aeabi_lcmp _aeabi_ulcmp _aeabi_ldivmod _aeabi_uldivmod LIB2FUNCS_EXTRA = $(srcdir)/config/arm/bpabi.c \ $(srcdir)/config/arm/unaligned-funcs.c +LIB2FUNCS_STATIC_EXTRA = $(srcdir)/config/arm/fp16.c + UNWIND_H = $(srcdir)/config/arm/unwind-arm.h LIB2ADDEH = $(srcdir)/config/arm/unwind-arm.c \ $(srcdir)/config/arm/libunwind.S \ diff --git a/gcc/config/arm/t-pe b/gcc/config/arm/t-pe index e965a1c61c2..8adfd1f90c2 100644 --- a/gcc/config/arm/t-pe +++ b/gcc/config/arm/t-pe @@ -17,8 +17,7 @@ # along with GCC; see the file COPYING3. If not see # <http://www.gnu.org/licenses/>. -LIB1ASMSRC = arm/lib1funcs.asm -LIB1ASMFUNCS = _udivsi3 _divsi3 _umodsi3 _modsi3 _dvmd_tls _call_via_rX _interwork_call_via_rX _clzsi2 _clzdi2 +LIB1ASMFUNCS += _udivsi3 _divsi3 _umodsi3 _modsi3 _dvmd_tls _call_via_rX _interwork_call_via_rX _clzsi2 _clzdi2 # We want fine grained libraries, so use the new code to build the # floating point emulation libraries. diff --git a/gcc/config/arm/t-strongarm-elf b/gcc/config/arm/t-strongarm-elf index bf130635f85..64d7ca69499 100644 --- a/gcc/config/arm/t-strongarm-elf +++ b/gcc/config/arm/t-strongarm-elf @@ -16,8 +16,7 @@ # along with GCC; see the file COPYING3. If not see # <http://www.gnu.org/licenses/>. -LIB1ASMSRC = arm/lib1funcs.asm -LIB1ASMFUNCS = _udivsi3 _divsi3 _umodsi3 _modsi3 _dvmd_tls _bb_init_func _clzsi2 _clzdi2 +LIB1ASMFUNCS += _udivsi3 _divsi3 _umodsi3 _modsi3 _dvmd_tls _bb_init_func _clzsi2 _clzdi2 # We want fine grained libraries, so use the new code to build the # floating point emulation libraries. diff --git a/gcc/config/arm/t-symbian b/gcc/config/arm/t-symbian index 5b6f0078914..4a1476f6791 100644 --- a/gcc/config/arm/t-symbian +++ b/gcc/config/arm/t-symbian @@ -16,7 +16,7 @@ # along with GCC; see the file COPYING3. If not see # <http://www.gnu.org/licenses/>. -LIB1ASMFUNCS = _bb_init_func _call_via_rX _interwork_call_via_rX _clzsi2 _clzdi2 +LIB1ASMFUNCS += _bb_init_func _call_via_rX _interwork_call_via_rX _clzsi2 _clzdi2 # These functions have __aeabi equivalents and will never be called by GCC. # By putting them in LIB1ASMFUNCS, we avoid the standard libgcc2.c code being @@ -35,6 +35,9 @@ UNWIND_H = $(srcdir)/config/arm/unwind-arm.h LIB2ADDEH = $(srcdir)/unwind-c.c $(srcdir)/config/arm/pr-support.c LIB2ADDEHDEP = $(UNWIND_H) +# Include half-float helpers. +LIB2FUNCS_STATIC_EXTRA = $(srcdir)/config/arm/fp16.c + # Create a multilib for processors with VFP floating-point, and a # multilib for those without -- using the soft-float ABI in both # cases. Symbian OS object should be compiled with interworking diff --git a/gcc/config/arm/t-vxworks b/gcc/config/arm/t-vxworks index c9514b5a4e5..af01ac412db 100644 --- a/gcc/config/arm/t-vxworks +++ b/gcc/config/arm/t-vxworks @@ -16,8 +16,7 @@ # along with GCC; see the file COPYING3. If not see # <http://www.gnu.org/licenses/>. -LIB1ASMSRC = arm/lib1funcs.asm -LIB1ASMFUNCS = _udivsi3 _divsi3 _umodsi3 _modsi3 _dvmd_tls _bb_init_func _call_via_rX _interwork_call_via_rX _clzsi2 _clzdi2 +LIB1ASMFUNCS += _udivsi3 _divsi3 _umodsi3 _modsi3 _dvmd_tls _bb_init_func _call_via_rX _interwork_call_via_rX _clzsi2 _clzdi2 # We want fine grained libraries, so use the new code to build the # floating point emulation libraries. diff --git a/gcc/config/arm/t-wince-pe b/gcc/config/arm/t-wince-pe index 4fcb48376bd..54fabc8a21e 100644 --- a/gcc/config/arm/t-wince-pe +++ b/gcc/config/arm/t-wince-pe @@ -16,8 +16,7 @@ # along with GCC; see the file COPYING3. If not see # <http://www.gnu.org/licenses/>. -LIB1ASMSRC = arm/lib1funcs.asm -LIB1ASMFUNCS = _udivsi3 _divsi3 _umodsi3 _modsi3 _dvmd_tls _call_via_rX _interwork_call_via_rX _clzsi2 _clzdi2 +LIB1ASMFUNCS += _udivsi3 _divsi3 _umodsi3 _modsi3 _dvmd_tls _call_via_rX _interwork_call_via_rX _clzsi2 _clzdi2 # We want fine grained libraries, so use the new code to build the # floating point emulation libraries. diff --git a/gcc/config/arm/vfp.md b/gcc/config/arm/vfp.md index 737f81ccb27..eb18864ecbf 100644 --- a/gcc/config/arm/vfp.md +++ b/gcc/config/arm/vfp.md @@ -185,6 +185,61 @@ (set_attr "neg_pool_range" "*, 0,*,*,*,*,1008,*")] ) +;; HFmode moves +(define_insn "*movhf_vfp" + [(set (match_operand:HF 0 "nonimmediate_operand" "= t,Um,r,m,t,r,t,r,r") + (match_operand:HF 1 "general_operand" " Um, t,m,r,t,r,r,t,F"))] + "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_NEON_FP16 + && ( s_register_operand (operands[0], HFmode) + || s_register_operand (operands[1], HFmode))" + "* + switch (which_alternative) + { + case 0: /* S register from memory */ + return \"vld1.16\\t{%z0}, %A1\"; + case 1: /* memory from S register */ + return \"vst1.16\\t{%z1}, %A0\"; + case 2: /* ARM register from memory */ + return \"ldrh\\t%0, %1\\t%@ __fp16\"; + case 3: /* memory from ARM register */ + return \"strh\\t%1, %0\\t%@ __fp16\"; + case 4: /* S register from S register */ + return \"fcpys\\t%0, %1\"; + case 5: /* ARM register from ARM register */ + return \"mov\\t%0, %1\\t%@ __fp16\"; + case 6: /* S register from ARM register */ + return \"fmsr\\t%0, %1\"; + case 7: /* ARM register from S register */ + return \"fmrs\\t%0, %1\"; + case 8: /* ARM register from constant */ + { + REAL_VALUE_TYPE r; + long bits; + rtx ops[4]; + + REAL_VALUE_FROM_CONST_DOUBLE (r, operands[1]); + bits = real_to_target (NULL, &r, HFmode); + ops[0] = operands[0]; + ops[1] = GEN_INT (bits); + ops[2] = GEN_INT (bits & 0xff00); + ops[3] = GEN_INT (bits & 0x00ff); + + if (arm_arch_thumb2) + output_asm_insn (\"movw\\t%0, %1\", ops); + else + output_asm_insn (\"mov\\t%0, %2\;orr\\t%0, %0, %3\", ops); + return \"\"; + } + default: + gcc_unreachable (); + } + " + [(set_attr "conds" "unconditional") + (set_attr "type" "*,*,load1,store1,fcpys,*,r_2_f,f_2_r,*") + (set_attr "neon_type" "neon_vld1_1_2_regs,neon_vst1_1_2_regs_vst2_2_regs,*,*,*,*,*,*,*") + (set_attr "length" "4,4,4,4,4,4,4,4,8")] +) + ;; SFmode moves ;; Disparage the w<->r cases because reloading an invalid address is @@ -736,6 +791,24 @@ (set_attr "type" "f_cvt")] ) +(define_insn "extendhfsf2" + [(set (match_operand:SF 0 "s_register_operand" "=t") + (float_extend:SF (match_operand:HF 1 "s_register_operand" "t")))] + "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_NEON_FP16" + "vcvtb%?.f32.f16\\t%0, %1" + [(set_attr "predicable" "yes") + (set_attr "type" "f_cvt")] +) + +(define_insn "truncsfhf2" + [(set (match_operand:HF 0 "s_register_operand" "=t") + (float_truncate:HF (match_operand:SF 1 "s_register_operand" "t")))] + "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_NEON_FP16" + "vcvtb%?.f16.f32\\t%0, %1" + [(set_attr "predicable" "yes") + (set_attr "type" "f_cvt")] +) + (define_insn "*truncsisf2_vfp" [(set (match_operand:SI 0 "s_register_operand" "=t") (fix:SI (fix:SF (match_operand:SF 1 "s_register_operand" "t"))))] diff --git a/gcc/config/avr/avr.c b/gcc/config/avr/avr.c index 1e79644fc2e..d0df1be5b78 100644 --- a/gcc/config/avr/avr.c +++ b/gcc/config/avr/avr.c @@ -380,9 +380,6 @@ avr_override_options (void) flag_delete_null_pointer_checks = 0; - if (!PARAM_SET_P (PARAM_INLINE_CALL_COST)) - set_param_value ("inline-call-cost", 5); - for (t = avr_mcu_types; t->name; t++) if (strcmp (t->name, avr_mcu_name) == 0) break; diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c index 61774cc0fdf..06ae734af30 100644 --- a/gcc/config/i386/i386.c +++ b/gcc/config/i386/i386.c @@ -3424,12 +3424,6 @@ override_options (bool main_args_p) static void ix86_function_specific_save (struct cl_target_option *ptr) { - gcc_assert (IN_RANGE (ix86_arch, 0, 255)); - gcc_assert (IN_RANGE (ix86_schedule, 0, 255)); - gcc_assert (IN_RANGE (ix86_tune, 0, 255)); - gcc_assert (IN_RANGE (ix86_fpmath, 0, 255)); - gcc_assert (IN_RANGE (ix86_branch_cost, 0, 255)); - ptr->arch = ix86_arch; ptr->schedule = ix86_schedule; ptr->tune = ix86_tune; @@ -3439,6 +3433,14 @@ ix86_function_specific_save (struct cl_target_option *ptr) ptr->arch_specified = ix86_arch_specified; ptr->ix86_isa_flags_explicit = ix86_isa_flags_explicit; ptr->target_flags_explicit = target_flags_explicit; + + /* The fields are char but the variables are not; make sure the + values fit in the fields. */ + gcc_assert (ptr->arch == ix86_arch); + gcc_assert (ptr->schedule == ix86_schedule); + gcc_assert (ptr->tune == ix86_tune); + gcc_assert (ptr->fpmath == ix86_fpmath); + gcc_assert (ptr->branch_cost == ix86_branch_cost); } /* Restore the current options */ diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h index 657c8ae3eef..7592f6b420c 100644 --- a/gcc/config/i386/i386.h +++ b/gcc/config/i386/i386.h @@ -1498,6 +1498,7 @@ enum reg_class || ((CLASS) == AD_REGS) \ || ((CLASS) == SIREG) \ || ((CLASS) == DIREG) \ + || ((CLASS) == SSE_FIRST_REG) \ || ((CLASS) == FP_TOP_REG) \ || ((CLASS) == FP_SECOND_REG)) diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md index 59d9e829ed0..a71ca43c163 100644 --- a/gcc/config/i386/i386.md +++ b/gcc/config/i386/i386.md @@ -18531,7 +18531,7 @@ (define_expand "scalb<mode>3" [(use (match_operand:MODEF 0 "register_operand" "")) (use (match_operand:MODEF 1 "general_operand" "")) - (use (match_operand:MODEF 2 "register_operand" ""))] + (use (match_operand:MODEF 2 "general_operand" ""))] "TARGET_USE_FANCY_MATH_387 && (!(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH) || TARGET_MIX_SSE_I387) @@ -18552,6 +18552,34 @@ emit_insn (gen_truncxf<mode>2_i387_noop (operands[0], op0)); DONE; }) + +(define_expand "significandxf2" + [(parallel [(set (match_operand:XF 0 "register_operand" "") + (unspec:XF [(match_operand:XF 1 "register_operand" "")] + UNSPEC_XTRACT_FRACT)) + (set (match_dup 2) + (unspec:XF [(match_dup 1)] UNSPEC_XTRACT_EXP))])] + "TARGET_USE_FANCY_MATH_387 + && flag_unsafe_math_optimizations" +{ + operands[2] = gen_reg_rtx (XFmode); +}) + +(define_expand "significand<mode>2" + [(use (match_operand:MODEF 0 "register_operand" "")) + (use (match_operand:MODEF 1 "register_operand" ""))] + "TARGET_USE_FANCY_MATH_387 + && (!(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH) + || TARGET_MIX_SSE_I387) + && flag_unsafe_math_optimizations" +{ + rtx op0 = gen_reg_rtx (XFmode); + rtx op1 = gen_reg_rtx (XFmode); + + emit_insn (gen_fxtract_extend<mode>xf3_i387 (op0, op1, operands[1])); + emit_insn (gen_truncxf<mode>2_i387_noop (operands[0], op0)); + DONE; +}) (define_insn "sse4_1_round<mode>2" diff --git a/gcc/config/moxie/moxie.h b/gcc/config/moxie/moxie.h index e63190978f0..73a1a5101e0 100644 --- a/gcc/config/moxie/moxie.h +++ b/gcc/config/moxie/moxie.h @@ -518,6 +518,9 @@ do \ #define MOVE_MAX 4 #define TRULY_NOOP_TRUNCATION(op,ip) 1 +/* All load operations zero extend. */ +#define LOAD_EXTEND_OP(MEM) ZERO_EXTEND + #define RETURN_POPS_ARGS(FUNDECL, FUNTYPE, STACK_SIZE) 0 /* A C expression that is nonzero if X is a legitimate constant for diff --git a/gcc/config/moxie/sfp-machine.h b/gcc/config/moxie/sfp-machine.h index 57f515e9fc6..98f9f1bf491 100644 --- a/gcc/config/moxie/sfp-machine.h +++ b/gcc/config/moxie/sfp-machine.h @@ -3,6 +3,11 @@ #define _FP_WS_TYPE signed long #define _FP_I_TYPE long +/* The type of the result of a floating point comparison. This must + match `__libgcc_cmp_return__' in GCC for the target. */ +typedef int __gcc_CMPtype __attribute__ ((mode (__libgcc_cmp_return__))); +#define CMPtype __gcc_CMPtype + #define _FP_MUL_MEAT_S(R,X,Y) \ _FP_MUL_MEAT_1_wide(_FP_WFRACBITS_S,R,X,Y,umul_ppmm) #define _FP_MUL_MEAT_D(R,X,Y) \ diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c index 592d8dd4fb1..9465c9f945a 100644 --- a/gcc/config/rs6000/rs6000.c +++ b/gcc/config/rs6000/rs6000.c @@ -258,7 +258,7 @@ static GTY(()) section *toc_section; int rs6000_alignment_flags; /* True for any options that were explicitly set. */ -struct { +static struct { bool aix_struct_ret; /* True if -maix-struct-ret was used. */ bool alignment; /* True if -malign- was used. */ bool spe_abi; /* True if -mabi=spe/no-spe was used. */ @@ -775,7 +775,6 @@ static bool rs6000_ms_bitfield_layout_p (const_tree); static tree rs6000_handle_struct_attribute (tree *, tree, tree, int, bool *); static void rs6000_eliminate_indexed_memrefs (rtx operands[2]); static const char *rs6000_mangle_type (const_tree); -EXPORTED_CONST struct attribute_spec rs6000_attribute_table[]; static void rs6000_set_default_type_attributes (tree); static rtx rs6000_savres_routine_sym (rs6000_stack_t *, bool, bool, bool); static rtx rs6000_emit_stack_reset (rs6000_stack_t *, rtx, rtx, int, bool); @@ -1034,6 +1033,22 @@ static const char alt_reg_names[][8] = "sfp" }; #endif + +/* Table of valid machine attributes. */ + +static const struct attribute_spec rs6000_attribute_table[] = +{ + /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */ + { "altivec", 1, 1, false, true, false, rs6000_handle_altivec_attribute }, + { "longcall", 0, 0, false, true, true, rs6000_handle_longcall_attribute }, + { "shortcall", 0, 0, false, true, true, rs6000_handle_longcall_attribute }, + { "ms_struct", 0, 0, false, false, false, rs6000_handle_struct_attribute }, + { "gcc_struct", 0, 0, false, false, false, rs6000_handle_struct_attribute }, +#ifdef SUBTARGET_ATTRIBUTE_TABLE + SUBTARGET_ATTRIBUTE_TABLE, +#endif + { NULL, 0, 0, false, false, false, NULL } +}; #ifndef MASK_STRICT_ALIGN #define MASK_STRICT_ALIGN 0 @@ -20571,22 +20586,6 @@ rs6000_initialize_trampoline (rtx addr, rtx fnaddr, rtx cxt) } -/* Table of valid machine attributes. */ - -const struct attribute_spec rs6000_attribute_table[] = -{ - /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */ - { "altivec", 1, 1, false, true, false, rs6000_handle_altivec_attribute }, - { "longcall", 0, 0, false, true, true, rs6000_handle_longcall_attribute }, - { "shortcall", 0, 0, false, true, true, rs6000_handle_longcall_attribute }, - { "ms_struct", 0, 0, false, false, false, rs6000_handle_struct_attribute }, - { "gcc_struct", 0, 0, false, false, false, rs6000_handle_struct_attribute }, -#ifdef SUBTARGET_ATTRIBUTE_TABLE - SUBTARGET_ATTRIBUTE_TABLE, -#endif - { NULL, 0, 0, false, false, false, NULL } -}; - /* Handle the "altivec" attribute. The attribute may have arguments as follows: |