diff options
Diffstat (limited to 'gcc/config/aarch64/aarch64.c')
-rw-r--r-- | gcc/config/aarch64/aarch64.c | 346 |
1 files changed, 190 insertions, 156 deletions
diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c index 9c77888157d..527b00dbcaa 100644 --- a/gcc/config/aarch64/aarch64.c +++ b/gcc/config/aarch64/aarch64.c @@ -87,6 +87,14 @@ struct aarch64_address_info { enum aarch64_symbol_type symbol_type; }; +struct simd_immediate_info +{ + rtx value; + int shift; + int element_width; + bool mvn; +}; + /* The current code model. */ enum aarch64_code_model aarch64_cmodel; @@ -103,8 +111,6 @@ static bool aarch64_vfp_is_call_or_return_candidate (enum machine_mode, static void aarch64_elf_asm_constructor (rtx, int) ATTRIBUTE_UNUSED; static void aarch64_elf_asm_destructor (rtx, int) ATTRIBUTE_UNUSED; static void aarch64_override_options_after_change (void); -static int aarch64_simd_valid_immediate (rtx, enum machine_mode, int, rtx *, - int *, unsigned char *, int *, int *); static bool aarch64_vector_mode_supported_p (enum machine_mode); static unsigned bit_count (unsigned HOST_WIDE_INT); static bool aarch64_const_vec_all_same_int_p (rtx, @@ -532,9 +538,7 @@ aarch64_load_symref_appropriately (rtx dest, rtx imm, { rtx tmp_reg = dest; if (can_create_pseudo_p ()) - { - tmp_reg = gen_reg_rtx (Pmode); - } + tmp_reg = gen_reg_rtx (Pmode); emit_move_insn (tmp_reg, gen_rtx_HIGH (Pmode, imm)); emit_insn (gen_ldr_got_small (dest, tmp_reg, imm)); return; @@ -696,6 +700,49 @@ aarch64_split_128bit_move_p (rtx dst, rtx src) || ! (FP_REGNUM_P (REGNO (dst)) && FP_REGNUM_P (REGNO (src)))); } +/* Split a complex SIMD combine. */ + +void +aarch64_split_simd_combine (rtx dst, rtx src1, rtx src2) +{ + enum machine_mode src_mode = GET_MODE (src1); + enum machine_mode dst_mode = GET_MODE (dst); + + gcc_assert (VECTOR_MODE_P (dst_mode)); + + if (REG_P (dst) && REG_P (src1) && REG_P (src2)) + { + rtx (*gen) (rtx, rtx, rtx); + + switch (src_mode) + { + case V8QImode: + gen = gen_aarch64_simd_combinev8qi; + break; + case V4HImode: + gen = gen_aarch64_simd_combinev4hi; + break; + case V2SImode: + gen = gen_aarch64_simd_combinev2si; + break; + case V2SFmode: + gen = gen_aarch64_simd_combinev2sf; + break; + case DImode: + gen = gen_aarch64_simd_combinedi; + break; + case DFmode: + gen = gen_aarch64_simd_combinedf; + break; + default: + gcc_unreachable (); + } + + emit_insn (gen (dst, src1, src2)); + return; + } +} + /* Split a complex SIMD move. */ void @@ -5068,6 +5115,10 @@ aarch64_classify_symbol (rtx x, return SYMBOL_SMALL_ABSOLUTE; case AARCH64_CMODEL_TINY_PIC: + if (!aarch64_symbol_binds_local_p (x)) + return SYMBOL_SMALL_GOT; + return SYMBOL_TINY_ABSOLUTE; + case AARCH64_CMODEL_SMALL_PIC: if (!aarch64_symbol_binds_local_p (x)) return SYMBOL_SMALL_GOT; @@ -5150,8 +5201,7 @@ aarch64_legitimate_constant_p (enum machine_mode mode, rtx x) /* This could probably go away because we now decompose CONST_INTs according to expand_mov_immediate. */ if ((GET_CODE (x) == CONST_VECTOR - && aarch64_simd_valid_immediate (x, mode, false, - NULL, NULL, NULL, NULL, NULL) != -1) + && aarch64_simd_valid_immediate (x, mode, false, NULL)) || CONST_INT_P (x) || aarch64_valid_floating_const (mode, x)) return !targetm.cannot_force_const_mem (mode, x); @@ -5982,32 +6032,57 @@ aarch64_vector_mode_supported_p (enum machine_mode mode) return false; } -/* Return quad mode as the preferred SIMD mode. */ +/* Return appropriate SIMD container + for MODE within a vector of WIDTH bits. */ static enum machine_mode -aarch64_preferred_simd_mode (enum machine_mode mode) +aarch64_simd_container_mode (enum machine_mode mode, unsigned width) { + gcc_assert (width == 64 || width == 128); if (TARGET_SIMD) - switch (mode) - { - case DFmode: - return V2DFmode; - case SFmode: - return V4SFmode; - case SImode: - return V4SImode; - case HImode: - return V8HImode; - case QImode: - return V16QImode; - case DImode: - return V2DImode; - break; - - default:; - } + { + if (width == 128) + switch (mode) + { + case DFmode: + return V2DFmode; + case SFmode: + return V4SFmode; + case SImode: + return V4SImode; + case HImode: + return V8HImode; + case QImode: + return V16QImode; + case DImode: + return V2DImode; + default: + break; + } + else + switch (mode) + { + case SFmode: + return V2SFmode; + case SImode: + return V2SImode; + case HImode: + return V4HImode; + case QImode: + return V8QImode; + default: + break; + } + } return word_mode; } +/* Return 128-bit container as the preferred SIMD mode for MODE. */ +static enum machine_mode +aarch64_preferred_simd_mode (enum machine_mode mode) +{ + return aarch64_simd_container_mode (mode, 128); +} + /* Return the bitmask of possible vector sizes for the vectorizer to iterate over. */ static unsigned int @@ -6095,7 +6170,7 @@ aarch64_mangle_type (const_tree type) } /* Return the equivalent letter for size. */ -static unsigned char +static char sizetochar (int size) { switch (size) @@ -6142,15 +6217,10 @@ aarch64_vect_float_const_representable_p (rtx x) return aarch64_float_const_representable_p (x0); } -/* TODO: This function returns values similar to those - returned by neon_valid_immediate in gcc/config/arm/arm.c - but the API here is different enough that these magic numbers - are not used. It should be sufficient to return true or false. */ -static int -aarch64_simd_valid_immediate (rtx op, enum machine_mode mode, int inverse, - rtx *modconst, int *elementwidth, - unsigned char *elementchar, - int *mvn, int *shift) +/* Return true for valid and false for invalid. */ +bool +aarch64_simd_valid_immediate (rtx op, enum machine_mode mode, bool inverse, + struct simd_immediate_info *info) { #define CHECK(STRIDE, ELSIZE, CLASS, TEST, SHIFT, NEG) \ matches = 1; \ @@ -6161,7 +6231,6 @@ aarch64_simd_valid_immediate (rtx op, enum machine_mode mode, int inverse, { \ immtype = (CLASS); \ elsize = (ELSIZE); \ - elchar = sizetochar (elsize); \ eshift = (SHIFT); \ emvn = (NEG); \ break; \ @@ -6170,36 +6239,25 @@ aarch64_simd_valid_immediate (rtx op, enum machine_mode mode, int inverse, unsigned int i, elsize = 0, idx = 0, n_elts = CONST_VECTOR_NUNITS (op); unsigned int innersize = GET_MODE_SIZE (GET_MODE_INNER (mode)); unsigned char bytes[16]; - unsigned char elchar = 0; int immtype = -1, matches; unsigned int invmask = inverse ? 0xff : 0; int eshift, emvn; if (GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT) { - bool simd_imm_zero = aarch64_simd_imm_zero_p (op, mode); - int elem_width = GET_MODE_BITSIZE (GET_MODE (CONST_VECTOR_ELT (op, 0))); - - if (!(simd_imm_zero - || aarch64_vect_float_const_representable_p (op))) - return -1; - - if (modconst) - *modconst = CONST_VECTOR_ELT (op, 0); - - if (elementwidth) - *elementwidth = elem_width; - - if (elementchar) - *elementchar = sizetochar (elem_width); + if (! (aarch64_simd_imm_zero_p (op, mode) + || aarch64_vect_float_const_representable_p (op))) + return false; - if (shift) - *shift = 0; + if (info) + { + info->value = CONST_VECTOR_ELT (op, 0); + info->element_width = GET_MODE_BITSIZE (GET_MODE (info->value)); + info->mvn = false; + info->shift = 0; + } - if (simd_imm_zero) - return 19; - else - return 18; + return true; } /* Splat vector constant out into a byte vector. */ @@ -6297,23 +6355,14 @@ aarch64_simd_valid_immediate (rtx op, enum machine_mode mode, int inverse, if (immtype == -1 || (immtype >= 12 && immtype <= 15) || immtype == 18) - return -1; - - - if (elementwidth) - *elementwidth = elsize; - - if (elementchar) - *elementchar = elchar; - - if (mvn) - *mvn = emvn; - - if (shift) - *shift = eshift; + return false; - if (modconst) + if (info) { + info->element_width = elsize; + info->mvn = emvn != 0; + info->shift = eshift; + unsigned HOST_WIDE_INT imm = 0; /* Un-invert bytes of recognized vector, if necessary. */ @@ -6330,68 +6379,27 @@ aarch64_simd_valid_immediate (rtx op, enum machine_mode mode, int inverse, imm |= (unsigned HOST_WIDE_INT) (bytes[i] ? 0xff : 0) << (i * BITS_PER_UNIT); - *modconst = GEN_INT (imm); - } - else - { - unsigned HOST_WIDE_INT imm = 0; - for (i = 0; i < elsize / BITS_PER_UNIT; i++) - imm |= (unsigned HOST_WIDE_INT) bytes[i] << (i * BITS_PER_UNIT); + info->value = GEN_INT (imm); + } + else + { + for (i = 0; i < elsize / BITS_PER_UNIT; i++) + imm |= (unsigned HOST_WIDE_INT) bytes[i] << (i * BITS_PER_UNIT); /* Construct 'abcdefgh' because the assembler cannot handle - generic constants. */ - gcc_assert (shift != NULL && mvn != NULL); - if (*mvn) + generic constants. */ + if (info->mvn) imm = ~imm; - imm = (imm >> *shift) & 0xff; - *modconst = GEN_INT (imm); - } + imm = (imm >> info->shift) & 0xff; + info->value = GEN_INT (imm); + } } - return immtype; + return true; #undef CHECK } -/* Return TRUE if rtx X is legal for use as either a AdvSIMD MOVI instruction - (or, implicitly, MVNI) immediate. Write back width per element - to *ELEMENTWIDTH, and a modified constant (whatever should be output - for a MOVI instruction) in *MODCONST. */ -int -aarch64_simd_immediate_valid_for_move (rtx op, enum machine_mode mode, - rtx *modconst, int *elementwidth, - unsigned char *elementchar, - int *mvn, int *shift) -{ - rtx tmpconst; - int tmpwidth; - unsigned char tmpwidthc; - int tmpmvn = 0, tmpshift = 0; - int retval = aarch64_simd_valid_immediate (op, mode, 0, &tmpconst, - &tmpwidth, &tmpwidthc, - &tmpmvn, &tmpshift); - - if (retval == -1) - return 0; - - if (modconst) - *modconst = tmpconst; - - if (elementwidth) - *elementwidth = tmpwidth; - - if (elementchar) - *elementchar = tmpwidthc; - - if (mvn) - *mvn = tmpmvn; - - if (shift) - *shift = tmpshift; - - return 1; -} - static bool aarch64_const_vec_all_same_int_p (rtx x, HOST_WIDE_INT minval, @@ -6496,9 +6504,7 @@ aarch64_simd_scalar_immediate_valid_for_move (rtx op, enum machine_mode mode) gcc_assert (!VECTOR_MODE_P (mode)); vmode = aarch64_preferred_simd_mode (mode); rtx op_v = aarch64_simd_gen_const_vector_dup (vmode, INTVAL (op)); - int retval = aarch64_simd_immediate_valid_for_move (op_v, vmode, 0, - NULL, NULL, NULL, NULL); - return retval; + return aarch64_simd_valid_immediate (op_v, vmode, false, NULL); } /* Construct and return a PARALLEL RTX vector. */ @@ -6726,8 +6732,7 @@ aarch64_simd_make_constant (rtx vals) gcc_unreachable (); if (const_vec != NULL_RTX - && aarch64_simd_immediate_valid_for_move (const_vec, mode, NULL, NULL, - NULL, NULL, NULL)) + && aarch64_simd_valid_immediate (const_vec, mode, false, NULL)) /* Load using MOVI/MVNI. */ return const_vec; else if ((const_dup = aarch64_simd_dup_constant (vals)) != NULL_RTX) @@ -7285,49 +7290,78 @@ aarch64_float_const_representable_p (rtx x) } char* -aarch64_output_simd_mov_immediate (rtx *const_vector, +aarch64_output_simd_mov_immediate (rtx const_vector, enum machine_mode mode, unsigned width) { - int is_valid; - unsigned char widthc; - int lane_width_bits; + bool is_valid; static char templ[40]; - int shift = 0, mvn = 0; const char *mnemonic; unsigned int lane_count = 0; + char element_char; - is_valid = - aarch64_simd_immediate_valid_for_move (*const_vector, mode, - const_vector, &lane_width_bits, - &widthc, &mvn, &shift); + struct simd_immediate_info info; + + /* This will return true to show const_vector is legal for use as either + a AdvSIMD MOVI instruction (or, implicitly, MVNI) immediate. It will + also update INFO to show how the immediate should be generated. */ + is_valid = aarch64_simd_valid_immediate (const_vector, mode, false, &info); gcc_assert (is_valid); + element_char = sizetochar (info.element_width); + lane_count = width / info.element_width; + mode = GET_MODE_INNER (mode); if (mode == SFmode || mode == DFmode) { - bool zero_p = - aarch64_float_const_zero_rtx_p (*const_vector); - gcc_assert (shift == 0); - mnemonic = zero_p ? "movi" : "fmov"; + gcc_assert (info.shift == 0 && ! info.mvn); + if (aarch64_float_const_zero_rtx_p (info.value)) + info.value = GEN_INT (0); + else + { +#define buf_size 20 + REAL_VALUE_TYPE r; + REAL_VALUE_FROM_CONST_DOUBLE (r, info.value); + char float_buf[buf_size] = {'\0'}; + real_to_decimal_for_mode (float_buf, &r, buf_size, buf_size, 1, mode); +#undef buf_size + + if (lane_count == 1) + snprintf (templ, sizeof (templ), "fmov\t%%d0, %s", float_buf); + else + snprintf (templ, sizeof (templ), "fmov\t%%0.%d%c, %s", + lane_count, element_char, float_buf); + return templ; + } } - else - mnemonic = mvn ? "mvni" : "movi"; - gcc_assert (lane_width_bits != 0); - lane_count = width / lane_width_bits; + mnemonic = info.mvn ? "mvni" : "movi"; if (lane_count == 1) - snprintf (templ, sizeof (templ), "%s\t%%d0, %%1", mnemonic); - else if (shift) - snprintf (templ, sizeof (templ), "%s\t%%0.%d%c, %%1, lsl %d", - mnemonic, lane_count, widthc, shift); + snprintf (templ, sizeof (templ), "%s\t%%d0, " HOST_WIDE_INT_PRINT_HEX, + mnemonic, UINTVAL (info.value)); + else if (info.shift) + snprintf (templ, sizeof (templ), "%s\t%%0.%d%c, " HOST_WIDE_INT_PRINT_HEX + ", lsl %d", mnemonic, lane_count, element_char, + UINTVAL (info.value), info.shift); else - snprintf (templ, sizeof (templ), "%s\t%%0.%d%c, %%1", - mnemonic, lane_count, widthc); + snprintf (templ, sizeof (templ), "%s\t%%0.%d%c, " HOST_WIDE_INT_PRINT_HEX, + mnemonic, lane_count, element_char, UINTVAL (info.value)); return templ; } +char* +aarch64_output_scalar_simd_mov_immediate (rtx immediate, + enum machine_mode mode) +{ + enum machine_mode vmode; + + gcc_assert (!VECTOR_MODE_P (mode)); + vmode = aarch64_simd_container_mode (mode, 64); + rtx v_op = aarch64_simd_gen_const_vector_dup (vmode, INTVAL (immediate)); + return aarch64_output_simd_mov_immediate (v_op, vmode, 64); +} + /* Split operands into moves from op[1] + op[2] into op[0]. */ void |