diff options
Diffstat (limited to 'gcc/config/i386/i386.c')
-rw-r--r-- | gcc/config/i386/i386.c | 390 |
1 files changed, 305 insertions, 85 deletions
diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c index 0d84cde9cbe..f6c17dfd405 100644 --- a/gcc/config/i386/i386.c +++ b/gcc/config/i386/i386.c @@ -16612,7 +16612,7 @@ ix86_print_operand (FILE *file, rtx x, int code) if (TARGET_64BIT) x = gen_rtx_UNSPEC (DImode, gen_rtvec (1, x), UNSPEC_LEA_ADDR); - output_address (x); + output_address (VOIDmode, x); return; case 'L': @@ -17503,7 +17503,7 @@ ix86_print_operand_address_as (FILE *file, rtx addr, addr_space_t as) } static void -ix86_print_operand_address (FILE *file, rtx addr) +ix86_print_operand_address (FILE *file, machine_mode /*mode*/, rtx addr) { ix86_print_operand_address_as (file, addr, ADDR_SPACE_GENERIC); } @@ -22582,8 +22582,8 @@ ix86_expand_sse_cmp (rtx dest, enum rtx_code code, rtx cmp_op0, rtx cmp_op1, cmp_op1 = force_reg (cmp_ops_mode, cmp_op1); if (optimize - || reg_overlap_mentioned_p (dest, op_true) - || reg_overlap_mentioned_p (dest, op_false)) + || (op_true && reg_overlap_mentioned_p (dest, op_true)) + || (op_false && reg_overlap_mentioned_p (dest, op_false))) dest = gen_reg_rtx (maskcmp ? cmp_mode : mode); /* Compare patterns for int modes are unspec in AVX512F only. */ @@ -22644,6 +22644,14 @@ ix86_expand_sse_movcc (rtx dest, rtx cmp, rtx op_true, rtx op_false) rtx t2, t3, x; + /* If we have an integer mask and FP value then we need + to cast mask to FP mode. */ + if (mode != cmpmode && VECTOR_MODE_P (cmpmode)) + { + cmp = force_reg (cmpmode, cmp); + cmp = gen_rtx_SUBREG (mode, cmp, 0); + } + if (vector_all_ones_operand (op_true, mode) && rtx_equal_p (op_false, CONST0_RTX (mode)) && !maskcmp) @@ -22855,34 +22863,127 @@ ix86_expand_fp_movcc (rtx operands[]) return true; } -/* Expand a floating-point vector conditional move; a vcond operation - rather than a movcc operation. */ +/* Helper for ix86_cmp_code_to_pcmp_immediate for int modes. */ + +static int +ix86_int_cmp_code_to_pcmp_immediate (enum rtx_code code) +{ + switch (code) + { + case EQ: + return 0; + case LT: + case LTU: + return 1; + case LE: + case LEU: + return 2; + case NE: + return 4; + case GE: + case GEU: + return 5; + case GT: + case GTU: + return 6; + default: + gcc_unreachable (); + } +} + +/* Helper for ix86_cmp_code_to_pcmp_immediate for fp modes. */ + +static int +ix86_fp_cmp_code_to_pcmp_immediate (enum rtx_code code) +{ + switch (code) + { + case EQ: + return 0x08; + case NE: + return 0x04; + case GT: + return 0x16; + case LE: + return 0x1a; + case GE: + return 0x15; + case LT: + return 0x19; + default: + gcc_unreachable (); + } +} + +/* Return immediate value to be used in UNSPEC_PCMP + for comparison CODE in MODE. */ + +static int +ix86_cmp_code_to_pcmp_immediate (enum rtx_code code, machine_mode mode) +{ + if (FLOAT_MODE_P (mode)) + return ix86_fp_cmp_code_to_pcmp_immediate (code); + return ix86_int_cmp_code_to_pcmp_immediate (code); +} + +/* Expand AVX-512 vector comparison. */ bool -ix86_expand_fp_vcond (rtx operands[]) +ix86_expand_mask_vec_cmp (rtx operands[]) { - enum rtx_code code = GET_CODE (operands[3]); + machine_mode mask_mode = GET_MODE (operands[0]); + machine_mode cmp_mode = GET_MODE (operands[2]); + enum rtx_code code = GET_CODE (operands[1]); + rtx imm = GEN_INT (ix86_cmp_code_to_pcmp_immediate (code, cmp_mode)); + int unspec_code; + rtx unspec; + + switch (code) + { + case LEU: + case GTU: + case GEU: + case LTU: + unspec_code = UNSPEC_UNSIGNED_PCMP; + default: + unspec_code = UNSPEC_PCMP; + } + + unspec = gen_rtx_UNSPEC (mask_mode, gen_rtvec (3, operands[2], + operands[3], imm), + unspec_code); + emit_insn (gen_rtx_SET (operands[0], unspec)); + + return true; +} + +/* Expand fp vector comparison. */ + +bool +ix86_expand_fp_vec_cmp (rtx operands[]) +{ + enum rtx_code code = GET_CODE (operands[1]); rtx cmp; code = ix86_prepare_sse_fp_compare_args (operands[0], code, - &operands[4], &operands[5]); + &operands[2], &operands[3]); if (code == UNKNOWN) { rtx temp; - switch (GET_CODE (operands[3])) + switch (GET_CODE (operands[1])) { case LTGT: - temp = ix86_expand_sse_cmp (operands[0], ORDERED, operands[4], - operands[5], operands[0], operands[0]); - cmp = ix86_expand_sse_cmp (operands[0], NE, operands[4], - operands[5], operands[1], operands[2]); + temp = ix86_expand_sse_cmp (operands[0], ORDERED, operands[2], + operands[3], NULL, NULL); + cmp = ix86_expand_sse_cmp (operands[0], NE, operands[2], + operands[3], NULL, NULL); code = AND; break; case UNEQ: - temp = ix86_expand_sse_cmp (operands[0], UNORDERED, operands[4], - operands[5], operands[0], operands[0]); - cmp = ix86_expand_sse_cmp (operands[0], EQ, operands[4], - operands[5], operands[1], operands[2]); + temp = ix86_expand_sse_cmp (operands[0], UNORDERED, operands[2], + operands[3], NULL, NULL); + cmp = ix86_expand_sse_cmp (operands[0], EQ, operands[2], + operands[3], NULL, NULL); code = IOR; break; default: @@ -22890,72 +22991,26 @@ ix86_expand_fp_vcond (rtx operands[]) } cmp = expand_simple_binop (GET_MODE (cmp), code, temp, cmp, cmp, 1, OPTAB_DIRECT); - ix86_expand_sse_movcc (operands[0], cmp, operands[1], operands[2]); - return true; } + else + cmp = ix86_expand_sse_cmp (operands[0], code, operands[2], operands[3], + operands[1], operands[2]); - if (ix86_expand_sse_fp_minmax (operands[0], code, operands[4], - operands[5], operands[1], operands[2])) - return true; + if (operands[0] != cmp) + emit_move_insn (operands[0], cmp); - cmp = ix86_expand_sse_cmp (operands[0], code, operands[4], operands[5], - operands[1], operands[2]); - ix86_expand_sse_movcc (operands[0], cmp, operands[1], operands[2]); return true; } -/* Expand a signed/unsigned integral vector conditional move. */ - -bool -ix86_expand_int_vcond (rtx operands[]) +static rtx +ix86_expand_int_sse_cmp (rtx dest, enum rtx_code code, rtx cop0, rtx cop1, + rtx op_true, rtx op_false, bool *negate) { - machine_mode data_mode = GET_MODE (operands[0]); - machine_mode mode = GET_MODE (operands[4]); - enum rtx_code code = GET_CODE (operands[3]); - bool negate = false; - rtx x, cop0, cop1; + machine_mode data_mode = GET_MODE (dest); + machine_mode mode = GET_MODE (cop0); + rtx x; - cop0 = operands[4]; - cop1 = operands[5]; - - /* Try to optimize x < 0 ? -1 : 0 into (signed) x >> 31 - and x < 0 ? 1 : 0 into (unsigned) x >> 31. */ - if ((code == LT || code == GE) - && data_mode == mode - && cop1 == CONST0_RTX (mode) - && operands[1 + (code == LT)] == CONST0_RTX (data_mode) - && GET_MODE_UNIT_SIZE (data_mode) > 1 - && GET_MODE_UNIT_SIZE (data_mode) <= 8 - && (GET_MODE_SIZE (data_mode) == 16 - || (TARGET_AVX2 && GET_MODE_SIZE (data_mode) == 32))) - { - rtx negop = operands[2 - (code == LT)]; - int shift = GET_MODE_UNIT_BITSIZE (data_mode) - 1; - if (negop == CONST1_RTX (data_mode)) - { - rtx res = expand_simple_binop (mode, LSHIFTRT, cop0, GEN_INT (shift), - operands[0], 1, OPTAB_DIRECT); - if (res != operands[0]) - emit_move_insn (operands[0], res); - return true; - } - else if (GET_MODE_INNER (data_mode) != DImode - && vector_all_ones_operand (negop, data_mode)) - { - rtx res = expand_simple_binop (mode, ASHIFTRT, cop0, GEN_INT (shift), - operands[0], 0, OPTAB_DIRECT); - if (res != operands[0]) - emit_move_insn (operands[0], res); - return true; - } - } - - if (!nonimmediate_operand (cop1, mode)) - cop1 = force_reg (mode, cop1); - if (!general_operand (operands[1], data_mode)) - operands[1] = force_reg (data_mode, operands[1]); - if (!general_operand (operands[2], data_mode)) - operands[2] = force_reg (data_mode, operands[2]); + *negate = false; /* XOP supports all of the comparisons on all 128-bit vector int types. */ if (TARGET_XOP @@ -22976,13 +23031,13 @@ ix86_expand_int_vcond (rtx operands[]) case LE: case LEU: code = reverse_condition (code); - negate = true; + *negate = true; break; case GE: case GEU: code = reverse_condition (code); - negate = true; + *negate = true; /* FALLTHRU */ case LT: @@ -23003,14 +23058,14 @@ ix86_expand_int_vcond (rtx operands[]) case EQ: /* SSE4.1 supports EQ. */ if (!TARGET_SSE4_1) - return false; + return NULL; break; case GT: case GTU: /* SSE4.2 supports GT/GTU. */ if (!TARGET_SSE4_2) - return false; + return NULL; break; default: @@ -23071,12 +23126,13 @@ ix86_expand_int_vcond (rtx operands[]) case V8HImode: /* Perform a parallel unsigned saturating subtraction. */ x = gen_reg_rtx (mode); - emit_insn (gen_rtx_SET (x, gen_rtx_US_MINUS (mode, cop0, cop1))); + emit_insn (gen_rtx_SET (x, gen_rtx_US_MINUS (mode, cop0, + cop1))); cop0 = x; cop1 = CONST0_RTX (mode); code = EQ; - negate = !negate; + *negate = !*negate; break; default: @@ -23085,22 +23141,162 @@ ix86_expand_int_vcond (rtx operands[]) } } + if (*negate) + std::swap (op_true, op_false); + /* Allow the comparison to be done in one mode, but the movcc to happen in another mode. */ if (data_mode == mode) { - x = ix86_expand_sse_cmp (operands[0], code, cop0, cop1, - operands[1+negate], operands[2-negate]); + x = ix86_expand_sse_cmp (dest, code, cop0, cop1, + op_true, op_false); } else { gcc_assert (GET_MODE_SIZE (data_mode) == GET_MODE_SIZE (mode)); x = ix86_expand_sse_cmp (gen_reg_rtx (mode), code, cop0, cop1, - operands[1+negate], operands[2-negate]); + op_true, op_false); if (GET_MODE (x) == mode) x = gen_lowpart (data_mode, x); } + return x; +} + +/* Expand integer vector comparison. */ + +bool +ix86_expand_int_vec_cmp (rtx operands[]) +{ + rtx_code code = GET_CODE (operands[1]); + bool negate = false; + rtx cmp = ix86_expand_int_sse_cmp (operands[0], code, operands[2], + operands[3], NULL, NULL, &negate); + + if (!cmp) + return false; + + if (negate) + cmp = ix86_expand_int_sse_cmp (operands[0], EQ, cmp, + CONST0_RTX (GET_MODE (cmp)), + NULL, NULL, &negate); + + gcc_assert (!negate); + + if (operands[0] != cmp) + emit_move_insn (operands[0], cmp); + + return true; +} + +/* Expand a floating-point vector conditional move; a vcond operation + rather than a movcc operation. */ + +bool +ix86_expand_fp_vcond (rtx operands[]) +{ + enum rtx_code code = GET_CODE (operands[3]); + rtx cmp; + + code = ix86_prepare_sse_fp_compare_args (operands[0], code, + &operands[4], &operands[5]); + if (code == UNKNOWN) + { + rtx temp; + switch (GET_CODE (operands[3])) + { + case LTGT: + temp = ix86_expand_sse_cmp (operands[0], ORDERED, operands[4], + operands[5], operands[0], operands[0]); + cmp = ix86_expand_sse_cmp (operands[0], NE, operands[4], + operands[5], operands[1], operands[2]); + code = AND; + break; + case UNEQ: + temp = ix86_expand_sse_cmp (operands[0], UNORDERED, operands[4], + operands[5], operands[0], operands[0]); + cmp = ix86_expand_sse_cmp (operands[0], EQ, operands[4], + operands[5], operands[1], operands[2]); + code = IOR; + break; + default: + gcc_unreachable (); + } + cmp = expand_simple_binop (GET_MODE (cmp), code, temp, cmp, cmp, 1, + OPTAB_DIRECT); + ix86_expand_sse_movcc (operands[0], cmp, operands[1], operands[2]); + return true; + } + + if (ix86_expand_sse_fp_minmax (operands[0], code, operands[4], + operands[5], operands[1], operands[2])) + return true; + + cmp = ix86_expand_sse_cmp (operands[0], code, operands[4], operands[5], + operands[1], operands[2]); + ix86_expand_sse_movcc (operands[0], cmp, operands[1], operands[2]); + return true; +} + +/* Expand a signed/unsigned integral vector conditional move. */ + +bool +ix86_expand_int_vcond (rtx operands[]) +{ + machine_mode data_mode = GET_MODE (operands[0]); + machine_mode mode = GET_MODE (operands[4]); + enum rtx_code code = GET_CODE (operands[3]); + bool negate = false; + rtx x, cop0, cop1; + + cop0 = operands[4]; + cop1 = operands[5]; + + /* Try to optimize x < 0 ? -1 : 0 into (signed) x >> 31 + and x < 0 ? 1 : 0 into (unsigned) x >> 31. */ + if ((code == LT || code == GE) + && data_mode == mode + && cop1 == CONST0_RTX (mode) + && operands[1 + (code == LT)] == CONST0_RTX (data_mode) + && GET_MODE_UNIT_SIZE (data_mode) > 1 + && GET_MODE_UNIT_SIZE (data_mode) <= 8 + && (GET_MODE_SIZE (data_mode) == 16 + || (TARGET_AVX2 && GET_MODE_SIZE (data_mode) == 32))) + { + rtx negop = operands[2 - (code == LT)]; + int shift = GET_MODE_UNIT_BITSIZE (data_mode) - 1; + if (negop == CONST1_RTX (data_mode)) + { + rtx res = expand_simple_binop (mode, LSHIFTRT, cop0, GEN_INT (shift), + operands[0], 1, OPTAB_DIRECT); + if (res != operands[0]) + emit_move_insn (operands[0], res); + return true; + } + else if (GET_MODE_INNER (data_mode) != DImode + && vector_all_ones_operand (negop, data_mode)) + { + rtx res = expand_simple_binop (mode, ASHIFTRT, cop0, GEN_INT (shift), + operands[0], 0, OPTAB_DIRECT); + if (res != operands[0]) + emit_move_insn (operands[0], res); + return true; + } + } + + if (!nonimmediate_operand (cop1, mode)) + cop1 = force_reg (mode, cop1); + if (!general_operand (operands[1], data_mode)) + operands[1] = force_reg (data_mode, operands[1]); + if (!general_operand (operands[2], data_mode)) + operands[2] = force_reg (data_mode, operands[2]); + + x = ix86_expand_int_sse_cmp (operands[0], code, cop0, cop1, + operands[1], operands[2], &negate); + + if (!x) + return false; + ix86_expand_sse_movcc (operands[0], x, operands[1+negate], operands[2-negate]); return true; @@ -53085,6 +53281,28 @@ ix86_autovectorize_vector_sizes (void) (TARGET_AVX && !TARGET_PREFER_AVX128) ? 32 | 16 : 0; } +/* Implemenation of targetm.vectorize.get_mask_mode. */ + +static machine_mode +ix86_get_mask_mode (unsigned nunits, unsigned vector_size) +{ + unsigned elem_size = vector_size / nunits; + + /* Scalar mask case. */ + if (TARGET_AVX512F && vector_size == 64) + { + if (elem_size == 4 || elem_size == 8 || TARGET_AVX512BW) + return smallest_mode_for_size (nunits, MODE_INT); + } + + machine_mode elem_mode + = smallest_mode_for_size (elem_size * BITS_PER_UNIT, MODE_INT); + + gcc_assert (elem_size * nunits == vector_size); + + return mode_for_vector (elem_mode, nunits); +} + /* Return class of registers which could be used for pseudo of MODE @@ -54096,6 +54314,8 @@ ix86_addr_space_zero_address_valid (addr_space_t as) #undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES #define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES \ ix86_autovectorize_vector_sizes +#undef TARGET_VECTORIZE_GET_MASK_MODE +#define TARGET_VECTORIZE_GET_MASK_MODE ix86_get_mask_mode #undef TARGET_VECTORIZE_INIT_COST #define TARGET_VECTORIZE_INIT_COST ix86_init_cost #undef TARGET_VECTORIZE_ADD_STMT_COST |