diff options
author | Martin Jambor <mjambor@suse.cz> | 2017-06-26 15:27:22 +0200 |
---|---|---|
committer | Martin Jambor <mjambor@suse.cz> | 2017-06-26 15:27:22 +0200 |
commit | 166bec868d991fdf71f9a66f994e5977fcab4aa2 (patch) | |
tree | 5e45be1a3236377b15a25666d5ad9e0a566aef8a /gcc/config/rs6000/rs6000.c | |
parent | 7f6e4303242a526871a02c003eb57257f7b25448 (diff) | |
parent | e76fa056805f0aeb36583a27b02f4a4adbfd0004 (diff) | |
download | gcc-166bec868d991fdf71f9a66f994e5977fcab4aa2.tar.gz |
Merge branch 'master' into gcn
Diffstat (limited to 'gcc/config/rs6000/rs6000.c')
-rw-r--r-- | gcc/config/rs6000/rs6000.c | 1662 |
1 files changed, 164 insertions, 1498 deletions
diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c index 8e825704266..79dccba1dce 100644 --- a/gcc/config/rs6000/rs6000.c +++ b/gcc/config/rs6000/rs6000.c @@ -77,6 +77,7 @@ #endif #include "case-cfn-macros.h" #include "ppc-auxv.h" +#include "tree-ssa-propagate.h" /* This file should be included last. */ #include "target-def.h" @@ -2014,10 +2015,6 @@ rs6000_cpu_name_lookup (const char *name) This is ordinarily the length in words of a value of mode MODE but can be less for certain modes in special long registers. - For the SPE, GPRs are 64 bits but only 32 bits are visible in - scalar instructions. The upper 32 bits are only available to the - SIMD instructions. - POWER and PowerPC GPRs hold 32 bits worth; PowerPC64 GPRs and FPRs point register holds 64 bits worth. */ @@ -2236,7 +2233,6 @@ rs6000_debug_vector_unit (enum rs6000_vector v) case VECTOR_VSX: ret = "vsx"; break; case VECTOR_P8_VECTOR: ret = "p8_vector"; break; case VECTOR_PAIRED: ret = "paired"; break; - case VECTOR_SPE: ret = "spe"; break; case VECTOR_OTHER: ret = "other"; break; default: ret = "unknown"; break; } @@ -2450,8 +2446,6 @@ rs6000_debug_reg_global (void) SDmode, DDmode, TDmode, - V8QImode, - V4HImode, V2SImode, V16QImode, V8HImode, @@ -2904,9 +2898,7 @@ rs6000_setup_reg_addr_masks (void) addr_mask |= RELOAD_REG_INDEXED; /* Figure out if we can do PRE_INC, PRE_DEC, or PRE_MODIFY - addressing. Restrict addressing on SPE for 64-bit types - because of the SUBREG hackery used to address 64-bit floats in - '32-bit' GPRs. If we allow scalars into Altivec registers, + addressing. If we allow scalars into Altivec registers, don't allow PRE_INC, PRE_DEC, or PRE_MODIFY. */ if (TARGET_UPDATE @@ -3174,7 +3166,7 @@ rs6000_init_hard_regno_mode_ok (bool global_init_p) rs6000_vector_align[TImode] = align64; } - /* TODO add SPE and paired floating point vector support. */ + /* TODO add paired floating point vector support. */ /* Register class constraints for the constraints that depend on compile switches. When the VSX code was added, different constraints were added @@ -3830,8 +3822,7 @@ darwin_rs6000_override_options (void) /* Return the builtin mask of the various options used that could affect which builtins were used. In the past we used target_flags, but we've run out of - bits, and some options like SPE and PAIRED are no longer in - target_flags. */ + bits, and some options like PAIRED are no longer in target_flags. */ HOST_WIDE_INT rs6000_builtin_mask_calculate (void) @@ -5482,8 +5473,7 @@ rs6000_option_override_internal (bool global_init_p) /* Set the builtin mask of the various options used that could affect which builtins were used. In the past we used target_flags, but we've run out - of bits, and some options like SPE and PAIRED are no longer in - target_flags. */ + of bits, and some options like PAIRED are no longer in target_flags. */ rs6000_builtin_mask = rs6000_builtin_mask_calculate (); if (TARGET_DEBUG_BUILTIN || TARGET_DEBUG_TARGET) rs6000_print_builtin_options (stderr, 0, "builtin mask", @@ -7462,6 +7452,8 @@ rs6000_expand_vector_set (rtx target, rtx val, int elt) insn = gen_vsx_set_v8hi_p9 (target, target, val, elt_rtx); else if (mode == V16QImode) insn = gen_vsx_set_v16qi_p9 (target, target, val, elt_rtx); + else if (mode == V4SFmode) + insn = gen_vsx_set_v4sf_p9 (target, target, val, elt_rtx); } if (insn) @@ -8490,9 +8482,7 @@ reg_offset_addressing_ok_p (machine_mode mode) return mode_supports_vsx_dform_quad (mode); break; - case V4HImode: case V2SImode: - case V1DImode: case V2SFmode: /* Paired vector modes. Only reg+reg addressing is valid. */ if (TARGET_PAIRED_FLOAT) @@ -8701,9 +8691,6 @@ legitimate_small_data_p (machine_mode mode, rtx x) && small_data_operand (x, mode)); } -/* SPE offset addressing is limited to 5-bits worth of double words. */ -#define SPE_CONST_OFFSET_OK(x) (((x) & ~0xf8) == 0) - bool rs6000_legitimate_offset_address_p (machine_mode mode, rtx x, bool strict, bool worst_case) @@ -8730,12 +8717,10 @@ rs6000_legitimate_offset_address_p (machine_mode mode, rtx x, extra = 0; switch (mode) { - case V4HImode: case V2SImode: - case V1DImode: case V2SFmode: - /* SPE vector modes. */ - return SPE_CONST_OFFSET_OK (offset); + /* Paired single modes: offset addressing isn't valid. */ + return false; case DFmode: case DDmode: @@ -10981,10 +10966,8 @@ rs6000_emit_move (rtx dest, rtx source, machine_mode mode) case V8HImode: case V4SFmode: case V4SImode: - case V4HImode: case V2SFmode: case V2SImode: - case V1DImode: case V2DFmode: case V2DImode: case V1TImode: @@ -11779,7 +11762,6 @@ function_arg_padding (machine_mode mode, const_tree type) However, we're stuck with this because changing the ABI might break existing library interfaces. - Doubleword align SPE vectors. Quadword align Altivec/VSX vectors. Quadword align large synthetic vector types. */ @@ -12200,18 +12182,17 @@ rs6000_function_arg_advance_1 (CUMULATIVE_ARGS *cum, machine_mode mode, int n_words = rs6000_arg_size (mode, type); int gregno = cum->sysv_gregno; - /* Long long and SPE vectors are put in (r3,r4), (r5,r6), - (r7,r8) or (r9,r10). As does any other 2 word item such - as complex int due to a historical mistake. */ + /* Long long is put in (r3,r4), (r5,r6), (r7,r8) or (r9,r10). + As does any other 2 word item such as complex int due to a + historical mistake. */ if (n_words == 2) gregno += (1 - gregno) & 1; /* Multi-reg args are not split between registers and stack. */ if (gregno + n_words - 1 > GP_ARG_MAX_REG) { - /* Long long and SPE vectors are aligned on the stack. - So are other 2 word items such as complex int due to - a historical mistake. */ + /* Long long is aligned on the stack. So are other 2 word + items such as complex int due to a historical mistake. */ if (n_words == 2) cum->words += cum->words & 1; cum->words += n_words; @@ -12748,9 +12729,9 @@ rs6000_function_arg (cumulative_args_t cum_v, machine_mode mode, int n_words = rs6000_arg_size (mode, type); int gregno = cum->sysv_gregno; - /* Long long and SPE vectors are put in (r3,r4), (r5,r6), - (r7,r8) or (r9,r10). As does any other 2 word item such - as complex int due to a historical mistake. */ + /* Long long is put in (r3,r4), (r5,r6), (r7,r8) or (r9,r10). + As does any other 2 word item such as complex int due to a + historical mistake. */ if (n_words == 2) gregno += (1 - gregno) & 1; @@ -13687,9 +13668,8 @@ rs6000_gimplify_va_arg (tree valist, tree type, gimple_seq *pre_p, lab_false = create_artificial_label (input_location); lab_over = create_artificial_label (input_location); - /* Long long and SPE vectors are aligned in the registers. - As are any other 2 gpr item such as complex int due to a - historical mistake. */ + /* Long long is aligned in the registers. As are any other 2 gpr + item such as complex int due to a historical mistake. */ u = reg; if (n_reg == 2 && reg == gpr) { @@ -16355,9 +16335,11 @@ rs6000_gimple_fold_builtin (gimple_stmt_iterator *gsi) /* Even element flavors of vec_mul (signed). */ case ALTIVEC_BUILTIN_VMULESB: case ALTIVEC_BUILTIN_VMULESH: + case ALTIVEC_BUILTIN_VMULESW: /* Even element flavors of vec_mul (unsigned). */ case ALTIVEC_BUILTIN_VMULEUB: case ALTIVEC_BUILTIN_VMULEUH: + case ALTIVEC_BUILTIN_VMULEUW: { arg0 = gimple_call_arg (stmt, 0); arg1 = gimple_call_arg (stmt, 1); @@ -16370,9 +16352,11 @@ rs6000_gimple_fold_builtin (gimple_stmt_iterator *gsi) /* Odd element flavors of vec_mul (signed). */ case ALTIVEC_BUILTIN_VMULOSB: case ALTIVEC_BUILTIN_VMULOSH: + case ALTIVEC_BUILTIN_VMULOSW: /* Odd element flavors of vec_mul (unsigned). */ case ALTIVEC_BUILTIN_VMULOUB: case ALTIVEC_BUILTIN_VMULOUH: + case ALTIVEC_BUILTIN_VMULOUW: { arg0 = gimple_call_arg (stmt, 0); arg1 = gimple_call_arg (stmt, 1); @@ -16588,6 +16572,76 @@ rs6000_gimple_fold_builtin (gimple_stmt_iterator *gsi) gsi_replace (gsi, g, true); return true; } + /* Flavors of vec_rotate_left. */ + case ALTIVEC_BUILTIN_VRLB: + case ALTIVEC_BUILTIN_VRLH: + case ALTIVEC_BUILTIN_VRLW: + case P8V_BUILTIN_VRLD: + { + arg0 = gimple_call_arg (stmt, 0); + arg1 = gimple_call_arg (stmt, 1); + lhs = gimple_call_lhs (stmt); + gimple *g = gimple_build_assign (lhs, LROTATE_EXPR, arg0, arg1); + gimple_set_location (g, gimple_location (stmt)); + gsi_replace (gsi, g, true); + return true; + } + /* Flavors of vector shift right algebraic. + vec_sra{b,h,w} -> vsra{b,h,w}. */ + case ALTIVEC_BUILTIN_VSRAB: + case ALTIVEC_BUILTIN_VSRAH: + case ALTIVEC_BUILTIN_VSRAW: + case P8V_BUILTIN_VSRAD: + { + arg0 = gimple_call_arg (stmt, 0); + arg1 = gimple_call_arg (stmt, 1); + lhs = gimple_call_lhs (stmt); + gimple *g = gimple_build_assign (lhs, RSHIFT_EXPR, arg0, arg1); + gimple_set_location (g, gimple_location (stmt)); + gsi_replace (gsi, g, true); + return true; + } + /* Flavors of vector shift left. + builtin_altivec_vsl{b,h,w} -> vsl{b,h,w}. */ + case ALTIVEC_BUILTIN_VSLB: + case ALTIVEC_BUILTIN_VSLH: + case ALTIVEC_BUILTIN_VSLW: + case P8V_BUILTIN_VSLD: + { + arg0 = gimple_call_arg (stmt, 0); + if (INTEGRAL_TYPE_P (TREE_TYPE (TREE_TYPE (arg0))) + && !TYPE_OVERFLOW_WRAPS (TREE_TYPE (TREE_TYPE (arg0)))) + return false; + arg1 = gimple_call_arg (stmt, 1); + lhs = gimple_call_lhs (stmt); + gimple *g = gimple_build_assign (lhs, LSHIFT_EXPR, arg0, arg1); + gimple_set_location (g, gimple_location (stmt)); + gsi_replace (gsi, g, true); + return true; + } + /* Flavors of vector shift right. */ + case ALTIVEC_BUILTIN_VSRB: + case ALTIVEC_BUILTIN_VSRH: + case ALTIVEC_BUILTIN_VSRW: + case P8V_BUILTIN_VSRD: + { + arg0 = gimple_call_arg (stmt, 0); + arg1 = gimple_call_arg (stmt, 1); + lhs = gimple_call_lhs (stmt); + gimple_seq stmts = NULL; + /* Convert arg0 to unsigned. */ + tree arg0_unsigned + = gimple_build (&stmts, VIEW_CONVERT_EXPR, + unsigned_type_for (TREE_TYPE (arg0)), arg0); + tree res + = gimple_build (&stmts, RSHIFT_EXPR, + TREE_TYPE (arg0_unsigned), arg0_unsigned, arg1); + /* Convert result back to the lhs type. */ + res = gimple_build (&stmts, VIEW_CONVERT_EXPR, TREE_TYPE (lhs), res); + gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT); + update_call_from_tree (gsi, res); + return true; + } default: break; } @@ -16635,7 +16689,6 @@ rs6000_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED, case RS6000_BTC_TERNARY: name3 = "ternary"; break; case RS6000_BTC_PREDICATE: name3 = "predicate"; break; case RS6000_BTC_ABS: name3 = "abs"; break; - case RS6000_BTC_EVSEL: name3 = "evsel"; break; case RS6000_BTC_DST: name3 = "dst"; break; } @@ -16843,7 +16896,6 @@ rs6000_init_builtins (void) : "__vector long long", intDI_type_node, 2); V2DF_type_node = rs6000_vector_type ("__vector double", double_type_node, 2); - V4HI_type_node = build_vector_type (intHI_type_node, 4); V4SI_type_node = rs6000_vector_type ("__vector signed int", intSI_type_node, 4); V4SF_type_node = rs6000_vector_type ("__vector float", float_type_node, 4); @@ -16991,7 +17043,6 @@ rs6000_init_builtins (void) builtin_mode_to_type[V2DImode][0] = V2DI_type_node; builtin_mode_to_type[V2DImode][1] = unsigned_V2DI_type_node; builtin_mode_to_type[V2DFmode][0] = V2DF_type_node; - builtin_mode_to_type[V4HImode][0] = V4HI_type_node; builtin_mode_to_type[V4SImode][0] = V4SI_type_node; builtin_mode_to_type[V4SImode][1] = unsigned_V4SI_type_node; builtin_mode_to_type[V4SFmode][0] = V4SF_type_node; @@ -17025,11 +17076,11 @@ rs6000_init_builtins (void) pixel_V8HI_type_node = rs6000_vector_type ("__vector __pixel", pixel_type_node, 8); - /* Paired and SPE builtins are only available if you build a compiler with - the appropriate options, so only create those builtins with the - appropriate compiler option. Create Altivec and VSX builtins on machines - with at least the general purpose extensions (970 and newer) to allow the - use of the target attribute. */ + /* Paired builtins are only available if you build a compiler with the + appropriate options, so only create those builtins with the appropriate + compiler option. Create Altivec and VSX builtins on machines with at + least the general purpose extensions (970 and newer) to allow the use of + the target attribute. */ if (TARGET_PAIRED_FLOAT) paired_init_builtins (); if (TARGET_EXTRA_BUILTINS) @@ -17991,8 +18042,10 @@ builtin_function_type (machine_mode mode_ret, machine_mode mode_arg0, /* unsigned 2 argument functions. */ case ALTIVEC_BUILTIN_VMULEUB: case ALTIVEC_BUILTIN_VMULEUH: + case ALTIVEC_BUILTIN_VMULEUW: case ALTIVEC_BUILTIN_VMULOUB: case ALTIVEC_BUILTIN_VMULOUH: + case ALTIVEC_BUILTIN_VMULOUW: case CRYPTO_BUILTIN_VCIPHER: case CRYPTO_BUILTIN_VCIPHERLAST: case CRYPTO_BUILTIN_VNCIPHER: @@ -18090,6 +18143,14 @@ builtin_function_type (machine_mode mode_ret, machine_mode mode_arg0, h.uns_p[2] = 1; break; + /* unsigned second arguments (vector shift right). */ + case ALTIVEC_BUILTIN_VSRB: + case ALTIVEC_BUILTIN_VSRH: + case ALTIVEC_BUILTIN_VSRW: + case P8V_BUILTIN_VSRD: + h.uns_p[2] = 1; + break; + default: break; } @@ -18161,11 +18222,11 @@ rs6000_common_init_builtins (void) builtin_mode_to_type[V2SFmode][0] = opaque_V2SF_type_node; } - /* Paired and SPE builtins are only available if you build a compiler with - the appropriate options, so only create those builtins with the - appropriate compiler option. Create Altivec and VSX builtins on machines - with at least the general purpose extensions (970 and newer) to allow the - use of the target attribute.. */ + /* Paired builtins are only available if you build a compiler with the + appropriate options, so only create those builtins with the appropriate + compiler option. Create Altivec and VSX builtins on machines with at + least the general purpose extensions (970 and newer) to allow the use of + the target attribute.. */ if (TARGET_EXTRA_BUILTINS) builtin_mask |= RS6000_BTM_COMMON; @@ -18409,7 +18470,7 @@ rs6000_common_init_builtins (void) mode0 = insn_data[icode].operand[0].mode; if (mode0 == V2SImode) { - /* code for SPE */ + /* code for paired single */ if (! (type = v2si_ftype)) { v2si_ftype @@ -18603,113 +18664,6 @@ rs6000_init_libfuncs (void) } } - -/* Expand a block clear operation, and return 1 if successful. Return 0 - if we should let the compiler generate normal code. - - operands[0] is the destination - operands[1] is the length - operands[3] is the alignment */ - -int -expand_block_clear (rtx operands[]) -{ - rtx orig_dest = operands[0]; - rtx bytes_rtx = operands[1]; - rtx align_rtx = operands[3]; - bool constp = (GET_CODE (bytes_rtx) == CONST_INT); - HOST_WIDE_INT align; - HOST_WIDE_INT bytes; - int offset; - int clear_bytes; - int clear_step; - - /* If this is not a fixed size move, just call memcpy */ - if (! constp) - return 0; - - /* This must be a fixed size alignment */ - gcc_assert (GET_CODE (align_rtx) == CONST_INT); - align = INTVAL (align_rtx) * BITS_PER_UNIT; - - /* Anything to clear? */ - bytes = INTVAL (bytes_rtx); - if (bytes <= 0) - return 1; - - /* Use the builtin memset after a point, to avoid huge code bloat. - When optimize_size, avoid any significant code bloat; calling - memset is about 4 instructions, so allow for one instruction to - load zero and three to do clearing. */ - if (TARGET_ALTIVEC && align >= 128) - clear_step = 16; - else if (TARGET_POWERPC64 && (align >= 64 || !STRICT_ALIGNMENT)) - clear_step = 8; - else - clear_step = 4; - - if (optimize_size && bytes > 3 * clear_step) - return 0; - if (! optimize_size && bytes > 8 * clear_step) - return 0; - - for (offset = 0; bytes > 0; offset += clear_bytes, bytes -= clear_bytes) - { - machine_mode mode = BLKmode; - rtx dest; - - if (bytes >= 16 && TARGET_ALTIVEC && align >= 128) - { - clear_bytes = 16; - mode = V4SImode; - } - else if (bytes >= 8 && TARGET_POWERPC64 - && (align >= 64 || !STRICT_ALIGNMENT)) - { - clear_bytes = 8; - mode = DImode; - if (offset == 0 && align < 64) - { - rtx addr; - - /* If the address form is reg+offset with offset not a - multiple of four, reload into reg indirect form here - rather than waiting for reload. This way we get one - reload, not one per store. */ - addr = XEXP (orig_dest, 0); - if ((GET_CODE (addr) == PLUS || GET_CODE (addr) == LO_SUM) - && GET_CODE (XEXP (addr, 1)) == CONST_INT - && (INTVAL (XEXP (addr, 1)) & 3) != 0) - { - addr = copy_addr_to_reg (addr); - orig_dest = replace_equiv_address (orig_dest, addr); - } - } - } - else if (bytes >= 4 && (align >= 32 || !STRICT_ALIGNMENT)) - { /* move 4 bytes */ - clear_bytes = 4; - mode = SImode; - } - else if (bytes >= 2 && (align >= 16 || !STRICT_ALIGNMENT)) - { /* move 2 bytes */ - clear_bytes = 2; - mode = HImode; - } - else /* move 1 byte at a time */ - { - clear_bytes = 1; - mode = QImode; - } - - dest = adjust_address (orig_dest, mode, offset); - - emit_move_insn (dest, CONST0_RTX (mode)); - } - - return 1; -} - /* Emit a potentially record-form instruction, setting DST from SRC. If DOT is 0, that is all; otherwise, set CCREG to the result of the signed comparison of DST with zero. If DOT is 1, the generated RTL @@ -18717,7 +18671,7 @@ expand_block_clear (rtx operands[]) is CR0 do a single dot insn (as a PARALLEL); otherwise, do a SET and a separate COMPARE. */ -static void +void rs6000_emit_dot_insn (rtx dst, rtx src, int dot, rtx ccreg) { if (dot == 0) @@ -18746,1330 +18700,6 @@ rs6000_emit_dot_insn (rtx dst, rtx src, int dot, rtx ccreg) } } -/* Figure out the correct instructions to generate to load data for - block compare. MODE is used for the read from memory, and - data is zero extended if REG is wider than MODE. If LE code - is being generated, bswap loads are used. - - REG is the destination register to move the data into. - MEM is the memory block being read. - MODE is the mode of memory to use for the read. */ -static void -do_load_for_compare (rtx reg, rtx mem, machine_mode mode) -{ - switch (GET_MODE (reg)) - { - case DImode: - switch (mode) - { - case QImode: - emit_insn (gen_zero_extendqidi2 (reg, mem)); - break; - case HImode: - { - rtx src = mem; - if (!BYTES_BIG_ENDIAN) - { - src = gen_reg_rtx (HImode); - emit_insn (gen_bswaphi2 (src, mem)); - } - emit_insn (gen_zero_extendhidi2 (reg, src)); - break; - } - case SImode: - { - rtx src = mem; - if (!BYTES_BIG_ENDIAN) - { - src = gen_reg_rtx (SImode); - emit_insn (gen_bswapsi2 (src, mem)); - } - emit_insn (gen_zero_extendsidi2 (reg, src)); - } - break; - case DImode: - if (!BYTES_BIG_ENDIAN) - emit_insn (gen_bswapdi2 (reg, mem)); - else - emit_insn (gen_movdi (reg, mem)); - break; - default: - gcc_unreachable (); - } - break; - - case SImode: - switch (mode) - { - case QImode: - emit_insn (gen_zero_extendqisi2 (reg, mem)); - break; - case HImode: - { - rtx src = mem; - if (!BYTES_BIG_ENDIAN) - { - src = gen_reg_rtx (HImode); - emit_insn (gen_bswaphi2 (src, mem)); - } - emit_insn (gen_zero_extendhisi2 (reg, src)); - break; - } - case SImode: - if (!BYTES_BIG_ENDIAN) - emit_insn (gen_bswapsi2 (reg, mem)); - else - emit_insn (gen_movsi (reg, mem)); - break; - case DImode: - /* DImode is larger than the destination reg so is not expected. */ - gcc_unreachable (); - break; - default: - gcc_unreachable (); - } - break; - default: - gcc_unreachable (); - break; - } -} - -/* Select the mode to be used for reading the next chunk of bytes - in the compare. - - OFFSET is the current read offset from the beginning of the block. - BYTES is the number of bytes remaining to be read. - ALIGN is the minimum alignment of the memory blocks being compared in bytes. - WORD_MODE_OK indicates using WORD_MODE is allowed, else SImode is - the largest allowable mode. */ -static machine_mode -select_block_compare_mode (unsigned HOST_WIDE_INT offset, - unsigned HOST_WIDE_INT bytes, - unsigned HOST_WIDE_INT align, bool word_mode_ok) -{ - /* First see if we can do a whole load unit - as that will be more efficient than a larger load + shift. */ - - /* If big, use biggest chunk. - If exactly chunk size, use that size. - If remainder can be done in one piece with shifting, do that. - Do largest chunk possible without violating alignment rules. */ - - /* The most we can read without potential page crossing. */ - unsigned HOST_WIDE_INT maxread = ROUND_UP (bytes, align); - - if (word_mode_ok && bytes >= UNITS_PER_WORD) - return word_mode; - else if (bytes == GET_MODE_SIZE (SImode)) - return SImode; - else if (bytes == GET_MODE_SIZE (HImode)) - return HImode; - else if (bytes == GET_MODE_SIZE (QImode)) - return QImode; - else if (bytes < GET_MODE_SIZE (SImode) - && offset >= GET_MODE_SIZE (SImode) - bytes) - /* This matches the case were we have SImode and 3 bytes - and offset >= 1 and permits us to move back one and overlap - with the previous read, thus avoiding having to shift - unwanted bytes off of the input. */ - return SImode; - else if (word_mode_ok && bytes < UNITS_PER_WORD - && offset >= UNITS_PER_WORD-bytes) - /* Similarly, if we can use DImode it will get matched here and - can do an overlapping read that ends at the end of the block. */ - return word_mode; - else if (word_mode_ok && maxread >= UNITS_PER_WORD) - /* It is safe to do all remaining in one load of largest size, - possibly with a shift to get rid of unwanted bytes. */ - return word_mode; - else if (maxread >= GET_MODE_SIZE (SImode)) - /* It is safe to do all remaining in one SImode load, - possibly with a shift to get rid of unwanted bytes. */ - return SImode; - else if (bytes > GET_MODE_SIZE (SImode)) - return SImode; - else if (bytes > GET_MODE_SIZE (HImode)) - return HImode; - - /* final fallback is do one byte */ - return QImode; -} - -/* Compute the alignment of pointer+OFFSET where the original alignment - of pointer was BASE_ALIGN. */ -static unsigned HOST_WIDE_INT -compute_current_alignment (unsigned HOST_WIDE_INT base_align, - unsigned HOST_WIDE_INT offset) -{ - if (offset == 0) - return base_align; - return min (base_align, offset & -offset); -} - -/* Expand a block compare operation, and return true if successful. - Return false if we should let the compiler generate normal code, - probably a memcmp call. - - OPERANDS[0] is the target (result). - OPERANDS[1] is the first source. - OPERANDS[2] is the second source. - OPERANDS[3] is the length. - OPERANDS[4] is the alignment. */ -bool -expand_block_compare (rtx operands[]) -{ - rtx target = operands[0]; - rtx orig_src1 = operands[1]; - rtx orig_src2 = operands[2]; - rtx bytes_rtx = operands[3]; - rtx align_rtx = operands[4]; - HOST_WIDE_INT cmp_bytes = 0; - rtx src1 = orig_src1; - rtx src2 = orig_src2; - - /* This case is complicated to handle because the subtract - with carry instructions do not generate the 64-bit - carry and so we must emit code to calculate it ourselves. - We choose not to implement this yet. */ - if (TARGET_32BIT && TARGET_POWERPC64) - return false; - - /* If this is not a fixed size compare, just call memcmp. */ - if (!CONST_INT_P (bytes_rtx)) - return false; - - /* This must be a fixed size alignment. */ - if (!CONST_INT_P (align_rtx)) - return false; - - unsigned int base_align = UINTVAL (align_rtx) / BITS_PER_UNIT; - - /* SLOW_UNALIGNED_ACCESS -- don't do unaligned stuff. */ - if (SLOW_UNALIGNED_ACCESS (word_mode, MEM_ALIGN (orig_src1)) - || SLOW_UNALIGNED_ACCESS (word_mode, MEM_ALIGN (orig_src2))) - return false; - - gcc_assert (GET_MODE (target) == SImode); - - /* Anything to move? */ - unsigned HOST_WIDE_INT bytes = UINTVAL (bytes_rtx); - if (bytes == 0) - return true; - - /* The code generated for p7 and older is not faster than glibc - memcmp if alignment is small and length is not short, so bail - out to avoid those conditions. */ - if (!TARGET_EFFICIENT_OVERLAPPING_UNALIGNED - && ((base_align == 1 && bytes > 16) - || (base_align == 2 && bytes > 32))) - return false; - - rtx tmp_reg_src1 = gen_reg_rtx (word_mode); - rtx tmp_reg_src2 = gen_reg_rtx (word_mode); - /* P7/P8 code uses cond for subfc. but P9 uses - it for cmpld which needs CCUNSmode. */ - rtx cond; - if (TARGET_P9_MISC) - cond = gen_reg_rtx (CCUNSmode); - else - cond = gen_reg_rtx (CCmode); - - /* If we have an LE target without ldbrx and word_mode is DImode, - then we must avoid using word_mode. */ - int word_mode_ok = !(!BYTES_BIG_ENDIAN && !TARGET_LDBRX - && word_mode == DImode); - - /* Strategy phase. How many ops will this take and should we expand it? */ - - unsigned HOST_WIDE_INT offset = 0; - machine_mode load_mode = - select_block_compare_mode (offset, bytes, base_align, word_mode_ok); - unsigned int load_mode_size = GET_MODE_SIZE (load_mode); - - /* We don't want to generate too much code. */ - unsigned HOST_WIDE_INT max_bytes = - load_mode_size * (unsigned HOST_WIDE_INT) rs6000_block_compare_inline_limit; - if (!IN_RANGE (bytes, 1, max_bytes)) - return false; - - bool generate_6432_conversion = false; - rtx convert_label = NULL; - rtx final_label = NULL; - - /* Example of generated code for 18 bytes aligned 1 byte. - Compiled with -fno-reorder-blocks for clarity. - ldbrx 10,31,8 - ldbrx 9,7,8 - subfc. 9,9,10 - bne 0,.L6487 - addi 9,12,8 - addi 5,11,8 - ldbrx 10,0,9 - ldbrx 9,0,5 - subfc. 9,9,10 - bne 0,.L6487 - addi 9,12,16 - lhbrx 10,0,9 - addi 9,11,16 - lhbrx 9,0,9 - subf 9,9,10 - b .L6488 - .p2align 4,,15 - .L6487: #convert_label - popcntd 9,9 - subfe 10,10,10 - or 9,9,10 - .L6488: #final_label - extsw 10,9 - - We start off with DImode for two blocks that jump to the DI->SI conversion - if the difference is found there, then a final block of HImode that skips - the DI->SI conversion. */ - - while (bytes > 0) - { - unsigned int align = compute_current_alignment (base_align, offset); - if (TARGET_EFFICIENT_OVERLAPPING_UNALIGNED) - load_mode = select_block_compare_mode (offset, bytes, align, - word_mode_ok); - else - load_mode = select_block_compare_mode (0, bytes, align, word_mode_ok); - load_mode_size = GET_MODE_SIZE (load_mode); - if (bytes >= load_mode_size) - cmp_bytes = load_mode_size; - else if (TARGET_EFFICIENT_OVERLAPPING_UNALIGNED) - { - /* Move this load back so it doesn't go past the end. - P8/P9 can do this efficiently. */ - unsigned int extra_bytes = load_mode_size - bytes; - cmp_bytes = bytes; - if (extra_bytes < offset) - { - offset -= extra_bytes; - cmp_bytes = load_mode_size; - bytes = cmp_bytes; - } - } - else - /* P7 and earlier can't do the overlapping load trick fast, - so this forces a non-overlapping load and a shift to get - rid of the extra bytes. */ - cmp_bytes = bytes; - - src1 = adjust_address (orig_src1, load_mode, offset); - src2 = adjust_address (orig_src2, load_mode, offset); - - if (!REG_P (XEXP (src1, 0))) - { - rtx src1_reg = copy_addr_to_reg (XEXP (src1, 0)); - src1 = replace_equiv_address (src1, src1_reg); - } - set_mem_size (src1, cmp_bytes); - - if (!REG_P (XEXP (src2, 0))) - { - rtx src2_reg = copy_addr_to_reg (XEXP (src2, 0)); - src2 = replace_equiv_address (src2, src2_reg); - } - set_mem_size (src2, cmp_bytes); - - do_load_for_compare (tmp_reg_src1, src1, load_mode); - do_load_for_compare (tmp_reg_src2, src2, load_mode); - - if (cmp_bytes < load_mode_size) - { - /* Shift unneeded bytes off. */ - rtx sh = GEN_INT (BITS_PER_UNIT * (load_mode_size - cmp_bytes)); - if (word_mode == DImode) - { - emit_insn (gen_lshrdi3 (tmp_reg_src1, tmp_reg_src1, sh)); - emit_insn (gen_lshrdi3 (tmp_reg_src2, tmp_reg_src2, sh)); - } - else - { - emit_insn (gen_lshrsi3 (tmp_reg_src1, tmp_reg_src1, sh)); - emit_insn (gen_lshrsi3 (tmp_reg_src2, tmp_reg_src2, sh)); - } - } - - int remain = bytes - cmp_bytes; - if (GET_MODE_SIZE (GET_MODE (target)) > GET_MODE_SIZE (load_mode)) - { - /* Target is larger than load size so we don't need to - reduce result size. */ - - /* We previously did a block that need 64->32 conversion but - the current block does not, so a label is needed to jump - to the end. */ - if (generate_6432_conversion && !final_label) - final_label = gen_label_rtx (); - - if (remain > 0) - { - /* This is not the last block, branch to the end if the result - of this subtract is not zero. */ - if (!final_label) - final_label = gen_label_rtx (); - rtx fin_ref = gen_rtx_LABEL_REF (VOIDmode, final_label); - rtx tmp = gen_rtx_MINUS (word_mode, tmp_reg_src1, tmp_reg_src2); - rtx cr = gen_reg_rtx (CCmode); - rs6000_emit_dot_insn (tmp_reg_src2, tmp, 2, cr); - emit_insn (gen_movsi (target, - gen_lowpart (SImode, tmp_reg_src2))); - rtx ne_rtx = gen_rtx_NE (VOIDmode, cr, const0_rtx); - rtx ifelse = gen_rtx_IF_THEN_ELSE (VOIDmode, ne_rtx, - fin_ref, pc_rtx); - rtx j = emit_jump_insn (gen_rtx_SET (pc_rtx, ifelse)); - JUMP_LABEL (j) = final_label; - LABEL_NUSES (final_label) += 1; - } - else - { - if (word_mode == DImode) - { - emit_insn (gen_subdi3 (tmp_reg_src2, tmp_reg_src1, - tmp_reg_src2)); - emit_insn (gen_movsi (target, - gen_lowpart (SImode, tmp_reg_src2))); - } - else - emit_insn (gen_subsi3 (target, tmp_reg_src1, tmp_reg_src2)); - - if (final_label) - { - rtx fin_ref = gen_rtx_LABEL_REF (VOIDmode, final_label); - rtx j = emit_jump_insn (gen_rtx_SET (pc_rtx, fin_ref)); - JUMP_LABEL(j) = final_label; - LABEL_NUSES (final_label) += 1; - emit_barrier (); - } - } - } - else - { - /* Do we need a 64->32 conversion block? We need the 64->32 - conversion even if target size == load_mode size because - the subtract generates one extra bit. */ - generate_6432_conversion = true; - - if (remain > 0) - { - if (!convert_label) - convert_label = gen_label_rtx (); - - /* Compare to zero and branch to convert_label if not zero. */ - rtx cvt_ref = gen_rtx_LABEL_REF (VOIDmode, convert_label); - if (TARGET_P9_MISC) - { - /* Generate a compare, and convert with a setb later. */ - rtx cmp = gen_rtx_COMPARE (CCUNSmode, tmp_reg_src1, - tmp_reg_src2); - emit_insn (gen_rtx_SET (cond, cmp)); - } - else - /* Generate a subfc. and use the longer - sequence for conversion. */ - if (TARGET_64BIT) - emit_insn (gen_subfdi3_carry_dot2 (tmp_reg_src2, tmp_reg_src2, - tmp_reg_src1, cond)); - else - emit_insn (gen_subfsi3_carry_dot2 (tmp_reg_src2, tmp_reg_src2, - tmp_reg_src1, cond)); - rtx ne_rtx = gen_rtx_NE (VOIDmode, cond, const0_rtx); - rtx ifelse = gen_rtx_IF_THEN_ELSE (VOIDmode, ne_rtx, - cvt_ref, pc_rtx); - rtx j = emit_jump_insn (gen_rtx_SET (pc_rtx, ifelse)); - JUMP_LABEL(j) = convert_label; - LABEL_NUSES (convert_label) += 1; - } - else - { - /* Just do the subtract/compare. Since this is the last block - the convert code will be generated immediately following. */ - if (TARGET_P9_MISC) - { - rtx cmp = gen_rtx_COMPARE (CCUNSmode, tmp_reg_src1, - tmp_reg_src2); - emit_insn (gen_rtx_SET (cond, cmp)); - } - else - if (TARGET_64BIT) - emit_insn (gen_subfdi3_carry (tmp_reg_src2, tmp_reg_src2, - tmp_reg_src1)); - else - emit_insn (gen_subfsi3_carry (tmp_reg_src2, tmp_reg_src2, - tmp_reg_src1)); - } - } - - offset += cmp_bytes; - bytes -= cmp_bytes; - } - - if (generate_6432_conversion) - { - if (convert_label) - emit_label (convert_label); - - /* We need to produce DI result from sub, then convert to target SI - while maintaining <0 / ==0 / >0 properties. This sequence works: - subfc L,A,B - subfe H,H,H - popcntd L,L - rldimi L,H,6,0 - - This is an alternate one Segher cooked up if somebody - wants to expand this for something that doesn't have popcntd: - subfc L,a,b - subfe H,x,x - addic t,L,-1 - subfe v,t,L - or z,v,H - - And finally, p9 can just do this: - cmpld A,B - setb r */ - - if (TARGET_P9_MISC) - { - emit_insn (gen_setb_unsigned (target, cond)); - } - else - { - if (TARGET_64BIT) - { - rtx tmp_reg_ca = gen_reg_rtx (DImode); - emit_insn (gen_subfdi3_carry_in_xx (tmp_reg_ca)); - emit_insn (gen_popcntddi2 (tmp_reg_src2, tmp_reg_src2)); - emit_insn (gen_iordi3 (tmp_reg_src2, tmp_reg_src2, tmp_reg_ca)); - emit_insn (gen_movsi (target, gen_lowpart (SImode, tmp_reg_src2))); - } - else - { - rtx tmp_reg_ca = gen_reg_rtx (SImode); - emit_insn (gen_subfsi3_carry_in_xx (tmp_reg_ca)); - emit_insn (gen_popcntdsi2 (tmp_reg_src2, tmp_reg_src2)); - emit_insn (gen_iorsi3 (target, tmp_reg_src2, tmp_reg_ca)); - } - } - } - - if (final_label) - emit_label (final_label); - - gcc_assert (bytes == 0); - return true; -} - -/* Generate alignment check and branch code to set up for - strncmp when we don't have DI alignment. - STRNCMP_LABEL is the label to branch if there is a page crossing. - SRC is the string pointer to be examined. - BYTES is the max number of bytes to compare. */ -static void -expand_strncmp_align_check (rtx strncmp_label, rtx src, HOST_WIDE_INT bytes) -{ - rtx lab_ref = gen_rtx_LABEL_REF (VOIDmode, strncmp_label); - rtx src_check = copy_addr_to_reg (XEXP (src, 0)); - if (GET_MODE (src_check) == SImode) - emit_insn (gen_andsi3 (src_check, src_check, GEN_INT (0xfff))); - else - emit_insn (gen_anddi3 (src_check, src_check, GEN_INT (0xfff))); - rtx cond = gen_reg_rtx (CCmode); - emit_move_insn (cond, gen_rtx_COMPARE (CCmode, src_check, - GEN_INT (4096 - bytes))); - - rtx cmp_rtx = gen_rtx_LT (VOIDmode, cond, const0_rtx); - - rtx ifelse = gen_rtx_IF_THEN_ELSE (VOIDmode, cmp_rtx, - pc_rtx, lab_ref); - rtx j = emit_jump_insn (gen_rtx_SET (pc_rtx, ifelse)); - JUMP_LABEL (j) = strncmp_label; - LABEL_NUSES (strncmp_label) += 1; -} - -/* Expand a string compare operation with length, and return - true if successful. Return false if we should let the - compiler generate normal code, probably a strncmp call. - - OPERANDS[0] is the target (result). - OPERANDS[1] is the first source. - OPERANDS[2] is the second source. - If NO_LENGTH is zero, then: - OPERANDS[3] is the length. - OPERANDS[4] is the alignment in bytes. - If NO_LENGTH is nonzero, then: - OPERANDS[3] is the alignment in bytes. */ -bool -expand_strn_compare (rtx operands[], int no_length) -{ - rtx target = operands[0]; - rtx orig_src1 = operands[1]; - rtx orig_src2 = operands[2]; - rtx bytes_rtx, align_rtx; - if (no_length) - { - bytes_rtx = NULL; - align_rtx = operands[3]; - } - else - { - bytes_rtx = operands[3]; - align_rtx = operands[4]; - } - unsigned HOST_WIDE_INT cmp_bytes = 0; - rtx src1 = orig_src1; - rtx src2 = orig_src2; - - /* If we have a length, it must be constant. This simplifies things - a bit as we don't have to generate code to check if we've exceeded - the length. Later this could be expanded to handle this case. */ - if (!no_length && !CONST_INT_P (bytes_rtx)) - return false; - - /* This must be a fixed size alignment. */ - if (!CONST_INT_P (align_rtx)) - return false; - - unsigned int base_align = UINTVAL (align_rtx); - int align1 = MEM_ALIGN (orig_src1) / BITS_PER_UNIT; - int align2 = MEM_ALIGN (orig_src2) / BITS_PER_UNIT; - - /* SLOW_UNALIGNED_ACCESS -- don't do unaligned stuff. */ - if (SLOW_UNALIGNED_ACCESS (word_mode, align1) - || SLOW_UNALIGNED_ACCESS (word_mode, align2)) - return false; - - gcc_assert (GET_MODE (target) == SImode); - - /* If we have an LE target without ldbrx and word_mode is DImode, - then we must avoid using word_mode. */ - int word_mode_ok = !(!BYTES_BIG_ENDIAN && !TARGET_LDBRX - && word_mode == DImode); - - unsigned int word_mode_size = GET_MODE_SIZE (word_mode); - - unsigned HOST_WIDE_INT offset = 0; - unsigned HOST_WIDE_INT bytes; /* N from the strncmp args if available. */ - unsigned HOST_WIDE_INT compare_length; /* How much to compare inline. */ - if (no_length) - /* Use this as a standin to determine the mode to use. */ - bytes = rs6000_string_compare_inline_limit * word_mode_size; - else - bytes = UINTVAL (bytes_rtx); - - machine_mode load_mode = - select_block_compare_mode (offset, bytes, base_align, word_mode_ok); - unsigned int load_mode_size = GET_MODE_SIZE (load_mode); - compare_length = rs6000_string_compare_inline_limit * load_mode_size; - - /* If we have equality at the end of the last compare and we have not - found the end of the string, we need to call strcmp/strncmp to - compare the remainder. */ - bool equality_compare_rest = false; - - if (no_length) - { - bytes = compare_length; - equality_compare_rest = true; - } - else - { - if (bytes <= compare_length) - compare_length = bytes; - else - equality_compare_rest = true; - } - - rtx result_reg = gen_reg_rtx (word_mode); - rtx final_move_label = gen_label_rtx (); - rtx final_label = gen_label_rtx (); - rtx begin_compare_label = NULL; - - if (base_align < 8) - { - /* Generate code that checks distance to 4k boundary for this case. */ - begin_compare_label = gen_label_rtx (); - rtx strncmp_label = gen_label_rtx (); - rtx jmp; - - /* Strncmp for power8 in glibc does this: - rldicl r8,r3,0,52 - cmpldi cr7,r8,4096-16 - bgt cr7,L(pagecross) */ - - /* Make sure that the length we use for the alignment test and - the subsequent code generation are in agreement so we do not - go past the length we tested for a 4k boundary crossing. */ - unsigned HOST_WIDE_INT align_test = compare_length; - if (align_test < 8) - { - align_test = HOST_WIDE_INT_1U << ceil_log2 (align_test); - base_align = align_test; - } - else - { - align_test = ROUND_UP (align_test, 8); - base_align = 8; - } - - if (align1 < 8) - expand_strncmp_align_check (strncmp_label, src1, align_test); - if (align2 < 8) - expand_strncmp_align_check (strncmp_label, src2, align_test); - - /* Now generate the following sequence: - - branch to begin_compare - - strncmp_label - - call to strncmp - - branch to final_label - - begin_compare_label */ - - rtx cmp_ref = gen_rtx_LABEL_REF (VOIDmode, begin_compare_label); - jmp = emit_jump_insn (gen_rtx_SET (pc_rtx, cmp_ref)); - JUMP_LABEL (jmp) = begin_compare_label; - LABEL_NUSES (begin_compare_label) += 1; - emit_barrier (); - - emit_label (strncmp_label); - - if (!REG_P (XEXP (src1, 0))) - { - rtx src1_reg = copy_addr_to_reg (XEXP (src1, 0)); - src1 = replace_equiv_address (src1, src1_reg); - } - - if (!REG_P (XEXP (src2, 0))) - { - rtx src2_reg = copy_addr_to_reg (XEXP (src2, 0)); - src2 = replace_equiv_address (src2, src2_reg); - } - - if (no_length) - { - tree fun = builtin_decl_explicit (BUILT_IN_STRCMP); - emit_library_call_value (XEXP (DECL_RTL (fun), 0), - target, LCT_NORMAL, GET_MODE (target), 2, - force_reg (Pmode, XEXP (src1, 0)), Pmode, - force_reg (Pmode, XEXP (src2, 0)), Pmode); - } - else - { - /* -m32 -mpowerpc64 results in word_mode being DImode even - though otherwise it is 32-bit. The length arg to strncmp - is a size_t which will be the same size as pointers. */ - rtx len_rtx; - if (TARGET_64BIT) - len_rtx = gen_reg_rtx (DImode); - else - len_rtx = gen_reg_rtx (SImode); - - emit_move_insn (len_rtx, bytes_rtx); - - tree fun = builtin_decl_explicit (BUILT_IN_STRNCMP); - emit_library_call_value (XEXP (DECL_RTL (fun), 0), - target, LCT_NORMAL, GET_MODE (target), 3, - force_reg (Pmode, XEXP (src1, 0)), Pmode, - force_reg (Pmode, XEXP (src2, 0)), Pmode, - len_rtx, GET_MODE (len_rtx)); - } - - rtx fin_ref = gen_rtx_LABEL_REF (VOIDmode, final_label); - jmp = emit_jump_insn (gen_rtx_SET (pc_rtx, fin_ref)); - JUMP_LABEL (jmp) = final_label; - LABEL_NUSES (final_label) += 1; - emit_barrier (); - emit_label (begin_compare_label); - } - - rtx cleanup_label = NULL; - rtx tmp_reg_src1 = gen_reg_rtx (word_mode); - rtx tmp_reg_src2 = gen_reg_rtx (word_mode); - - /* Generate sequence of ld/ldbrx, cmpb to compare out - to the length specified. */ - unsigned HOST_WIDE_INT bytes_to_compare = compare_length; - while (bytes_to_compare > 0) - { - /* Compare sequence: - check each 8B with: ld/ld cmpd bne - If equal, use rldicr/cmpb to check for zero byte. - cleanup code at end: - cmpb get byte that differs - cmpb look for zero byte - orc combine - cntlzd get bit of first zero/diff byte - subfic convert for rldcl use - rldcl rldcl extract diff/zero byte - subf subtract for final result - - The last compare can branch around the cleanup code if the - result is zero because the strings are exactly equal. */ - unsigned int align = compute_current_alignment (base_align, offset); - if (TARGET_EFFICIENT_OVERLAPPING_UNALIGNED) - load_mode = select_block_compare_mode (offset, bytes_to_compare, align, - word_mode_ok); - else - load_mode = select_block_compare_mode (0, bytes_to_compare, align, - word_mode_ok); - load_mode_size = GET_MODE_SIZE (load_mode); - if (bytes_to_compare >= load_mode_size) - cmp_bytes = load_mode_size; - else if (TARGET_EFFICIENT_OVERLAPPING_UNALIGNED) - { - /* Move this load back so it doesn't go past the end. - P8/P9 can do this efficiently. */ - unsigned int extra_bytes = load_mode_size - bytes_to_compare; - cmp_bytes = bytes_to_compare; - if (extra_bytes < offset) - { - offset -= extra_bytes; - cmp_bytes = load_mode_size; - bytes_to_compare = cmp_bytes; - } - } - else - /* P7 and earlier can't do the overlapping load trick fast, - so this forces a non-overlapping load and a shift to get - rid of the extra bytes. */ - cmp_bytes = bytes_to_compare; - - src1 = adjust_address (orig_src1, load_mode, offset); - src2 = adjust_address (orig_src2, load_mode, offset); - - if (!REG_P (XEXP (src1, 0))) - { - rtx src1_reg = copy_addr_to_reg (XEXP (src1, 0)); - src1 = replace_equiv_address (src1, src1_reg); - } - set_mem_size (src1, cmp_bytes); - - if (!REG_P (XEXP (src2, 0))) - { - rtx src2_reg = copy_addr_to_reg (XEXP (src2, 0)); - src2 = replace_equiv_address (src2, src2_reg); - } - set_mem_size (src2, cmp_bytes); - - do_load_for_compare (tmp_reg_src1, src1, load_mode); - do_load_for_compare (tmp_reg_src2, src2, load_mode); - - /* We must always left-align the data we read, and - clear any bytes to the right that are beyond the string. - Otherwise the cmpb sequence won't produce the correct - results. The beginning of the compare will be done - with word_mode so will not have any extra shifts or - clear rights. */ - - if (load_mode_size < word_mode_size) - { - /* Rotate left first. */ - rtx sh = GEN_INT (BITS_PER_UNIT * (word_mode_size - load_mode_size)); - if (word_mode == DImode) - { - emit_insn (gen_rotldi3 (tmp_reg_src1, tmp_reg_src1, sh)); - emit_insn (gen_rotldi3 (tmp_reg_src2, tmp_reg_src2, sh)); - } - else - { - emit_insn (gen_rotlsi3 (tmp_reg_src1, tmp_reg_src1, sh)); - emit_insn (gen_rotlsi3 (tmp_reg_src2, tmp_reg_src2, sh)); - } - } - - if (cmp_bytes < word_mode_size) - { - /* Now clear right. This plus the rotate can be - turned into a rldicr instruction. */ - HOST_WIDE_INT mb = BITS_PER_UNIT * (word_mode_size - cmp_bytes); - rtx mask = GEN_INT (HOST_WIDE_INT_M1U << mb); - if (word_mode == DImode) - { - emit_insn (gen_anddi3_mask (tmp_reg_src1, tmp_reg_src1, mask)); - emit_insn (gen_anddi3_mask (tmp_reg_src2, tmp_reg_src2, mask)); - } - else - { - emit_insn (gen_andsi3_mask (tmp_reg_src1, tmp_reg_src1, mask)); - emit_insn (gen_andsi3_mask (tmp_reg_src2, tmp_reg_src2, mask)); - } - } - - /* Cases to handle. A and B are chunks of the two strings. - 1: Not end of comparison: - A != B: branch to cleanup code to compute result. - A == B: check for 0 byte, next block if not found. - 2: End of the inline comparison: - A != B: branch to cleanup code to compute result. - A == B: check for 0 byte, call strcmp/strncmp - 3: compared requested N bytes: - A == B: branch to result 0. - A != B: cleanup code to compute result. */ - - unsigned HOST_WIDE_INT remain = bytes_to_compare - cmp_bytes; - - rtx dst_label; - if (remain > 0 || equality_compare_rest) - { - /* Branch to cleanup code, otherwise fall through to do - more compares. */ - if (!cleanup_label) - cleanup_label = gen_label_rtx (); - dst_label = cleanup_label; - } - else - /* Branch to end and produce result of 0. */ - dst_label = final_move_label; - - rtx lab_ref = gen_rtx_LABEL_REF (VOIDmode, dst_label); - rtx cond = gen_reg_rtx (CCmode); - - /* Always produce the 0 result, it is needed if - cmpb finds a 0 byte in this chunk. */ - rtx tmp = gen_rtx_MINUS (word_mode, tmp_reg_src1, tmp_reg_src2); - rs6000_emit_dot_insn (result_reg, tmp, 1, cond); - - rtx cmp_rtx; - if (remain == 0 && !equality_compare_rest) - cmp_rtx = gen_rtx_EQ (VOIDmode, cond, const0_rtx); - else - cmp_rtx = gen_rtx_NE (VOIDmode, cond, const0_rtx); - - rtx ifelse = gen_rtx_IF_THEN_ELSE (VOIDmode, cmp_rtx, - lab_ref, pc_rtx); - rtx j = emit_jump_insn (gen_rtx_SET (pc_rtx, ifelse)); - JUMP_LABEL (j) = dst_label; - LABEL_NUSES (dst_label) += 1; - - if (remain > 0 || equality_compare_rest) - { - /* Generate a cmpb to test for a 0 byte and branch - to final result if found. */ - rtx cmpb_zero = gen_reg_rtx (word_mode); - rtx lab_ref_fin = gen_rtx_LABEL_REF (VOIDmode, final_move_label); - rtx condz = gen_reg_rtx (CCmode); - rtx zero_reg = gen_reg_rtx (word_mode); - if (word_mode == SImode) - { - emit_insn (gen_movsi (zero_reg, GEN_INT (0))); - emit_insn (gen_cmpbsi3 (cmpb_zero, tmp_reg_src1, zero_reg)); - if (cmp_bytes < word_mode_size) - { - /* Don't want to look at zero bytes past end. */ - HOST_WIDE_INT mb = - BITS_PER_UNIT * (word_mode_size - cmp_bytes); - rtx mask = GEN_INT (HOST_WIDE_INT_M1U << mb); - emit_insn (gen_andsi3_mask (cmpb_zero, cmpb_zero, mask)); - } - } - else - { - emit_insn (gen_movdi (zero_reg, GEN_INT (0))); - emit_insn (gen_cmpbdi3 (cmpb_zero, tmp_reg_src1, zero_reg)); - if (cmp_bytes < word_mode_size) - { - /* Don't want to look at zero bytes past end. */ - HOST_WIDE_INT mb = - BITS_PER_UNIT * (word_mode_size - cmp_bytes); - rtx mask = GEN_INT (HOST_WIDE_INT_M1U << mb); - emit_insn (gen_anddi3_mask (cmpb_zero, cmpb_zero, mask)); - } - } - - emit_move_insn (condz, gen_rtx_COMPARE (CCmode, cmpb_zero, zero_reg)); - rtx cmpnz_rtx = gen_rtx_NE (VOIDmode, condz, const0_rtx); - rtx ifelse = gen_rtx_IF_THEN_ELSE (VOIDmode, cmpnz_rtx, - lab_ref_fin, pc_rtx); - rtx j2 = emit_jump_insn (gen_rtx_SET (pc_rtx, ifelse)); - JUMP_LABEL (j2) = final_move_label; - LABEL_NUSES (final_move_label) += 1; - - } - - offset += cmp_bytes; - bytes_to_compare -= cmp_bytes; - } - - if (equality_compare_rest) - { - /* Update pointers past what has been compared already. */ - src1 = adjust_address (orig_src1, load_mode, offset); - src2 = adjust_address (orig_src2, load_mode, offset); - - if (!REG_P (XEXP (src1, 0))) - { - rtx src1_reg = copy_addr_to_reg (XEXP (src1, 0)); - src1 = replace_equiv_address (src1, src1_reg); - } - set_mem_size (src1, cmp_bytes); - - if (!REG_P (XEXP (src2, 0))) - { - rtx src2_reg = copy_addr_to_reg (XEXP (src2, 0)); - src2 = replace_equiv_address (src2, src2_reg); - } - set_mem_size (src2, cmp_bytes); - - /* Construct call to strcmp/strncmp to compare the rest of the string. */ - if (no_length) - { - tree fun = builtin_decl_explicit (BUILT_IN_STRCMP); - emit_library_call_value (XEXP (DECL_RTL (fun), 0), - target, LCT_NORMAL, GET_MODE (target), 2, - force_reg (Pmode, XEXP (src1, 0)), Pmode, - force_reg (Pmode, XEXP (src2, 0)), Pmode); - } - else - { - rtx len_rtx; - if (TARGET_64BIT) - len_rtx = gen_reg_rtx (DImode); - else - len_rtx = gen_reg_rtx (SImode); - - emit_move_insn (len_rtx, GEN_INT (bytes - compare_length)); - tree fun = builtin_decl_explicit (BUILT_IN_STRNCMP); - emit_library_call_value (XEXP (DECL_RTL (fun), 0), - target, LCT_NORMAL, GET_MODE (target), 3, - force_reg (Pmode, XEXP (src1, 0)), Pmode, - force_reg (Pmode, XEXP (src2, 0)), Pmode, - len_rtx, GET_MODE (len_rtx)); - } - - rtx fin_ref = gen_rtx_LABEL_REF (VOIDmode, final_label); - rtx jmp = emit_jump_insn (gen_rtx_SET (pc_rtx, fin_ref)); - JUMP_LABEL (jmp) = final_label; - LABEL_NUSES (final_label) += 1; - emit_barrier (); - } - - if (cleanup_label) - emit_label (cleanup_label); - - /* Generate the final sequence that identifies the differing - byte and generates the final result, taking into account - zero bytes: - - cmpb cmpb_result1, src1, src2 - cmpb cmpb_result2, src1, zero - orc cmpb_result1, cmp_result1, cmpb_result2 - cntlzd get bit of first zero/diff byte - addi convert for rldcl use - rldcl rldcl extract diff/zero byte - subf subtract for final result - */ - - rtx cmpb_diff = gen_reg_rtx (word_mode); - rtx cmpb_zero = gen_reg_rtx (word_mode); - rtx rot_amt = gen_reg_rtx (word_mode); - rtx zero_reg = gen_reg_rtx (word_mode); - - rtx rot1_1 = gen_reg_rtx (word_mode); - rtx rot1_2 = gen_reg_rtx (word_mode); - rtx rot2_1 = gen_reg_rtx (word_mode); - rtx rot2_2 = gen_reg_rtx (word_mode); - - if (word_mode == SImode) - { - emit_insn (gen_cmpbsi3 (cmpb_diff, tmp_reg_src1, tmp_reg_src2)); - emit_insn (gen_movsi (zero_reg, GEN_INT (0))); - emit_insn (gen_cmpbsi3 (cmpb_zero, tmp_reg_src1, zero_reg)); - emit_insn (gen_one_cmplsi2 (cmpb_diff,cmpb_diff)); - emit_insn (gen_iorsi3 (cmpb_diff, cmpb_diff, cmpb_zero)); - emit_insn (gen_clzsi2 (rot_amt, cmpb_diff)); - emit_insn (gen_addsi3 (rot_amt, rot_amt, GEN_INT (8))); - emit_insn (gen_rotlsi3 (rot1_1, tmp_reg_src1, - gen_lowpart (SImode, rot_amt))); - emit_insn (gen_andsi3_mask (rot1_2, rot1_1, GEN_INT (0xff))); - emit_insn (gen_rotlsi3 (rot2_1, tmp_reg_src2, - gen_lowpart (SImode, rot_amt))); - emit_insn (gen_andsi3_mask (rot2_2, rot2_1, GEN_INT (0xff))); - emit_insn (gen_subsi3 (result_reg, rot1_2, rot2_2)); - } - else - { - emit_insn (gen_cmpbdi3 (cmpb_diff, tmp_reg_src1, tmp_reg_src2)); - emit_insn (gen_movdi (zero_reg, GEN_INT (0))); - emit_insn (gen_cmpbdi3 (cmpb_zero, tmp_reg_src1, zero_reg)); - emit_insn (gen_one_cmpldi2 (cmpb_diff,cmpb_diff)); - emit_insn (gen_iordi3 (cmpb_diff, cmpb_diff, cmpb_zero)); - emit_insn (gen_clzdi2 (rot_amt, cmpb_diff)); - emit_insn (gen_adddi3 (rot_amt, rot_amt, GEN_INT (8))); - emit_insn (gen_rotldi3 (rot1_1, tmp_reg_src1, - gen_lowpart (SImode, rot_amt))); - emit_insn (gen_anddi3_mask (rot1_2, rot1_1, GEN_INT (0xff))); - emit_insn (gen_rotldi3 (rot2_1, tmp_reg_src2, - gen_lowpart (SImode, rot_amt))); - emit_insn (gen_anddi3_mask (rot2_2, rot2_1, GEN_INT (0xff))); - emit_insn (gen_subdi3 (result_reg, rot1_2, rot2_2)); - } - - emit_label (final_move_label); - emit_insn (gen_movsi (target, - gen_lowpart (SImode, result_reg))); - emit_label (final_label); - return true; -} - -/* Expand a block move operation, and return 1 if successful. Return 0 - if we should let the compiler generate normal code. - - operands[0] is the destination - operands[1] is the source - operands[2] is the length - operands[3] is the alignment */ - -#define MAX_MOVE_REG 4 - -int -expand_block_move (rtx operands[]) -{ - rtx orig_dest = operands[0]; - rtx orig_src = operands[1]; - rtx bytes_rtx = operands[2]; - rtx align_rtx = operands[3]; - int constp = (GET_CODE (bytes_rtx) == CONST_INT); - int align; - int bytes; - int offset; - int move_bytes; - rtx stores[MAX_MOVE_REG]; - int num_reg = 0; - - /* If this is not a fixed size move, just call memcpy */ - if (! constp) - return 0; - - /* This must be a fixed size alignment */ - gcc_assert (GET_CODE (align_rtx) == CONST_INT); - align = INTVAL (align_rtx) * BITS_PER_UNIT; - - /* Anything to move? */ - bytes = INTVAL (bytes_rtx); - if (bytes <= 0) - return 1; - - if (bytes > rs6000_block_move_inline_limit) - return 0; - - for (offset = 0; bytes > 0; offset += move_bytes, bytes -= move_bytes) - { - union { - rtx (*movmemsi) (rtx, rtx, rtx, rtx); - rtx (*mov) (rtx, rtx); - } gen_func; - machine_mode mode = BLKmode; - rtx src, dest; - - /* Altivec first, since it will be faster than a string move - when it applies, and usually not significantly larger. */ - if (TARGET_ALTIVEC && bytes >= 16 && align >= 128) - { - move_bytes = 16; - mode = V4SImode; - gen_func.mov = gen_movv4si; - } - else if (TARGET_STRING - && bytes > 24 /* move up to 32 bytes at a time */ - && ! fixed_regs[5] - && ! fixed_regs[6] - && ! fixed_regs[7] - && ! fixed_regs[8] - && ! fixed_regs[9] - && ! fixed_regs[10] - && ! fixed_regs[11] - && ! fixed_regs[12]) - { - move_bytes = (bytes > 32) ? 32 : bytes; - gen_func.movmemsi = gen_movmemsi_8reg; - } - else if (TARGET_STRING - && bytes > 16 /* move up to 24 bytes at a time */ - && ! fixed_regs[5] - && ! fixed_regs[6] - && ! fixed_regs[7] - && ! fixed_regs[8] - && ! fixed_regs[9] - && ! fixed_regs[10]) - { - move_bytes = (bytes > 24) ? 24 : bytes; - gen_func.movmemsi = gen_movmemsi_6reg; - } - else if (TARGET_STRING - && bytes > 8 /* move up to 16 bytes at a time */ - && ! fixed_regs[5] - && ! fixed_regs[6] - && ! fixed_regs[7] - && ! fixed_regs[8]) - { - move_bytes = (bytes > 16) ? 16 : bytes; - gen_func.movmemsi = gen_movmemsi_4reg; - } - else if (bytes >= 8 && TARGET_POWERPC64 - && (align >= 64 || !STRICT_ALIGNMENT)) - { - move_bytes = 8; - mode = DImode; - gen_func.mov = gen_movdi; - if (offset == 0 && align < 64) - { - rtx addr; - - /* If the address form is reg+offset with offset not a - multiple of four, reload into reg indirect form here - rather than waiting for reload. This way we get one - reload, not one per load and/or store. */ - addr = XEXP (orig_dest, 0); - if ((GET_CODE (addr) == PLUS || GET_CODE (addr) == LO_SUM) - && GET_CODE (XEXP (addr, 1)) == CONST_INT - && (INTVAL (XEXP (addr, 1)) & 3) != 0) - { - addr = copy_addr_to_reg (addr); - orig_dest = replace_equiv_address (orig_dest, addr); - } - addr = XEXP (orig_src, 0); - if ((GET_CODE (addr) == PLUS || GET_CODE (addr) == LO_SUM) - && GET_CODE (XEXP (addr, 1)) == CONST_INT - && (INTVAL (XEXP (addr, 1)) & 3) != 0) - { - addr = copy_addr_to_reg (addr); - orig_src = replace_equiv_address (orig_src, addr); - } - } - } - else if (TARGET_STRING && bytes > 4 && !TARGET_POWERPC64) - { /* move up to 8 bytes at a time */ - move_bytes = (bytes > 8) ? 8 : bytes; - gen_func.movmemsi = gen_movmemsi_2reg; - } - else if (bytes >= 4 && (align >= 32 || !STRICT_ALIGNMENT)) - { /* move 4 bytes */ - move_bytes = 4; - mode = SImode; - gen_func.mov = gen_movsi; - } - else if (bytes >= 2 && (align >= 16 || !STRICT_ALIGNMENT)) - { /* move 2 bytes */ - move_bytes = 2; - mode = HImode; - gen_func.mov = gen_movhi; - } - else if (TARGET_STRING && bytes > 1) - { /* move up to 4 bytes at a time */ - move_bytes = (bytes > 4) ? 4 : bytes; - gen_func.movmemsi = gen_movmemsi_1reg; - } - else /* move 1 byte at a time */ - { - move_bytes = 1; - mode = QImode; - gen_func.mov = gen_movqi; - } - - src = adjust_address (orig_src, mode, offset); - dest = adjust_address (orig_dest, mode, offset); - - if (mode != BLKmode) - { - rtx tmp_reg = gen_reg_rtx (mode); - - emit_insn ((*gen_func.mov) (tmp_reg, src)); - stores[num_reg++] = (*gen_func.mov) (dest, tmp_reg); - } - - if (mode == BLKmode || num_reg >= MAX_MOVE_REG || bytes == move_bytes) - { - int i; - for (i = 0; i < num_reg; i++) - emit_insn (stores[i]); - num_reg = 0; - } - - if (mode == BLKmode) - { - /* Move the address into scratch registers. The movmemsi - patterns require zero offset. */ - if (!REG_P (XEXP (src, 0))) - { - rtx src_reg = copy_addr_to_reg (XEXP (src, 0)); - src = replace_equiv_address (src, src_reg); - } - set_mem_size (src, move_bytes); - - if (!REG_P (XEXP (dest, 0))) - { - rtx dest_reg = copy_addr_to_reg (XEXP (dest, 0)); - dest = replace_equiv_address (dest, dest_reg); - } - set_mem_size (dest, move_bytes); - - emit_insn ((*gen_func.movmemsi) (dest, src, - GEN_INT (move_bytes & 31), - align_rtx)); - } - } - - return 1; -} - - -/* Return a string to perform a load_multiple operation. - operands[0] is the vector. - operands[1] is the source address. - operands[2] is the first destination register. */ - -const char * -rs6000_output_load_multiple (rtx operands[3]) -{ - /* We have to handle the case where the pseudo used to contain the address - is assigned to one of the output registers. */ - int i, j; - int words = XVECLEN (operands[0], 0); - rtx xop[10]; - - if (XVECLEN (operands[0], 0) == 1) - return "lwz %2,0(%1)"; - - for (i = 0; i < words; i++) - if (refers_to_regno_p (REGNO (operands[2]) + i, operands[1])) - { - if (i == words-1) - { - xop[0] = GEN_INT (4 * (words-1)); - xop[1] = operands[1]; - xop[2] = operands[2]; - output_asm_insn ("lswi %2,%1,%0\n\tlwz %1,%0(%1)", xop); - return ""; - } - else if (i == 0) - { - xop[0] = GEN_INT (4 * (words-1)); - xop[1] = operands[1]; - xop[2] = gen_rtx_REG (SImode, REGNO (operands[2]) + 1); - output_asm_insn ("addi %1,%1,4\n\tlswi %2,%1,%0\n\tlwz %1,-4(%1)", xop); - return ""; - } - else - { - for (j = 0; j < words; j++) - if (j != i) - { - xop[0] = GEN_INT (j * 4); - xop[1] = operands[1]; - xop[2] = gen_rtx_REG (SImode, REGNO (operands[2]) + j); - output_asm_insn ("lwz %2,%0(%1)", xop); - } - xop[0] = GEN_INT (i * 4); - xop[1] = operands[1]; - output_asm_insn ("lwz %1,%0(%1)", xop); - return ""; - } - } - - return "lswi %2,%1,%N0"; -} - /* A validation routine: say whether CODE, a condition code, and MODE match. The other alternatives either don't make sense or should @@ -23123,7 +21753,7 @@ print_operand (FILE *file, rtx x, int code) } return; - /* Print AltiVec or SPE memory operand. */ + /* Print AltiVec memory operand. */ case 'y': { rtx tmp; @@ -26169,10 +24799,6 @@ rs6000_savres_strategy (rs6000_stack_t *info, +---------------------------------------+ | Save area for VRSAVE register (Z) | 8+P+A+V+L+X+W+Y +---------------------------------------+ - | SPE: area for 64-bit GP registers | - +---------------------------------------+ - | SPE alignment padding | - +---------------------------------------+ | saved CR (C) | 8+P+A+V+L+X+W+Y+Z +---------------------------------------+ | Save area for GP registers (G) | 8+P+A+V+L+X+W+Y+Z+C @@ -29970,7 +28596,6 @@ rs6000_emit_epilogue (int sibcall) if (regno == INVALID_REGNUM) break; - /* Note: possible use of r0 here to address SPE regs. */ mem = gen_frame_mem_offset (reg_mode, frame_reg_rtx, info->ehrd_offset + frame_off + reg_size * (int) i); @@ -36817,7 +35442,7 @@ altivec_expand_vec_perm_const (rtx operands[4]) (BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrglw_direct : CODE_FOR_altivec_vmrghw_direct), { 8, 9, 10, 11, 24, 25, 26, 27, 12, 13, 14, 15, 28, 29, 30, 31 } }, - { OPTION_MASK_P8_VECTOR, CODE_FOR_p8_vmrgew, + { OPTION_MASK_P8_VECTOR, CODE_FOR_p8_vmrgew_v4si, { 0, 1, 2, 3, 16, 17, 18, 19, 8, 9, 10, 11, 24, 25, 26, 27 } }, { OPTION_MASK_P8_VECTOR, CODE_FOR_p8_vmrgow, { 4, 5, 6, 7, 20, 21, 22, 23, 12, 13, 14, 15, 28, 29, 30, 31 } } @@ -37000,7 +35625,7 @@ altivec_expand_vec_perm_const (rtx operands[4]) return false; } -/* Expand a Paired Single, VSX Permute Doubleword, or SPE constant permutation. +/* Expand a Paired Single or VSX Permute Doubleword constant permutation. Return true if we match an efficient implementation. */ static bool @@ -37227,10 +35852,8 @@ rs6000_parallel_return (machine_mode mode, /* Target hook for TARGET_FUNCTION_VALUE. - On the SPE, both FPs and vectors are returned in r3. - - On RS/6000 an integer value is in r3 and a floating-point value is in - fp1, unless -msoft-float. */ + An integer value is in r3 and a floating-point value is in fp1, + unless -msoft-float. */ static rtx rs6000_function_value (const_tree valtype, @@ -37442,7 +36065,7 @@ rs6000_initial_elimination_offset (int from, int to) return offset; } -/* Fill in sizes for SPE register high parts in table used by unwinder. */ +/* Fill in sizes of registers used by unwinder. */ static void rs6000_init_dwarf_reg_sizes_extra (tree address) @@ -42408,6 +41031,49 @@ rs6000_atomic_assign_expand_fenv (tree *hold, tree *clear, tree *update) *update = build2 (COMPOUND_EXPR, void_type_node, update_mffs, update_mtfsf); } +void +rs6000_generate_float2_code (bool signed_convert, rtx dst, rtx src1, rtx src2) +{ + rtx rtx_tmp0, rtx_tmp1, rtx_tmp2, rtx_tmp3; + + rtx_tmp0 = gen_reg_rtx (V2DImode); + rtx_tmp1 = gen_reg_rtx (V2DImode); + + /* The destination of the vmrgew instruction layout is: + rtx_tmp2[0] rtx_tmp3[0] rtx_tmp2[1] rtx_tmp3[0]. + Setup rtx_tmp0 and rtx_tmp1 to ensure the order of the elements after the + vmrgew instruction will be correct. */ + if (VECTOR_ELT_ORDER_BIG) + { + emit_insn (gen_vsx_xxpermdi_v2di_be (rtx_tmp0, src1, src2, GEN_INT (0))); + emit_insn (gen_vsx_xxpermdi_v2di_be (rtx_tmp1, src1, src2, GEN_INT (3))); + } + else + { + emit_insn (gen_vsx_xxpermdi_v2di (rtx_tmp0, src1, src2, GEN_INT (3))); + emit_insn (gen_vsx_xxpermdi_v2di (rtx_tmp1, src1, src2, GEN_INT (0))); + } + + rtx_tmp2 = gen_reg_rtx (V4SFmode); + rtx_tmp3 = gen_reg_rtx (V4SFmode); + + if (signed_convert) + { + emit_insn (gen_vsx_xvcvsxdsp (rtx_tmp2, rtx_tmp0)); + emit_insn (gen_vsx_xvcvsxdsp (rtx_tmp3, rtx_tmp1)); + } + else + { + emit_insn (gen_vsx_xvcvuxdsp (rtx_tmp2, rtx_tmp0)); + emit_insn (gen_vsx_xvcvuxdsp (rtx_tmp3, rtx_tmp1)); + } + + if (VECTOR_ELT_ORDER_BIG) + emit_insn (gen_p8_vmrgew_v4sf (dst, rtx_tmp2, rtx_tmp3)); + else + emit_insn (gen_p8_vmrgew_v4sf (dst, rtx_tmp3, rtx_tmp2)); +} + /* Implement the TARGET_OPTAB_SUPPORTED_P hook. */ static bool |