diff options
-rw-r--r-- | gcc/ChangeLog | 22 | ||||
-rw-r--r-- | gcc/config/i386/darwin.h | 3 | ||||
-rw-r--r-- | gcc/config/i386/i386-protos.h | 5 | ||||
-rw-r--r-- | gcc/config/i386/i386.c | 270 | ||||
-rw-r--r-- | gcc/config/i386/i386.h | 5 | ||||
-rw-r--r-- | gcc/config/i386/i386.md | 75 | ||||
-rw-r--r-- | gcc/config/i386/sse.md | 43 |
7 files changed, 387 insertions, 36 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 380cdb84162..05fac8a48a2 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,25 @@ +2007-02-09 Stuart Hastings <stuart@apple.com> + Richard Henderson <rth@redhat.com> + + * gcc/config/i386/i386.h (TARGET_KEEPS_VECTOR_ALIGNED_STACK): New. + * gcc/config/i386/darwin.h: (TARGET_KEEPS_VECTOR_ALIGNED_STACK): New. + * gcc/config/i386/i386.md (fixuns_trunc<mode>si2, fixuns_truncsfhi2, + fixuns_truncdfhi2): New. + (fix_truncsfdi_sse): Call ix86_expand_convert_sign_didf_sse. + (floatunsdidf2): Call ix86_expand_convert_uns_didf_sse. + (floatunssisf2): Add call to ix86_expand_convert_uns_sisf_sse. + (floatunssidf2): Allow nonimmediate source. + * gcc/config/i386/sse.md (movdi_to_sse): New. (vec_concatv2di): Drop '*'. + * gcc/config/i386/i386-protos.h (ix86_expand_convert_uns_si_sse, + ix86_expand_convert_uns_didf_sse, ix86_expand_convert_uns_sidf_sse, + ix86_expand_convert_uns_sisf_sse, ix86_expand_convert_sign_didf_sse): New. + * gcc/config/i386/i386.c (ix86_expand_convert_uns_si_sse, + ix86_expand_convert_uns_didf_sse, ix86_expand_convert_uns_sidf_sse, + ix86_expand_convert_uns_sisf_sse, ix86_expand_convert_sign_didf_sse, + ix86_build_const_vector, ix86_expand_vector_init_one_nonzero): New. + (ix86_build_signbit_mask): Fix decl of v, refactor to call ix86_build_const_vector. + (x86_emit_floatuns): Rewrite. + 2007-02-10 Manuel Lopez-Ibanez <manu@gcc.gnu.org> * genautomata.c (longest_path_length): Delete unused function. diff --git a/gcc/config/i386/darwin.h b/gcc/config/i386/darwin.h index 6b509c7cb8d..109fe79ae67 100644 --- a/gcc/config/i386/darwin.h +++ b/gcc/config/i386/darwin.h @@ -66,6 +66,9 @@ Boston, MA 02110-1301, USA. */ #undef FORCE_PREFERRED_STACK_BOUNDARY_IN_MAIN #define FORCE_PREFERRED_STACK_BOUNDARY_IN_MAIN (0) +#undef TARGET_KEEPS_VECTOR_ALIGNED_STACK +#define TARGET_KEEPS_VECTOR_ALIGNED_STACK 1 + /* We want -fPIC by default, unless we're using -static to compile for the kernel or some such. */ diff --git a/gcc/config/i386/i386-protos.h b/gcc/config/i386/i386-protos.h index 184b5b21d3e..48af4d5507c 100644 --- a/gcc/config/i386/i386-protos.h +++ b/gcc/config/i386/i386-protos.h @@ -89,6 +89,11 @@ extern void ix86_expand_binary_operator (enum rtx_code, extern int ix86_binary_operator_ok (enum rtx_code, enum machine_mode, rtx[]); extern void ix86_expand_unary_operator (enum rtx_code, enum machine_mode, rtx[]); +extern void ix86_expand_convert_uns_si_sse (rtx, rtx); +extern void ix86_expand_convert_uns_didf_sse (rtx, rtx); +extern void ix86_expand_convert_uns_sidf_sse (rtx, rtx); +extern void ix86_expand_convert_uns_sisf_sse (rtx, rtx); +extern void ix86_expand_convert_sign_didf_sse (rtx, rtx); extern rtx ix86_build_signbit_mask (enum machine_mode, bool, bool); extern void ix86_expand_fp_absneg_operator (enum rtx_code, enum machine_mode, rtx[]); diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c index e5ff76e2d22..2dbfe47af2c 100644 --- a/gcc/config/i386/i386.c +++ b/gcc/config/i386/i386.c @@ -1518,6 +1518,9 @@ static const char *ix86_mangle_fundamental_type (tree); static tree ix86_stack_protect_fail (void); static rtx ix86_internal_arg_pointer (void); static void ix86_dwarf_handle_frame_unspec (const char *, rtx, int); +static rtx ix86_build_const_vector (enum machine_mode, bool, rtx); +static bool ix86_expand_vector_init_one_nonzero (bool, enum machine_mode, + rtx, rtx, int); /* This function is only used on Solaris. */ static void i386_solaris_elf_named_section (const char *, unsigned int, tree) @@ -9858,6 +9861,233 @@ ix86_unary_operator_ok (enum rtx_code code ATTRIBUTE_UNUSED, return TRUE; } +/* Convert an SF or DFmode value in an SSE register into an unsigned SImode. + When -fpmath=387, this is done with an x87 st(0)_FP->signed-int-64 + conversion, and ignoring the upper 32 bits of the result. On x86_64, + there is an equivalent SSE %xmm->signed-int-64 conversion. + + On x86_32, we don't have the instruction, nor the 64-bit destination + register it requires. Do the conversion inline in the SSE registers. + Requires SSE2. For x86_32, -mfpmath=sse, !optimize_size only. */ + +void +ix86_expand_convert_uns_si_sse (rtx target, rtx input) +{ + REAL_VALUE_TYPE TWO31r; + enum machine_mode mode, vecmode; + rtx two31, value, large, sign, result_vec, zero_or_two31, x; + + mode = GET_MODE (input); + vecmode = mode == SFmode ? V4SFmode : V2DFmode; + + real_ldexp (&TWO31r, &dconst1, 31); + two31 = const_double_from_real_value (TWO31r, mode); + two31 = ix86_build_const_vector (mode, true, two31); + two31 = force_reg (vecmode, two31); + + value = gen_reg_rtx (vecmode); + ix86_expand_vector_init_one_nonzero (false, vecmode, value, input, 0); + + large = gen_reg_rtx (vecmode); + x = gen_rtx_fmt_ee (LE, vecmode, two31, value); + emit_insn (gen_rtx_SET (VOIDmode, large, x)); + + zero_or_two31 = gen_reg_rtx (vecmode); + x = gen_rtx_AND (vecmode, large, two31); + emit_insn (gen_rtx_SET (VOIDmode, zero_or_two31, x)); + + x = gen_rtx_MINUS (vecmode, value, zero_or_two31); + emit_insn (gen_rtx_SET (VOIDmode, value, x)); + + result_vec = gen_reg_rtx (V4SImode); + if (mode == SFmode) + x = gen_sse2_cvttps2dq (result_vec, value); + else + x = gen_sse2_cvttpd2dq (result_vec, value); + emit_insn (x); + + sign = gen_reg_rtx (V4SImode); + emit_insn (gen_ashlv4si3 (sign, gen_lowpart (V4SImode, large), + GEN_INT (31))); + + emit_insn (gen_xorv4si3 (result_vec, result_vec, sign)); + + ix86_expand_vector_extract (false, target, result_vec, 0); +} + +/* Convert an unsigned DImode value into a DFmode, using only SSE. + Expects the 64-bit DImode to be supplied in a pair of integral + registers. Requires SSE2; will use SSE3 if available. For x86_32, + -mfpmath=sse, !optimize_size only. */ + +void +ix86_expand_convert_uns_didf_sse (rtx target, rtx input) +{ + REAL_VALUE_TYPE bias_lo_rvt, bias_hi_rvt; + rtx int_xmm, fp_xmm; + rtx biases, exponents; + rtx x; + + int_xmm = gen_reg_rtx (V4SImode); + if (TARGET_INTER_UNIT_MOVES) + emit_insn (gen_movdi_to_sse (int_xmm, input)); + else if (TARGET_SSE_SPLIT_REGS) + { + emit_insn (gen_rtx_CLOBBER (VOIDmode, int_xmm)); + emit_move_insn (gen_lowpart (DImode, int_xmm), input); + } + else + { + x = gen_reg_rtx (V2DImode); + ix86_expand_vector_init_one_nonzero (false, V2DImode, x, input, 0); + emit_move_insn (int_xmm, gen_lowpart (V4SImode, x)); + } + + x = gen_rtx_CONST_VECTOR (V4SImode, + gen_rtvec (4, GEN_INT (0x43300000UL), + GEN_INT (0x45300000UL), + const0_rtx, const0_rtx)); + exponents = validize_mem (force_const_mem (V4SImode, x)); + + /* int_xmm = {0x45300000UL, fp_xmm/hi, 0x43300000, fp_xmm/lo } */ + emit_insn (gen_sse2_punpckldq (int_xmm, int_xmm, exponents)); + + /* Concatenating (juxtaposing) (0x43300000UL ## fp_value_low_xmm) + yields a valid DF value equal to (0x1.0p52 + double(fp_value_lo_xmm)). + Similarly (0x45300000UL ## fp_value_hi_xmm) yields + (0x1.0p84 + double(fp_value_hi_xmm)). + Note these exponents differ by 32. */ + + fp_xmm = copy_to_mode_reg (V2DFmode, gen_lowpart (V2DFmode, int_xmm)); + + /* Subtract off those 0x1.0p52 and 0x1.0p84 biases, to produce values + in [0,2**32-1] and [0]+[2**32,2**64-1] respectively. */ + real_ldexp (&bias_lo_rvt, &dconst1, 52); + real_ldexp (&bias_hi_rvt, &dconst1, 84); + biases = const_double_from_real_value (bias_lo_rvt, DFmode); + x = const_double_from_real_value (bias_hi_rvt, DFmode); + biases = gen_rtx_CONST_VECTOR (V2DFmode, gen_rtvec (2, biases, x)); + biases = validize_mem (force_const_mem (V2DFmode, biases)); + emit_insn (gen_subv2df3 (fp_xmm, fp_xmm, biases)); + + /* Add the upper and lower DFmode values together. */ + if (TARGET_SSE3) + emit_insn (gen_sse3_haddv2df3 (fp_xmm, fp_xmm, fp_xmm)); + else + { + x = copy_to_mode_reg (V2DFmode, fp_xmm); + emit_insn (gen_sse2_unpckhpd (fp_xmm, fp_xmm, fp_xmm)); + emit_insn (gen_addv2df3 (fp_xmm, fp_xmm, x)); + } + + ix86_expand_vector_extract (false, target, fp_xmm, 0); +} + +/* Convert an unsigned SImode value into a DFmode. Only currently used + for SSE, but applicable anywhere. */ + +void +ix86_expand_convert_uns_sidf_sse (rtx target, rtx input) +{ + REAL_VALUE_TYPE TWO31r; + rtx x, fp; + + x = expand_simple_binop (SImode, PLUS, input, GEN_INT (-2147483647 - 1), + NULL, 1, OPTAB_DIRECT); + + fp = gen_reg_rtx (DFmode); + emit_insn (gen_floatsidf2 (fp, x)); + + real_ldexp (&TWO31r, &dconst1, 31); + x = const_double_from_real_value (TWO31r, DFmode); + + x = expand_simple_binop (DFmode, PLUS, fp, x, target, 0, OPTAB_DIRECT); + if (x != target) + emit_move_insn (target, x); +} + +/* Convert a signed DImode value into a DFmode. Only used for SSE in + 32-bit mode; otherwise we have a direct convert instruction. */ + +void +ix86_expand_convert_sign_didf_sse (rtx target, rtx input) +{ + REAL_VALUE_TYPE TWO32r; + rtx fp_lo, fp_hi, x; + + fp_lo = gen_reg_rtx (DFmode); + fp_hi = gen_reg_rtx (DFmode); + + emit_insn (gen_floatsidf2 (fp_hi, gen_highpart (SImode, input))); + + real_ldexp (&TWO32r, &dconst1, 32); + x = const_double_from_real_value (TWO32r, DFmode); + fp_hi = expand_simple_binop (DFmode, MULT, fp_hi, x, fp_hi, 0, OPTAB_DIRECT); + + ix86_expand_convert_uns_sidf_sse (fp_lo, gen_lowpart (SImode, input)); + + x = expand_simple_binop (DFmode, PLUS, fp_hi, fp_lo, target, + 0, OPTAB_DIRECT); + if (x != target) + emit_move_insn (target, x); +} + +/* Convert an unsigned SImode value into a SFmode, using only SSE. + For x86_32, -mfpmath=sse, !optimize_size only. */ +void +ix86_expand_convert_uns_sisf_sse (rtx target, rtx input) +{ + REAL_VALUE_TYPE ONE16r; + rtx fp_hi, fp_lo, int_hi, int_lo, x; + + real_ldexp (&ONE16r, &dconst1, 16); + x = const_double_from_real_value (ONE16r, SFmode); + int_lo = expand_simple_binop (SImode, AND, input, GEN_INT(0xffff), + NULL, 0, OPTAB_DIRECT); + int_hi = expand_simple_binop (SImode, LSHIFTRT, input, GEN_INT(16), + NULL, 0, OPTAB_DIRECT); + fp_hi = gen_reg_rtx (SFmode); + fp_lo = gen_reg_rtx (SFmode); + emit_insn (gen_floatsisf2 (fp_hi, int_hi)); + emit_insn (gen_floatsisf2 (fp_lo, int_lo)); + fp_hi = expand_simple_binop (SFmode, MULT, fp_hi, x, fp_hi, + 0, OPTAB_DIRECT); + fp_hi = expand_simple_binop (SFmode, PLUS, fp_hi, fp_lo, target, + 0, OPTAB_DIRECT); + if (!rtx_equal_p (target, fp_hi)) + emit_move_insn (target, fp_hi); +} + +/* A subroutine of ix86_build_signbit_mask_vector. If VECT is true, + then replicate the value for all elements of the vector + register. */ + +static rtx +ix86_build_const_vector (enum machine_mode mode, bool vect, rtx value) +{ + rtvec v; + switch (mode) + { + case SFmode: + if (vect) + v = gen_rtvec (4, value, value, value, value); + else + v = gen_rtvec (4, value, CONST0_RTX (SFmode), + CONST0_RTX (SFmode), CONST0_RTX (SFmode)); + return gen_rtx_CONST_VECTOR (V4SFmode, v); + + case DFmode: + if (vect) + v = gen_rtvec (2, value, value); + else + v = gen_rtvec (2, value, CONST0_RTX (DFmode)); + return gen_rtx_CONST_VECTOR (V2DFmode, v); + + default: + gcc_unreachable (); + } +} + /* A subroutine of ix86_expand_fp_absneg_operator and copysign expanders. Create a mask for the sign bit in MODE for an SSE register. If VECT is true, then replicate the mask for all elements of the vector register. @@ -9869,7 +10099,7 @@ ix86_build_signbit_mask (enum machine_mode mode, bool vect, bool invert) enum machine_mode vec_mode; HOST_WIDE_INT hi, lo; int shift = 63; - rtvec v; + rtx v; rtx mask; /* Find the sign bit, sign extended to 2*HWI. */ @@ -9887,25 +10117,9 @@ ix86_build_signbit_mask (enum machine_mode mode, bool vect, bool invert) mask = immed_double_const (lo, hi, mode == SFmode ? SImode : DImode); mask = gen_lowpart (mode, mask); - if (mode == SFmode) - { - if (vect) - v = gen_rtvec (4, mask, mask, mask, mask); - else - v = gen_rtvec (4, mask, CONST0_RTX (SFmode), - CONST0_RTX (SFmode), CONST0_RTX (SFmode)); - vec_mode = V4SFmode; - } - else - { - if (vect) - v = gen_rtvec (2, mask, mask); - else - v = gen_rtvec (2, mask, CONST0_RTX (DFmode)); - vec_mode = V2DFmode; - } - - return force_reg (vec_mode, gen_rtx_CONST_VECTOR (vec_mode, v)); + v = ix86_build_const_vector (mode, vect, mask); + vec_mode = (mode == SFmode) ? V4SFmode : V2DFmode; + return force_reg (vec_mode, v); } /* Generate code for floating point ABS or NEG. */ @@ -19573,21 +19787,25 @@ x86_emit_floatuns (rtx operands[2]) mode = GET_MODE (out); neglab = gen_label_rtx (); donelab = gen_label_rtx (); - i1 = gen_reg_rtx (Pmode); f0 = gen_reg_rtx (mode); - emit_cmp_and_jump_insns (in, const0_rtx, LT, const0_rtx, Pmode, 0, neglab); + emit_cmp_and_jump_insns (in, const0_rtx, LT, const0_rtx, inmode, 0, neglab); + + expand_float (out, in, 0); - emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_FLOAT (mode, in))); emit_jump_insn (gen_jump (donelab)); emit_barrier (); emit_label (neglab); - i0 = expand_simple_binop (Pmode, LSHIFTRT, in, const1_rtx, NULL, 1, OPTAB_DIRECT); - i1 = expand_simple_binop (Pmode, AND, in, const1_rtx, NULL, 1, OPTAB_DIRECT); - i0 = expand_simple_binop (Pmode, IOR, i0, i1, i0, 1, OPTAB_DIRECT); + i0 = expand_simple_binop (inmode, LSHIFTRT, in, const1_rtx, NULL, + 1, OPTAB_DIRECT); + i1 = expand_simple_binop (inmode, AND, in, const1_rtx, NULL, + 1, OPTAB_DIRECT); + i0 = expand_simple_binop (inmode, IOR, i0, i1, i0, 1, OPTAB_DIRECT); + expand_float (f0, i0, 0); + emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_PLUS (mode, f0, f0))); emit_label (donelab); diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h index c2ebe93f5e9..72dec0a441f 100644 --- a/gcc/config/i386/i386.h +++ b/gcc/config/i386/i386.h @@ -658,6 +658,11 @@ extern const char *host_detect_local_cpu (int argc, const char **argv); #define FORCE_PREFERRED_STACK_BOUNDARY_IN_MAIN \ (ix86_preferred_stack_boundary > STACK_BOUNDARY && !TARGET_64BIT) +/* Target OS keeps a vector-aligned (128-bit, 16-byte) stack. This is + mandatory for the 64-bit ABI, and may or may not be true for other + operating systems. */ +#define TARGET_KEEPS_VECTOR_ALIGNED_STACK TARGET_64BIT + /* Minimum allocation boundary for the code of a function. */ #define FUNCTION_BOUNDARY 8 diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md index 4f194d4b216..19cd0913ffc 100644 --- a/gcc/config/i386/i386.md +++ b/gcc/config/i386/i386.md @@ -4331,6 +4331,38 @@ } }) +;; Unsigned conversion to SImode. + +(define_expand "fixuns_trunc<mode>si2" + [(set (match_operand:SI 0 "nonimmediate_operand" "") + (fix:SI (match_operand:SSEMODEF 1 "register_operand" "")))] + "!TARGET_64BIT && TARGET_SSE2 && TARGET_SSE_MATH + && TARGET_KEEPS_VECTOR_ALIGNED_STACK && !optimize_size" +{ + ix86_expand_convert_uns_si_sse (operands[0], operands[1]); + DONE; +}) + +;; Unsigned conversion to HImode. +;; Without these patterns, we'll try the unsigned SI conversion which +;; is complex for SSE, rather than the signed SI conversion, which isn't. + +(define_expand "fixuns_truncsfhi2" + [(set (match_dup 2) + (fix:SI (match_operand:SF 1 "nonimmediate_operand" ""))) + (set (match_operand:HI 0 "nonimmediate_operand" "") + (subreg:HI (match_dup 2) 0))] + "TARGET_SSE_MATH" + "operands[2] = gen_reg_rtx (SImode);") + +(define_expand "fixuns_truncdfhi2" + [(set (match_dup 2) + (fix:SI (match_operand:DF 1 "nonimmediate_operand" ""))) + (set (match_operand:HI 0 "nonimmediate_operand" "") + (subreg:HI (match_dup 2) 0))] + "TARGET_SSE_MATH" + "operands[2] = gen_reg_rtx (SImode);") + ;; When SSE is available, it is always faster to use it! (define_insn "fix_truncsfdi_sse" [(set (match_operand:DI 0 "register_operand" "=r,r") @@ -4848,8 +4880,14 @@ (define_expand "floatdidf2" [(set (match_operand:DF 0 "register_operand" "") (float:DF (match_operand:DI 1 "nonimmediate_operand" "")))] - "TARGET_80387 || (TARGET_64BIT && TARGET_SSE2 && TARGET_SSE_MATH)" - "") + "TARGET_80387 || (TARGET_SSE2 && TARGET_SSE_MATH)" +{ + if (!TARGET_64BIT && TARGET_SSE2 && TARGET_SSE_MATH) + { + ix86_expand_convert_sign_didf_sse (operands[0], operands[1]); + DONE; + } +}) (define_insn "*floatdidf2_mixed" [(set (match_operand:DF 0 "register_operand" "=f,?f,x,x") @@ -4944,21 +4982,40 @@ (define_expand "floatunssisf2" [(use (match_operand:SF 0 "register_operand" "")) - (use (match_operand:SI 1 "register_operand" ""))] - "!TARGET_64BIT && TARGET_SSE_MATH" - "x86_emit_floatuns (operands); DONE;") + (use (match_operand:SI 1 "nonimmediate_operand" ""))] + "!TARGET_64BIT" +{ + if (TARGET_SSE_MATH && TARGET_SSE2) + ix86_expand_convert_uns_sisf_sse (operands[0], operands[1]); + else + x86_emit_floatuns (operands); + DONE; +}) + +(define_expand "floatunssidf2" + [(use (match_operand:DF 0 "register_operand" "")) + (use (match_operand:SI 1 "nonimmediate_operand" ""))] + "!TARGET_64BIT && TARGET_SSE_MATH && TARGET_SSE2" + "ix86_expand_convert_uns_sidf_sse (operands[0], operands[1]); DONE;") (define_expand "floatunsdisf2" [(use (match_operand:SF 0 "register_operand" "")) - (use (match_operand:DI 1 "register_operand" ""))] + (use (match_operand:DI 1 "nonimmediate_operand" ""))] "TARGET_64BIT && TARGET_SSE_MATH" "x86_emit_floatuns (operands); DONE;") (define_expand "floatunsdidf2" [(use (match_operand:DF 0 "register_operand" "")) - (use (match_operand:DI 1 "register_operand" ""))] - "TARGET_64BIT && TARGET_SSE2 && TARGET_SSE_MATH" - "x86_emit_floatuns (operands); DONE;") + (use (match_operand:DI 1 "nonimmediate_operand" ""))] + "TARGET_SSE_MATH && TARGET_SSE2 + && (TARGET_64BIT || TARGET_KEEPS_VECTOR_ALIGNED_STACK)" +{ + if (TARGET_64BIT) + x86_emit_floatuns (operands); + else + ix86_expand_convert_uns_didf_sse (operands[0], operands[1]); + DONE; +}) ;; SSE extract/set expanders diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md index ed1de1946f4..9a1e24bbd9c 100644 --- a/gcc/config/i386/sse.md +++ b/gcc/config/i386/sse.md @@ -87,6 +87,47 @@ (const_string "V4SF") (const_string "TI")))]) +;; Move a DI from a 32-bit register pair (e.g. %edx:%eax) to an xmm. +;; We'd rather avoid this entirely; if the 32-bit reg pair was loaded +;; from memory, we'd prefer to load the memory directly into the %xmm +;; register. To facilitate this happy circumstance, this pattern won't +;; split until after register allocation. If the 64-bit value didn't +;; come from memory, this is the best we can do. This is much better +;; than storing %edx:%eax into a stack temporary and loading an %xmm +;; from there. + +(define_insn_and_split "movdi_to_sse" + [(parallel + [(set (match_operand:V4SI 0 "register_operand" "=?x,x") + (subreg:V4SI (match_operand:DI 1 "nonimmediate_operand" "r,m") 0)) + (clobber (match_scratch:V4SI 2 "=&x,X"))])] + "!TARGET_64BIT && TARGET_SSE2 && TARGET_INTER_UNIT_MOVES" + "#" + "&& reload_completed" + [(const_int 0)] +{ + switch (which_alternative) + { + case 0: + /* The DImode arrived in a pair of integral registers (e.g. %edx:%eax). + Assemble the 64-bit DImode value in an xmm register. */ + emit_insn (gen_sse2_loadld (operands[0], CONST0_RTX (V4SImode), + gen_rtx_SUBREG (SImode, operands[1], 0))); + emit_insn (gen_sse2_loadld (operands[2], CONST0_RTX (V4SImode), + gen_rtx_SUBREG (SImode, operands[1], 4))); + emit_insn (gen_sse2_punpckldq (operands[0], operands[0], operands[2])); + break; + + case 1: + emit_insn (gen_vec_concatv2di (operands[0], operands[1], const0_rtx)); + break; + + default: + gcc_unreachable (); + } + DONE; +}) + (define_expand "movv4sf" [(set (match_operand:V4SF 0 "nonimmediate_operand" "") (match_operand:V4SF 1 "nonimmediate_operand" ""))] @@ -4118,7 +4159,7 @@ [(set_attr "type" "sselog,ssemov,ssemov") (set_attr "mode" "TI,V4SF,V2SF")]) -(define_insn "*vec_concatv2di" +(define_insn "vec_concatv2di" [(set (match_operand:V2DI 0 "register_operand" "=Y2,?Y2,Y2,x,x,x") (vec_concat:V2DI (match_operand:DI 1 "nonimmediate_operand" " m,*y ,0 ,0,0,m") |