summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authoruros <uros@138bc75d-0d04-0410-961f-82ee72b054a4>2012-03-28 21:28:15 +0000
committeruros <uros@138bc75d-0d04-0410-961f-82ee72b054a4>2012-03-28 21:28:15 +0000
commitdf6b300635523d43f009f0b8f4432ad0a91970e5 (patch)
tree95a7d1277073a8fda275fef4a4ad7886b55a8907
parent0c77715ba591f1ea7596395122e5240130bdd178 (diff)
downloadgcc-df6b300635523d43f009f0b8f4432ad0a91970e5.tar.gz
* config/i386/i386.c (ix86_modes_tieable_p): Handle 32bit AVX modes.
(ix86_expand_vector_move_misalign): Remove un-needed gen_lowpart calls. git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@185927 138bc75d-0d04-0410-961f-82ee72b054a4
-rw-r--r--gcc/ChangeLog18
-rw-r--r--gcc/config/i386/i386.c38
2 files changed, 26 insertions, 30 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index 6a7605ef18a..f84e25e2686 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,8 @@
+2012-03-28 Uros Bizjak <ubizjak@gmail.com>
+
+ * config/i386/i386.c (ix86_modes_tieable_p): Handle 32bit AVX modes.
+ (ix86_expand_vector_move_misalign): Remove un-needed gen_lowpart calls.
+
2012-03-28 Jakub Jelinek <jakub@redhat.com>
PR middle-end/52691
@@ -16,22 +21,20 @@
(loop_optimizer_finalize): If loops are to be preserved only
clean up optional loop features.
(rtl_loop_done): Forcefully free loops here.
- * cgraph.c (cgraph_release_function_body): Forcefully free
- loops.
+ * cgraph.c (cgraph_release_function_body): Forcefully free loops.
* cfgexpand.c (expand_gimple_cond): Properly add new basic-blocks
to existing loops.
(construct_init_block): Likewise.
(construct_exit_block): Likewise.
(gimple_expand_cfg): Clear LOOP_CLOSED_SSA loop state. Cleanup
the CFG after expanding.
- * cfgloop.c (verify_loop_structure): Calculate or verify
- dominators. If we needed to calculate them, free them afterwards.
+ * cfgloop.c (verify_loop_structure): Calculate or verify dominators.
+ If we needed to calculate them, free them afterwards.
* tree-pass.h (PROP_loops): New define.
* tree-ssa-loop.c (pass_tree_loop_init): Provide PROP_loops.
* basic-block.h (CLEANUP_CFG_CHANGED): New.
* cfgcleanup.c (merge_blocks_move): Protect loop latches.
- (cleanup_cfg): If we did something and have loops around, fix
- them up.
+ (cleanup_cfg): If we did something and have loops around, fix them up.
* cse.c (rest_of_handle_cse_after_global_opts): Call cleanup_cfg
with CLEANUP_CFG_CHANGED.
* cfghooks.c (merge_blocks): If we merge a loop header into
@@ -84,8 +87,7 @@
PR target/52737
* config.gcc (tm_file): Remove avr/multilib.h.
- * doc/invoke.texi (AVR Options): Adjust
- documentation of -mtiny-stack.
+ * doc/invoke.texi (AVR Options): Adjust documentation of -mtiny-stack.
* config/avr/genmultilib.awk: Remove code to generate multilib.h.
(BEGIN): Use -msp8 as multilib option instead of -mtiny-stack.
diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
index 18172a10825..5029be6c085 100644
--- a/gcc/config/i386/i386.c
+++ b/gcc/config/i386/i386.c
@@ -15831,17 +15831,18 @@ ix86_expand_vector_move_misalign (enum machine_mode mode, rtx operands[])
switch (GET_MODE_SIZE (mode))
{
case 16:
- /* If we're optimizing for size, movups is the smallest. */
if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
{
op0 = gen_lowpart (V4SFmode, op0);
op1 = gen_lowpart (V4SFmode, op1);
emit_insn (gen_sse_movups (op0, op1));
- return;
}
- op0 = gen_lowpart (V16QImode, op0);
- op1 = gen_lowpart (V16QImode, op1);
- emit_insn (gen_sse2_movdqu (op0, op1));
+ else
+ {
+ op0 = gen_lowpart (V16QImode, op0);
+ op1 = gen_lowpart (V16QImode, op1);
+ emit_insn (gen_sse2_movdqu (op0, op1));
+ }
break;
case 32:
op0 = gen_lowpart (V32QImode, op0);
@@ -15853,27 +15854,22 @@ ix86_expand_vector_move_misalign (enum machine_mode mode, rtx operands[])
}
break;
case MODE_VECTOR_FLOAT:
- op0 = gen_lowpart (mode, op0);
- op1 = gen_lowpart (mode, op1);
-
switch (mode)
{
case V4SFmode:
emit_insn (gen_sse_movups (op0, op1));
break;
- case V8SFmode:
- ix86_avx256_split_vector_move_misalign (op0, op1);
- break;
case V2DFmode:
if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
{
op0 = gen_lowpart (V4SFmode, op0);
op1 = gen_lowpart (V4SFmode, op1);
emit_insn (gen_sse_movups (op0, op1));
- return;
}
- emit_insn (gen_sse2_movupd (op0, op1));
+ else
+ emit_insn (gen_sse2_movupd (op0, op1));
break;
+ case V8SFmode:
case V4DFmode:
ix86_avx256_split_vector_move_misalign (op0, op1);
break;
@@ -15918,8 +15914,6 @@ ix86_expand_vector_move_misalign (enum machine_mode mode, rtx operands[])
if (TARGET_SSE_UNALIGNED_LOAD_OPTIMAL)
{
- op0 = gen_lowpart (V2DFmode, op0);
- op1 = gen_lowpart (V2DFmode, op1);
emit_insn (gen_sse2_movupd (op0, op1));
return;
}
@@ -15984,8 +15978,8 @@ ix86_expand_vector_move_misalign (enum machine_mode mode, rtx operands[])
return;
}
- /* ??? Similar to above, only less clear because of quote
- typeless stores unquote. */
+ /* ??? Similar to above, only less clear
+ because of typeless stores. */
if (TARGET_SSE2 && !TARGET_SSE_TYPELESS_STORES
&& GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
{
@@ -15998,11 +15992,7 @@ ix86_expand_vector_move_misalign (enum machine_mode mode, rtx operands[])
if (TARGET_SSE2 && mode == V2DFmode)
{
if (TARGET_SSE_UNALIGNED_STORE_OPTIMAL)
- {
- op0 = gen_lowpart (V2DFmode, op0);
- op1 = gen_lowpart (V2DFmode, op1);
- emit_insn (gen_sse2_movupd (op0, op1));
- }
+ emit_insn (gen_sse2_movupd (op0, op1));
else
{
m = adjust_address (op0, DFmode, 0);
@@ -31399,6 +31389,10 @@ ix86_modes_tieable_p (enum machine_mode mode1, enum machine_mode mode2)
/* If MODE2 is only appropriate for an SSE register, then tie with
any other mode acceptable to SSE registers. */
+ if (GET_MODE_SIZE (mode2) == 32
+ && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode2))
+ return (GET_MODE_SIZE (mode1) == 32
+ && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode1));
if (GET_MODE_SIZE (mode2) == 16
&& ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode2))
return (GET_MODE_SIZE (mode1) == 16