summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorhubicka <hubicka@138bc75d-0d04-0410-961f-82ee72b054a4>2010-04-18 10:52:26 +0000
committerhubicka <hubicka@138bc75d-0d04-0410-961f-82ee72b054a4>2010-04-18 10:52:26 +0000
commit7203f8f0f9065b1201478f93bfa3557d24f59859 (patch)
tree04b7c8385388a495de58e395735aa2986c5d78f9
parentda203f51d052f6346874885f8ba87611e8f27022 (diff)
downloadgcc-7203f8f0f9065b1201478f93bfa3557d24f59859.tar.gz
* i386.md (UNSPEC_SSE_PROLOGUE_SAVE_LOW): New.
(sse_prologue_save_insn expander): Use new pattern. (sse_prologue_save_insn1): New pattern and splitter. (sse_prologue_save_insn): Update to deal also with 64bit aligned blocks. * i386.c (setup_incoming_varargs_64): Do not compute jump destination here. (ix86_gimplify_va_arg): Update alignment needed. (ix86_local_alignment): Do not align all local arrays to 128bit. git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@158483 138bc75d-0d04-0410-961f-82ee72b054a4
-rw-r--r--gcc/ChangeLog12
-rw-r--r--gcc/config/i386/i386.c56
-rw-r--r--gcc/config/i386/i386.md122
3 files changed, 148 insertions, 42 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index addc0942f8d..912670d50dd 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,15 @@
+2010-04-18 Jan Hubicka <jh@suse.cz>
+
+ * i386.md (UNSPEC_SSE_PROLOGUE_SAVE_LOW): New.
+ (sse_prologue_save_insn expander): Use new pattern.
+ (sse_prologue_save_insn1): New pattern and splitter.
+ (sse_prologue_save_insn): Update to deal also with 64bit aligned
+ blocks.
+ * i386.c (setup_incoming_varargs_64): Do not compute jump destination here.
+ (ix86_gimplify_va_arg): Update alignment needed.
+ (ix86_local_alignment): Do not align all local arrays
+ to 128bit.
+
2010-04-17 Jan Hubicka <jh@suse.cz>
* ipa-inline.c (cgraph_early_inlining): Handle flattening too.
diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
index b99fe2ae345..7376d1b48e7 100644
--- a/gcc/config/i386/i386.c
+++ b/gcc/config/i386/i386.c
@@ -6790,7 +6790,6 @@ setup_incoming_varargs_64 (CUMULATIVE_ARGS *cum)
{
rtx save_area, mem;
rtx label;
- rtx label_ref;
rtx tmp_reg;
rtx nsse_reg;
alias_set_type set;
@@ -6841,35 +6840,9 @@ setup_incoming_varargs_64 (CUMULATIVE_ARGS *cum)
SSE saves. We need some preparation work to get this working. */
label = gen_label_rtx ();
- label_ref = gen_rtx_LABEL_REF (Pmode, label);
- /* Compute address to jump to :
- label - eax*4 + nnamed_sse_arguments*4 Or
- label - eax*5 + nnamed_sse_arguments*5 for AVX. */
- tmp_reg = gen_reg_rtx (Pmode);
nsse_reg = gen_reg_rtx (Pmode);
emit_insn (gen_zero_extendqidi2 (nsse_reg, gen_rtx_REG (QImode, AX_REG)));
- emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
- gen_rtx_MULT (Pmode, nsse_reg,
- GEN_INT (4))));
-
- /* vmovaps is one byte longer than movaps. */
- if (TARGET_AVX)
- emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
- gen_rtx_PLUS (Pmode, tmp_reg,
- nsse_reg)));
-
- if (cum->sse_regno)
- emit_move_insn
- (nsse_reg,
- gen_rtx_CONST (DImode,
- gen_rtx_PLUS (DImode,
- label_ref,
- GEN_INT (cum->sse_regno
- * (TARGET_AVX ? 5 : 4)))));
- else
- emit_move_insn (nsse_reg, label_ref);
- emit_insn (gen_subdi3 (nsse_reg, nsse_reg, tmp_reg));
/* Compute address of memory block we save into. We always use pointer
pointing 127 bytes after first byte to store - this is needed to keep
@@ -6882,11 +6855,12 @@ setup_incoming_varargs_64 (CUMULATIVE_ARGS *cum)
mem = gen_rtx_MEM (BLKmode, plus_constant (tmp_reg, -127));
MEM_NOTRAP_P (mem) = 1;
set_mem_alias_set (mem, set);
- set_mem_align (mem, BITS_PER_WORD);
+ set_mem_align (mem, 64);
/* And finally do the dirty job! */
emit_insn (gen_sse_prologue_save (mem, nsse_reg,
- GEN_INT (cum->sse_regno), label));
+ GEN_INT (cum->sse_regno), label,
+ gen_reg_rtx (Pmode)));
}
}
@@ -7047,7 +7021,7 @@ ix86_gimplify_va_arg (tree valist, tree type, gimple_seq *pre_p,
int indirect_p = 0;
tree ptrtype;
enum machine_mode nat_mode;
- int arg_boundary;
+ unsigned int arg_boundary;
/* Only 64bit target needs something special. */
if (!TARGET_64BIT || is_va_list_char_pointer (TREE_TYPE (valist)))
@@ -7279,6 +7253,8 @@ ix86_gimplify_va_arg (tree valist, tree type, gimple_seq *pre_p,
t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
size_int (-align));
t = fold_convert (TREE_TYPE (ovf), t);
+ if (crtl->stack_alignment_needed < arg_boundary)
+ crtl->stack_alignment_needed = arg_boundary;
}
gimplify_expr (&t, pre_p, NULL, is_gimple_val, fb_rvalue);
gimplify_assign (addr, t, pre_p);
@@ -20099,10 +20075,26 @@ ix86_local_alignment (tree exp, enum machine_mode mode,
}
/* x86-64 ABI requires arrays greater than 16 bytes to be aligned
- to 16byte boundary. */
- if (TARGET_64BIT)
+ to 16byte boundary. Exact wording is:
+
+ An array uses the same alignment as its elements, except that a local or
+ global array variable of length at least 16 bytes or
+ a C99 variable-length array variable always has alignment of at least 16 bytes.
+
+ This was added to allow use of aligned SSE instructions at arrays. This
+ rule is meant for static storage (where compiler can not do the analysis
+ by itself). We follow it for automatic variables only when convenient.
+ We fully control everything in the function compiled and functions from
+ other unit can not rely on the alignment.
+
+ Exclude va_list type. It is the common case of local array where
+ we can not benefit from the alignment. */
+ if (TARGET_64BIT && optimize_function_for_speed_p (cfun)
+ && TARGET_SSE)
{
if (AGGREGATE_TYPE_P (type)
+ && (TYPE_MAIN_VARIANT (type)
+ != TYPE_MAIN_VARIANT (va_list_type_node))
&& TYPE_SIZE (type)
&& TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
&& (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 16
diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
index d08a7ea1081..fbc15522673 100644
--- a/gcc/config/i386/i386.md
+++ b/gcc/config/i386/i386.md
@@ -85,6 +85,7 @@
(UNSPEC_SET_RIP 16)
(UNSPEC_SET_GOT_OFFSET 17)
(UNSPEC_MEMORY_BLOCKAGE 18)
+ (UNSPEC_SSE_PROLOGUE_SAVE_LOW 19)
; TLS support
(UNSPEC_TP 20)
@@ -18441,15 +18442,24 @@
(reg:DI XMM5_REG)
(reg:DI XMM6_REG)
(reg:DI XMM7_REG)] UNSPEC_SSE_PROLOGUE_SAVE))
- (use (match_operand:DI 1 "register_operand" ""))
+ (clobber (match_operand:DI 1 "register_operand" ""))
(use (match_operand:DI 2 "immediate_operand" ""))
- (use (label_ref:DI (match_operand 3 "" "")))])]
+ (use (label_ref:DI (match_operand 3 "" "")))
+ (clobber (match_operand:DI 4 "register_operand" ""))
+ (use (match_dup 1))])]
"TARGET_64BIT"
"")
-(define_insn "*sse_prologue_save_insn"
+;; Pre-reload version of prologue save. Until after prologue generation we don't know
+;; what the size of save instruction will be.
+;; Operand 0+operand 6 is the memory save area
+;; Operand 1 is number of registers to save (will get overwritten to operand 5)
+;; Operand 2 is number of non-vaargs SSE arguments
+;; Operand 3 is label starting the save block
+;; Operand 4 is used for temporary computation of jump address
+(define_insn "*sse_prologue_save_insn1"
[(set (mem:BLK (plus:DI (match_operand:DI 0 "register_operand" "R")
- (match_operand:DI 4 "const_int_operand" "n")))
+ (match_operand:DI 6 "const_int_operand" "n")))
(unspec:BLK [(reg:DI XMM0_REG)
(reg:DI XMM1_REG)
(reg:DI XMM2_REG)
@@ -18458,9 +18468,98 @@
(reg:DI XMM5_REG)
(reg:DI XMM6_REG)
(reg:DI XMM7_REG)] UNSPEC_SSE_PROLOGUE_SAVE))
+ (clobber (match_operand:DI 1 "register_operand" "=r"))
+ (use (match_operand:DI 2 "const_int_operand" "i"))
+ (use (label_ref:DI (match_operand 3 "" "X")))
+ (clobber (match_operand:DI 4 "register_operand" "=&r"))
+ (use (match_operand:DI 5 "register_operand" "1"))]
+ "TARGET_64BIT
+ && INTVAL (operands[6]) + X86_64_SSE_REGPARM_MAX * 16 - 16 < 128
+ && INTVAL (operands[6]) + INTVAL (operands[2]) * 16 >= -128"
+ "#"
+ [(set_attr "type" "other")
+ (set_attr "memory" "store")
+ (set_attr "mode" "DI")])
+
+;; We know size of save instruction; expand the computation of jump address
+;; in the jumptable.
+(define_split
+ [(parallel [(set (match_operand:BLK 0 "" "")
+ (unspec:BLK [(reg:DI XMM0_REG)
+ (reg:DI XMM1_REG)
+ (reg:DI XMM2_REG)
+ (reg:DI XMM3_REG)
+ (reg:DI XMM4_REG)
+ (reg:DI XMM5_REG)
+ (reg:DI XMM6_REG)
+ (reg:DI XMM7_REG)] UNSPEC_SSE_PROLOGUE_SAVE))
+ (clobber (match_operand:DI 1 "register_operand" ""))
+ (use (match_operand:DI 2 "const_int_operand" ""))
+ (use (match_operand 3 "" ""))
+ (clobber (match_operand:DI 4 "register_operand" ""))
+ (use (match_operand:DI 5 "register_operand" ""))])]
+ "reload_completed"
+ [(parallel [(set (match_dup 0)
+ (unspec:BLK [(reg:DI XMM0_REG)
+ (reg:DI XMM1_REG)
+ (reg:DI XMM2_REG)
+ (reg:DI XMM3_REG)
+ (reg:DI XMM4_REG)
+ (reg:DI XMM5_REG)
+ (reg:DI XMM6_REG)
+ (reg:DI XMM7_REG)] UNSPEC_SSE_PROLOGUE_SAVE_LOW))
+ (use (match_dup 1))
+ (use (match_dup 2))
+ (use (match_dup 3))
+ (use (match_dup 5))])]
+{
+ /* Movaps is 4 bytes, AVX and movsd is 5 bytes. */
+ int size = 4 + (TARGET_AVX || crtl->stack_alignment_needed < 128);
+
+ /* Compute address to jump to:
+ label - eax*size + nnamed_sse_arguments*size. */
+ if (size == 5)
+ emit_insn (gen_rtx_SET (VOIDmode, operands[4],
+ gen_rtx_PLUS
+ (Pmode,
+ gen_rtx_MULT (Pmode, operands[1],
+ GEN_INT (4)),
+ operands[1])));
+ else if (size == 4)
+ emit_insn (gen_rtx_SET (VOIDmode, operands[4],
+ gen_rtx_MULT (Pmode, operands[1],
+ GEN_INT (4))));
+ else
+ gcc_unreachable ();
+ if (INTVAL (operands[2]))
+ emit_move_insn
+ (operands[1],
+ gen_rtx_CONST (DImode,
+ gen_rtx_PLUS (DImode,
+ operands[3],
+ GEN_INT (INTVAL (operands[2])
+ * size))));
+ else
+ emit_move_insn (operands[1], operands[3]);
+ emit_insn (gen_subdi3 (operands[1], operands[1], operands[4]));
+ operands[5] = GEN_INT (size);
+})
+
+(define_insn "sse_prologue_save_insn"
+ [(set (mem:BLK (plus:DI (match_operand:DI 0 "register_operand" "R")
+ (match_operand:DI 4 "const_int_operand" "n")))
+ (unspec:BLK [(reg:DI XMM0_REG)
+ (reg:DI XMM1_REG)
+ (reg:DI XMM2_REG)
+ (reg:DI XMM3_REG)
+ (reg:DI XMM4_REG)
+ (reg:DI XMM5_REG)
+ (reg:DI XMM6_REG)
+ (reg:DI XMM7_REG)] UNSPEC_SSE_PROLOGUE_SAVE_LOW))
(use (match_operand:DI 1 "register_operand" "r"))
(use (match_operand:DI 2 "const_int_operand" "i"))
- (use (label_ref:DI (match_operand 3 "" "X")))]
+ (use (label_ref:DI (match_operand 3 "" "X")))
+ (use (match_operand:DI 5 "const_int_operand" "i"))]
"TARGET_64BIT
&& INTVAL (operands[4]) + X86_64_SSE_REGPARM_MAX * 16 - 16 < 128
&& INTVAL (operands[4]) + INTVAL (operands[2]) * 16 >= -128"
@@ -18480,7 +18579,10 @@
PUT_MODE (operands[4], TImode);
if (GET_CODE (XEXP (operands[0], 0)) != PLUS)
output_asm_insn ("rex", operands);
- output_asm_insn ("%vmovaps\t{%5, %4|%4, %5}", operands);
+ if (crtl->stack_alignment_needed < 128)
+ output_asm_insn ("%vmovsd\t{%5, %4|%4, %5}", operands);
+ else
+ output_asm_insn ("%vmovaps\t{%5, %4|%4, %5}", operands);
}
(*targetm.asm_out.internal_label) (asm_out_file, "L",
CODE_LABEL_NUMBER (operands[3]));
@@ -18489,11 +18591,11 @@
[(set_attr "type" "other")
(set_attr "length_immediate" "0")
(set_attr "length_address" "0")
+ ;; 2 bytes for jump and opernds[4] bytes for each save.
(set (attr "length")
- (if_then_else
- (eq (symbol_ref "TARGET_AVX") (const_int 0))
- (const_string "34")
- (const_string "42")))
+ (plus (const_int 2)
+ (mult (symbol_ref ("INTVAL (operands[5])"))
+ (symbol_ref ("X86_64_SSE_REGPARM_MAX - INTVAL (operands[2])")))))
(set_attr "memory" "store")
(set_attr "modrm" "0")
(set_attr "prefix" "maybe_vex")