summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorUros Bizjak <ubizjak@gmail.com>2015-10-06 04:47:06 -0700
committerH.J. Lu <hjl.tools@gmail.com>2015-10-06 09:18:29 -0700
commit297bb79fc9bb4d365ea4c01f3ed701fd6e1a6483 (patch)
tree5fb60e16a88e103e245e776b4f8fef40c72dc78f
parent6e33678dd5f2f586fb64de2e918e4cca2251160e (diff)
downloadgcc-hjl/unaligned.tar.gz
Support unaligned SSE moves outside realigned areahjl/unaligned
gcc/ PR target/66697 * config/i386/i386.c (ix86_option_override_internal): Use 8-byte minimum stack boundary in 64-bit mode. (ix86_compute_frame_layout): Remove assert on INCOMING_STACK_BOUNDARY. (ix86_emit_save_reg_using_mov): Support unaligned SSE store. Add a REG_CFA_EXPRESSION note if needed. (ix86_emit_restore_sse_regs_using_mov): Support unaligned SSE load. (ix86_handle_force_align_arg_pointer_attribute): New. (ix86_minimum_incoming_stack_boundary): Remove TARGET_64BIT check. (ix86_attribute_table): Set ix86_force_align_arg_pointer_string with ix86_handle_force_align_arg_pointer_attribute. * config/i386/i386.h (MIN_STACK_BOUNDARY): Set to BITS_PER_WORD. gcc/testsuite/ PR target/66697 * gcc.target/i386/20060512-1.c: Remove ia32 requirement. (PUSH): New. (POP): Likewise. (vector_using_function): Use PUSH and POP. * cc.target/i386/20060512-2.c: Remove ia32 requirement.
-rw-r--r--gcc/config/i386/i386.c65
-rw-r--r--gcc/config/i386/i386.h2
-rw-r--r--gcc/testsuite/gcc.target/i386/20060512-1.c13
-rw-r--r--gcc/testsuite/gcc.target/i386/20060512-2.c1
4 files changed, 66 insertions, 15 deletions
diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
index 38953ddc4ae..0b59bfc1931 100644
--- a/gcc/config/i386/i386.c
+++ b/gcc/config/i386/i386.c
@@ -5209,8 +5209,7 @@ ix86_option_override_internal (bool main_args_p,
ix86_incoming_stack_boundary = ix86_default_incoming_stack_boundary;
if (opts_set->x_ix86_incoming_stack_boundary_arg)
{
- int min = (TARGET_64BIT_P (opts->x_ix86_isa_flags)
- ? (TARGET_SSE_P (opts->x_ix86_isa_flags) ? 4 : 3) : 2);
+ int min = TARGET_64BIT_P (opts->x_ix86_isa_flags) ? 3 : 2;
if (opts->x_ix86_incoming_stack_boundary_arg < min
|| opts->x_ix86_incoming_stack_boundary_arg > 12)
@@ -11391,7 +11390,6 @@ ix86_compute_frame_layout (struct ix86_frame *frame)
/* The only ABI that has saved SSE registers (Win64) also has a
16-byte aligned default stack, and thus we don't need to be
within the re-aligned local stack frame to save them. */
- gcc_assert (INCOMING_STACK_BOUNDARY >= 128);
offset = ROUND_UP (offset, 16);
offset += frame->nsseregs * 16;
}
@@ -11616,14 +11614,26 @@ ix86_emit_save_reg_using_mov (machine_mode mode, unsigned int regno,
struct machine_function *m = cfun->machine;
rtx reg = gen_rtx_REG (mode, regno);
rtx mem, addr, base, insn;
+ unsigned int align;
addr = choose_baseaddr (cfa_offset);
mem = gen_frame_mem (mode, addr);
- /* For SSE saves, we need to indicate the 128-bit alignment. */
- set_mem_align (mem, GET_MODE_ALIGNMENT (mode));
+ /* The location is aligned up to INCOMING_STACK_BOUNDARY. */
+ align = MIN (GET_MODE_ALIGNMENT (mode), INCOMING_STACK_BOUNDARY);
+ set_mem_align (mem, align);
+
+ /* SSE saves are not within re-aligned local stack frame.
+ In case INCOMING_STACK_BOUNDARY is misaligned, we have
+ to emit unaligned store. */
+ if (mode == V4SFmode && align < 128)
+ {
+ rtx unspec = gen_rtx_UNSPEC (mode, gen_rtvec (1, reg), UNSPEC_STOREU);
+ insn = emit_insn (gen_rtx_SET (mem, unspec));
+ }
+ else
+ insn = emit_insn (gen_rtx_SET (mem, reg));
- insn = emit_move_insn (mem, reg);
RTX_FRAME_RELATED_P (insn) = 1;
base = addr;
@@ -11670,6 +11680,8 @@ ix86_emit_save_reg_using_mov (machine_mode mode, unsigned int regno,
mem = gen_rtx_MEM (mode, addr);
add_reg_note (insn, REG_CFA_OFFSET, gen_rtx_SET (mem, reg));
}
+ else
+ add_reg_note (insn, REG_CFA_EXPRESSION, gen_rtx_SET (mem, reg));
}
/* Emit code to save registers using MOV insns.
@@ -11886,6 +11898,25 @@ find_drap_reg (void)
}
}
+/* Handle a "force_align_arg_pointer" attribute. */
+
+static tree
+ix86_handle_force_align_arg_pointer_attribute (tree *node, tree name,
+ tree, int, bool *no_add_attrs)
+{
+ if (TREE_CODE (*node) != FUNCTION_TYPE
+ && TREE_CODE (*node) != METHOD_TYPE
+ && TREE_CODE (*node) != FIELD_DECL
+ && TREE_CODE (*node) != TYPE_DECL)
+ {
+ warning (OPT_Wattributes, "%qE attribute only applies to functions",
+ name);
+ *no_add_attrs = true;
+ }
+
+ return NULL_TREE;
+}
+
/* Return minimum incoming stack alignment. */
static unsigned int
@@ -11900,7 +11931,6 @@ ix86_minimum_incoming_stack_boundary (bool sibcall)
if -mstackrealign is used, it isn't used for sibcall check and
estimated stack alignment is 128bit. */
else if (!sibcall
- && !TARGET_64BIT
&& ix86_force_align_arg_pointer
&& crtl->stack_alignment_estimated == 128)
incoming_stack_boundary = MIN_STACK_BOUNDARY;
@@ -13184,11 +13214,26 @@ ix86_emit_restore_sse_regs_using_mov (HOST_WIDE_INT cfa_offset,
{
rtx reg = gen_rtx_REG (V4SFmode, regno);
rtx mem;
+ unsigned int align;
mem = choose_baseaddr (cfa_offset);
mem = gen_rtx_MEM (V4SFmode, mem);
- set_mem_align (mem, 128);
- emit_move_insn (reg, mem);
+
+ /* The location is aligned up to INCOMING_STACK_BOUNDARY. */
+ align = MIN (GET_MODE_ALIGNMENT (V4SFmode), INCOMING_STACK_BOUNDARY);
+ set_mem_align (mem, align);
+
+ /* SSE saves are not within re-aligned local stack frame.
+ In case INCOMING_STACK_BOUNDARY is misaligned, we have
+ to emit unaligned load. */
+ if (align < 128)
+ {
+ rtx unspec = gen_rtx_UNSPEC (V4SFmode, gen_rtvec (1, mem),
+ UNSPEC_LOADU);
+ emit_insn (gen_rtx_SET (reg, unspec));
+ }
+ else
+ emit_insn (gen_rtx_SET (reg, mem));
ix86_add_cfa_restore_note (NULL, reg, cfa_offset);
@@ -48159,7 +48204,7 @@ static const struct attribute_spec ix86_attribute_table[] =
true },
/* force_align_arg_pointer says this function realigns the stack at entry. */
{ (const char *)&ix86_force_align_arg_pointer_string, 0, 0,
- false, true, true, ix86_handle_cconv_attribute, false },
+ false, true, true, ix86_handle_force_align_arg_pointer_attribute, false },
#if TARGET_DLLIMPORT_DECL_ATTRIBUTES
{ "dllimport", 0, 0, false, false, false, handle_dll_attribute, false },
{ "dllexport", 0, 0, false, false, false, handle_dll_attribute, false },
diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h
index a24dea544c6..2edc22b5677 100644
--- a/gcc/config/i386/i386.h
+++ b/gcc/config/i386/i386.h
@@ -755,7 +755,7 @@ extern const char *host_detect_local_cpu (int argc, const char **argv);
#define MAIN_STACK_BOUNDARY (TARGET_64BIT ? 128 : 32)
/* Minimum stack boundary. */
-#define MIN_STACK_BOUNDARY (TARGET_64BIT ? (TARGET_SSE ? 128 : 64) : 32)
+#define MIN_STACK_BOUNDARY BITS_PER_WORD
/* Boundary (in *bits*) on which the stack pointer prefers to be
aligned; the compiler cannot rely on having this alignment. */
diff --git a/gcc/testsuite/gcc.target/i386/20060512-1.c b/gcc/testsuite/gcc.target/i386/20060512-1.c
index 374d18aea57..ec163a9bc51 100644
--- a/gcc/testsuite/gcc.target/i386/20060512-1.c
+++ b/gcc/testsuite/gcc.target/i386/20060512-1.c
@@ -1,5 +1,4 @@
/* { dg-do run } */
-/* { dg-require-effective-target ia32 } */
/* { dg-options "-std=gnu99 -msse2 -mpreferred-stack-boundary=4" } */
/* { dg-require-effective-target sse2 } */
@@ -7,6 +6,14 @@
#include <emmintrin.h>
+#ifdef __x86_64__
+# define PUSH "pushq %rsi"
+# define POP "popq %rsi"
+#else
+# define PUSH "pushl %esi"
+# define POP "popl %esi"
+#endif
+
__m128i __attribute__ ((__noinline__))
vector_using_function ()
{
@@ -27,9 +34,9 @@ static void
sse2_test (void)
{
int result;
- asm ("pushl %esi"); /* Disalign runtime stack. */
+ asm (PUSH); /* Misalign runtime stack. */
result = self_aligning_function (g_1, g_2);
if (result != 42)
abort ();
- asm ("popl %esi");
+ asm (POP);
}
diff --git a/gcc/testsuite/gcc.target/i386/20060512-2.c b/gcc/testsuite/gcc.target/i386/20060512-2.c
index d3a779cb4e2..8ce4bd7f8be 100644
--- a/gcc/testsuite/gcc.target/i386/20060512-2.c
+++ b/gcc/testsuite/gcc.target/i386/20060512-2.c
@@ -1,5 +1,4 @@
/* { dg-do compile } */
-/* { dg-require-effective-target ia32 } */
/* { dg-options "-std=gnu99 -mpreferred-stack-boundary=4" } */
int
outer_function (int x, int y)