diff options
Diffstat (limited to 'gcc')
-rw-r--r-- | gcc/ChangeLog | 60 | ||||
-rw-r--r-- | gcc/c-family/ChangeLog | 5 | ||||
-rw-r--r-- | gcc/c-family/c-common.c | 29 | ||||
-rw-r--r-- | gcc/common.opt | 4 | ||||
-rw-r--r-- | gcc/config/i386/i386-protos.h | 3 | ||||
-rw-r--r-- | gcc/config/i386/i386.c | 396 | ||||
-rw-r--r-- | gcc/config/i386/i386.h | 7 | ||||
-rw-r--r-- | gcc/config/i386/i386.md | 59 | ||||
-rw-r--r-- | gcc/config/i386/linux.h | 4 | ||||
-rw-r--r-- | gcc/config/i386/linux64.h | 4 | ||||
-rw-r--r-- | gcc/doc/extend.texi | 11 | ||||
-rw-r--r-- | gcc/doc/invoke.texi | 21 | ||||
-rw-r--r-- | gcc/doc/libgcc.texi | 40 | ||||
-rw-r--r-- | gcc/doc/tm.texi | 4 | ||||
-rw-r--r-- | gcc/doc/tm.texi.in | 2 | ||||
-rw-r--r-- | gcc/explow.c | 74 | ||||
-rw-r--r-- | gcc/function.c | 58 | ||||
-rw-r--r-- | gcc/gcc.c | 13 | ||||
-rw-r--r-- | gcc/libgcc-std.ver | 4 | ||||
-rw-r--r-- | gcc/opts.c | 14 | ||||
-rw-r--r-- | gcc/output.h | 1 | ||||
-rw-r--r-- | gcc/target.def | 10 | ||||
-rw-r--r-- | gcc/testsuite/ChangeLog | 9 | ||||
-rw-r--r-- | gcc/testsuite/gcc.dg/split-1.c | 49 | ||||
-rw-r--r-- | gcc/testsuite/gcc.dg/split-2.c | 55 | ||||
-rw-r--r-- | gcc/testsuite/gcc.dg/split-3.c | 64 | ||||
-rw-r--r-- | gcc/testsuite/gcc.dg/split-4.c | 68 | ||||
-rw-r--r-- | gcc/testsuite/lib/target-supports.exp | 9 | ||||
-rw-r--r-- | gcc/varasm.c | 29 |
29 files changed, 1074 insertions, 32 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 68dd7e9f8b4..1970bd74cdf 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,63 @@ +2010-09-27 Ian Lance Taylor <iant@google.com> + + * common.opt (fsplit-stack): New option. + * opts.c (decode_options): Set flag_split_stack to final value. + * target.def (supports_split_stack): New hook. + * gcc.c (STACK_SPLIT_SPEC): Define. + (LINK_COMMAND_SPEC): Use STACK_SPLIT_SPEC. + * doc/invoke.texi (Option Summary): Mention -fsplit-stack. + (Code Gen Options): Document -fsplit-stack. + * doc/extend.texi (Function Attributes): Mention no_split_stack. + (Function Attributes): Document no_split_stack. + * doc/tm.texi.in (Stack Smashing Protection): Add @hook + TARGET_SUPPORTS_SPLIT_STACK. + * doc/tm.texi: Rebuild. + * function.c (thread_prologue_and_epilogue_insns): If + flag_split_stack, add split stack prologue. + * explow.c (allocate_dynamic_stack_space): Support -fsplit-stack. + * varasm.c (saw_no_split_stack): New static variable. + (assemble_start_function): Set saw_no_split_stack if the function + has the no_split_stack attribute. + (file_end_indicate_split_stack): New function. + * output.h (file_end_indicate_split_stack): Declare. + * libgcc-std.ver (GCC_4.6.0): Add -fsplit-stack support variables + and function. + * doc/libgcc.texi (Miscellaneous routines): Document -fsplit-stack + routines. + * config/i386/i386.c (ix86_option_override_internal): Don't set + expand_builtin_va_start to NULL if -fsplit-stack. + (ix86_function_regparm): Reduce local regparm by 1 for 32-bit + -fsplit-stack. + (ix86_va_start): If -fsplit-stack, get overflow pointer from + scratch register set by prologue. + (ix86_code_end): If -fsplit-stack, call + file_end_indicate_split_stack. + (ix86_supports_split_stack): New static function. + (SPLIT_STACK_AVAILABLE): Define. + (split_stack_prologue_scratch_regno): New static function. + (split_stack_fn): New static variable. + (ix86_expand_split_stack_prologue): New function. + (ix86_live_on_entry): New static function. + (ix86_legitimate_address_p): Handle UNSPEC_STACK_CHECK. + (output_pic_addr_const): Likewise. + (i386_asm_output_addr_const_extra): Likewise. + (ix86_expand_call): Change return type to rtx. Return the new + call instruction. + (TARGET_SUPPORTS_SPLIT_STACK): Define. + (TARGET_EXTRA_LIVE_ON_ENTRY): Define. + * config/i386/i386.md (UNSPEC_STACK_CHECK): Define. + (split_stack_prologue, split_stack_return): New insns. + (split_stack_space_check): New insn. + * config/i386/i386.h (struct machine_function): Add + split_stack_varargs_pointer field. + * config/i386/linux.h (TARGET_CAN_SPLIT_STACK): Define. + (TARGET_THREAD_SPLIT_STACK_OFFSET): Define. + * config/i386/linux64.h (TARGET_CAN_SPLIT_STACK): Define. + (TARGET_THREAD_SPLIT_STACK_OFFSET): Define. + * config/i386/i386-protos.h (ix86_expand_split_stack_prologue): + Declare. + (ix86_expand_call): Update declaration. + 2010-09-27 Nicola Pero <nicola.pero@meta-innovation.com> * doc/objc.texi (Type encoding): Added the new 'long double' (D) diff --git a/gcc/c-family/ChangeLog b/gcc/c-family/ChangeLog index 25603f90e25..e16c8718b88 100644 --- a/gcc/c-family/ChangeLog +++ b/gcc/c-family/ChangeLog @@ -1,3 +1,8 @@ +2010-09-27 Ian Lance Taylor <iant@google.com> + + * c-common.c (c_common_attribute_table): Add no_split_stack. + (handle_no_split_stack_attribute): New static function. + 2010-09-27 Nicola Pero <nicola.pero@meta-innovation.com> Merge from 'apple/trunk' branch on FSF servers. diff --git a/gcc/c-family/c-common.c b/gcc/c-family/c-common.c index 6ef3bf99542..63e2d70fa9d 100644 --- a/gcc/c-family/c-common.c +++ b/gcc/c-family/c-common.c @@ -358,6 +358,7 @@ static tree handle_type_generic_attribute (tree *, tree, tree, int, bool *); static tree handle_alloc_size_attribute (tree *, tree, tree, int, bool *); static tree handle_target_attribute (tree *, tree, tree, int, bool *); static tree handle_optimize_attribute (tree *, tree, tree, int, bool *); +static tree handle_no_split_stack_attribute (tree *, tree, tree, int, bool *); static tree handle_fnspec_attribute (tree *, tree, tree, int, bool *); static void check_function_nonnull (tree, int, tree *); @@ -661,6 +662,8 @@ const struct attribute_spec c_common_attribute_table[] = handle_target_attribute }, { "optimize", 1, -1, true, false, false, handle_optimize_attribute }, + { "no_split_stack", 0, 0, true, false, false, + handle_no_split_stack_attribute }, /* For internal use (marking of builtins and runtime functions) only. The name contains space to prevent its usage in source code. */ { "fn spec", 1, 1, false, true, true, @@ -7841,6 +7844,32 @@ handle_optimize_attribute (tree *node, tree name, tree args, return NULL_TREE; } + +/* Handle a "no_split_stack" attribute. */ + +static tree +handle_no_split_stack_attribute (tree *node, tree name, + tree ARG_UNUSED (args), + int ARG_UNUSED (flags), + bool *no_add_attrs) +{ + tree decl = *node; + + if (TREE_CODE (decl) != FUNCTION_DECL) + { + error_at (DECL_SOURCE_LOCATION (decl), + "%qE attribute applies only to functions", name); + *no_add_attrs = true; + } + else if (DECL_INITIAL (decl)) + { + error_at (DECL_SOURCE_LOCATION (decl), + "can%'t set %qE attribute after definition", name); + *no_add_attrs = true; + } + + return NULL_TREE; +} /* Check for valid arguments being passed to a function. ATTRS is a list of attributes. There are NARGS arguments in the array diff --git a/gcc/common.opt b/gcc/common.opt index 6b89c3294ed..aa17ae80b3f 100644 --- a/gcc/common.opt +++ b/gcc/common.opt @@ -1426,6 +1426,10 @@ fsplit-ivs-in-unroller Common Report Var(flag_split_ivs_in_unroller) Init(1) Optimization Split lifetimes of induction variables when loops are unrolled +fsplit-stack +Common Report Var(flag_split_stack) Init(-1) +Generate discontiguous stack frames + fsplit-wide-types Common Report Var(flag_split_wide_types) Optimization Split wide types into independent registers diff --git a/gcc/config/i386/i386-protos.h b/gcc/config/i386/i386-protos.h index fd31e9917f5..700dec1e520 100644 --- a/gcc/config/i386/i386-protos.h +++ b/gcc/config/i386/i386-protos.h @@ -29,6 +29,7 @@ extern void ix86_setup_frame_addresses (void); extern HOST_WIDE_INT ix86_initial_elimination_offset (int, int); extern void ix86_expand_prologue (void); extern void ix86_expand_epilogue (int); +extern void ix86_expand_split_stack_prologue (void); extern void ix86_output_addr_vec_elt (FILE *, int); extern void ix86_output_addr_diff_elt (FILE *, int, int); @@ -117,7 +118,7 @@ extern bool ix86_expand_int_vcond (rtx[]); extern void ix86_expand_sse_unpack (rtx[], bool, bool); extern void ix86_expand_sse4_unpack (rtx[], bool, bool); extern bool ix86_expand_int_addcc (rtx[]); -extern void ix86_expand_call (rtx, rtx, rtx, rtx, rtx, int); +extern rtx ix86_expand_call (rtx, rtx, rtx, rtx, rtx, int); extern void x86_initialize_trampoline (rtx, rtx, rtx); extern rtx ix86_zero_extend_to_Pmode (rtx); extern void ix86_split_long_move (rtx[]); diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c index 627d8d20ea0..7d4126beece 100644 --- a/gcc/config/i386/i386.c +++ b/gcc/config/i386/i386.c @@ -1989,6 +1989,8 @@ static void ix86_add_new_builtins (int); static rtx ix86_expand_vec_perm_builtin (tree); static tree ix86_canonical_va_list_type (tree); static void predict_jump (int); +static unsigned int split_stack_prologue_scratch_regno (void); +static bool i386_asm_output_addr_const_extra (FILE *, rtx); enum ix86_function_specific_strings { @@ -3649,7 +3651,7 @@ ix86_option_override_internal (bool main_args_p) /* If using typedef char *va_list, signal that __builtin_va_start (&ap, 0) can be optimized to ap = __builtin_next_arg (0). */ - if (!TARGET_64BIT) + if (!TARGET_64BIT && !flag_split_stack) targetm.expand_builtin_va_start = NULL; if (TARGET_64BIT) @@ -4890,6 +4892,10 @@ ix86_function_regparm (const_tree type, const_tree decl) if (local_regparm == 3 && DECL_STATIC_CHAIN (decl)) local_regparm = 2; + /* In 32-bit mode save a register for the split stack. */ + if (!TARGET_64BIT && local_regparm == 3 && flag_split_stack) + local_regparm = 2; + /* Each fixed register usage increases register pressure, so less registers should be used for argument passing. This functionality can be overriden by an explicit @@ -7248,10 +7254,56 @@ ix86_va_start (tree valist, rtx nextarg) tree gpr, fpr, ovf, sav, t; tree type; + rtx ovf_rtx; + + if (flag_split_stack + && cfun->machine->split_stack_varargs_pointer == NULL_RTX) + { + unsigned int scratch_regno; + + /* When we are splitting the stack, we can't refer to the stack + arguments using internal_arg_pointer, because they may be on + the old stack. The split stack prologue will arrange to + leave a pointer to the old stack arguments in a scratch + register, which we here copy to a pseudo-register. The split + stack prologue can't set the pseudo-register directly because + it (the prologue) runs before any registers have been saved. */ + + scratch_regno = split_stack_prologue_scratch_regno (); + if (scratch_regno != INVALID_REGNUM) + { + rtx reg, seq; + + reg = gen_reg_rtx (Pmode); + cfun->machine->split_stack_varargs_pointer = reg; + + start_sequence (); + emit_move_insn (reg, gen_rtx_REG (Pmode, scratch_regno)); + seq = get_insns (); + end_sequence (); + + push_topmost_sequence (); + emit_insn_after (seq, entry_of_function ()); + pop_topmost_sequence (); + } + } + /* Only 64bit target needs something special. */ if (!TARGET_64BIT || is_va_list_char_pointer (TREE_TYPE (valist))) { - std_expand_builtin_va_start (valist, nextarg); + if (cfun->machine->split_stack_varargs_pointer == NULL_RTX) + std_expand_builtin_va_start (valist, nextarg); + else + { + rtx va_r, next; + + va_r = expand_expr (valist, NULL_RTX, VOIDmode, EXPAND_WRITE); + next = expand_binop (ptr_mode, add_optab, + cfun->machine->split_stack_varargs_pointer, + crtl->args.arg_offset_rtx, + NULL_RTX, 0, OPTAB_LIB_WIDEN); + convert_move (va_r, next, 0); + } return; } @@ -7297,7 +7349,11 @@ ix86_va_start (tree valist, rtx nextarg) /* Find the overflow area. */ type = TREE_TYPE (ovf); - t = make_tree (type, crtl->args.internal_arg_pointer); + if (cfun->machine->split_stack_varargs_pointer == NULL_RTX) + ovf_rtx = crtl->args.internal_arg_pointer; + else + ovf_rtx = cfun->machine->split_stack_varargs_pointer; + t = make_tree (type, ovf_rtx); if (words != 0) t = build2 (POINTER_PLUS_EXPR, type, t, size_int (words * UNITS_PER_WORD)); @@ -8042,6 +8098,9 @@ ix86_code_end (void) set_cfun (NULL); current_function_decl = NULL; } + + if (flag_split_stack) + file_end_indicate_split_stack (); } /* Emit code for the SET_GOT patterns. */ @@ -8344,6 +8403,29 @@ ix86_builtin_setjmp_frame_value (void) return stack_realign_fp ? hard_frame_pointer_rtx : virtual_stack_vars_rtx; } +/* On the x86 -fsplit-stack and -fstack-protector both use the same + field in the TCB, so they can not be used together. */ + +static bool +ix86_supports_split_stack (bool report ATTRIBUTE_UNUSED) +{ + bool ret = true; + +#ifndef TARGET_THREAD_SPLIT_STACK_OFFSET + if (report) + error ("%<-fsplit-stack%> currently only supported on GNU/Linux"); + ret = false; +#endif + + return ret; +} + +/* When using -fsplit-stack, the allocation routines set a field in + the TCB to the bottom of the stack plus this much space, measured + in bytes. */ + +#define SPLIT_STACK_AVAILABLE 256 + /* Fill structure ix86_frame about frame of currently computed function. */ static void @@ -10334,6 +10416,277 @@ ix86_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED, #endif } + +/* Return a scratch register to use in the split stack prologue. The + split stack prologue is used for -fsplit-stack. It is the first + instructions in the function, even before the regular prologue. + The scratch register can be any caller-saved register which is not + used for parameters or for the static chain. */ + +static unsigned int +split_stack_prologue_scratch_regno (void) +{ + if (TARGET_64BIT) + return R11_REG; + else + { + bool is_fastcall; + int regparm; + + is_fastcall = (lookup_attribute ("fastcall", + TYPE_ATTRIBUTES (TREE_TYPE (cfun->decl))) + != NULL); + regparm = ix86_function_regparm (TREE_TYPE (cfun->decl), cfun->decl); + + if (is_fastcall) + { + if (DECL_STATIC_CHAIN (cfun->decl)) + { + sorry ("-fsplit-stack does not support fastcall with " + "nested function"); + return INVALID_REGNUM; + } + return AX_REG; + } + else if (regparm < 3) + { + if (!DECL_STATIC_CHAIN (cfun->decl)) + return CX_REG; + else + { + if (regparm >= 2) + { + sorry ("-fsplit-stack does not support 2 register " + " parameters for a nested function"); + return INVALID_REGNUM; + } + return DX_REG; + } + } + else + { + /* FIXME: We could make this work by pushing a register + around the addition and comparison. */ + sorry ("-fsplit-stack does not support 3 register parameters"); + return INVALID_REGNUM; + } + } +} + +/* A SYMBOL_REF for the function which allocates new stackspace for + -fsplit-stack. */ + +static GTY(()) rtx split_stack_fn; + +/* Handle -fsplit-stack. These are the first instructions in the + function, even before the regular prologue. */ + +void +ix86_expand_split_stack_prologue (void) +{ + struct ix86_frame frame; + HOST_WIDE_INT allocate; + int args_size; + rtx label, limit, current, jump_insn, allocate_rtx, call_insn, call_fusage; + rtx scratch_reg = NULL_RTX; + rtx varargs_label = NULL_RTX; + + gcc_assert (flag_split_stack && reload_completed); + + ix86_finalize_stack_realign_flags (); + ix86_compute_frame_layout (&frame); + allocate = frame.stack_pointer_offset - INCOMING_FRAME_SP_OFFSET; + + /* This is the label we will branch to if we have enough stack + space. We expect the basic block reordering pass to reverse this + branch if optimizing, so that we branch in the unlikely case. */ + label = gen_label_rtx (); + + /* We need to compare the stack pointer minus the frame size with + the stack boundary in the TCB. The stack boundary always gives + us SPLIT_STACK_AVAILABLE bytes, so if we need less than that we + can compare directly. Otherwise we need to do an addition. */ + + limit = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), + UNSPEC_STACK_CHECK); + limit = gen_rtx_CONST (Pmode, limit); + limit = gen_rtx_MEM (Pmode, limit); + if (allocate < SPLIT_STACK_AVAILABLE) + current = stack_pointer_rtx; + else + { + unsigned int scratch_regno; + rtx offset; + + /* We need a scratch register to hold the stack pointer minus + the required frame size. Since this is the very start of the + function, the scratch register can be any caller-saved + register which is not used for parameters. */ + offset = GEN_INT (- allocate); + scratch_regno = split_stack_prologue_scratch_regno (); + if (scratch_regno == INVALID_REGNUM) + return; + scratch_reg = gen_rtx_REG (Pmode, scratch_regno); + if (!TARGET_64BIT || x86_64_immediate_operand (offset, Pmode)) + { + /* We don't use ix86_gen_add3 in this case because it will + want to split to lea, but when not optimizing the insn + will not be split after this point. */ + emit_insn (gen_rtx_SET (VOIDmode, scratch_reg, + gen_rtx_PLUS (Pmode, stack_pointer_rtx, + offset))); + } + else + { + emit_move_insn (scratch_reg, offset); + emit_insn (gen_adddi3 (scratch_reg, scratch_reg, + stack_pointer_rtx)); + } + current = scratch_reg; + } + + ix86_expand_branch (GEU, current, limit, label); + jump_insn = get_last_insn (); + JUMP_LABEL (jump_insn) = label; + + /* Mark the jump as very likely to be taken. */ + add_reg_note (jump_insn, REG_BR_PROB, + GEN_INT (REG_BR_PROB_BASE - REG_BR_PROB_BASE / 100)); + + /* Get more stack space. We pass in the desired stack space and the + size of the arguments to copy to the new stack. In 32-bit mode + we push the parameters; __morestack will return on a new stack + anyhow. In 64-bit mode we pass the parameters in r10 and + r11. */ + allocate_rtx = GEN_INT (allocate); + args_size = crtl->args.size >= 0 ? crtl->args.size : 0; + call_fusage = NULL_RTX; + if (TARGET_64BIT) + { + rtx reg; + + reg = gen_rtx_REG (Pmode, R10_REG); + + /* If this function uses a static chain, it will be in %r10. + Preserve it across the call to __morestack. */ + if (DECL_STATIC_CHAIN (cfun->decl)) + { + rtx rax; + + rax = gen_rtx_REG (Pmode, AX_REG); + emit_move_insn (rax, reg); + use_reg (&call_fusage, rax); + } + + emit_move_insn (reg, allocate_rtx); + use_reg (&call_fusage, reg); + reg = gen_rtx_REG (Pmode, R11_REG); + emit_move_insn (reg, GEN_INT (args_size)); + use_reg (&call_fusage, reg); + } + else + { + emit_insn (gen_push (GEN_INT (args_size))); + emit_insn (gen_push (allocate_rtx)); + } + if (split_stack_fn == NULL_RTX) + split_stack_fn = gen_rtx_SYMBOL_REF (Pmode, "__morestack"); + call_insn = ix86_expand_call (NULL_RTX, gen_rtx_MEM (QImode, split_stack_fn), + GEN_INT (UNITS_PER_WORD), constm1_rtx, + NULL_RTX, 0); + add_function_usage_to (call_insn, call_fusage); + + /* In order to make call/return prediction work right, we now need + to execute a return instruction. See + libgcc/config/i386/morestack.S for the details on how this works. + + For flow purposes gcc must not see this as a return + instruction--we need control flow to continue at the subsequent + label. Therefore, we use an unspec. */ + gcc_assert (crtl->args.pops_args < 65536); + emit_insn (gen_split_stack_return (GEN_INT (crtl->args.pops_args))); + + /* If we are in 64-bit mode and this function uses a static chain, + we saved %r10 in %rax before calling _morestack. */ + if (TARGET_64BIT && DECL_STATIC_CHAIN (cfun->decl)) + emit_move_insn (gen_rtx_REG (Pmode, R10_REG), + gen_rtx_REG (Pmode, AX_REG)); + + /* If this function calls va_start, we need to store a pointer to + the arguments on the old stack, because they may not have been + all copied to the new stack. At this point the old stack can be + found at the frame pointer value used by __morestack, because + __morestack has set that up before calling back to us. Here we + store that pointer in a scratch register, and in + ix86_expand_prologue we store the scratch register in a stack + slot. */ + if (cfun->machine->split_stack_varargs_pointer != NULL_RTX) + { + unsigned int scratch_regno; + rtx frame_reg; + int words; + + scratch_regno = split_stack_prologue_scratch_regno (); + scratch_reg = gen_rtx_REG (Pmode, scratch_regno); + frame_reg = gen_rtx_REG (Pmode, BP_REG); + + /* 64-bit: + fp -> old fp value + return address within this function + return address of caller of this function + stack arguments + So we add three words to get to the stack arguments. + + 32-bit: + fp -> old fp value + return address within this function + first argument to __morestack + second argument to __morestack + return address of caller of this function + stack arguments + So we add five words to get to the stack arguments. + */ + words = TARGET_64BIT ? 3 : 5; + emit_insn (gen_rtx_SET (VOIDmode, scratch_reg, + gen_rtx_PLUS (Pmode, frame_reg, + GEN_INT (words * UNITS_PER_WORD)))); + + varargs_label = gen_label_rtx (); + emit_jump_insn (gen_jump (varargs_label)); + JUMP_LABEL (get_last_insn ()) = varargs_label; + + emit_barrier (); + } + + emit_label (label); + LABEL_NUSES (label) = 1; + + /* If this function calls va_start, we now have to set the scratch + register for the case where we do not call __morestack. In this + case we need to set it based on the stack pointer. */ + if (cfun->machine->split_stack_varargs_pointer != NULL_RTX) + { + emit_insn (gen_rtx_SET (VOIDmode, scratch_reg, + gen_rtx_PLUS (Pmode, stack_pointer_rtx, + GEN_INT (UNITS_PER_WORD)))); + + emit_label (varargs_label); + LABEL_NUSES (varargs_label) = 1; + } +} + +/* We may have to tell the dataflow pass that the split stack prologue + is initializing a scratch register. */ + +static void +ix86_live_on_entry (bitmap regs) +{ + if (cfun->machine->split_stack_varargs_pointer != NULL_RTX) + { + gcc_assert (flag_split_stack); + bitmap_set_bit (regs, split_stack_prologue_scratch_regno ()); + } +} /* Extract the parts of an RTL expression that is a valid memory address for an instruction. Return 0 if the structure of the address is @@ -10979,6 +11332,10 @@ ix86_legitimate_address_p (enum machine_mode mode ATTRIBUTE_UNUSED, case UNSPEC_DTPOFF: break; + case UNSPEC_STACK_CHECK: + gcc_assert (flag_split_stack); + break; + default: /* Invalid address unspec. */ return false; @@ -11870,6 +12227,13 @@ output_pic_addr_const (FILE *file, rtx x, int code) break; case UNSPEC: + if (XINT (x, 1) == UNSPEC_STACK_CHECK) + { + bool f = i386_asm_output_addr_const_extra (file, x); + gcc_assert (f); + break; + } + gcc_assert (XVECLEN (x, 0) == 1); output_pic_addr_const (file, XVECEXP (x, 0, 0), code); switch (XINT (x, 1)) @@ -13273,6 +13637,22 @@ i386_asm_output_addr_const_extra (FILE *file, rtx x) break; #endif + case UNSPEC_STACK_CHECK: + { + int offset; + + gcc_assert (flag_split_stack); + +#ifdef TARGET_THREAD_SPLIT_STACK_OFFSET + offset = TARGET_THREAD_SPLIT_STACK_OFFSET; +#else + gcc_unreachable (); +#endif + + fprintf (file, "%s:%d", TARGET_64BIT ? "%fs" : "%gs", offset); + } + break; + default: return false; } @@ -20359,7 +20739,7 @@ construct_plt_address (rtx symbol) return tmp; } -void +rtx ix86_expand_call (rtx retval, rtx fnaddr, rtx callarg1, rtx callarg2, rtx pop, int sibcall) @@ -20450,6 +20830,8 @@ ix86_expand_call (rtx retval, rtx fnaddr, rtx callarg1, call = emit_call_insn (call); if (use) CALL_INSN_FUNCTION_USAGE (call) = use; + + return call; } @@ -32856,6 +33238,9 @@ ix86_units_per_simd_word (enum machine_mode mode) #undef TARGET_STACK_PROTECT_FAIL #define TARGET_STACK_PROTECT_FAIL ix86_stack_protect_fail +#undef TARGET_SUPPORTS_SPLIT_STACK +#define TARGET_SUPPORTS_SPLIT_STACK ix86_supports_split_stack + #undef TARGET_FUNCTION_VALUE #define TARGET_FUNCTION_VALUE ix86_function_value @@ -32914,6 +33299,9 @@ ix86_units_per_simd_word (enum machine_mode mode) #undef TARGET_CAN_ELIMINATE #define TARGET_CAN_ELIMINATE ix86_can_eliminate +#undef TARGET_EXTRA_LIVE_ON_ENTRY +#define TARGET_EXTRA_LIVE_ON_ENTRY ix86_live_on_entry + #undef TARGET_ASM_CODE_END #define TARGET_ASM_CODE_END ix86_code_end diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h index b3439bc9f0c..e1c13ac3af4 100644 --- a/gcc/config/i386/i386.h +++ b/gcc/config/i386/i386.h @@ -2277,6 +2277,13 @@ struct GTY(()) machine_function { has been computed for. */ int use_fast_prologue_epilogue_nregs; + /* For -fsplit-stack support: A stack local which holds a pointer to + the stack arguments for a function with a variable number of + arguments. This is set at the start of the function and is used + to initialize the overflow_arg_area field of the va_list + structure. */ + rtx split_stack_varargs_pointer; + /* This value is used for amd64 targets and specifies the current abi to be used. MS_ABI means ms abi. Otherwise SYSV_ABI means sysv abi. */ ENUM_BITFIELD(calling_abi) call_abi : 8; diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md index c541c1485c4..51e375470e1 100644 --- a/gcc/config/i386/i386.md +++ b/gcc/config/i386/i386.md @@ -85,6 +85,7 @@ UNSPEC_SET_RIP UNSPEC_SET_GOT_OFFSET UNSPEC_MEMORY_BLOCKAGE + UNSPEC_STACK_CHECK ;; TLS support UNSPEC_TP @@ -11612,6 +11613,64 @@ "leave" [(set_attr "type" "leave")]) +;; Handle -fsplit-stack. + +(define_expand "split_stack_prologue" + [(const_int 0)] + "" +{ + ix86_expand_split_stack_prologue (); + DONE; +}) + +;; In order to support the call/return predictor, we use a return +;; instruction which the middle-end doesn't see. +(define_insn "split_stack_return" + [(unspec_volatile [(match_operand:SI 0 "const_int_operand" "")] + UNSPEC_STACK_CHECK)] + "" +{ + if (operands[0] == const0_rtx) + return "ret"; + else + return "ret\t%0"; +} + [(set_attr "atom_unit" "jeu") + (set_attr "modrm" "0") + (set (attr "length") + (if_then_else (match_operand:SI 0 "const0_operand" "") + (const_int 1) + (const_int 3))) + (set (attr "length_immediate") + (if_then_else (match_operand:SI 0 "const0_operand" "") + (const_int 0) + (const_int 2)))]) + +;; If there are operand 0 bytes available on the stack, jump to +;; operand 1. + +(define_expand "split_stack_space_check" + [(set (pc) (if_then_else + (ltu (minus (reg SP_REG) + (match_operand 0 "register_operand" "")) + (unspec [(const_int 0)] UNSPEC_STACK_CHECK)) + (label_ref (match_operand 1 "" "")) + (pc)))] + "" +{ + rtx reg, size, limit; + + reg = gen_reg_rtx (Pmode); + size = force_reg (Pmode, operands[0]); + emit_insn (gen_sub3_insn (reg, stack_pointer_rtx, size)); + limit = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), + UNSPEC_STACK_CHECK); + limit = gen_rtx_MEM (Pmode, gen_rtx_CONST (Pmode, limit)); + ix86_expand_branch (GEU, reg, limit, operands[1]); + + DONE; +}) + ;; Bit manipulation instructions. (define_expand "ffs<mode>2" diff --git a/gcc/config/i386/linux.h b/gcc/config/i386/linux.h index 81dfd1e2509..7564c70b6d2 100644 --- a/gcc/config/i386/linux.h +++ b/gcc/config/i386/linux.h @@ -218,4 +218,8 @@ along with GCC; see the file COPYING3. If not see #ifdef TARGET_LIBC_PROVIDES_SSP /* i386 glibc provides __stack_chk_guard in %gs:0x14. */ #define TARGET_THREAD_SSP_OFFSET 0x14 + +/* We steal the last transactional memory word. */ +#define TARGET_CAN_SPLIT_STACK +#define TARGET_THREAD_SPLIT_STACK_OFFSET 0x30 #endif diff --git a/gcc/config/i386/linux64.h b/gcc/config/i386/linux64.h index fda73d21107..4a3e366fb4e 100644 --- a/gcc/config/i386/linux64.h +++ b/gcc/config/i386/linux64.h @@ -123,4 +123,8 @@ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see /* i386 glibc provides __stack_chk_guard in %gs:0x14, x86_64 glibc provides it in %fs:0x28. */ #define TARGET_THREAD_SSP_OFFSET (TARGET_64BIT ? 0x28 : 0x14) + +/* We steal the last transactional memory word. */ +#define TARGET_CAN_SPLIT_STACK +#define TARGET_THREAD_SPLIT_STACK_OFFSET (TARGET_64BIT ? 0x70 : 0x30) #endif diff --git a/gcc/doc/extend.texi b/gcc/doc/extend.texi index 7073c902abd..877dec23959 100644 --- a/gcc/doc/extend.texi +++ b/gcc/doc/extend.texi @@ -1926,7 +1926,8 @@ attributes are currently defined for functions on all targets: @code{returns_twice}, @code{noinline}, @code{noclone}, @code{always_inline}, @code{flatten}, @code{pure}, @code{const}, @code{nothrow}, @code{sentinel}, @code{format}, @code{format_arg}, -@code{no_instrument_function}, @code{section}, @code{constructor}, +@code{no_instrument_function}, @code{no_split_stack}, +@code{section}, @code{constructor}, @code{destructor}, @code{used}, @code{unused}, @code{deprecated}, @code{weak}, @code{malloc}, @code{alias}, @code{ifunc}, @code{warn_unused_result}, @code{nonnull}, @code{gnu_inline}, @@ -2856,6 +2857,14 @@ If @option{-finstrument-functions} is given, profiling function calls will be generated at entry and exit of most user-compiled functions. Functions with this attribute will not be so instrumented. +@item no_split_stack +@cindex @code{no_split_stack} function attribute +@opindex fsplit-stack +If @option{-fsplit-stack} is given, functions will have a small +prologue which decides whether to split the stack. Functions with the +@code{no_split_stack} attribute will not have that prologue, and thus +may run with only a small amount of stack space available. + @item noinline @cindex @code{noinline} function attribute This function attribute prevents a function from being considered for diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi index ee62b1b9569..bb17a0e2cac 100644 --- a/gcc/doc/invoke.texi +++ b/gcc/doc/invoke.texi @@ -927,7 +927,7 @@ See S/390 and zSeries Options. -fshort-double -fshort-wchar @gol -fverbose-asm -fpack-struct[=@var{n}] -fstack-check @gol -fstack-limit-register=@var{reg} -fstack-limit-symbol=@var{sym} @gol --fno-stack-limit @gol +-fno-stack-limit -fsplit-stack @gol -fleading-underscore -ftls-model=@var{model} @gol -ftrapv -fwrapv -fbounds-check @gol -fvisibility} @@ -17857,6 +17857,25 @@ and grows downwards, you can use the flags @option{-Wl,--defsym,__stack_limit=0x7ffe0000} to enforce a stack limit of 128KB@. Note that this may only work with the GNU linker. +@item -fsplit-stack +@opindex fsplit-stack +Generate code to automatically split the stack before it overflows. +The resulting program has a discontiguous stack which can only +overflow if the program is unable to allocate any more memory. This +is most useful when running threaded programs, as it is no longer +necessary to calculate a good stack size to use for each thread. This +is currently only implemented for the i386 and x86_64 backends running +GNU/Linux. + +When code compiled with @option{-fsplit-stack} calls code compiled +without @option{-fsplit-stack}, there may not be much stack space +available for the latter code to run. If compiling all code, +including library code, with @option{-fsplit-stack} is not an option, +then the linker can fix up these calls so that the code compiled +without @option{-fsplit-stack} always has a large stack. Support for +this is implemented in the gold linker in GNU binutils release 2.21 +and later. + @item -fleading-underscore @opindex fleading-underscore This option and its counterpart, @option{-fno-leading-underscore}, forcibly diff --git a/gcc/doc/libgcc.texi b/gcc/doc/libgcc.texi index 8ead53d90a9..5be6c306729 100644 --- a/gcc/doc/libgcc.texi +++ b/gcc/doc/libgcc.texi @@ -2262,3 +2262,43 @@ document me! @deftypefn {Runtime Function} void __clear_cache (char *@var{beg}, char *@var{end}) This function clears the instruction cache between @var{beg} and @var{end}. @end deftypefn + +@subsection Split stack functions and variables +@deftypefn {Runtime Function} {void *} __splitstack_find (void *@var{segment_arg}, @ +void *@var{sp}, size_t @var{len}, void **@var{next_segment}, @ +void **@var{next_sp}, void **@var{initial_sp}) +When using @option{-fsplit-stack}, this call may be used to iterate +over the stack segments. It may be called like this: +@smallexample + void *next_segment = NULL; + void *next_sp = NULL; + void *initial_sp = NULL; + void *stack; + size_t stack_size; + while ((stack = __splitstack_find (next_segment, next_sp, + &stack_size, &next_segment, + &next_sp, &initial_sp)) + != NULL) + @{ + /* Stack segment starts at stack and is + stack_size bytes long. */ + @} +@end smallexample + +There is no way to iterate over the stack segments of a different +thread. However, what is permitted is for one thread to call this +with the @var{segment_arg} and @var{sp} arguments NULL, to pass +@var{next_segment}, @var{next_sp}, and @var{initial_sp} to a different +thread, and then to suspend one way or another. A different thread +may run the subsequent @code{__splitstack_find} iterations. Of +course, this will only work if the first thread is suspended while the +second thread is calling @code{__splitstack_find}. If not, the second +thread could be looking at the stack while it is changing, and +anything could happen. +@end deftypefn + +@defvar __morestack_segments +@defvarx __morestack_current_segment +@defvarx __morestack_initial_sp +Internal variables used by the @option{-fsplit-stack} implementation. +@end defvar diff --git a/gcc/doc/tm.texi b/gcc/doc/tm.texi index a5356ebbcbb..3737314d0c5 100644 --- a/gcc/doc/tm.texi +++ b/gcc/doc/tm.texi @@ -4960,6 +4960,10 @@ The default version of this hook invokes a function called normally defined in @file{libgcc2.c}. @end deftypefn +@deftypefn {Target Hook} bool TARGET_SUPPORTS_SPLIT_STACK (bool) +Whether this target supports splitting the stack. This is called after options have been parsed, so the target may reject splitting the stack in some configurations. The default version of this hook returns false. If @var{report} is true, this function may issue a warning or error; if @var{report} is false, it must simply return a value +@end deftypefn + @node Varargs @section Implementing the Varargs Macros @cindex varargs implementation diff --git a/gcc/doc/tm.texi.in b/gcc/doc/tm.texi.in index 2fcd97956dd..ea3087fb29c 100644 --- a/gcc/doc/tm.texi.in +++ b/gcc/doc/tm.texi.in @@ -4960,6 +4960,8 @@ The default version of this hook invokes a function called normally defined in @file{libgcc2.c}. @end deftypefn +@hook TARGET_SUPPORTS_SPLIT_STACK + @node Varargs @section Implementing the Varargs Macros @cindex varargs implementation diff --git a/gcc/explow.c b/gcc/explow.c index 2024b5324d8..21af58a7c81 100644 --- a/gcc/explow.c +++ b/gcc/explow.c @@ -1,6 +1,6 @@ /* Subroutines for manipulating rtx's in semantically interesting ways. Copyright (C) 1987, 1991, 1994, 1995, 1996, 1997, 1998, - 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008 + 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010 Free Software Foundation, Inc. This file is part of GCC. @@ -1146,6 +1146,7 @@ allocate_dynamic_stack_space (rtx size, rtx target, int known_align, { HOST_WIDE_INT stack_usage_size = -1; bool known_align_valid = true; + rtx final_label, final_target; /* If we're asking for zero bytes, it doesn't matter what we point to since we can't dereference it. But return a reasonable @@ -1284,6 +1285,14 @@ allocate_dynamic_stack_space (rtx size, rtx target, int known_align, } } + /* Don't use a TARGET that isn't a pseudo or is the wrong mode. */ + if (target == 0 || !REG_P (target) + || REGNO (target) < FIRST_PSEUDO_REGISTER + || GET_MODE (target) != Pmode) + target = gen_reg_rtx (Pmode); + + mark_reg_pointer (target, known_align); + /* The size is supposed to be fully adjusted at this point so record it if stack usage info is requested. */ if (flag_stack_usage) @@ -1296,6 +1305,52 @@ allocate_dynamic_stack_space (rtx size, rtx target, int known_align, current_function_has_unbounded_dynamic_stack_size = 1; } + final_label = NULL_RTX; + final_target = NULL_RTX; + + /* If we are splitting the stack, we need to ask the backend whether + there is enough room on the current stack. If there isn't, or if + the backend doesn't know how to tell is, then we need to call a + function to allocate memory in some other way. This memory will + be released when we release the current stack segment. The + effect is that stack allocation becomes less efficient, but at + least it doesn't cause a stack overflow. */ + if (flag_split_stack) + { + rtx available_label, space, func; + + available_label = NULL_RTX; + +#ifdef HAVE_split_stack_space_check + if (HAVE_split_stack_space_check) + { + available_label = gen_label_rtx (); + + /* This instruction will branch to AVAILABLE_LABEL if there + are SIZE bytes available on the stack. */ + emit_insn (gen_split_stack_space_check (size, available_label)); + } +#endif + + func = init_one_libfunc ("__morestack_allocate_stack_space"); + + space = emit_library_call_value (func, target, LCT_NORMAL, Pmode, + 1, size, Pmode); + + if (available_label == NULL_RTX) + return space; + + final_target = gen_reg_rtx (Pmode); + mark_reg_pointer (final_target, known_align); + + emit_move_insn (final_target, space); + + final_label = gen_label_rtx (); + emit_jump (final_label); + + emit_label (available_label); + } + do_pending_stack_adjust (); /* We ought to be called always on the toplevel and stack ought to be aligned @@ -1313,14 +1368,6 @@ allocate_dynamic_stack_space (rtx size, rtx target, int known_align, else if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK) probe_stack_range (STACK_CHECK_PROTECT, size); - /* Don't use a TARGET that isn't a pseudo or is the wrong mode. */ - if (target == 0 || !REG_P (target) - || REGNO (target) < FIRST_PSEUDO_REGISTER - || GET_MODE (target) != Pmode) - target = gen_reg_rtx (Pmode); - - mark_reg_pointer (target, known_align); - /* Perform the required allocation from the stack. Some systems do this differently than simply incrementing/decrementing from the stack pointer, such as acquiring the space by calling malloc(). */ @@ -1413,6 +1460,15 @@ allocate_dynamic_stack_space (rtx size, rtx target, int known_align, if (cfun->nonlocal_goto_save_area != 0) update_nonlocal_goto_save_area (); + /* Finish up the split stack handling. */ + if (final_label != NULL_RTX) + { + gcc_assert (flag_split_stack); + emit_move_insn (final_target, target); + emit_label (final_label); + target = final_target; + } + return target; } diff --git a/gcc/function.c b/gcc/function.c index a11f248dc35..04a2ebcb4f1 100644 --- a/gcc/function.c +++ b/gcc/function.c @@ -5214,17 +5214,50 @@ emit_return_into_block (basic_block bb) static void thread_prologue_and_epilogue_insns (void) { - int inserted = 0; + bool inserted; + rtx seq, epilogue_end; + edge entry_edge; edge e; -#if defined (HAVE_sibcall_epilogue) || defined (HAVE_epilogue) || defined (HAVE_return) || defined (HAVE_prologue) - rtx seq; -#endif -#if defined (HAVE_epilogue) || defined(HAVE_return) - rtx epilogue_end = NULL_RTX; -#endif edge_iterator ei; rtl_profile_for_bb (ENTRY_BLOCK_PTR); + + inserted = false; + seq = NULL_RTX; + epilogue_end = NULL_RTX; + + /* Can't deal with multiple successors of the entry block at the + moment. Function should always have at least one entry + point. */ + gcc_assert (single_succ_p (ENTRY_BLOCK_PTR)); + entry_edge = single_succ_edge (ENTRY_BLOCK_PTR); + + if (flag_split_stack + && (lookup_attribute ("no_split_stack", DECL_ATTRIBUTES (cfun->decl)) + == NULL)) + { +#ifndef HAVE_split_stack_prologue + gcc_unreachable (); +#else + gcc_assert (HAVE_split_stack_prologue); + + start_sequence (); + emit_insn (gen_split_stack_prologue ()); + seq = get_insns (); + end_sequence (); + + record_insns (seq, NULL, &prologue_insn_hash); + set_insn_locators (seq, prologue_locator); + + /* This relies on the fact that committing the edge insertion + will look for basic blocks within the inserted instructions, + which in turn relies on the fact that we are not in CFG + layout mode here. */ + insert_insn_on_edge (seq, entry_edge); + inserted = true; +#endif + } + #ifdef HAVE_prologue if (HAVE_prologue) { @@ -5251,13 +5284,8 @@ thread_prologue_and_epilogue_insns (void) end_sequence (); set_insn_locators (seq, prologue_locator); - /* Can't deal with multiple successors of the entry block - at the moment. Function should always have at least one - entry point. */ - gcc_assert (single_succ_p (ENTRY_BLOCK_PTR)); - - insert_insn_on_edge (seq, single_succ_edge (ENTRY_BLOCK_PTR)); - inserted = 1; + insert_insn_on_edge (seq, entry_edge); + inserted = true; } #endif @@ -5427,7 +5455,7 @@ thread_prologue_and_epilogue_insns (void) end_sequence (); insert_insn_on_edge (seq, e); - inserted = 1; + inserted = true; } else #endif diff --git a/gcc/gcc.c b/gcc/gcc.c index 2614faf5b7b..cd201f53b95 100644 --- a/gcc/gcc.c +++ b/gcc/gcc.c @@ -545,6 +545,16 @@ proper position among the other output files. */ #define MFLIB_SPEC "%{fmudflap|fmudflapth: -export-dynamic}" #endif +/* When using -fsplit-stack we need to wrap pthread_create, in order + to initialize the stack guard. We always use wrapping, rather than + shared library ordering, and we keep the wrapper function in + libgcc. This is not yet a real spec, though it could become one; + it is currently just stuffed into LINK_SPEC. FIXME: This wrapping + only works with GNU ld and gold. FIXME: This is incompatible with + -fmudflap when linking statically, which wants to do its own + wrapping. */ +#define STACK_SPLIT_SPEC " %{fsplit-stack: --wrap=pthread_create}" + /* config.h can define LIBGCC_SPEC to override how and when libgcc.a is included. */ #ifndef LIBGCC_SPEC @@ -657,7 +667,8 @@ proper position among the other output files. */ "%X %{o*} %{A} %{d} %{e*} %{m} %{N} %{n} %{r}\ %{s} %{t} %{u*} %{x} %{z} %{Z} %{!A:%{!nostdlib:%{!nostartfiles:%S}}}\ %{static:} %{L*} %(mfwrap) %(link_libgcc) %o\ - %{fopenmp|ftree-parallelize-loops=*:%:include(libgomp.spec)%(link_gomp)} %(mflib)\ + %{fopenmp|ftree-parallelize-loops=*:%:include(libgomp.spec)%(link_gomp)}\ + %(mflib) " STACK_SPLIT_SPEC "\ %{fprofile-arcs|fprofile-generate*|coverage:-lgcov}\ %{!nostdlib:%{!nodefaultlibs:%(link_ssp) %(link_gcc_c_sequence)}}\ %{!A:%{!nostdlib:%{!nostartfiles:%E}}} %{T*} }}}}}}" diff --git a/gcc/libgcc-std.ver b/gcc/libgcc-std.ver index 1f24f1771e8..c1326351025 100644 --- a/gcc/libgcc-std.ver +++ b/gcc/libgcc-std.ver @@ -1914,4 +1914,8 @@ GCC_4.5.0 { %inherit GCC_4.6.0 GCC_4.5.0 GCC_4.6.0 { + __morestack_segments + __morestack_current_segment + __morestack_initial_sp + __splitstack_find } diff --git a/gcc/opts.c b/gcc/opts.c index 643f88999d5..40e6acde262 100644 --- a/gcc/opts.c +++ b/gcc/opts.c @@ -1086,6 +1086,20 @@ decode_options (unsigned int argc, const char **argv, check option consistency. */ if (flag_lto && flag_whopr) error ("-flto and -fwhopr are mutually exclusive"); + + /* We initialize flag_split_stack to -1 so that targets can set a + default value if they choose based on other options. */ + if (flag_split_stack == -1) + flag_split_stack = 0; + else + { + if (!targetm.supports_split_stack (true)) + { + error ("%<-fsplit-stack%> is not supported by " + "this compiler configuration"); + flag_split_stack = 0; + } + } } #define LEFT_COLUMN 27 diff --git a/gcc/output.h b/gcc/output.h index 8371aa05f83..2a70fd25bdd 100644 --- a/gcc/output.h +++ b/gcc/output.h @@ -632,6 +632,7 @@ extern void default_asm_declare_constant_name (FILE *, const char *, const_tree, HOST_WIDE_INT); extern void default_file_start (void); extern void file_end_indicate_exec_stack (void); +extern void file_end_indicate_split_stack (void); extern void default_elf_asm_output_external (FILE *file, tree, const char *); diff --git a/gcc/target.def b/gcc/target.def index 35c9799062c..a60e75f3e3a 100644 --- a/gcc/target.def +++ b/gcc/target.def @@ -1679,6 +1679,16 @@ DEFHOOK tree, (void), default_external_stack_protect_fail) +DEFHOOK +(supports_split_stack, + "Whether this target supports splitting the stack. This is called\ + after options have been parsed, so the target may reject splitting\ + the stack in some configurations. The default version of this hook\ + returns false. If @var{report} is true, this function may issue a warning\ + or error; if @var{report} is false, it must simply return a value", + bool, (bool), + hook_bool_bool_false) + /* Returns NULL if target supports the insn within a doloop block, otherwise it returns an error message. */ DEFHOOK diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index 150dac4e4b8..4f795dddb99 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,12 @@ +2010-09-27 Ian Lance Taylor <iant@google.com> + + * lib/target-supports.exp (check_effective_target_split_stack): + New procedure. + * gcc.dg/split-1.c: New test. + * gcc.dg/split-2.c: New test. + * gcc.dg/split-3.c: New test. + * gcc.dg/split-4.c: New test. + 2010-09-27 Nicola Pero <nicola.pero@meta-innovation.com> PR objc/25464 diff --git a/gcc/testsuite/gcc.dg/split-1.c b/gcc/testsuite/gcc.dg/split-1.c new file mode 100644 index 00000000000..044b4e2889b --- /dev/null +++ b/gcc/testsuite/gcc.dg/split-1.c @@ -0,0 +1,49 @@ +/* This test needs to use setrlimit to set the stack size, so it can + only run on Unix. */ +/* { dg-do run { target *-*-linux* *-*-solaris* *-*-darwin* } } */ +/* { dg-require-effective-target split_stack } */ +/* { dg-options "-fsplit-stack" } */ + +#include <stdlib.h> +#include <sys/types.h> +#include <sys/resource.h> + +/* Use a noinline function to ensure that the buffer is not removed + from the stack. */ +static void use_buffer (char *buf) __attribute__ ((noinline)); +static void +use_buffer (char *buf) +{ + buf[0] = '\0'; +} + +/* Each recursive call uses 10,000 bytes. We call it 1000 times, + using a total of 10,000,000 bytes. If -fsplit-stack is not + working, that will overflow our stack limit. */ + +static void +down (int i) +{ + char buf[10000]; + + if (i > 0) + { + use_buffer (buf); + down (i - 1); + } +} + +int +main (void) +{ + struct rlimit r; + + /* We set a stack limit because we are usually invoked via make, and + make sets the stack limit to be as large as possible. */ + r.rlim_cur = 8192 * 1024; + r.rlim_max = 8192 * 1024; + if (setrlimit (RLIMIT_STACK, &r) != 0) + abort (); + down (1000); + return 0; +} diff --git a/gcc/testsuite/gcc.dg/split-2.c b/gcc/testsuite/gcc.dg/split-2.c new file mode 100644 index 00000000000..208169aa095 --- /dev/null +++ b/gcc/testsuite/gcc.dg/split-2.c @@ -0,0 +1,55 @@ +/* { dg-do run } */ +/* { dg-require-effective-target split_stack } */ +/* { dg-require-effective-target pthread_h } */ +/* { dg-options "-pthread -fsplit-stack" } */ + +#include <stdlib.h> +#include <pthread.h> + +/* Use a noinline function to ensure that the buffer is not removed + from the stack. */ +static void use_buffer (char *buf) __attribute__ ((noinline)); +static void +use_buffer (char *buf) +{ + buf[0] = '\0'; +} + +/* Each recursive call uses 10,000 bytes. We call it 1000 times, + using a total of 10,000,000 bytes. If -fsplit-stack is not + working, that will overflow our stack limit. */ + +static void +down (int i) +{ + char buf[10000]; + + if (i > 0) + { + use_buffer (buf); + down (i - 1); + } +} + +static void * +thread_routine (void *arg __attribute__ ((unused))) +{ + down (1000); + return NULL; +} + +int +main (void) +{ + int i; + pthread_t tid; + void *dummy; + + i = pthread_create (&tid, NULL, thread_routine, NULL); + if (i != 0) + abort (); + i = pthread_join (tid, &dummy); + if (i != 0) + abort (); + return 0; +} diff --git a/gcc/testsuite/gcc.dg/split-3.c b/gcc/testsuite/gcc.dg/split-3.c new file mode 100644 index 00000000000..360f6720c78 --- /dev/null +++ b/gcc/testsuite/gcc.dg/split-3.c @@ -0,0 +1,64 @@ +/* This test needs to use setrlimit to set the stack size, so it can + only run on Unix. */ +/* { dg-do run { target *-*-linux* *-*-solaris* *-*-darwin* } } */ +/* { dg-require-effective-target split_stack } */ +/* { dg-options "-fsplit-stack" } */ + +#include <stdarg.h> +#include <stdlib.h> +#include <sys/types.h> +#include <sys/resource.h> + +/* Use a noinline function to ensure that the buffer is not removed + from the stack. */ +static void use_buffer (char *buf) __attribute__ ((noinline)); +static void +use_buffer (char *buf) +{ + buf[0] = '\0'; +} + +/* Each recursive call uses 10,000 bytes. We call it 1000 times, + using a total of 10,000,000 bytes. If -fsplit-stack is not + working, that will overflow our stack limit. */ + +static void +down (int i, ...) +{ + char buf[10000]; + va_list ap; + + va_start (ap, i); + if (va_arg (ap, int) != 1 + || va_arg (ap, int) != 2 + || va_arg (ap, int) != 3 + || va_arg (ap, int) != 4 + || va_arg (ap, int) != 5 + || va_arg (ap, int) != 6 + || va_arg (ap, int) != 7 + || va_arg (ap, int) != 8 + || va_arg (ap, int) != 9 + || va_arg (ap, int) != 10) + abort (); + + if (i > 0) + { + use_buffer (buf); + down (i - 1, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10); + } +} + +int +main (void) +{ + struct rlimit r; + + /* We set a stack limit because we are usually invoked via make, and + make sets the stack limit to be as large as possible. */ + r.rlim_cur = 8192 * 1024; + r.rlim_max = 8192 * 1024; + if (setrlimit (RLIMIT_STACK, &r) != 0) + abort (); + down (1000, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10); + return 0; +} diff --git a/gcc/testsuite/gcc.dg/split-4.c b/gcc/testsuite/gcc.dg/split-4.c new file mode 100644 index 00000000000..38196bed6c7 --- /dev/null +++ b/gcc/testsuite/gcc.dg/split-4.c @@ -0,0 +1,68 @@ +/* This test needs to use setrlimit to set the stack size, so it can + only run on Unix. */ +/* { dg-do run { target *-*-linux* *-*-solaris* *-*-darwin* } } */ +/* { dg-require-effective-target split_stack } */ +/* { dg-options "-fsplit-stack" } */ + +#include <stdlib.h> +#include <string.h> +#include <sys/types.h> +#include <sys/resource.h> + +/* Use a noinline function to ensure that the buffer is not removed + from the stack. */ +static void use_buffer (char *buf, size_t) __attribute__ ((noinline)); +static void +use_buffer (char *buf, size_t c) +{ + size_t i; + + for (i = 0; i < c; ++i) + buf[i] = (char) i; +} + +/* Each recursive call uses 10 * i bytes. We call it 1000 times, + using a total of 5,000,000 bytes. If -fsplit-stack is not working, + that will overflow our stack limit. */ + +static void +down1 (int i) +{ + char buf[10 * i]; + + if (i > 0) + { + use_buffer (buf, 10 * i); + down1 (i - 1); + } +} + +/* Same thing, using alloca. */ + +static void +down2 (int i) +{ + char *buf = alloca (10 * i); + + if (i > 0) + { + use_buffer (buf, 10 * i); + down2 (i - 1); + } +} + +int +main (void) +{ + struct rlimit r; + + /* We set a stack limit because we are usually invoked via make, and + make sets the stack limit to be as large as possible. */ + r.rlim_cur = 8192 * 1024; + r.rlim_max = 8192 * 1024; + if (setrlimit (RLIMIT_STACK, &r) != 0) + abort (); + down1 (1000); + down2 (1000); + return 0; +} diff --git a/gcc/testsuite/lib/target-supports.exp b/gcc/testsuite/lib/target-supports.exp index cf78c95a5df..e2174b71dee 100644 --- a/gcc/testsuite/lib/target-supports.exp +++ b/gcc/testsuite/lib/target-supports.exp @@ -3664,6 +3664,15 @@ proc check_effective_target_lto { } { return [info exists ENABLE_LTO] } +# Return 1 if this target supports the -fsplit-stack option, 0 +# otherwise. + +proc check_effective_target_split_stack {} { + return [check_no_compiler_messages split_stack object { + void foo (void) { } + } "-fsplit-stack"] +} + # Return 1 if the language for the compiler under test is C. proc check_effective_target_c { } { diff --git a/gcc/varasm.c b/gcc/varasm.c index b02462bdec4..bc314940347 100644 --- a/gcc/varasm.c +++ b/gcc/varasm.c @@ -99,6 +99,10 @@ bool first_function_block_is_cold; static alias_set_type const_alias_set; +/* Whether we saw any functions with no_split_stack. */ + +static bool saw_no_split_stack; + static const char *strip_reg_name (const char *); static int contains_pointers_p (tree); #ifdef ASM_OUTPUT_EXTERNAL @@ -1549,6 +1553,9 @@ assemble_start_function (tree decl, const char *fnname) /* Standard thing is just output label for the function. */ ASM_OUTPUT_FUNCTION_LABEL (asm_out_file, fnname, current_function_decl); #endif /* ASM_DECLARE_FUNCTION_NAME */ + + if (lookup_attribute ("no_split_stack", DECL_ATTRIBUTES (decl))) + saw_no_split_stack = true; } /* Output assembler code associated with defining the size of the @@ -6649,6 +6656,28 @@ file_end_indicate_exec_stack (void) switch_to_section (get_section (".note.GNU-stack", flags, NULL)); } +/* Emit a special section directive to indicate that this object file + was compiled with -fsplit-stack. This is used to let the linker + detect calls between split-stack code and non-split-stack code, so + that it can modify the split-stack code to allocate a sufficiently + large stack. We emit another special section if there are any + functions in this file which have the no_split_stack attribute, to + prevent the linker from warning about being unable to convert the + functions if they call non-split-stack code. */ + +void +file_end_indicate_split_stack (void) +{ + if (flag_split_stack) + { + switch_to_section (get_section (".note.GNU-split-stack", SECTION_DEBUG, + NULL)); + if (saw_no_split_stack) + switch_to_section (get_section (".note.GNU-no-split-stack", + SECTION_DEBUG, NULL)); + } +} + /* Output DIRECTIVE (a C string) followed by a newline. This is used as a get_unnamed_section callback. */ |