diff options
author | rsandifo <rsandifo@138bc75d-0d04-0410-961f-82ee72b054a4> | 2008-09-06 08:13:09 +0000 |
---|---|---|
committer | rsandifo <rsandifo@138bc75d-0d04-0410-961f-82ee72b054a4> | 2008-09-06 08:13:09 +0000 |
commit | f5ec18cc5caf6a5925178329366811fd398e928c (patch) | |
tree | 69d3ad291660fd69dde909d9ee0d72a37de78ec4 /gcc | |
parent | 2236fd042bc8ec18f4add6ab6a916a0135a0fcda (diff) | |
download | gcc-f5ec18cc5caf6a5925178329366811fd398e928c.tar.gz |
gcc/
2008-xx-xx Richard Sandiford <rdsandiford@goolemail.com>
Peter Fuerst <post@pfrst.de>
* doc/invoke.texi: Document -mr10k-cache-barrier=.
* doc/extend.texi: Document __builtin_mips_cache.
* config/mips/mips-ftypes.def: Add a (VOID, SI, CVPOINTER) entry.
* config/mips/mips.opt (mr10k-cache-barrier=): New option.
* config/mips/mips.h (TARGET_CPU_CPP_BUILTINS): Define
__GCC_HAVE_BUILTIN_MIPS_CACHE.
(TARGET_CACHE_BUILTIN, ISA_HAS_CACHE): New macros.
* config/mips/mips.c (mips_r10k_cache_barrier_setting): New enum.
(set_push_mips_isas): New variable.
(mips_r10k_cache_barrier): New variable.
(cache): New availability predicate.
(mips_builtins): Add an entry for __builtin_mips_cache.
(mips_build_cvpointer_type): New function.
(MIPS_ATYPE_CVPOINTER): New macro.
(mips_prepare_builtin_arg): Only use the insn's mode if the rtx's
mode is VOIDmode.
(r10k_simplified_address_p, r10k_simplify_address)
(r10k_uncached_address_p, r10k_safe_address_p)
(r10k_needs_protection_p_1, r10k_needs_protection_p_store)
(r10k_needs_protection_p_call, r10k_needs_protection_p)
(r10k_insert_cache_barriers): New functions.
(mips_reorg_process_insns): Delete cache barriers after a
branch-likely instruction.
(mips_reorg): Call r10k_insert_cache_barriers.
(mips_handle_option): Handle OPT_mr10k_cache_barrier_.
* config/mips/mips.md (UNSPEC_MIPS_CACHE): New constant.
(UNSPEC_R10K_CACHE_BARRIER): Likewise.
(mips_cache, r10k_cache_barrier): New define_insns.
gcc/testsuite/
* gcc.target/mips/mips.exp (dg-mips-options): Make
-mr10k-cache-barrier=* imply -mips3 or above.
* gcc.target/mips/cache-1.c: New test.
* gcc.target/mips/r10k-cache-barrier-1.c: Likewise.
* gcc.target/mips/r10k-cache-barrier-2.c: Likewise.
* gcc.target/mips/r10k-cache-barrier-3.c: Likewise.
* gcc.target/mips/r10k-cache-barrier-4.c: Likewise.
* gcc.target/mips/r10k-cache-barrier-5.c: Likewise.
* gcc.target/mips/r10k-cache-barrier-6.c: Likewise.
* gcc.target/mips/r10k-cache-barrier-7.c: Likewise.
* gcc.target/mips/r10k-cache-barrier-8.c: Likewise.
* gcc.target/mips/r10k-cache-barrier-9.c: Likewise.
* gcc.target/mips/r10k-cache-barrier-10.c: Likewise.
* gcc.target/mips/r10k-cache-barrier-11.c: Likewise.
* gcc.target/mips/r10k-cache-barrier-12.c: Likewise.
* gcc.target/mips/r10k-cache-barrier-13.c: Likewise.
* gcc.target/mips/r10k-cache-barrier-14.c: Likewise.
* gcc.target/mips/r10k-cache-barrier-15.c: Likewise.
git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@140055 138bc75d-0d04-0410-961f-82ee72b054a4
Diffstat (limited to 'gcc')
26 files changed, 928 insertions, 2 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog index f493fedfd89..280b57170a4 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,35 @@ +2008-09-06 Richard Sandiford <rdsandiford@goolemail.com> + Peter Fuerst <post@pfrst.de> + + * doc/invoke.texi: Document -mr10k-cache-barrier=. + * doc/extend.texi: Document __builtin_mips_cache. + * config/mips/mips-ftypes.def: Add a (VOID, SI, CVPOINTER) entry. + * config/mips/mips.opt (mr10k-cache-barrier=): New option. + * config/mips/mips.h (TARGET_CPU_CPP_BUILTINS): Define + __GCC_HAVE_BUILTIN_MIPS_CACHE. + (TARGET_CACHE_BUILTIN, ISA_HAS_CACHE): New macros. + * config/mips/mips.c (mips_r10k_cache_barrier_setting): New enum. + (set_push_mips_isas): New variable. + (mips_r10k_cache_barrier): New variable. + (cache): New availability predicate. + (mips_builtins): Add an entry for __builtin_mips_cache. + (mips_build_cvpointer_type): New function. + (MIPS_ATYPE_CVPOINTER): New macro. + (mips_prepare_builtin_arg): Only use the insn's mode if the rtx's + mode is VOIDmode. + (r10k_simplified_address_p, r10k_simplify_address) + (r10k_uncached_address_p, r10k_safe_address_p) + (r10k_needs_protection_p_1, r10k_needs_protection_p_store) + (r10k_needs_protection_p_call, r10k_needs_protection_p) + (r10k_insert_cache_barriers): New functions. + (mips_reorg_process_insns): Delete cache barriers after a + branch-likely instruction. + (mips_reorg): Call r10k_insert_cache_barriers. + (mips_handle_option): Handle OPT_mr10k_cache_barrier_. + * config/mips/mips.md (UNSPEC_MIPS_CACHE): New constant. + (UNSPEC_R10K_CACHE_BARRIER): Likewise. + (mips_cache, r10k_cache_barrier): New define_insns. + 2008-09-06 Richard Sandiford <rdsandiford@googlemail.com> * ira-int.h (ira_zero_hard_reg_set, ira_one_hard_reg_set): Delete. diff --git a/gcc/config/mips/mips-ftypes.def b/gcc/config/mips/mips-ftypes.def index 8ae54ebe263..df0d9ff2aa3 100644 --- a/gcc/config/mips/mips-ftypes.def +++ b/gcc/config/mips/mips-ftypes.def @@ -120,6 +120,7 @@ DEF_MIPS_FTYPE (2, (V8QI, V4HI, V4HI)) DEF_MIPS_FTYPE (1, (V8QI, V8QI)) DEF_MIPS_FTYPE (2, (V8QI, V8QI, V8QI)) +DEF_MIPS_FTYPE (2, (VOID, SI, CVPOINTER)) DEF_MIPS_FTYPE (2, (VOID, SI, SI)) DEF_MIPS_FTYPE (2, (VOID, V2HI, V2HI)) DEF_MIPS_FTYPE (2, (VOID, V4QI, V4QI)) diff --git a/gcc/config/mips/mips.c b/gcc/config/mips/mips.c index 33f7e87af33..9187991c763 100644 --- a/gcc/config/mips/mips.c +++ b/gcc/config/mips/mips.c @@ -148,6 +148,13 @@ enum mips_address_type { ADDRESS_SYMBOLIC }; +/* Enumerates the setting of the -mr10k-cache-barrier option. */ +enum mips_r10k_cache_barrier_setting { + R10K_CACHE_BARRIER_NONE, + R10K_CACHE_BARRIER_STORE, + R10K_CACHE_BARRIER_LOAD_STORE +}; + /* Macros to create an enumeration identifier for a function prototype. */ #define MIPS_FTYPE_NAME1(A, B) MIPS_##A##_FTYPE_##B #define MIPS_FTYPE_NAME2(A, B, C) MIPS_##A##_FTYPE_##B##_##C @@ -456,6 +463,9 @@ static int mips_base_align_functions; /* align_functions */ /* The -mcode-readable setting. */ enum mips_code_readable_setting mips_code_readable = CODE_READABLE_YES; +/* The -mr10k-cache-barrier setting. */ +static enum mips_r10k_cache_barrier_setting mips_r10k_cache_barrier; + /* Index [M][R] is true if register R is allowed to hold a value of mode M. */ bool mips_hard_regno_mode_ok[(int) MAX_MACHINE_MODE][FIRST_PSEUDO_REGISTER]; @@ -10922,6 +10932,7 @@ AVAIL_NON_MIPS16 (dspr2, TARGET_DSPR2) AVAIL_NON_MIPS16 (dsp_32, !TARGET_64BIT && TARGET_DSP) AVAIL_NON_MIPS16 (dspr2_32, !TARGET_64BIT && TARGET_DSPR2) AVAIL_NON_MIPS16 (loongson, TARGET_LOONGSON_VECTORS) +AVAIL_NON_MIPS16 (cache, TARGET_CACHE_BUILTIN) /* Construct a mips_builtin_description from the given arguments. @@ -11352,7 +11363,10 @@ static const struct mips_builtin_description mips_builtins[] = { LOONGSON_BUILTIN_SUFFIX (punpcklwd, u, MIPS_UV2SI_FTYPE_UV2SI_UV2SI), LOONGSON_BUILTIN_SUFFIX (punpcklbh, s, MIPS_V8QI_FTYPE_V8QI_V8QI), LOONGSON_BUILTIN_SUFFIX (punpcklhw, s, MIPS_V4HI_FTYPE_V4HI_V4HI), - LOONGSON_BUILTIN_SUFFIX (punpcklwd, s, MIPS_V2SI_FTYPE_V2SI_V2SI) + LOONGSON_BUILTIN_SUFFIX (punpcklwd, s, MIPS_V2SI_FTYPE_V2SI_V2SI), + + /* Sundry other built-in functions. */ + DIRECT_NO_TARGET_BUILTIN (cache, MIPS_VOID_FTYPE_SI_CVPOINTER, cache) }; /* MODE is a vector mode whose elements have type TYPE. Return the type @@ -11374,10 +11388,25 @@ mips_builtin_vector_type (tree type, enum machine_mode mode) return types[mode_index]; } +/* Return a type for 'const volatile void *'. */ + +static tree +mips_build_cvpointer_type (void) +{ + static tree cache; + + if (cache == NULL_TREE) + cache = build_pointer_type (build_qualified_type + (void_type_node, + TYPE_QUAL_CONST | TYPE_QUAL_VOLATILE)); + return cache; +} + /* Source-level argument types. */ #define MIPS_ATYPE_VOID void_type_node #define MIPS_ATYPE_INT integer_type_node #define MIPS_ATYPE_POINTER ptr_type_node +#define MIPS_ATYPE_CVPOINTER mips_build_cvpointer_type () /* Standard mode-based argument types. */ #define MIPS_ATYPE_UQI unsigned_intQI_type_node @@ -11477,7 +11506,13 @@ mips_prepare_builtin_arg (enum insn_code icode, mode = insn_data[icode].operand[opno].mode; if (!insn_data[icode].operand[opno].predicate (value, mode)) { - value = copy_to_mode_reg (mode, value); + /* Cope with address operands, where MODE is not the mode of + VALUE itself. */ + if (GET_MODE (value) == VOIDmode) + value = copy_to_mode_reg (mode, value); + else + value = copy_to_reg (value); + /* Check the predicate again. */ if (!insn_data[icode].operand[opno].predicate (value, mode)) { @@ -12024,6 +12059,378 @@ mips16_lay_out_constants (void) mips16_emit_constants (pool.first, get_last_insn ()); } +/* Return true if it is worth r10k_simplify_address's while replacing + an address with X. We are looking for constants, and for addresses + at a known offset from the incoming stack pointer. */ + +static bool +r10k_simplified_address_p (rtx x) +{ + if (GET_CODE (x) == PLUS && CONST_INT_P (XEXP (x, 1))) + x = XEXP (x, 0); + return x == virtual_incoming_args_rtx || CONSTANT_P (x); +} + +/* X is an expression that appears in INSN. Try to use the UD chains + to simplify it, returning the simplified form on success and the + original form otherwise. Replace the incoming value of $sp with + virtual_incoming_args_rtx (which should never occur in X otherwise). */ + +static rtx +r10k_simplify_address (rtx x, rtx insn) +{ + rtx newx, op0, op1, set, def_insn, note; + struct df_ref *use, *def; + struct df_link *defs; + + newx = NULL_RTX; + if (UNARY_P (x)) + { + op0 = r10k_simplify_address (XEXP (x, 0), insn); + if (op0 != XEXP (x, 0)) + newx = simplify_gen_unary (GET_CODE (x), GET_MODE (x), + op0, GET_MODE (XEXP (x, 0))); + } + else if (BINARY_P (x)) + { + op0 = r10k_simplify_address (XEXP (x, 0), insn); + op1 = r10k_simplify_address (XEXP (x, 1), insn); + if (op0 != XEXP (x, 0) || op1 != XEXP (x, 1)) + newx = simplify_gen_binary (GET_CODE (x), GET_MODE (x), op0, op1); + } + else if (GET_CODE (x) == LO_SUM) + { + /* LO_SUMs can be offset from HIGHs, if we know they won't + overflow. See mips_classify_address for the rationale behind + the lax check. */ + op0 = r10k_simplify_address (XEXP (x, 0), insn); + if (GET_CODE (op0) == HIGH) + newx = XEXP (x, 1); + } + else if (REG_P (x)) + { + /* Uses are recorded by regno_reg_rtx, not X itself. */ + use = df_find_use (insn, regno_reg_rtx[REGNO (x)]); + gcc_assert (use); + defs = DF_REF_CHAIN (use); + + /* Require a single definition. */ + if (defs && defs->next == NULL) + { + def = defs->ref; + if (DF_REF_IS_ARTIFICIAL (def)) + { + /* Replace the incoming value of $sp with + virtual_incoming_args_rtx. */ + if (x == stack_pointer_rtx + && DF_REF_BB (def) == ENTRY_BLOCK_PTR) + newx = virtual_incoming_args_rtx; + } + else if (dominated_by_p (CDI_DOMINATORS, DF_REF_BB (use), + DF_REF_BB (def))) + { + /* Make sure that DEF_INSN is a single set of REG. */ + def_insn = DF_REF_INSN (def); + if (NONJUMP_INSN_P (def_insn)) + { + set = single_set (def_insn); + if (set && rtx_equal_p (SET_DEST (set), x)) + { + /* Prefer to use notes, since the def-use chains + are often shorter. */ + note = find_reg_equal_equiv_note (def_insn); + if (note) + newx = XEXP (note, 0); + else + newx = SET_SRC (set); + newx = r10k_simplify_address (newx, def_insn); + } + } + } + } + } + if (newx && r10k_simplified_address_p (newx)) + return newx; + return x; +} + +/* Return true if ADDRESS is known to be an uncached address + on R10K systems. */ + +static bool +r10k_uncached_address_p (unsigned HOST_WIDE_INT address) +{ + unsigned HOST_WIDE_INT upper; + + /* Check for KSEG1. */ + if (address + 0x60000000 < 0x20000000) + return true; + + /* Check for uncached XKPHYS addresses. */ + if (Pmode == DImode) + { + upper = (address >> 40) & 0xf9ffff; + if (upper == 0x900000 || upper == 0xb80000) + return true; + } + return false; +} + +/* Return true if we can prove that an access to address X in instruction + INSN would be safe from R10K speculation. This X is a general + expression; it might not be a legitimate address. */ + +static bool +r10k_safe_address_p (rtx x, rtx insn) +{ + rtx base, offset; + HOST_WIDE_INT offset_val; + + x = r10k_simplify_address (x, insn); + + /* Check for references to the stack frame. It doesn't really matter + how much of the frame has been allocated at INSN; -mr10k-cache-barrier + allows us to assume that accesses to any part of the eventual frame + is safe from speculation at any point in the function. */ + mips_split_plus (x, &base, &offset_val); + if (base == virtual_incoming_args_rtx + && offset_val >= -cfun->machine->frame.total_size + && offset_val < cfun->machine->frame.args_size) + return true; + + /* Check for uncached addresses. */ + if (CONST_INT_P (x)) + return r10k_uncached_address_p (INTVAL (x)); + + /* Check for accesses to a static object. */ + split_const (x, &base, &offset); + return offset_within_block_p (base, INTVAL (offset)); +} + +/* Return true if a MEM with MEM_EXPR EXPR and MEM_OFFSET OFFSET is + an in-range access to an automatic variable, or to an object with + a link-time-constant address. */ + +static bool +r10k_safe_mem_expr_p (tree expr, rtx offset) +{ + if (expr == NULL_TREE + || offset == NULL_RTX + || !CONST_INT_P (offset) + || INTVAL (offset) < 0 + || INTVAL (offset) >= int_size_in_bytes (TREE_TYPE (expr))) + return false; + + while (TREE_CODE (expr) == COMPONENT_REF) + { + expr = TREE_OPERAND (expr, 0); + if (expr == NULL_TREE) + return false; + } + + return DECL_P (expr); +} + +/* A for_each_rtx callback for which DATA points to the instruction + containing *X. Stop the search if we find a MEM that is not safe + from R10K speculation. */ + +static int +r10k_needs_protection_p_1 (rtx *loc, void *data) +{ + rtx mem; + + mem = *loc; + if (!MEM_P (mem)) + return 0; + + if (r10k_safe_mem_expr_p (MEM_EXPR (mem), MEM_OFFSET (mem))) + return -1; + + if (r10k_safe_address_p (XEXP (mem, 0), (rtx) data)) + return -1; + + return 1; +} + +/* A note_stores callback for which DATA points to an instruction pointer. + If *DATA is nonnull, make it null if it X contains a MEM that is not + safe from R10K speculation. */ + +static void +r10k_needs_protection_p_store (rtx x, const_rtx pat ATTRIBUTE_UNUSED, + void *data) +{ + rtx *insn_ptr; + + insn_ptr = (rtx *) data; + if (*insn_ptr && for_each_rtx (&x, r10k_needs_protection_p_1, *insn_ptr)) + *insn_ptr = NULL_RTX; +} + +/* A for_each_rtx callback that iterates over the pattern of a CALL_INSN. + Return nonzero if the call is not to a declared function. */ + +static int +r10k_needs_protection_p_call (rtx *loc, void *data ATTRIBUTE_UNUSED) +{ + rtx x; + + x = *loc; + if (!MEM_P (x)) + return 0; + + x = XEXP (x, 0); + if (GET_CODE (x) == SYMBOL_REF && SYMBOL_REF_DECL (x)) + return -1; + + return 1; +} + +/* Return true if instruction INSN needs to be protected by an R10K + cache barrier. */ + +static bool +r10k_needs_protection_p (rtx insn) +{ + if (CALL_P (insn)) + return for_each_rtx (&PATTERN (insn), r10k_needs_protection_p_call, NULL); + + if (mips_r10k_cache_barrier == R10K_CACHE_BARRIER_STORE) + { + note_stores (PATTERN (insn), r10k_needs_protection_p_store, &insn); + return insn == NULL_RTX; + } + + return for_each_rtx (&PATTERN (insn), r10k_needs_protection_p_1, insn); +} + +/* Return true if BB is only reached by blocks in PROTECTED_BBS and if every + edge is unconditional. */ + +static bool +r10k_protected_bb_p (basic_block bb, sbitmap protected_bbs) +{ + edge_iterator ei; + edge e; + + FOR_EACH_EDGE (e, ei, bb->preds) + if (!single_succ_p (e->src) + || !TEST_BIT (protected_bbs, e->src->index) + || (e->flags & EDGE_COMPLEX) != 0) + return false; + return true; +} + +/* Implement -mr10k-cache-barrier= for the current function. */ + +static void +r10k_insert_cache_barriers (void) +{ + int *rev_post_order; + unsigned int i, n; + basic_block bb; + sbitmap protected_bbs; + rtx insn, end, unprotected_region; + + if (TARGET_MIPS16) + { + sorry ("%qs does not support MIPS16 code", "-mr10k-cache-barrier"); + return; + } + + /* Restore the BLOCK_FOR_INSN pointers, which are needed by DF. */ + compute_bb_for_insn (); + + /* Create def-use chains. */ + df_set_flags (DF_EQ_NOTES); + df_chain_add_problem (DF_UD_CHAIN); + df_analyze (); + + /* Calculate dominators. */ + calculate_dominance_info (CDI_DOMINATORS); + + /* Bit X of PROTECTED_BBS is set if the last operation in basic block + X is protected by a cache barrier. */ + protected_bbs = sbitmap_alloc (last_basic_block); + sbitmap_zero (protected_bbs); + + /* Iterate over the basic blocks in reverse post-order. */ + rev_post_order = XNEWVEC (int, last_basic_block); + n = pre_and_rev_post_order_compute (NULL, rev_post_order, false); + for (i = 0; i < n; i++) + { + bb = BASIC_BLOCK (rev_post_order[i]); + + /* If this block is only reached by unconditional edges, and if the + source of every edge is protected, the beginning of the block is + also protected. */ + if (r10k_protected_bb_p (bb, protected_bbs)) + unprotected_region = NULL_RTX; + else + unprotected_region = pc_rtx; + end = NEXT_INSN (BB_END (bb)); + + /* UNPROTECTED_REGION is: + + - null if we are processing a protected region, + - pc_rtx if we are processing an unprotected region but have + not yet found the first instruction in it + - the first instruction in an unprotected region otherwise. */ + for (insn = BB_HEAD (bb); insn != end; insn = NEXT_INSN (insn)) + { + if (unprotected_region && INSN_P (insn)) + { + if (recog_memoized (insn) == CODE_FOR_mips_cache) + /* This CACHE instruction protects the following code. */ + unprotected_region = NULL_RTX; + else + { + /* See if INSN is the first instruction in this + unprotected region. */ + if (unprotected_region == pc_rtx) + unprotected_region = insn; + + /* See if INSN needs to be protected. If so, + we must insert a cache barrier somewhere between + PREV_INSN (UNPROTECTED_REGION) and INSN. It isn't + clear which position is better performance-wise, + but as a tie-breaker, we assume that it is better + to allow delay slots to be back-filled where + possible, and that it is better not to insert + barriers in the middle of already-scheduled code. + We therefore insert the barrier at the beginning + of the region. */ + if (r10k_needs_protection_p (insn)) + { + emit_insn_before (gen_r10k_cache_barrier (), + unprotected_region); + unprotected_region = NULL_RTX; + } + } + } + + if (CALL_P (insn)) + /* The called function is not required to protect the exit path. + The code that follows a call is therefore unprotected. */ + unprotected_region = pc_rtx; + } + + /* Record whether the end of this block is protected. */ + if (unprotected_region == NULL_RTX) + SET_BIT (protected_bbs, bb->index); + } + XDELETEVEC (rev_post_order); + + sbitmap_free (protected_bbs); + + free_dominance_info (CDI_DOMINATORS); + + df_finish_pass (false); + + free_bb_for_insn (); +} + /* A temporary variable used by for_each_rtx callbacks, etc. */ static rtx mips_sim_insn; @@ -12675,6 +13082,13 @@ mips_reorg_process_insns (void) orphaned high-part relocation. */ if (mips_orphaned_high_part_p (htab, insn)) delete_insn (insn); + /* Also delete cache barriers if the last instruction + was an annulled branch. INSN will not be speculatively + executed. */ + else if (recog_memoized (insn) == CODE_FOR_r10k_cache_barrier + && last_insn + && INSN_ANNULLED_BRANCH_P (SEQ_BEGIN (last_insn))) + delete_insn (insn); else { mips_avoid_hazard (last_insn, insn, &hilo_delay, @@ -12694,6 +13108,8 @@ static void mips_reorg (void) { mips16_lay_out_constants (); + if (mips_r10k_cache_barrier != R10K_CACHE_BARRIER_NONE) + r10k_insert_cache_barriers (); if (mips_base_delayed_branch) dbr_schedule (get_insns ()); mips_reorg_process_insns (); @@ -13123,6 +13539,17 @@ mips_handle_option (size_t code, const char *arg, int value ATTRIBUTE_UNUSED) return false; return true; + case OPT_mr10k_cache_barrier_: + if (strcmp (arg, "load-store") == 0) + mips_r10k_cache_barrier = R10K_CACHE_BARRIER_LOAD_STORE; + else if (strcmp (arg, "store") == 0) + mips_r10k_cache_barrier = R10K_CACHE_BARRIER_STORE; + else if (strcmp (arg, "none") == 0) + mips_r10k_cache_barrier = R10K_CACHE_BARRIER_NONE; + else + return false; + return true; + default: return true; } @@ -13358,6 +13785,14 @@ mips_override_options (void) warning (0, "the %qs architecture does not support paired-single" " instructions", mips_arch_info->name); + if (mips_r10k_cache_barrier != R10K_CACHE_BARRIER_NONE + && !TARGET_CACHE_BUILTIN) + { + error ("%qs requires a target that provides the %qs instruction", + "-mr10k-cache-barrier", "cache"); + mips_r10k_cache_barrier = R10K_CACHE_BARRIER_NONE; + } + /* If TARGET_DSPR2, enable MASK_DSP. */ if (TARGET_DSPR2) target_flags |= MASK_DSP; diff --git a/gcc/config/mips/mips.h b/gcc/config/mips/mips.h index 41456997eb6..effa34ba261 100644 --- a/gcc/config/mips/mips.h +++ b/gcc/config/mips/mips.h @@ -563,6 +563,9 @@ enum mips_code_readable_setting { \ if (mips_abi == ABI_EABI) \ builtin_define ("__mips_eabi"); \ + \ + if (TARGET_CACHE_BUILTIN) \ + builtin_define ("__GCC_HAVE_BUILTIN_MIPS_CACHE"); \ } \ while (0) @@ -1020,6 +1023,12 @@ enum mips_code_readable_setting { /* ISA includes the pop instruction. */ #define ISA_HAS_POP TARGET_OCTEON + +/* The CACHE instruction is available in non-MIPS16 code. */ +#define TARGET_CACHE_BUILTIN (mips_isa >= 3) + +/* The CACHE instruction is available. */ +#define ISA_HAS_CACHE (TARGET_CACHE_BUILTIN && !TARGET_MIPS16) /* Add -G xx support. */ diff --git a/gcc/config/mips/mips.md b/gcc/config/mips/mips.md index 0512a70a40a..593fae30ba6 100644 --- a/gcc/config/mips/mips.md +++ b/gcc/config/mips/mips.md @@ -247,6 +247,9 @@ (UNSPEC_LOONGSON_ALU2_TURN_ENABLED_INSN 531) (UNSPEC_LOONGSON_FALU1_TURN_ENABLED_INSN 532) (UNSPEC_LOONGSON_FALU2_TURN_ENABLED_INSN 533) + + (UNSPEC_MIPS_CACHE 600) + (UNSPEC_R10K_CACHE_BARRIER 601) ] ) @@ -4722,6 +4725,25 @@ } [(set_attr "length" "20")]) +;; Cache operations for R4000-style caches. +(define_insn "mips_cache" + [(set (mem:BLK (scratch)) + (unspec:BLK [(match_operand:SI 0 "const_int_operand") + (match_operand:QI 1 "address_operand" "p")] + UNSPEC_MIPS_CACHE))] + "ISA_HAS_CACHE" + "cache\t%X0,%a1") + +;; Similar, but with the operands hard-coded to an R10K cache barrier +;; operation. We keep the pattern distinct so that we can identify +;; cache operations inserted by -mr10k-cache-barrier=, and so that +;; the operation is never inserted into a delay slot. +(define_insn "r10k_cache_barrier" + [(set (mem:BLK (scratch)) + (unspec:BLK [(const_int 0)] UNSPEC_R10K_CACHE_BARRIER))] + "ISA_HAS_CACHE" + "cache\t0x14,0(%$)" + [(set_attr "can_delay" "no")]) ;; Block moves, see mips.c for more details. ;; Argument 0 is the destination diff --git a/gcc/config/mips/mips.opt b/gcc/config/mips/mips.opt index 4e69e29a7e5..56c99ac16f8 100644 --- a/gcc/config/mips/mips.opt +++ b/gcc/config/mips/mips.opt @@ -236,6 +236,10 @@ mpaired-single Target Report Mask(PAIRED_SINGLE_FLOAT) Use paired-single floating-point instructions +mr10k-cache-barrier= +Target Joined RejectNegative +-mr10k-cache-barrier=SETTING Specify when r10k cache barriers should be inserted + mshared Target Report Var(TARGET_SHARED) Init(1) When generating -mabicalls code, make the code suitable for use in shared libraries diff --git a/gcc/doc/extend.texi b/gcc/doc/extend.texi index 21debf74679..32ca1f0b3b0 100644 --- a/gcc/doc/extend.texi +++ b/gcc/doc/extend.texi @@ -6985,6 +6985,7 @@ instructions, but allow the compiler to schedule those calls. * MIPS DSP Built-in Functions:: * MIPS Paired-Single Support:: * MIPS Loongson Built-in Functions:: +* Other MIPS Built-in Functions:: * picoChip Built-in Functions:: * PowerPC AltiVec Built-in Functions:: * SPARC VIS Built-in Functions:: @@ -9440,6 +9441,18 @@ implementing assertions. @end table +@node Other MIPS Built-in Functions +@subsection Other MIPS Built-in Functions + +GCC provides other MIPS-specific built-in functions: + +@table @code +@item void __builtin_mips_cache (int @var{op}, const volatile void *@var{addr}) +Insert a @samp{cache} instruction with operands @var{op} and @var{addr}. +GCC defines the preprocessor macro @code{___GCC_HAVE_BUILTIN_MIPS_CACHE} +when this function is available. +@end table + @node PowerPC AltiVec Built-in Functions @subsection PowerPC AltiVec Built-in Functions diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi index b5bd719d659..d96d4ab78c9 100644 --- a/gcc/doc/invoke.texi +++ b/gcc/doc/invoke.texi @@ -12808,6 +12808,73 @@ Work around certain SB-1 CPU core errata. (This flag currently works around the SB-1 revision 2 ``F1'' and ``F2'' floating point errata.) +@item -mr10k-cache-barrier=@var{setting} +@opindex mr10k-cache-barrier +Specify whether GCC should insert cache barriers to avoid the +side-effects of speculation on R10K processors. + +In common with many processors, the R10K tries to predict the outcome +of a conditional branch and speculatively executes instructions from +the ``taken'' branch. It later aborts these instructions if the +predicted outcome was wrong. However, on the R10K, even aborted +instructions can have side effects. + +This problem only affects kernel stores and, depending on the system, +kernel loads. As an example, a speculatively-executed store may load +the target memory into cache and mark the cache line as dirty, even if +the store itself is later aborted. If a DMA operation writes to the +same area of memory before the ``dirty'' line is flushed, the cached +data will overwrite the DMA-ed data. See the R10K processor manual +for a full description, including other potential problems. + +One workaround is to insert cache barrier instructions before every memory +access that might be speculatively executed and that might have side +effects even if aborted. @option{-mr10k-cache-barrier=@var{setting}} +controls GCC's implementation of this workaround. It assumes that +aborted accesses to any byte in the following regions will not have +side effects: + +@enumerate +@item +the memory occupied by the current function's stack frame; + +@item +the memory occupied by an incoming stack argument; + +@item +the memory occupied by an object with a link-time-constant address. +@end enumerate + +It is the kernel's responsibility to ensure that speculative +accesses to these regions are indeed safe. + +If the input program contains a function declaration such as: + +@smallexample +void foo (void); +@end smallexample + +then the implementation of @code{foo} must allow @code{j foo} and +@code{jal foo} to be executed speculatively. GCC honors this +restriction for functions it compiles itself. It expects non-GCC +functions (such as hand-written assembly code) to do the same. + +The option has three forms: + +@table @gcctabopt +@item -mr10k-cache-barrier=load-store +Insert a cache barrier before a load or store that might be +speculatively executed and that might have side effects even +if aborted. + +@item -mr10k-cache-barrier=store +Insert a cache barrier before a store that might be speculatively +executed and that might have side effects even if aborted. + +@item -mr10k-cache-barrier=none +Disable the insertion of cache barriers. This is the default setting. +@end table + @item -mflush-func=@var{func} @itemx -mno-flush-func @opindex mflush-func diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index a31b444c123..9ea373a98f3 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,24 @@ +2008-09-06 Richard Sandiford <rdsandiford@googlemail.com> + + * gcc.target/mips/mips.exp (dg-mips-options): Make + -mr10k-cache-barrier=* imply -mips3 or above. + * gcc.target/mips/cache-1.c: New test. + * gcc.target/mips/r10k-cache-barrier-1.c: Likewise. + * gcc.target/mips/r10k-cache-barrier-2.c: Likewise. + * gcc.target/mips/r10k-cache-barrier-3.c: Likewise. + * gcc.target/mips/r10k-cache-barrier-4.c: Likewise. + * gcc.target/mips/r10k-cache-barrier-5.c: Likewise. + * gcc.target/mips/r10k-cache-barrier-6.c: Likewise. + * gcc.target/mips/r10k-cache-barrier-7.c: Likewise. + * gcc.target/mips/r10k-cache-barrier-8.c: Likewise. + * gcc.target/mips/r10k-cache-barrier-9.c: Likewise. + * gcc.target/mips/r10k-cache-barrier-10.c: Likewise. + * gcc.target/mips/r10k-cache-barrier-11.c: Likewise. + * gcc.target/mips/r10k-cache-barrier-12.c: Likewise. + * gcc.target/mips/r10k-cache-barrier-13.c: Likewise. + * gcc.target/mips/r10k-cache-barrier-14.c: Likewise. + * gcc.target/mips/r10k-cache-barrier-15.c: Likewise. + 2008-09-05 Daniel Kraft <d@domob.eu> PR fortran/35837 diff --git a/gcc/testsuite/gcc.target/mips/cache-1.c b/gcc/testsuite/gcc.target/mips/cache-1.c new file mode 100644 index 00000000000..40c22e08cdc --- /dev/null +++ b/gcc/testsuite/gcc.target/mips/cache-1.c @@ -0,0 +1,30 @@ +/* { dg-mips-options "-O2" } */ + +void +f1 (int *area) +{ + __builtin_mips_cache (20, area); +} + +void +f2 (const short *area) +{ + __builtin_mips_cache (24, area + 10); +} + +void +f3 (volatile unsigned int *area, int offset) +{ + __builtin_mips_cache (0, area + offset); +} + +void +f4 (const volatile unsigned char *area) +{ + __builtin_mips_cache (4, area - 80); +} + +/* { dg-final { scan-assembler "\tcache\t0x14,0\\(\\\$4\\)" } } */ +/* { dg-final { scan-assembler "\tcache\t0x18,20\\(\\\$4\\)" } } */ +/* { dg-final { scan-assembler "\tcache\t0x0,0\\(\\\$.\\)" } } */ +/* { dg-final { scan-assembler "\tcache\t0x4,-80\\(\\\$4\\)" } } */ diff --git a/gcc/testsuite/gcc.target/mips/mips.exp b/gcc/testsuite/gcc.target/mips/mips.exp index 526bbdb14f2..832633900c9 100644 --- a/gcc/testsuite/gcc.target/mips/mips.exp +++ b/gcc/testsuite/gcc.target/mips/mips.exp @@ -238,6 +238,10 @@ proc dg-mips-options {args} { } else { append flags " -msoft-float" } + } elseif {[regexp -- {^-mr10k-cache-barrier=(load|store)} $flag] + && $mips_isa < 3 + && [lsearch -regexp $flags {^(-mips|-march)}] < 0} { + append flags " -mips3" } } foreach flag $flags { diff --git a/gcc/testsuite/gcc.target/mips/r10k-cache-barrier-1.c b/gcc/testsuite/gcc.target/mips/r10k-cache-barrier-1.c new file mode 100644 index 00000000000..fd13d8ac876 --- /dev/null +++ b/gcc/testsuite/gcc.target/mips/r10k-cache-barrier-1.c @@ -0,0 +1,45 @@ +/* { dg-mips-options "-O2 -mabi=64 -mr10k-cache-barrier=store" } */ + +/* Test that stores to uncached addresses do not get unnecessary + cache barriers. */ + +#define TEST(ADDR) \ + NOMIPS16 void \ + test_##ADDR (int n) \ + { \ + while (n--) \ + { \ + *(volatile char *) (0x##ADDR##UL) = 1; \ + *(volatile short *) (0x##ADDR##UL + 2) = 2; \ + *(volatile int *) (0x##ADDR##UL + 4) = 0; \ + } \ + } + +TEST (9000000000000000) +TEST (900000fffffffff8) + +TEST (9200000000000000) +TEST (920000fffffffff8) + +TEST (9400000000000000) +TEST (940000fffffffff8) + +TEST (9600000000000000) +TEST (960000fffffffff8) + +TEST (b800000000000000) +TEST (b80000fffffffff8) + +TEST (ba00000000000000) +TEST (ba0000fffffffff8) + +TEST (bc00000000000000) +TEST (bc0000fffffffff8) + +TEST (be00000000000000) +TEST (be0000fffffffff8) + +TEST (ffffffffa0000000) +TEST (ffffffffbffffff8) + +/* { dg-final { scan-assembler-not "\tcache\t" } } */ diff --git a/gcc/testsuite/gcc.target/mips/r10k-cache-barrier-10.c b/gcc/testsuite/gcc.target/mips/r10k-cache-barrier-10.c new file mode 100644 index 00000000000..405d7fcf033 --- /dev/null +++ b/gcc/testsuite/gcc.target/mips/r10k-cache-barrier-10.c @@ -0,0 +1,18 @@ +/* { dg-mips-options "-O2 -mr10k-cache-barrier=store -mips4 -mbranch-likely -mno-abicalls" } */ +int bar (int); + +/* Test that code after a branch-likely does not get an unnecessary + cache barrier. */ + +NOMIPS16 void +foo (int n, int *x) +{ + do + n = bar (n * 4 + 1); + while (n); + /* The preceding branch should be a branch likely, with the shift as + its delay slot. We therefore don't need a cache barrier here. */ + x[0] = 0; +} + +/* { dg-final { scan-assembler-not "\tcache\t" } } */ diff --git a/gcc/testsuite/gcc.target/mips/r10k-cache-barrier-11.c b/gcc/testsuite/gcc.target/mips/r10k-cache-barrier-11.c new file mode 100644 index 00000000000..be6816fda9b --- /dev/null +++ b/gcc/testsuite/gcc.target/mips/r10k-cache-barrier-11.c @@ -0,0 +1,13 @@ +/* { dg-mips-options "-O2 -mr10k-cache-barrier=store -mno-abicalls" } */ + +/* Test that loads are not unnecessarily protected. */ + +int bar (int); + +NOMIPS16 void +foo (int *ptr) +{ + *ptr = bar (*ptr); +} + +/* { dg-final { scan-assembler-times "\tcache\t" 1 } } */ diff --git a/gcc/testsuite/gcc.target/mips/r10k-cache-barrier-12.c b/gcc/testsuite/gcc.target/mips/r10k-cache-barrier-12.c new file mode 100644 index 00000000000..7e8026f1e5f --- /dev/null +++ b/gcc/testsuite/gcc.target/mips/r10k-cache-barrier-12.c @@ -0,0 +1,13 @@ +/* { dg-mips-options "-O2 -mr10k-cache-barrier=load-store -mno-abicalls" } */ + +/* Test that loads are correctly protected. */ + +int bar (int); + +NOMIPS16 void +foo (int *ptr) +{ + *ptr = bar (*ptr); +} + +/* { dg-final { scan-assembler-times "\tcache\t" 2 } } */ diff --git a/gcc/testsuite/gcc.target/mips/r10k-cache-barrier-13.c b/gcc/testsuite/gcc.target/mips/r10k-cache-barrier-13.c new file mode 100644 index 00000000000..fa5a416ce20 --- /dev/null +++ b/gcc/testsuite/gcc.target/mips/r10k-cache-barrier-13.c @@ -0,0 +1,14 @@ +/* { dg-mips-options "-O2 -mr10k-cache-barrier=store" } */ + +/* Test that indirect calls are protected. */ + +int bar (int); + +NOMIPS16 void +foo (void (*fn) (void), int x) +{ + if (x) + (*fn) (); +} + +/* { dg-final { scan-assembler-times "\tcache\t" 1 } } */ diff --git a/gcc/testsuite/gcc.target/mips/r10k-cache-barrier-14.c b/gcc/testsuite/gcc.target/mips/r10k-cache-barrier-14.c new file mode 100644 index 00000000000..4d807833abb --- /dev/null +++ b/gcc/testsuite/gcc.target/mips/r10k-cache-barrier-14.c @@ -0,0 +1,7 @@ +/* { dg-do compile { target mips16_attribute } } */ +/* { dg-mips-options "-O2 -mr10k-cache-barrier=store" } */ +/* { dg-add-options mips16_attribute } */ + +/* Test that indirect calls are protected. */ + +MIPS16 void foo (void) { } /* { dg-message "sorry, unimplemented" } */ diff --git a/gcc/testsuite/gcc.target/mips/r10k-cache-barrier-15.c b/gcc/testsuite/gcc.target/mips/r10k-cache-barrier-15.c new file mode 100644 index 00000000000..5b03838ca2c --- /dev/null +++ b/gcc/testsuite/gcc.target/mips/r10k-cache-barrier-15.c @@ -0,0 +1,2 @@ +/* { dg-mips-options "-O2 -mr10k-cache-barrier=store -mips2" } */ +/* { dg-error "requires.*cache.*instruction" "" { target *-*-* } 0 } */ diff --git a/gcc/testsuite/gcc.target/mips/r10k-cache-barrier-2.c b/gcc/testsuite/gcc.target/mips/r10k-cache-barrier-2.c new file mode 100644 index 00000000000..ed439b143f5 --- /dev/null +++ b/gcc/testsuite/gcc.target/mips/r10k-cache-barrier-2.c @@ -0,0 +1,40 @@ +/* { dg-mips-options "-O2 -mabi=64 -mr10k-cache-barrier=store" } */ + +/* Test that stores to constant cached addresses are protected + by cache barriers. */ + +#define TEST(ADDR) \ + NOMIPS16 void \ + test_##ADDR (int n) \ + { \ + *(volatile int *) (0x##ADDR##UL) = 1; \ + } + +TEST (8ffffffffffffffc) +TEST (9000010000000000) + +TEST (91fffffffffffffc) +TEST (9200010000000000) + +TEST (93fffffffffffffc) +TEST (9500010000000000) + +TEST (95fffffffffffffc) +TEST (9600010000000000) + +TEST (b7fffffffffffffc) +TEST (b800010000000000) + +TEST (b9fffffffffffffc) +TEST (ba00010000000000) + +TEST (bbfffffffffffffc) +TEST (bc00010000000000) + +TEST (bdfffffffffffffc) +TEST (be00010000000000) + +TEST (ffffffff9ffffffc) +TEST (ffffffffc0000000) + +/* { dg-final { scan-assembler-times "\tcache\t" 18 } } */ diff --git a/gcc/testsuite/gcc.target/mips/r10k-cache-barrier-3.c b/gcc/testsuite/gcc.target/mips/r10k-cache-barrier-3.c new file mode 100644 index 00000000000..8238f39b04c --- /dev/null +++ b/gcc/testsuite/gcc.target/mips/r10k-cache-barrier-3.c @@ -0,0 +1,17 @@ +/* { dg-mips-options "-O2 -mr10k-cache-barrier=store -mno-abicalls" } */ + +/* Test that in-range stores to the frame are not protected by + cache barriers. */ + +void bar (int *x); + +NOMIPS16 void +foo (int v) +{ + int x[0x100000]; + bar (x); + x[0x20] = v; + bar (x); +} + +/* { dg-final { scan-assembler-not "\tcache\t" } } */ diff --git a/gcc/testsuite/gcc.target/mips/r10k-cache-barrier-4.c b/gcc/testsuite/gcc.target/mips/r10k-cache-barrier-4.c new file mode 100644 index 00000000000..e8280e8af27 --- /dev/null +++ b/gcc/testsuite/gcc.target/mips/r10k-cache-barrier-4.c @@ -0,0 +1,20 @@ +/* { dg-mips-options "-O2 -mr10k-cache-barrier=store -mno-abicalls" } */ + +void bar (int *x); + +/* Test that out-of-range stores to the frame are protected by cache + barriers. */ + +NOMIPS16 void +foo (int v) +{ + int x[8]; + bar (x); + if (v & 1) + x[0x100] = 0; + if (v & 2) + x[-0x100] = 0; + bar (x); +} + +/* { dg-final { scan-assembler-times "\tcache\t" 2 } } */ diff --git a/gcc/testsuite/gcc.target/mips/r10k-cache-barrier-5.c b/gcc/testsuite/gcc.target/mips/r10k-cache-barrier-5.c new file mode 100644 index 00000000000..6e21ec3e393 --- /dev/null +++ b/gcc/testsuite/gcc.target/mips/r10k-cache-barrier-5.c @@ -0,0 +1,19 @@ +/* { dg-mips-options "-O2 -mr10k-cache-barrier=store -mno-abicalls -mabi=64" } */ + +/* Test that in-range stores to static objects do not get an unnecessary + cache barrier. */ + +int x[4]; +void bar (void); + +NOMIPS16 void +foo (int n) +{ + while (n--) + { + x[3] = 1; + bar (); + } +} + +/* { dg-final { scan-assembler-not "\tcache\t" } } */ diff --git a/gcc/testsuite/gcc.target/mips/r10k-cache-barrier-6.c b/gcc/testsuite/gcc.target/mips/r10k-cache-barrier-6.c new file mode 100644 index 00000000000..f014aa0dcdc --- /dev/null +++ b/gcc/testsuite/gcc.target/mips/r10k-cache-barrier-6.c @@ -0,0 +1,19 @@ +/* { dg-mips-options "-O2 -mr10k-cache-barrier=store -mabi=64" } */ + +int x[4]; +void bar (void); + +/* Test that out-of-range stores to static objects are protected by a + cache barrier. */ + +NOMIPS16 void +foo (int n) +{ + while (n--) + { + x[4] = 1; + bar (); + } +} + +/* { dg-final { scan-assembler "\tcache\t" } } */ diff --git a/gcc/testsuite/gcc.target/mips/r10k-cache-barrier-7.c b/gcc/testsuite/gcc.target/mips/r10k-cache-barrier-7.c new file mode 100644 index 00000000000..c98b4a8a0fe --- /dev/null +++ b/gcc/testsuite/gcc.target/mips/r10k-cache-barrier-7.c @@ -0,0 +1,27 @@ +/* { dg-mips-options "-O2 -mr10k-cache-barrier=store -mno-abicalls" } */ + +void bar1 (void); +void bar2 (void); +void bar3 (void); + +NOMIPS16 void +foo (int *x, int sel, int n) +{ + if (sel) + { + bar1 (); + x[0] = 1; + } + else + { + bar2 (); + x[1] = 0; + } + /* If there is one copy of this code, reached by two unconditional edges, + then it shouldn't need a third cache barrier. */ + x[2] = 2; + while (n--) + bar3 (); +} + +/* { dg-final { scan-assembler-times "\tcache\t" 2 } } */ diff --git a/gcc/testsuite/gcc.target/mips/r10k-cache-barrier-8.c b/gcc/testsuite/gcc.target/mips/r10k-cache-barrier-8.c new file mode 100644 index 00000000000..5394ae8067e --- /dev/null +++ b/gcc/testsuite/gcc.target/mips/r10k-cache-barrier-8.c @@ -0,0 +1,15 @@ +/* { dg-mips-options "-O2 -mr10k-cache-barrier=store -G8" } */ + +/* Test that in-range stores to components of static objects + do not get an unnecessary cache barrier. */ + +struct { struct { char i[4]; } a; struct { char j[4]; } b; } s; + +NOMIPS16 void +foo (int sel) +{ + s.a.i[0] = 1; + s.b.j[3] = 100; +} + +/* { dg-final { scan-assembler-not "\tcache\t" } } */ diff --git a/gcc/testsuite/gcc.target/mips/r10k-cache-barrier-9.c b/gcc/testsuite/gcc.target/mips/r10k-cache-barrier-9.c new file mode 100644 index 00000000000..cf795b6e732 --- /dev/null +++ b/gcc/testsuite/gcc.target/mips/r10k-cache-barrier-9.c @@ -0,0 +1,19 @@ +/* { dg-mips-options "-O2 -mr10k-cache-barrier=store -G8" } */ + +/* Test that out-of-range stores to components of static objects + are protected by a cache barrier. */ + +struct { struct { char i[4]; } a; struct { char j[4]; } b; } s; + +NOMIPS16 void +foo (int sel1, int sel2, int sel3) +{ + if (sel1) + s.a.i[8] = 1; + if (sel2) + s.b.j[4] = 100; + if (sel3) + s.a.i[-1] = 0; +} + +/* { dg-final { scan-assembler-times "\tcache\t" 3 } } */ |