diff options
-rw-r--r-- | gcc/ChangeLog | 43 | ||||
-rw-r--r-- | gcc/Makefile.in | 3 | ||||
-rw-r--r-- | gcc/calls.c | 2 | ||||
-rw-r--r-- | gcc/config/i386/i386.c | 5 | ||||
-rw-r--r-- | gcc/config/i386/i386.h | 5 | ||||
-rw-r--r-- | gcc/config/i386/sse.md | 32 | ||||
-rw-r--r-- | gcc/expr.c | 120 | ||||
-rw-r--r-- | gcc/expr.h | 4 | ||||
-rw-r--r-- | gcc/function.c | 2 | ||||
-rw-r--r-- | gcc/genopinit.c | 1 | ||||
-rw-r--r-- | gcc/optabs.c | 2 | ||||
-rw-r--r-- | gcc/optabs.h | 3 | ||||
-rw-r--r-- | gcc/stmt.c | 2 | ||||
-rw-r--r-- | gcc/testsuite/ChangeLog | 4 | ||||
-rw-r--r-- | gcc/testsuite/gcc.dg/tree-ssa/prefetch-7.c | 59 | ||||
-rw-r--r-- | gcc/tree-data-ref.c | 2 | ||||
-rw-r--r-- | gcc/tree-flow.h | 1 | ||||
-rw-r--r-- | gcc/tree-predcom.c | 2 | ||||
-rw-r--r-- | gcc/tree-pretty-print.c | 3 | ||||
-rw-r--r-- | gcc/tree-ssa-loop-prefetch.c | 225 | ||||
-rw-r--r-- | gcc/tree.h | 6 |
21 files changed, 466 insertions, 60 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog index e4b34951006..aaeeaac4ca2 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,46 @@ +2007-06-10 Zdenek Dvorak <dvorakz@suse.cz> + + * tree-data-ref.c (dr_analyze_alias): Handle case smt is NULL. + * tree-predcom.c (mark_virtual_ops_for_renaming): Exported. + * tree-ssa-loop-prefetch.c: Include optabs.h. + (FENCE_FOLLOWING_MOVNT): New macro. + (struct mem_ref): Add independent_p and storent_p fields. + (record_ref): Initalize the new fields. + (gather_memory_references_ref): Return true if the reference + could be analysed. + (gather_memory_references): Check whether all memory accesses + in loop were recorded. + (should_issue_prefetch_p): Return false for nontemporal stores. + (nontemporal_store_p, mark_nontemporal_store, emit_mfence_after_loop, + may_use_storent_in_loop_p, mark_nontemporal_stores): New functions. + (determine_loop_nest_reuse): Detect independent memory references. + (loop_prefetch_arrays): Call mark_nontemporal_stores. + * tree-flow.h (mark_virtual_ops_for_renaming): Declare. + * Makefile.in (tree-ssa-loop-prefetch.o): Add OPTABS_H dependency. + * config/i386/i386.h (x86_mfence): Declare. + (FENCE_FOLLOWING_MOVNT): Return x86_mfence. + * config/i386/i386.c (x86_mfence): New variable. + (ix86_init_mmx_sse_builtins): Initialize x86_mfence. + + * tree-pretty-print.c (dump_generic_node): Mark nontemporal stores. + * optabs.c (init_optabs): Initialize storent_optab. + * optabs.h (enum optab_index): Add OTI_storent. + (storent_optab): Declare. + * genopinit.c (optabs): Add initialization for storent_optab. + * tree.h (MOVE_NONTEMPORAL): New macro. + * expr.c (expand_assignment, store_expr, store_constructor_field, + store_constructor, store_field, expand_expr_real_1): Propagate + nontemporality of the expanded store. + (emit_storent_insn): New function. + * expr.h (expand_assignment, store_expr): Declaration changed. + * function.c (assign_parm_setup_reg): Pass false as nontemporality + to expand_assignment. + * stmt.c (expand_asm_expr): Ditto. + * calls.c (initialize_argument_information): Pass false as + nontemporality to store_expr. + * config/i386/sse.md (storentv4sf, storentv2df, storentv2di, + storentsi): New. + 2007-06-09 Daniel Berlin <dberlin@dberlin.org> * tree-ssa-structalias.c (set_uids_in_ptset): Add is_deref'd diff --git a/gcc/Makefile.in b/gcc/Makefile.in index a8d5046f62a..4e7570130af 100644 --- a/gcc/Makefile.in +++ b/gcc/Makefile.in @@ -2085,7 +2085,8 @@ tree-ssa-loop-prefetch.o: tree-ssa-loop-prefetch.c $(TREE_FLOW_H) $(CONFIG_H) \ output.h $(DIAGNOSTIC_H) $(TIMEVAR_H) $(TM_H) coretypes.h $(TREE_DUMP_H) \ tree-pass.h $(GGC_H) $(RECOG_H) insn-config.h $(HASHTAB_H) $(SCEV_H) \ $(CFGLOOP_H) $(PARAMS_H) langhooks.h $(BASIC_BLOCK_H) hard-reg-set.h \ - tree-chrec.h toplev.h langhooks.h $(TREE_INLINE_H) $(TREE_DATA_REF_H) + tree-chrec.h toplev.h langhooks.h $(TREE_INLINE_H) $(TREE_DATA_REF_H) \ + $(OPTABS_H) tree-predcom.o: tree-predcom.c $(CONFIG_H) $(SYSTEM_H) $(TREE_H) $(TM_P_H) \ $(CFGLOOP_H) $(TREE_FLOW_H) $(GGC_H) $(TREE_DATA_REF_H) $(SCEV_H) \ $(PARAMS_H) $(DIAGNOSTIC_H) tree-pass.h $(TM_H) coretypes.h tree-affine.h \ diff --git a/gcc/calls.c b/gcc/calls.c index 4089bb14ea1..32ca3ef38be 100644 --- a/gcc/calls.c +++ b/gcc/calls.c @@ -1080,7 +1080,7 @@ initialize_argument_information (int num_actuals ATTRIBUTE_UNUSED, else copy = assign_temp (type, 0, 1, 0); - store_expr (args[i].tree_value, copy, 0); + store_expr (args[i].tree_value, copy, 0, false); if (callee_copies) *ecf_flags &= ~(ECF_CONST | ECF_LIBCALL_BLOCK); diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c index eb7ff51399a..f8019bde0f9 100644 --- a/gcc/config/i386/i386.c +++ b/gcc/config/i386/i386.c @@ -1500,6 +1500,9 @@ int ix86_section_threshold = 65536; char internal_label_prefix[16]; int internal_label_prefix_len; +/* Fence to use after loop using movnt. */ +tree x86_mfence; + /* Register class used for passing given 64bit part of the argument. These represent classes as documented by the PS ABI, with the exception of SSESF, SSEDF classes, that are basically SSE class, just gcc will @@ -18120,7 +18123,7 @@ ix86_init_mmx_sse_builtins (void) def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_clflush", void_ftype_pcvoid, IX86_BUILTIN_CLFLUSH); def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_lfence", void_ftype_void, IX86_BUILTIN_LFENCE); - def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_mfence", void_ftype_void, IX86_BUILTIN_MFENCE); + x86_mfence = def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_mfence", void_ftype_void, IX86_BUILTIN_MFENCE); def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_loaddqu", v16qi_ftype_pcchar, IX86_BUILTIN_LOADDQU); def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_storedqu", void_ftype_pchar_v16qi, IX86_BUILTIN_STOREDQU); diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h index 730f32bbb9b..693f8ee2df7 100644 --- a/gcc/config/i386/i386.h +++ b/gcc/config/i386/i386.h @@ -386,6 +386,11 @@ extern int ix86_isa_flags; #define TARGET_TLS_DIRECT_SEG_REFS_DEFAULT 0 #endif +/* Fence to use after loop using storent. */ + +extern tree x86_mfence; +#define FENCE_FOLLOWING_MOVNT x86_mfence + /* Once GDB has been enhanced to deal with functions without frame pointers, we can change this to allow for elimination of the frame pointer in leaf functions. */ diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md index deb3c43cc34..a7a56490cab 100644 --- a/gcc/config/i386/sse.md +++ b/gcc/config/i386/sse.md @@ -317,6 +317,38 @@ (set_attr "prefix_rep" "1") (set_attr "mode" "TI")]) +; Expand patterns for non-temporal stores. At the moment, only those +; that directly map to insns are defined; it would be possible to +; define patterns for other modes that would expand to several insns. + +(define_expand "storentv4sf" + [(set (match_operand:V4SF 0 "memory_operand" "=m") + (unspec:V4SF [(match_operand:V4SF 1 "register_operand" "x")] + UNSPEC_MOVNT))] + "TARGET_SSE" + "") + +(define_expand "storentv2df" + [(set (match_operand:V2DF 0 "memory_operand" "=m") + (unspec:V2DF [(match_operand:V2DF 1 "register_operand" "x")] + UNSPEC_MOVNT))] + "TARGET_SSE2" + "") + +(define_expand "storentv2di" + [(set (match_operand:V2DI 0 "memory_operand" "=m") + (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "x")] + UNSPEC_MOVNT))] + "TARGET_SSE2" + "") + +(define_expand "storentsi" + [(set (match_operand:SI 0 "memory_operand" "=m") + (unspec:SI [(match_operand:SI 1 "register_operand" "r")] + UNSPEC_MOVNT))] + "TARGET_SSE2" + "") + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;; ;; Parallel single-precision floating point arithmetic diff --git a/gcc/expr.c b/gcc/expr.c index 05e23833b41..9c5056b3f13 100644 --- a/gcc/expr.c +++ b/gcc/expr.c @@ -142,7 +142,7 @@ static void store_constructor_field (rtx, unsigned HOST_WIDE_INT, tree, tree, int, int); static void store_constructor (tree, rtx, int, HOST_WIDE_INT); static rtx store_field (rtx, HOST_WIDE_INT, HOST_WIDE_INT, enum machine_mode, - tree, tree, int); + tree, tree, int, bool); static unsigned HOST_WIDE_INT highest_pow2_factor_for_target (tree, tree); @@ -4074,10 +4074,11 @@ optimize_bitfield_assignment_op (unsigned HOST_WIDE_INT bitsize, } -/* Expand an assignment that stores the value of FROM into TO. */ +/* Expand an assignment that stores the value of FROM into TO. If NONTEMPORAL + is true, try generating a nontemporal store. */ void -expand_assignment (tree to, tree from) +expand_assignment (tree to, tree from, bool nontemporal) { rtx to_rtx = 0; rtx result; @@ -4164,12 +4165,13 @@ expand_assignment (tree to, tree from) if (TREE_CODE (TREE_TYPE (from)) == COMPLEX_TYPE) { gcc_assert (bitpos == 0); - result = store_expr (from, to_rtx, false); + result = store_expr (from, to_rtx, false, nontemporal); } else { gcc_assert (bitpos == 0 || bitpos == GET_MODE_BITSIZE (mode1)); - result = store_expr (from, XEXP (to_rtx, bitpos != 0), false); + result = store_expr (from, XEXP (to_rtx, bitpos != 0), false, + nontemporal); } } else @@ -4195,7 +4197,8 @@ expand_assignment (tree to, tree from) result = NULL; else result = store_field (to_rtx, bitsize, bitpos, mode1, from, - TREE_TYPE (tem), get_alias_set (to)); + TREE_TYPE (tem), get_alias_set (to), + nontemporal); } if (result) @@ -4302,13 +4305,46 @@ expand_assignment (tree to, tree from) /* Compute FROM and store the value in the rtx we got. */ push_temp_slots (); - result = store_expr (from, to_rtx, 0); + result = store_expr (from, to_rtx, 0, nontemporal); preserve_temp_slots (result); free_temp_slots (); pop_temp_slots (); return; } +/* Emits nontemporal store insn that moves FROM to TO. Returns true if this + succeeded, false otherwise. */ + +static bool +emit_storent_insn (rtx to, rtx from) +{ + enum machine_mode mode = GET_MODE (to), imode; + enum insn_code code = storent_optab->handlers[mode].insn_code; + rtx pattern; + + if (code == CODE_FOR_nothing) + return false; + + imode = insn_data[code].operand[0].mode; + if (!insn_data[code].operand[0].predicate (to, imode)) + return false; + + imode = insn_data[code].operand[1].mode; + if (!insn_data[code].operand[1].predicate (from, imode)) + { + from = copy_to_mode_reg (imode, from); + if (!insn_data[code].operand[1].predicate (from, imode)) + return false; + } + + pattern = GEN_FCN (code) (to, from); + if (pattern == NULL_RTX) + return false; + + emit_insn (pattern); + return true; +} + /* Generate code for computing expression EXP, and storing the value into TARGET. @@ -4320,10 +4356,12 @@ expand_assignment (tree to, tree from) be more thorough? If CALL_PARAM_P is nonzero, this is a store into a call param on the - stack, and block moves may need to be treated specially. */ + stack, and block moves may need to be treated specially. + + If NONTEMPORAL is true, try using a nontemporal store instruction. */ rtx -store_expr (tree exp, rtx target, int call_param_p) +store_expr (tree exp, rtx target, int call_param_p, bool nontemporal) { rtx temp; rtx alt_rtl = NULL_RTX; @@ -4344,7 +4382,8 @@ store_expr (tree exp, rtx target, int call_param_p) part. */ expand_expr (TREE_OPERAND (exp, 0), const0_rtx, VOIDmode, call_param_p ? EXPAND_STACK_PARM : EXPAND_NORMAL); - return store_expr (TREE_OPERAND (exp, 1), target, call_param_p); + return store_expr (TREE_OPERAND (exp, 1), target, call_param_p, + nontemporal); } else if (TREE_CODE (exp) == COND_EXPR && GET_MODE (target) == BLKmode) { @@ -4358,11 +4397,13 @@ store_expr (tree exp, rtx target, int call_param_p) do_pending_stack_adjust (); NO_DEFER_POP; jumpifnot (TREE_OPERAND (exp, 0), lab1); - store_expr (TREE_OPERAND (exp, 1), target, call_param_p); + store_expr (TREE_OPERAND (exp, 1), target, call_param_p, + nontemporal); emit_jump_insn (gen_jump (lab2)); emit_barrier (); emit_label (lab1); - store_expr (TREE_OPERAND (exp, 2), target, call_param_p); + store_expr (TREE_OPERAND (exp, 2), target, call_param_p, + nontemporal); emit_label (lab2); OK_DEFER_POP; @@ -4433,7 +4474,12 @@ store_expr (tree exp, rtx target, int call_param_p) } else { - temp = expand_expr_real (exp, target, GET_MODE (target), + rtx tmp_target; + + /* If we want to use a nontemporal store, force the value to + register first. */ + tmp_target = nontemporal ? NULL_RTX : target; + temp = expand_expr_real (exp, tmp_target, GET_MODE (target), (call_param_p ? EXPAND_STACK_PARM : EXPAND_NORMAL), &alt_rtl); @@ -4591,6 +4637,11 @@ store_expr (tree exp, rtx target, int call_param_p) emit_block_move (target, temp, expr_size (exp), (call_param_p ? BLOCK_OP_CALL_PARM : BLOCK_OP_NORMAL)); + else if (nontemporal + && emit_storent_insn (target, temp)) + /* If we managed to emit a nontemporal store, there is nothing else to + do. */ + ; else { temp = force_operand (temp, target); @@ -4941,7 +4992,7 @@ store_constructor_field (rtx target, unsigned HOST_WIDE_INT bitsize, store_constructor (exp, target, cleared, bitsize / BITS_PER_UNIT); } else - store_field (target, bitsize, bitpos, mode, exp, type, alias_set); + store_field (target, bitsize, bitpos, mode, exp, type, alias_set, false); } /* Store the value of constructor EXP into the rtx TARGET. @@ -5291,7 +5342,7 @@ store_constructor (tree exp, rtx target, int cleared, HOST_WIDE_INT size) = gen_reg_rtx (promote_mode (domain, DECL_MODE (index), &unsignedp, 0)); SET_DECL_RTL (index, index_r); - store_expr (lo_index, index_r, 0); + store_expr (lo_index, index_r, 0, false); /* Build the head of the loop. */ do_pending_stack_adjust (); @@ -5318,7 +5369,7 @@ store_constructor (tree exp, rtx target, int cleared, HOST_WIDE_INT size) store_constructor (value, xtarget, cleared, bitsize / BITS_PER_UNIT); else - store_expr (value, xtarget, 0); + store_expr (value, xtarget, 0, false); /* Generate a conditional jump to exit the loop. */ exit_cond = build2 (LT_EXPR, integer_type_node, @@ -5329,7 +5380,8 @@ store_constructor (tree exp, rtx target, int cleared, HOST_WIDE_INT size) the loop. */ expand_assignment (index, build2 (PLUS_EXPR, TREE_TYPE (index), - index, integer_one_node)); + index, integer_one_node), + false); emit_jump (loop_start); @@ -5360,7 +5412,7 @@ store_constructor (tree exp, rtx target, int cleared, HOST_WIDE_INT size) expand_normal (position), highest_pow2_factor (position)); xtarget = adjust_address (xtarget, mode, 0); - store_expr (value, xtarget, 0); + store_expr (value, xtarget, 0, false); } else { @@ -5522,11 +5574,14 @@ store_constructor (tree exp, rtx target, int cleared, HOST_WIDE_INT size) ALIAS_SET is the alias set for the destination. This value will (in general) be different from that for TARGET, since TARGET is a - reference to the containing structure. */ + reference to the containing structure. + + If NONTEMPORAL is true, try generating a nontemporal store. */ static rtx store_field (rtx target, HOST_WIDE_INT bitsize, HOST_WIDE_INT bitpos, - enum machine_mode mode, tree exp, tree type, int alias_set) + enum machine_mode mode, tree exp, tree type, int alias_set, + bool nontemporal) { HOST_WIDE_INT width_mask = 0; @@ -5561,7 +5616,8 @@ store_field (rtx target, HOST_WIDE_INT bitsize, HOST_WIDE_INT bitpos, if (bitsize != (HOST_WIDE_INT) GET_MODE_BITSIZE (GET_MODE (target))) emit_move_insn (object, target); - store_field (blk_object, bitsize, bitpos, mode, exp, type, alias_set); + store_field (blk_object, bitsize, bitpos, mode, exp, type, alias_set, + nontemporal); emit_move_insn (target, object); @@ -5574,7 +5630,7 @@ store_field (rtx target, HOST_WIDE_INT bitsize, HOST_WIDE_INT bitpos, /* We're storing into a struct containing a single __complex. */ gcc_assert (!bitpos); - return store_expr (exp, target, 0); + return store_expr (exp, target, 0, nontemporal); } /* If the structure is in a register or if the component @@ -5675,7 +5731,7 @@ store_field (rtx target, HOST_WIDE_INT bitsize, HOST_WIDE_INT bitpos, if (!MEM_KEEP_ALIAS_SET_P (to_rtx) && MEM_ALIAS_SET (to_rtx) != 0) set_mem_alias_set (to_rtx, alias_set); - return store_expr (exp, to_rtx, 0); + return store_expr (exp, to_rtx, 0, nontemporal); } } @@ -7831,7 +7887,8 @@ expand_expr_real_1 (tree exp, rtx target, enum machine_mode tmode, /* Store data into beginning of memory target. */ store_expr (TREE_OPERAND (exp, 0), adjust_address (target, TYPE_MODE (valtype), 0), - modifier == EXPAND_STACK_PARM); + modifier == EXPAND_STACK_PARM, + false); else { @@ -7844,7 +7901,7 @@ expand_expr_real_1 (tree exp, rtx target, enum machine_mode tmode, * BITS_PER_UNIT), (HOST_WIDE_INT) GET_MODE_BITSIZE (mode)), 0, TYPE_MODE (valtype), TREE_OPERAND (exp, 0), - type, 0); + type, 0, false); } /* Return the entire union. */ @@ -8760,13 +8817,15 @@ expand_expr_real_1 (tree exp, rtx target, enum machine_mode tmode, op1 = gen_label_rtx (); jumpifnot (TREE_OPERAND (exp, 0), op0); store_expr (TREE_OPERAND (exp, 1), temp, - modifier == EXPAND_STACK_PARM); + modifier == EXPAND_STACK_PARM, + false); emit_jump_insn (gen_jump (op1)); emit_barrier (); emit_label (op0); store_expr (TREE_OPERAND (exp, 2), temp, - modifier == EXPAND_STACK_PARM); + modifier == EXPAND_STACK_PARM, + false); emit_label (op1); OK_DEFER_POP; @@ -8781,7 +8840,7 @@ expand_expr_real_1 (tree exp, rtx target, enum machine_mode tmode, tree lhs = TREE_OPERAND (exp, 0); tree rhs = TREE_OPERAND (exp, 1); gcc_assert (ignore); - expand_assignment (lhs, rhs); + expand_assignment (lhs, rhs, false); return const0_rtx; } @@ -8813,13 +8872,14 @@ expand_expr_real_1 (tree exp, rtx target, enum machine_mode tmode, do_jump (TREE_OPERAND (rhs, 1), value ? label : 0, value ? 0 : label); - expand_assignment (lhs, build_int_cst (TREE_TYPE (rhs), value)); + expand_assignment (lhs, build_int_cst (TREE_TYPE (rhs), value), + MOVE_NONTEMPORAL (exp)); do_pending_stack_adjust (); emit_label (label); return const0_rtx; } - expand_assignment (lhs, rhs); + expand_assignment (lhs, rhs, MOVE_NONTEMPORAL (exp)); return const0_rtx; } diff --git a/gcc/expr.h b/gcc/expr.h index 13c73215960..064a574f9b9 100644 --- a/gcc/expr.h +++ b/gcc/expr.h @@ -477,13 +477,13 @@ extern void emit_push_insn (rtx, enum machine_mode, tree, rtx, unsigned int, int, rtx, int, rtx, rtx, int, rtx); /* Expand an assignment that stores the value of FROM into TO. */ -extern void expand_assignment (tree, tree); +extern void expand_assignment (tree, tree, bool); /* Generate code for computing expression EXP, and storing the value into TARGET. If SUGGEST_REG is nonzero, copy the value through a register and return that register, if that is possible. */ -extern rtx store_expr (tree, rtx, int); +extern rtx store_expr (tree, rtx, int, bool); /* Given an rtx that may include add and multiply operations, generate them as insns and return a pseudo-reg containing the value. diff --git a/gcc/function.c b/gcc/function.c index a4782d6e159..7d2063c3a22 100644 --- a/gcc/function.c +++ b/gcc/function.c @@ -2723,7 +2723,7 @@ assign_parm_setup_reg (struct assign_parm_data_all *all, tree parm, /* TREE_USED gets set erroneously during expand_assignment. */ save_tree_used = TREE_USED (parm); - expand_assignment (parm, make_tree (data->nominal_type, tempreg)); + expand_assignment (parm, make_tree (data->nominal_type, tempreg), false); TREE_USED (parm) = save_tree_used; all->first_conversion_insn = get_insns (); all->last_conversion_insn = get_last_insn (); diff --git a/gcc/genopinit.c b/gcc/genopinit.c index a16d0e17c6b..0838058fdcb 100644 --- a/gcc/genopinit.c +++ b/gcc/genopinit.c @@ -164,6 +164,7 @@ static const char * const optabs[] = "mov_optab->handlers[$A].insn_code = CODE_FOR_$(mov$a$)", "movstrict_optab->handlers[$A].insn_code = CODE_FOR_$(movstrict$a$)", "movmisalign_optab->handlers[$A].insn_code = CODE_FOR_$(movmisalign$a$)", + "storent_optab->handlers[$A].insn_code = CODE_FOR_$(storent$a$)", "cmp_optab->handlers[$A].insn_code = CODE_FOR_$(cmp$a$)", "tst_optab->handlers[$A].insn_code = CODE_FOR_$(tst$a$)", "addcc_optab->handlers[$A].insn_code = CODE_FOR_$(add$acc$)", diff --git a/gcc/optabs.c b/gcc/optabs.c index 47114fcaa5c..cfd2934ddc7 100644 --- a/gcc/optabs.c +++ b/gcc/optabs.c @@ -5492,6 +5492,8 @@ init_optabs (void) movstrict_optab = init_optab (STRICT_LOW_PART); cmp_optab = init_optab (COMPARE); + storent_optab = init_optab (UNKNOWN); + ucmp_optab = init_optab (UNKNOWN); tst_optab = init_optab (UNKNOWN); diff --git a/gcc/optabs.h b/gcc/optabs.h index 4fe778565b6..c344b657378 100644 --- a/gcc/optabs.h +++ b/gcc/optabs.h @@ -151,6 +151,8 @@ enum optab_index OTI_movstrict, /* Move, with a misaligned memory. */ OTI_movmisalign, + /* Nontemporal store. */ + OTI_storent, /* Unary operations */ /* Negation */ @@ -367,6 +369,7 @@ extern GTY(()) optab optab_table[OTI_MAX]; #define mov_optab (optab_table[OTI_mov]) #define movstrict_optab (optab_table[OTI_movstrict]) #define movmisalign_optab (optab_table[OTI_movmisalign]) +#define storent_optab (optab_table[OTI_storent]) #define neg_optab (optab_table[OTI_neg]) #define negv_optab (optab_table[OTI_negv]) diff --git a/gcc/stmt.c b/gcc/stmt.c index bd3c7b1a9be..fa18f088c20 100644 --- a/gcc/stmt.c +++ b/gcc/stmt.c @@ -1114,7 +1114,7 @@ expand_asm_expr (tree exp) { if (o[i] != TREE_VALUE (tail)) { - expand_assignment (o[i], TREE_VALUE (tail)); + expand_assignment (o[i], TREE_VALUE (tail), false); free_temp_slots (); /* Restore the original value so that it's correct the next diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index 821e1c569c3..c9bdc4a069f 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,7 @@ +2007-06-10 Zdenek Dvorak <dvorakz@suse.cz> + + * gcc.dg/tree-ssa/prefetch-7.c: New test. + 2007-06-09 Zdenek Dvorak <dvorakz@suse.cz> * gcc.dg/tree-ssa/loop-28.c: New testcase. diff --git a/gcc/testsuite/gcc.dg/tree-ssa/prefetch-7.c b/gcc/testsuite/gcc.dg/tree-ssa/prefetch-7.c new file mode 100644 index 00000000000..510dee04598 --- /dev/null +++ b/gcc/testsuite/gcc.dg/tree-ssa/prefetch-7.c @@ -0,0 +1,59 @@ +/* { dg-do compile { target i?86-*-* x86_64-*-* } } */ +/* { dg-require-effective-target ilp32 } */ +/* { dg-options "-O2 -fprefetch-loop-arrays -march=athlon -msse2 -mfpmath=sse --param simultaneous-prefetches=100 --param max-unrolled-insns=1 -fdump-tree-aprefetch-details -fdump-tree-final_cleanup" } */ + +#define K 1000000 +int a[K], b[K]; + +void test(int *p) +{ + unsigned i; + + /* Nontemporal store should be used for a. */ + for (i = 0; i < K; i++) + a[i] = 0; + + /* Nontemporal store should be used for a, nontemporal prefetch for b. */ + for (i = 0; i < K; i++) + a[i] = b[i]; + + /* Nontemporal store should not be used here (only write and read temporal + prefetches). */ + for (i = 0; i < K - 10000; i++) + a[i + 10000] = a[i]; + + /* Nontemporal store should not be used here (only write and read nontemporal + prefetches). */ + for (i = 0; i < K - 100000; i++) + a[i + 100000] = a[i]; + + /* Nontemporal store should be used neither for a nor for p, as we do not know + whether they alias or not. */ + for (i = 0; i < K; i++) + { + a[i] = 0; + *p++ = 1; + } + + /* Nontemporal store should not be used for a, as we do not know whether its + value will be reused or not. */ + for (i = 0; i < 1000; i++) + a[i] = 0; +} + +/* { dg-final { scan-tree-dump-times "Issued prefetch" 5 "aprefetch" } } */ +/* { dg-final { scan-tree-dump-times "Issued nontemporal prefetch" 3 "aprefetch" } } */ +/* { dg-final { scan-tree-dump-times "nontemporal store" 2 "aprefetch" } } */ + +/* { dg-final { scan-tree-dump-times "builtin_prefetch" 8 "final_cleanup" } } */ +/* { dg-final { scan-tree-dump-times "=\\{nt\\}" 2 "final_cleanup" } } */ +/* { dg-final { scan-tree-dump-times "__builtin_ia32_mfence" 2 "final_cleanup" } } */ + +/* { dg-final { scan-assembler-times "prefetchw" 5 } } */ +/* { dg-final { scan-assembler-times "prefetcht" 1 } } */ +/* { dg-final { scan-assembler-times "prefetchnta" 2 } } */ +/* { dg-final { scan-assembler-times "movnti" 2 } } */ +/* { dg-final { scan-assembler-times "mfence" 2 } } */ + +/* { dg-final { cleanup-tree-dump "aprefetch" } } */ +/* { dg-final { cleanup-tree-dump "final_cleanup" } } */ diff --git a/gcc/tree-data-ref.c b/gcc/tree-data-ref.c index 4d96311ae6c..e0223c326f7 100644 --- a/gcc/tree-data-ref.c +++ b/gcc/tree-data-ref.c @@ -729,7 +729,7 @@ dr_analyze_alias (struct data_reference *dr) } DR_SYMBOL_TAG (dr) = smt; - if (var_can_have_subvars (smt)) + if (smt && var_can_have_subvars (smt)) DR_SUBVARS (dr) = get_subvars_for_var (smt); vops = BITMAP_ALLOC (NULL); diff --git a/gcc/tree-flow.h b/gcc/tree-flow.h index e4260ad00f1..a61700b3064 100644 --- a/gcc/tree-flow.h +++ b/gcc/tree-flow.h @@ -1019,6 +1019,7 @@ void tree_transform_and_unroll_loop (struct loop *, unsigned, transform_callback, void *); bool contains_abnormal_ssa_name_p (tree); bool stmt_dominates_stmt_p (tree, tree); +void mark_virtual_ops_for_renaming (tree); /* In tree-ssa-threadedge.c */ extern bool potentially_threadable_block (basic_block); diff --git a/gcc/tree-predcom.c b/gcc/tree-predcom.c index 4e5471397db..3c9164db2c7 100644 --- a/gcc/tree-predcom.c +++ b/gcc/tree-predcom.c @@ -1378,7 +1378,7 @@ get_init_expr (chain_p chain, unsigned index) /* Marks all virtual operands of statement STMT for renaming. */ -static void +void mark_virtual_ops_for_renaming (tree stmt) { ssa_op_iter iter; diff --git a/gcc/tree-pretty-print.c b/gcc/tree-pretty-print.c index 3678fab8b7e..7cfd4b51a14 100644 --- a/gcc/tree-pretty-print.c +++ b/gcc/tree-pretty-print.c @@ -1063,6 +1063,9 @@ dump_generic_node (pretty_printer *buffer, tree node, int spc, int flags, false); pp_space (buffer); pp_character (buffer, '='); + if (TREE_CODE (node) == GIMPLE_MODIFY_STMT + && MOVE_NONTEMPORAL (node)) + pp_string (buffer, "{nt}"); pp_space (buffer); dump_generic_node (buffer, GENERIC_TREE_OPERAND (node, 1), spc, flags, false); diff --git a/gcc/tree-ssa-loop-prefetch.c b/gcc/tree-ssa-loop-prefetch.c index 64f45a856ed..2424c4a1832 100644 --- a/gcc/tree-ssa-loop-prefetch.c +++ b/gcc/tree-ssa-loop-prefetch.c @@ -47,6 +47,7 @@ Software Foundation, 59 Temple Place - Suite 330, Boston, MA #include "langhooks.h" #include "tree-inline.h" #include "tree-data-ref.h" +#include "optabs.h" /* This pass inserts prefetch instructions to optimize cache usage during accesses to arrays in loops. It processes loops sequentially and: @@ -177,6 +178,13 @@ Software Foundation, 59 Temple Place - Suite 330, Boston, MA is accessed several times in a single iteration of the loop. */ #define NONTEMPORAL_FRACTION 16 +/* In case we have to emit a memory fence instruction after the loop that + uses nontemporal stores, this defines the builtin to use. */ + +#ifndef FENCE_FOLLOWING_MOVNT +#define FENCE_FOLLOWING_MOVNT NULL_TREE +#endif + /* The group of references between that reuse may occur. */ struct mem_ref_group @@ -198,7 +206,6 @@ struct mem_ref tree stmt; /* Statement in that the reference appears. */ tree mem; /* The reference. */ HOST_WIDE_INT delta; /* Constant offset of the reference. */ - bool write_p; /* Is it a write? */ struct mem_ref_group *group; /* The group of references it belongs to. */ unsigned HOST_WIDE_INT prefetch_mod; /* Prefetch only each PREFETCH_MOD-th @@ -208,8 +215,13 @@ struct mem_ref iterations. */ unsigned reuse_distance; /* The amount of data accessed before the first reuse of this value. */ - bool issue_prefetch_p; /* Should we really issue the prefetch? */ struct mem_ref *next; /* The next reference in the group. */ + unsigned write_p : 1; /* Is it a write? */ + unsigned independent_p : 1; /* True if the reference is independent on + all other references inside the loop. */ + unsigned issue_prefetch_p : 1; /* Should we really issue the prefetch? */ + unsigned storent_p : 1; /* True if we changed the store to a + nontemporal one. */ }; /* Dumps information about reference REF to FILE. */ @@ -302,6 +314,8 @@ record_ref (struct mem_ref_group *group, tree stmt, tree mem, (*aref)->issue_prefetch_p = false; (*aref)->group = group; (*aref)->next = NULL; + (*aref)->independent_p = false; + (*aref)->storent_p = false; if (dump_file && (dump_flags & TDF_DETAILS)) dump_mem_ref (dump_file, *aref); @@ -434,9 +448,10 @@ analyze_ref (struct loop *loop, tree *ref_p, tree *base, } /* Record a memory reference REF to the list REFS. The reference occurs in - LOOP in statement STMT and it is write if WRITE_P. */ + LOOP in statement STMT and it is write if WRITE_P. Returns true if the + reference was recorded, false otherwise. */ -static void +static bool gather_memory_references_ref (struct loop *loop, struct mem_ref_group **refs, tree ref, bool write_p, tree stmt) { @@ -445,26 +460,31 @@ gather_memory_references_ref (struct loop *loop, struct mem_ref_group **refs, struct mem_ref_group *agrp; if (!analyze_ref (loop, &ref, &base, &step, &delta, stmt)) - return; + return false; /* Now we know that REF = &BASE + STEP * iter + DELTA, where DELTA and STEP are integer constants. */ agrp = find_or_create_group (refs, base, step); record_ref (agrp, stmt, ref, delta, write_p); + + return true; } -/* Record the suitable memory references in LOOP. */ +/* Record the suitable memory references in LOOP. NO_OTHER_REFS is set to + true if there are no other memory references inside the loop. */ static struct mem_ref_group * -gather_memory_references (struct loop *loop) +gather_memory_references (struct loop *loop, bool *no_other_refs) { basic_block *body = get_loop_body_in_dom_order (loop); basic_block bb; unsigned i; block_stmt_iterator bsi; - tree stmt, lhs, rhs; + tree stmt, lhs, rhs, call; struct mem_ref_group *refs = NULL; + *no_other_refs = true; + /* Scan the loop body in order, so that the former references precede the later ones. */ for (i = 0; i < loop->num_nodes; i++) @@ -476,16 +496,26 @@ gather_memory_references (struct loop *loop) for (bsi = bsi_start (bb); !bsi_end_p (bsi); bsi_next (&bsi)) { stmt = bsi_stmt (bsi); + call = get_call_expr_in (stmt); + if (call && !(call_expr_flags (call) & ECF_CONST)) + *no_other_refs = false; + if (TREE_CODE (stmt) != GIMPLE_MODIFY_STMT) - continue; + { + if (!ZERO_SSA_OPERANDS (stmt, SSA_OP_ALL_VIRTUALS)) + *no_other_refs = false; + continue; + } lhs = GIMPLE_STMT_OPERAND (stmt, 0); rhs = GIMPLE_STMT_OPERAND (stmt, 1); if (REFERENCE_CLASS_P (rhs)) - gather_memory_references_ref (loop, &refs, rhs, false, stmt); + *no_other_refs &= gather_memory_references_ref (loop, &refs, + rhs, false, stmt); if (REFERENCE_CLASS_P (lhs)) - gather_memory_references_ref (loop, &refs, lhs, true, stmt); + *no_other_refs &= gather_memory_references_ref (loop, &refs, + lhs, true, stmt); } } free (body); @@ -746,6 +776,10 @@ should_issue_prefetch_p (struct mem_ref *ref) if (ref->prefetch_before != PREFETCH_ALL) return false; + /* Do not prefetch nontemporal stores. */ + if (ref->storent_p) + return false; + return true; } @@ -884,6 +918,130 @@ issue_prefetches (struct mem_ref_group *groups, issue_prefetch_ref (ref, unroll_factor, ahead); } +/* Returns true if REF is a memory write for that a nontemporal store insn + can be used. */ + +static bool +nontemporal_store_p (struct mem_ref *ref) +{ + enum machine_mode mode; + enum insn_code code; + + /* REF must be a write that is not reused. We require it to be independent + on all other memory references in the loop, as the nontemporal stores may + be reordered with respect to other memory references. */ + if (!ref->write_p + || !ref->independent_p + || ref->reuse_distance < L2_CACHE_SIZE_BYTES) + return false; + + /* Check that we have the storent instruction for the mode. */ + mode = TYPE_MODE (TREE_TYPE (ref->mem)); + if (mode == BLKmode) + return false; + + code = storent_optab->handlers[mode].insn_code; + return code != CODE_FOR_nothing; +} + +/* If REF is a nontemporal store, we mark the corresponding modify statement + and return true. Otherwise, we return false. */ + +static bool +mark_nontemporal_store (struct mem_ref *ref) +{ + if (!nontemporal_store_p (ref)) + return false; + + if (dump_file && (dump_flags & TDF_DETAILS)) + fprintf (dump_file, "Marked reference %p as a nontemporal store.\n", + (void *) ref); + + MOVE_NONTEMPORAL (ref->stmt) = true; + ref->storent_p = true; + + return true; +} + +/* Issue a memory fence instruction after LOOP. */ + +static void +emit_mfence_after_loop (struct loop *loop) +{ + VEC (edge, heap) *exits = get_loop_exit_edges (loop); + edge exit; + tree call; + block_stmt_iterator bsi; + unsigned i; + + for (i = 0; VEC_iterate (edge, exits, i, exit); i++) + { + call = build_function_call_expr (FENCE_FOLLOWING_MOVNT, NULL_TREE); + + if (!single_pred_p (exit->dest) + /* If possible, we prefer not to insert the fence on other paths + in cfg. */ + && !(exit->flags & EDGE_ABNORMAL)) + split_loop_exit_edge (exit); + bsi = bsi_after_labels (exit->dest); + + bsi_insert_before (&bsi, call, BSI_NEW_STMT); + mark_virtual_ops_for_renaming (call); + } + + VEC_free (edge, heap, exits); + update_ssa (TODO_update_ssa_only_virtuals); +} + +/* Returns true if we can use storent in loop, false otherwise. */ + +static bool +may_use_storent_in_loop_p (struct loop *loop) +{ + bool ret = true; + + if (loop->inner != NULL) + return false; + + /* If we must issue a mfence insn after using storent, check that there + is a suitable place for it at each of the loop exits. */ + if (FENCE_FOLLOWING_MOVNT != NULL_TREE) + { + VEC (edge, heap) *exits = get_loop_exit_edges (loop); + unsigned i; + edge exit; + + for (i = 0; VEC_iterate (edge, exits, i, exit); i++) + if ((exit->flags & EDGE_ABNORMAL) + && exit->dest == EXIT_BLOCK_PTR) + ret = false; + + VEC_free (edge, heap, exits); + } + + return ret; +} + +/* Marks nontemporal stores in LOOP. GROUPS contains the description of memory + references in the loop. */ + +static void +mark_nontemporal_stores (struct loop *loop, struct mem_ref_group *groups) +{ + struct mem_ref *ref; + bool any = false; + + if (!may_use_storent_in_loop_p (loop)) + return; + + for (; groups; groups = groups->next) + for (ref = groups->refs; ref; ref = ref->next) + any |= mark_nontemporal_store (ref); + + if (any && FENCE_FOLLOWING_MOVNT != NULL_TREE) + emit_mfence_after_loop (loop); +} + /* Determines whether we can profitably unroll LOOP FACTOR times, and if this is the case, fill in DESC by the description of number of iterations. */ @@ -1115,16 +1273,18 @@ self_reuse_distance (data_reference_p dr, unsigned *loop_sizes, unsigned n, } /* Determines the distance till the first reuse of each reference in REFS - in the loop nest of LOOP. */ + in the loop nest of LOOP. NO_OTHER_REFS is true if there are no other + memory references in the loop. */ static void -determine_loop_nest_reuse (struct loop *loop, struct mem_ref_group *refs) +determine_loop_nest_reuse (struct loop *loop, struct mem_ref_group *refs, + bool no_other_refs) { struct loop *nest, *aloop; VEC (data_reference_p, heap) *datarefs = NULL; VEC (ddr_p, heap) *dependences = NULL; struct mem_ref_group *gr; - struct mem_ref *ref; + struct mem_ref *ref, *refb; VEC (loop_p, heap) *vloops = NULL; unsigned *loop_data_size; unsigned i, j, n; @@ -1188,6 +1348,8 @@ determine_loop_nest_reuse (struct loop *loop, struct mem_ref_group *refs) dr->aux = ref; VEC_safe_push (data_reference_p, heap, datarefs, dr); } + else + no_other_refs = false; } for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++) @@ -1196,6 +1358,9 @@ determine_loop_nest_reuse (struct loop *loop, struct mem_ref_group *refs) ref = dr->aux; if (ref->reuse_distance > dist) ref->reuse_distance = dist; + + if (no_other_refs) + ref->independent_p = true; } compute_all_dependences (datarefs, &dependences, vloops, true); @@ -1205,12 +1370,18 @@ determine_loop_nest_reuse (struct loop *loop, struct mem_ref_group *refs) if (DDR_ARE_DEPENDENT (dep) == chrec_known) continue; + ref = DDR_A (dep)->aux; + refb = DDR_B (dep)->aux; + if (DDR_ARE_DEPENDENT (dep) == chrec_dont_know || DDR_NUM_DIST_VECTS (dep) == 0) { /* If the dependence cannot be analysed, assume that there might be a reuse. */ dist = 0; + + ref->independent_p = false; + refb->independent_p = false; } else { @@ -1228,6 +1399,18 @@ determine_loop_nest_reuse (struct loop *loop, struct mem_ref_group *refs) adist = volume_of_dist_vector (DDR_DIST_VECT (dep, j), loop_data_size, n); + /* If this is a dependence in the innermost loop (i.e., the + distances in all superloops are zero) and it is not + the trivial self-dependence with distance zero, record that + the references are not completely independent. */ + if (lambda_vector_zerop (DDR_DIST_VECT (dep, j), n - 1) + && (ref != refb + || DDR_DIST_VECT (dep, j)[n-1] != 0)) + { + ref->independent_p = false; + refb->independent_p = false; + } + /* Ignore accesses closer than L1_CACHE_SIZE_BYTES / NONTEMPORAL_FRACTION, so that we use nontemporal prefetches e.g. if single memory @@ -1241,12 +1424,10 @@ determine_loop_nest_reuse (struct loop *loop, struct mem_ref_group *refs) } } - ref = DDR_A (dep)->aux; - if (ref->reuse_distance > dist) - ref->reuse_distance = dist; - ref = DDR_B (dep)->aux; if (ref->reuse_distance > dist) ref->reuse_distance = dist; + if (refb->reuse_distance > dist) + refb->reuse_distance = dist; } free_dependence_relations (dependences); @@ -1273,7 +1454,7 @@ loop_prefetch_arrays (struct loop *loop) unsigned ahead, ninsns, time, unroll_factor; HOST_WIDE_INT est_niter; struct tree_niter_desc desc; - bool unrolled = false; + bool unrolled = false, no_other_refs; if (!maybe_hot_bb_p (loop->header)) { @@ -1283,7 +1464,7 @@ loop_prefetch_arrays (struct loop *loop) } /* Step 1: gather the memory references. */ - refs = gather_memory_references (loop); + refs = gather_memory_references (loop, &no_other_refs); /* Step 2: estimate the reuse effects. */ prune_by_reuse (refs); @@ -1291,7 +1472,7 @@ loop_prefetch_arrays (struct loop *loop) if (!anything_to_prefetch_p (refs)) goto fail; - determine_loop_nest_reuse (loop, refs); + determine_loop_nest_reuse (loop, refs, no_other_refs); /* Step 3: determine the ahead and unroll factor. */ @@ -1313,6 +1494,8 @@ loop_prefetch_arrays (struct loop *loop) goto fail; } + mark_nontemporal_stores (loop, refs); + ninsns = tree_num_loop_insns (loop, &eni_size_weights); unroll_factor = determine_unroll_factor (loop, refs, ninsns, &desc, est_niter); diff --git a/gcc/tree.h b/gcc/tree.h index 0d31884ba27..bd6232d42ef 100644 --- a/gcc/tree.h +++ b/gcc/tree.h @@ -447,6 +447,8 @@ struct gimple_stmt GTY(()) EH_FILTER_MUST_NOT_THROW in EH_FILTER_EXPR TYPE_REF_CAN_ALIAS_ALL in POINTER_TYPE, REFERENCE_TYPE + MOVE_NONTEMPORAL in + GIMPLE_MODIFY_STMT CASE_HIGH_SEEN in CASE_LABEL_EXPR public_flag: @@ -1159,6 +1161,10 @@ extern void omp_clause_range_check_failed (const tree, const char *, int, #define TYPE_REF_CAN_ALIAS_ALL(NODE) \ (PTR_OR_REF_CHECK (NODE)->base.static_flag) +/* In a MODIFY_EXPR, means that the store in the expression is nontemporal. */ +#define MOVE_NONTEMPORAL(NODE) \ + (GIMPLE_MODIFY_STMT_CHECK (NODE)->base.static_flag) + /* In an INTEGER_CST, REAL_CST, COMPLEX_CST, or VECTOR_CST, this means there was an overflow in folding. */ |