summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--gcc/ChangeLog33
-rw-r--r--gcc/builtins.c26
-rw-r--r--gcc/config/mips/mips.c5
-rw-r--r--gcc/config/mips/mips.h51
-rw-r--r--gcc/config/mips/mips.opt2
-rw-r--r--gcc/config/s390/s390.h7
-rw-r--r--gcc/config/sh/sh.h2
-rw-r--r--gcc/doc/tm.texi26
-rw-r--r--gcc/expr.c36
-rw-r--r--gcc/expr.h16
-rw-r--r--gcc/value-prof.c4
11 files changed, 173 insertions, 35 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index 450c4811b6c..cd929650abc 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,36 @@
+2007-08-24 Sandra Loosemore <sandra@codesourcery.com>
+ Nigel Stephens <nigel@mips.com>
+
+ PR target/11787
+
+ * doc/tm.texi (SET_RATIO, SET_BY_PIECES_P): Document new macros.
+ (STORE_BY_PIECES_P): No longer applies to __builtin_memset.
+ * expr.c (SET_BY_PIECES_P): Define.
+ (can_store_by_pieces, store_by_pieces): Add MEMSETP argument; use
+ it to decide whether to use SET_BY_PIECES_P or STORE_BY_PIECES_P.
+ (store_expr): Pass MEMSETP argument to can_store_by_pieces and
+ store_by_pieces.
+ * expr.h (SET_RATIO): Define.
+ (can_store_by_pieces, store_by_pieces): Update prototypes.
+ * builtins.c (expand_builtin_memcpy): Pass MEMSETP argument to
+ can_store_by_pieces/store_by_pieces.
+ (expand_builtin_memcpy_args): Likewise.
+ (expand_builtin_strncpy): Likewise.
+ (expand_builtin_memset_args): Likewise. Also remove special case
+ for optimize_size so that can_store_by_pieces/SET_BY_PIECES_P can
+ decide what to do instead.
+ * value-prof.c (tree_stringops_transform): Pass MEMSETP argument
+ to can_store_by_pieces.
+
+ * config/sh/sh.h (SET_BY_PIECES_P): Clone from STORE_BY_PIECES_P.
+ * config/s390/s390.h (SET_BY_PIECES_P): Likewise.
+
+ * config/mips/mips.opt (mmemcpy): Change from Var to Mask.
+ * config/mips/mips.c (override_options): Make -Os default to -mmemcpy.
+ * config/mips/mips.h (MIPS_CALL_RATIO): Define.
+ (MOVE_RATIO, CLEAR_RATIO, SET_RATIO): Define.
+ (STORE_BY_PIECES_P): Define.
+
2007-08-24 Tom Tromey <tromey@redhat.com>
* varpool.c (varpool_last_needed_node): Fix comment typo.
diff --git a/gcc/builtins.c b/gcc/builtins.c
index 8d2657b3c82..e353e4dce1d 100644
--- a/gcc/builtins.c
+++ b/gcc/builtins.c
@@ -3331,11 +3331,11 @@ expand_builtin_memcpy (tree exp, rtx target, enum machine_mode mode)
&& GET_CODE (len_rtx) == CONST_INT
&& (unsigned HOST_WIDE_INT) INTVAL (len_rtx) <= strlen (src_str) + 1
&& can_store_by_pieces (INTVAL (len_rtx), builtin_memcpy_read_str,
- (void *) src_str, dest_align))
+ (void *) src_str, dest_align, false))
{
dest_mem = store_by_pieces (dest_mem, INTVAL (len_rtx),
builtin_memcpy_read_str,
- (void *) src_str, dest_align, 0);
+ (void *) src_str, dest_align, false, 0);
dest_mem = force_operand (XEXP (dest_mem, 0), NULL_RTX);
dest_mem = convert_memory_address (ptr_mode, dest_mem);
return dest_mem;
@@ -3444,13 +3444,14 @@ expand_builtin_mempcpy_args (tree dest, tree src, tree len, tree type,
&& GET_CODE (len_rtx) == CONST_INT
&& (unsigned HOST_WIDE_INT) INTVAL (len_rtx) <= strlen (src_str) + 1
&& can_store_by_pieces (INTVAL (len_rtx), builtin_memcpy_read_str,
- (void *) src_str, dest_align))
+ (void *) src_str, dest_align, false))
{
dest_mem = get_memory_rtx (dest, len);
set_mem_align (dest_mem, dest_align);
dest_mem = store_by_pieces (dest_mem, INTVAL (len_rtx),
builtin_memcpy_read_str,
- (void *) src_str, dest_align, endp);
+ (void *) src_str, dest_align,
+ false, endp);
dest_mem = force_operand (XEXP (dest_mem, 0), NULL_RTX);
dest_mem = convert_memory_address (ptr_mode, dest_mem);
return dest_mem;
@@ -3792,13 +3793,13 @@ expand_builtin_strncpy (tree exp, rtx target, enum machine_mode mode)
if (!p || dest_align == 0 || !host_integerp (len, 1)
|| !can_store_by_pieces (tree_low_cst (len, 1),
builtin_strncpy_read_str,
- (void *) p, dest_align))
+ (void *) p, dest_align, false))
return NULL_RTX;
dest_mem = get_memory_rtx (dest, len);
store_by_pieces (dest_mem, tree_low_cst (len, 1),
builtin_strncpy_read_str,
- (void *) p, dest_align, 0);
+ (void *) p, dest_align, false, 0);
dest_mem = force_operand (XEXP (dest_mem, 0), NULL_RTX);
dest_mem = convert_memory_address (ptr_mode, dest_mem);
return dest_mem;
@@ -3926,14 +3927,15 @@ expand_builtin_memset_args (tree dest, tree val, tree len,
* We can't pass builtin_memset_gen_str as that emits RTL. */
c = 1;
if (host_integerp (len, 1)
- && !(optimize_size && tree_low_cst (len, 1) > 1)
&& can_store_by_pieces (tree_low_cst (len, 1),
- builtin_memset_read_str, &c, dest_align))
+ builtin_memset_read_str, &c, dest_align,
+ true))
{
val_rtx = force_reg (TYPE_MODE (unsigned_char_type_node),
val_rtx);
store_by_pieces (dest_mem, tree_low_cst (len, 1),
- builtin_memset_gen_str, val_rtx, dest_align, 0);
+ builtin_memset_gen_str, val_rtx, dest_align,
+ true, 0);
}
else if (!set_storage_via_setmem (dest_mem, len_rtx, val_rtx,
dest_align, expected_align,
@@ -3951,11 +3953,11 @@ expand_builtin_memset_args (tree dest, tree val, tree len,
if (c)
{
if (host_integerp (len, 1)
- && !(optimize_size && tree_low_cst (len, 1) > 1)
&& can_store_by_pieces (tree_low_cst (len, 1),
- builtin_memset_read_str, &c, dest_align))
+ builtin_memset_read_str, &c, dest_align,
+ true))
store_by_pieces (dest_mem, tree_low_cst (len, 1),
- builtin_memset_read_str, &c, dest_align, 0);
+ builtin_memset_read_str, &c, dest_align, true, 0);
else if (!set_storage_via_setmem (dest_mem, len_rtx, GEN_INT (c),
dest_align, expected_align,
expected_size))
diff --git a/gcc/config/mips/mips.c b/gcc/config/mips/mips.c
index 0dd25fc2f70..2c2f11eb66a 100644
--- a/gcc/config/mips/mips.c
+++ b/gcc/config/mips/mips.c
@@ -5323,6 +5323,11 @@ override_options (void)
flag_delayed_branch = 0;
}
+ /* Prefer a call to memcpy over inline code when optimizing for size,
+ though see MOVE_RATIO in mips.h. */
+ if (optimize_size && (target_flags_explicit & MASK_MEMCPY) == 0)
+ target_flags |= MASK_MEMCPY;
+
#ifdef MIPS_TFMODE_FORMAT
REAL_MODE_FORMAT (TFmode) = &MIPS_TFMODE_FORMAT;
#endif
diff --git a/gcc/config/mips/mips.h b/gcc/config/mips/mips.h
index 71809c2d5a7..c3797e530dc 100644
--- a/gcc/config/mips/mips.h
+++ b/gcc/config/mips/mips.h
@@ -2785,6 +2785,57 @@ while (0)
#undef PTRDIFF_TYPE
#define PTRDIFF_TYPE (POINTER_SIZE == 64 ? "long int" : "int")
+
+/* The base cost of a memcpy call, for MOVE_RATIO and friends. These
+ values were determined experimentally by benchmarking with CSiBE.
+ In theory, the call overhead is higher for TARGET_ABICALLS (especially
+ for o32 where we have to restore $gp afterwards as well as make an
+ indirect call), but in practice, bumping this up higher for
+ TARGET_ABICALLS doesn't make much difference to code size. */
+
+#define MIPS_CALL_RATIO 8
+
+/* Define MOVE_RATIO to encourage use of movmemsi when enabled,
+ since it should always generate code at least as good as
+ move_by_pieces(). But when inline movmemsi pattern is disabled
+ (i.e., with -mips16 or -mmemcpy), instead use a value approximating
+ the length of a memcpy call sequence, so that move_by_pieces will
+ generate inline code if it is shorter than a function call.
+ Since move_by_pieces_ninsns() counts memory-to-memory moves, but
+ we'll have to generate a load/store pair for each, halve the value of
+ MIPS_CALL_RATIO to take that into account.
+ The default value for MOVE_RATIO when HAVE_movmemsi is true is 2.
+ There is no point to setting it to less than this to try to disable
+ move_by_pieces entirely, because that also disables some desirable
+ tree-level optimizations, specifically related to optimizing a
+ one-byte string copy into a simple move byte operation. */
+
+#define MOVE_RATIO \
+ ((TARGET_MIPS16 || TARGET_MEMCPY) ? MIPS_CALL_RATIO / 2 : 2)
+
+/* For CLEAR_RATIO, when optimizing for size, give a better estimate
+ of the length of a memset call, but use the default otherwise. */
+
+#define CLEAR_RATIO \
+ (optimize_size ? MIPS_CALL_RATIO : 15)
+
+/* This is similar to CLEAR_RATIO, but for a non-zero constant, so when
+ optimizing for size adjust the ratio to account for the overhead of
+ loading the constant and replicating it across the word. */
+
+#define SET_RATIO \
+ (optimize_size ? MIPS_CALL_RATIO - 2 : 15)
+
+/* STORE_BY_PIECES_P can be used when copying a constant string, but
+ in that case each word takes 3 insns (lui, ori, sw), or more in
+ 64-bit mode, instead of 2 (lw, sw). For now we always fail this
+ and let the move_by_pieces code copy the string from read-only
+ memory. In the future, this could be tuned further for multi-issue
+ CPUs that can issue stores down one pipe and arithmetic instructions
+ down another; in that case, the lui/ori/sw combination would be a
+ win for long enough strings. */
+
+#define STORE_BY_PIECES_P(SIZE, ALIGN) 0
#ifndef __mips16
/* Since the bits of the _init and _fini function is spread across
diff --git a/gcc/config/mips/mips.opt b/gcc/config/mips/mips.opt
index 4a752ec3987..6f6c109187c 100644
--- a/gcc/config/mips/mips.opt
+++ b/gcc/config/mips/mips.opt
@@ -173,7 +173,7 @@ Target Report RejectNegative Mask(LONG64)
Use a 64-bit long type
mmemcpy
-Target Report Var(TARGET_MEMCPY)
+Target Report Mask(MEMCPY)
Don't optimize block moves
mmips-tfile
diff --git a/gcc/config/s390/s390.h b/gcc/config/s390/s390.h
index 4fb55050345..8cc8edfe070 100644
--- a/gcc/config/s390/s390.h
+++ b/gcc/config/s390/s390.h
@@ -803,10 +803,13 @@ extern struct rtx_def *s390_compare_op0, *s390_compare_op1, *s390_compare_emitte
|| (TARGET_64BIT && (SIZE) == 8) )
/* This macro is used to determine whether store_by_pieces should be
- called to "memset" storage with byte values other than zero, or
- to "memcpy" storage when the source is a constant string. */
+ called to "memcpy" storage when the source is a constant string. */
#define STORE_BY_PIECES_P(SIZE, ALIGN) MOVE_BY_PIECES_P (SIZE, ALIGN)
+/* Likewise to decide whether to "memset" storage with byte values
+ other than zero. */
+#define SET_BY_PIECES_P(SIZE, ALIGN) STORE_BY_PIECES_P (SIZE, ALIGN)
+
/* Don't perform CSE on function addresses. */
#define NO_FUNCTION_CSE
diff --git a/gcc/config/sh/sh.h b/gcc/config/sh/sh.h
index 71f5b6f3cd9..8f299b75afe 100644
--- a/gcc/config/sh/sh.h
+++ b/gcc/config/sh/sh.h
@@ -2184,6 +2184,8 @@ struct sh_args {
(move_by_pieces_ninsns (SIZE, ALIGN, STORE_MAX_PIECES + 1) \
< (TARGET_SMALLCODE ? 2 : ((ALIGN >= 32) ? 16 : 2)))
+#define SET_BY_PIECES_P(SIZE, ALIGN) STORE_BY_PIECES_P(SIZE, ALIGN)
+
/* Macros to check register numbers against specific register classes. */
/* These assume that REGNO is a hard or pseudo reg number.
diff --git a/gcc/doc/tm.texi b/gcc/doc/tm.texi
index 0ec10c5949d..a913b8058c6 100644
--- a/gcc/doc/tm.texi
+++ b/gcc/doc/tm.texi
@@ -5897,12 +5897,30 @@ will be used. Defaults to 1 if @code{move_by_pieces_ninsns} returns less
than @code{CLEAR_RATIO}.
@end defmac
+@defmac SET_RATIO
+The threshold of number of scalar move insns, @emph{below} which a sequence
+of insns should be generated to set memory to a constant value, instead of
+a block set insn or a library call.
+Increasing the value will always make code faster, but
+eventually incurs high cost in increased code size.
+
+If you don't define this, it defaults to the value of @code{MOVE_RATIO}.
+@end defmac
+
+@defmac SET_BY_PIECES_P (@var{size}, @var{alignment})
+A C expression used to determine whether @code{store_by_pieces} will be
+used to set a chunk of memory to a constant value, or whether some
+other mechanism will be used. Used by @code{__builtin_memset} when
+storing values other than constant zero.
+Defaults to 1 if @code{move_by_pieces_ninsns} returns less
+than @code{SET_RATIO}.
+@end defmac
+
@defmac STORE_BY_PIECES_P (@var{size}, @var{alignment})
A C expression used to determine whether @code{store_by_pieces} will be
-used to set a chunk of memory to a constant value, or whether some other
-mechanism will be used. Used by @code{__builtin_memset} when storing
-values other than constant zero and by @code{__builtin_strcpy} when
-when called with a constant source string.
+used to set a chunk of memory to a constant string value, or whether some
+other mechanism will be used. Used by @code{__builtin_strcpy} when
+called with a constant source string.
Defaults to 1 if @code{move_by_pieces_ninsns} returns less
than @code{MOVE_RATIO}.
@end defmac
diff --git a/gcc/expr.c b/gcc/expr.c
index 97116b33daa..244604000a9 100644
--- a/gcc/expr.c
+++ b/gcc/expr.c
@@ -186,8 +186,15 @@ static bool float_extend_from_mem[NUM_MACHINE_MODES][NUM_MACHINE_MODES];
#endif
/* This macro is used to determine whether store_by_pieces should be
- called to "memset" storage with byte values other than zero, or
- to "memcpy" storage when the source is a constant string. */
+ called to "memset" storage with byte values other than zero. */
+#ifndef SET_BY_PIECES_P
+#define SET_BY_PIECES_P(SIZE, ALIGN) \
+ (move_by_pieces_ninsns (SIZE, ALIGN, STORE_MAX_PIECES + 1) \
+ < (unsigned int) SET_RATIO)
+#endif
+
+/* This macro is used to determine whether store_by_pieces should be
+ called to "memcpy" storage when the source is a constant string. */
#ifndef STORE_BY_PIECES_P
#define STORE_BY_PIECES_P(SIZE, ALIGN) \
(move_by_pieces_ninsns (SIZE, ALIGN, STORE_MAX_PIECES + 1) \
@@ -2191,13 +2198,14 @@ use_group_regs (rtx *call_fusage, rtx regs)
/* Determine whether the LEN bytes generated by CONSTFUN can be
stored to memory using several move instructions. CONSTFUNDATA is
a pointer which will be passed as argument in every CONSTFUN call.
- ALIGN is maximum alignment we can assume. Return nonzero if a
- call to store_by_pieces should succeed. */
+ ALIGN is maximum alignment we can assume. MEMSETP is true if this is
+ a memset operation and false if it's a copy of a constant string.
+ Return nonzero if a call to store_by_pieces should succeed. */
int
can_store_by_pieces (unsigned HOST_WIDE_INT len,
rtx (*constfun) (void *, HOST_WIDE_INT, enum machine_mode),
- void *constfundata, unsigned int align)
+ void *constfundata, unsigned int align, bool memsetp)
{
unsigned HOST_WIDE_INT l;
unsigned int max_size;
@@ -2210,7 +2218,9 @@ can_store_by_pieces (unsigned HOST_WIDE_INT len,
if (len == 0)
return 1;
- if (! STORE_BY_PIECES_P (len, align))
+ if (! (memsetp
+ ? SET_BY_PIECES_P (len, align)
+ : STORE_BY_PIECES_P (len, align)))
return 0;
tmode = mode_for_size (STORE_MAX_PIECES * BITS_PER_UNIT, MODE_INT, 1);
@@ -2285,7 +2295,8 @@ can_store_by_pieces (unsigned HOST_WIDE_INT len,
/* Generate several move instructions to store LEN bytes generated by
CONSTFUN to block TO. (A MEM rtx with BLKmode). CONSTFUNDATA is a
pointer which will be passed as argument in every CONSTFUN call.
- ALIGN is maximum alignment we can assume.
+ ALIGN is maximum alignment we can assume. MEMSETP is true if this is
+ a memset operation and false if it's a copy of a constant string.
If ENDP is 0 return to, if ENDP is 1 return memory at the end ala
mempcpy, and if ENDP is 2 return memory the end minus one byte ala
stpcpy. */
@@ -2293,7 +2304,7 @@ can_store_by_pieces (unsigned HOST_WIDE_INT len,
rtx
store_by_pieces (rtx to, unsigned HOST_WIDE_INT len,
rtx (*constfun) (void *, HOST_WIDE_INT, enum machine_mode),
- void *constfundata, unsigned int align, int endp)
+ void *constfundata, unsigned int align, bool memsetp, int endp)
{
struct store_by_pieces data;
@@ -2303,7 +2314,9 @@ store_by_pieces (rtx to, unsigned HOST_WIDE_INT len,
return to;
}
- gcc_assert (STORE_BY_PIECES_P (len, align));
+ gcc_assert (memsetp
+ ? SET_BY_PIECES_P (len, align)
+ : STORE_BY_PIECES_P (len, align));
data.constfun = constfun;
data.constfundata = constfundata;
data.len = len;
@@ -4498,7 +4511,7 @@ store_expr (tree exp, rtx target, int call_param_p, bool nontemporal)
str_copy_len = MIN (str_copy_len, exp_len);
if (!can_store_by_pieces (str_copy_len, builtin_strncpy_read_str,
(void *) TREE_STRING_POINTER (exp),
- MEM_ALIGN (target)))
+ MEM_ALIGN (target), false))
goto normal_expr;
dest_mem = target;
@@ -4507,7 +4520,8 @@ store_expr (tree exp, rtx target, int call_param_p, bool nontemporal)
str_copy_len, builtin_strncpy_read_str,
(void *) TREE_STRING_POINTER (exp),
MEM_ALIGN (target),
- exp_len > str_copy_len ? 1 : 0);
+ exp_len > str_copy_len ? 1 : 0,
+ false);
if (exp_len > str_copy_len)
clear_storage (dest_mem, GEN_INT (exp_len - str_copy_len),
BLOCK_OP_NORMAL);
diff --git a/gcc/expr.h b/gcc/expr.h
index 242329a37be..24a391cc7f6 100644
--- a/gcc/expr.h
+++ b/gcc/expr.h
@@ -84,6 +84,13 @@ enum expand_modifier {EXPAND_NORMAL = 0, EXPAND_STACK_PARM, EXPAND_SUM,
#define CLEAR_RATIO (optimize_size ? 3 : 15)
#endif
#endif
+
+/* If a memory set (to value other than zero) operation would take
+ SET_RATIO or more simple move-instruction sequences, we will do a movmem
+ or libcall instead. */
+#ifndef SET_RATIO
+#define SET_RATIO MOVE_RATIO
+#endif
enum direction {none, upward, downward};
@@ -444,20 +451,23 @@ extern int can_move_by_pieces (unsigned HOST_WIDE_INT, unsigned int);
CONSTFUN with several move instructions by store_by_pieces
function. CONSTFUNDATA is a pointer which will be passed as argument
in every CONSTFUN call.
- ALIGN is maximum alignment we can assume. */
+ ALIGN is maximum alignment we can assume.
+ MEMSETP is true if this is a real memset/bzero, not a copy
+ of a const string. */
extern int can_store_by_pieces (unsigned HOST_WIDE_INT,
rtx (*) (void *, HOST_WIDE_INT,
enum machine_mode),
- void *, unsigned int);
+ void *, unsigned int, bool);
/* Generate several move instructions to store LEN bytes generated by
CONSTFUN to block TO. (A MEM rtx with BLKmode). CONSTFUNDATA is a
pointer which will be passed as argument in every CONSTFUN call.
ALIGN is maximum alignment we can assume.
+ MEMSETP is true if this is a real memset/bzero, not a copy.
Returns TO + LEN. */
extern rtx store_by_pieces (rtx, unsigned HOST_WIDE_INT,
rtx (*) (void *, HOST_WIDE_INT, enum machine_mode),
- void *, unsigned int, int);
+ void *, unsigned int, bool, int);
/* Emit insns to set X from Y. */
extern rtx emit_move_insn (rtx, rtx);
diff --git a/gcc/value-prof.c b/gcc/value-prof.c
index 937688fad4a..124a3c866b1 100644
--- a/gcc/value-prof.c
+++ b/gcc/value-prof.c
@@ -1392,13 +1392,13 @@ tree_stringops_transform (block_stmt_iterator *bsi)
case BUILT_IN_MEMSET:
if (!can_store_by_pieces (val, builtin_memset_read_str,
CALL_EXPR_ARG (call, 1),
- dest_align))
+ dest_align, true))
return false;
break;
case BUILT_IN_BZERO:
if (!can_store_by_pieces (val, builtin_memset_read_str,
integer_zero_node,
- dest_align))
+ dest_align, true))
return false;
break;
default: