summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--gcc/ChangeLog11
-rw-r--r--gcc/config/mips/mips-protos.h2
-rw-r--r--gcc/config/mips/mips.c86
-rw-r--r--gcc/config/mips/mips.h31
-rw-r--r--gcc/expr.c5
-rw-r--r--gcc/expr.h4
-rw-r--r--gcc/testsuite/ChangeLog6
-rw-r--r--gcc/testsuite/gcc.dg/memcpy-4.c7
8 files changed, 119 insertions, 33 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index 5b0d4a68223..4cdef41ddca 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,14 @@
+2012-01-02 Richard Sandiford <rdsandiford@googlemail.com>
+
+ * expr.h (move_by_pieces_ninsns): Declare.
+ * expr.c (move_by_pieces_ninsns): Make external.
+ * config/mips/mips-protos.h (mips_move_by_pieces_p): Declare.
+ (mips_store_by_pieces_p): Likewise.
+ * config/mips/mips.h (MOVE_BY_PIECES_P): Call mips_move_by_pieces_p.
+ (STORE_BY_PIECES_P): Likewise mips_store_by_pieces_p.
+ * config/mips/mips.c (mips_move_by_pieces_p): New function.
+ (mips_store_by_pieces_p): Likewise.
+
2012-01-02 Jakub Jelinek <jakub@redhat.com>
* passes.c (register_one_dump_file): Free full_name.
diff --git a/gcc/config/mips/mips-protos.h b/gcc/config/mips/mips-protos.h
index 1791ce7c143..ca0fb5eba67 100644
--- a/gcc/config/mips/mips-protos.h
+++ b/gcc/config/mips/mips-protos.h
@@ -239,6 +239,8 @@ extern void mips_split_call (rtx, rtx);
extern bool mips_get_pic_call_symbol (rtx *, int);
extern void mips_expand_fcc_reload (rtx, rtx, rtx);
extern void mips_set_return_address (rtx, rtx);
+extern bool mips_move_by_pieces_p (unsigned HOST_WIDE_INT, unsigned int);
+extern bool mips_store_by_pieces_p (unsigned HOST_WIDE_INT, unsigned int);
extern bool mips_expand_block_move (rtx, rtx, rtx);
extern void mips_expand_synci_loop (rtx, rtx);
diff --git a/gcc/config/mips/mips.c b/gcc/config/mips/mips.c
index bdbf94a48b0..7b3b6852cc9 100644
--- a/gcc/config/mips/mips.c
+++ b/gcc/config/mips/mips.c
@@ -6537,6 +6537,92 @@ mips_expand_fcc_reload (rtx dest, rtx src, rtx scratch)
emit_insn (gen_slt_sf (dest, fp2, fp1));
}
+/* Implement MOVE_BY_PIECES_P. */
+
+bool
+mips_move_by_pieces_p (unsigned HOST_WIDE_INT size, unsigned int align)
+{
+ if (HAVE_movmemsi)
+ {
+ /* movmemsi is meant to generate code that is at least as good as
+ move_by_pieces. However, movmemsi effectively uses a by-pieces
+ implementation both for moves smaller than a word and for
+ word-aligned moves of no more than MIPS_MAX_MOVE_BYTES_STRAIGHT
+ bytes. We should allow the tree-level optimisers to do such
+ moves by pieces, as it often exposes other optimization
+ opportunities. We might as well continue to use movmemsi at
+ the rtl level though, as it produces better code when
+ scheduling is disabled (such as at -O). */
+ if (currently_expanding_to_rtl)
+ return false;
+ if (align < BITS_PER_WORD)
+ return size < UNITS_PER_WORD;
+ return size <= MIPS_MAX_MOVE_BYTES_STRAIGHT;
+ }
+ /* The default value. If this becomes a target hook, we should
+ call the default definition instead. */
+ return (move_by_pieces_ninsns (size, align, MOVE_MAX_PIECES + 1)
+ < (unsigned int) MOVE_RATIO (optimize_insn_for_speed_p ()));
+}
+
+/* Implement STORE_BY_PIECES_P. */
+
+bool
+mips_store_by_pieces_p (unsigned HOST_WIDE_INT size, unsigned int align)
+{
+ /* Storing by pieces involves moving constants into registers
+ of size MIN (ALIGN, BITS_PER_WORD), then storing them.
+ We need to decide whether it is cheaper to load the address of
+ constant data into a register and use a block move instead. */
+
+ /* If the data is only byte aligned, then:
+
+ (a1) A block move of less than 4 bytes would involve three 3 LBs and
+ 3 SBs. We might as well use 3 single-instruction LIs and 3 SBs
+ instead.
+
+ (a2) A block move of 4 bytes from aligned source data can use an
+ LW/SWL/SWR sequence. This is often better than the 4 LIs and
+ 4 SBs that we would generate when storing by pieces. */
+ if (align <= BITS_PER_UNIT)
+ return size < 4;
+
+ /* If the data is 2-byte aligned, then:
+
+ (b1) A block move of less than 4 bytes would use a combination of LBs,
+ LHs, SBs and SHs. We get better code by using single-instruction
+ LIs, SBs and SHs instead.
+
+ (b2) A block move of 4 bytes from aligned source data would again use
+ an LW/SWL/SWR sequence. In most cases, loading the address of
+ the source data would require at least one extra instruction.
+ It is often more efficient to use 2 single-instruction LIs and
+ 2 SHs instead.
+
+ (b3) A block move of up to 3 additional bytes would be like (b1).
+
+ (b4) A block move of 8 bytes from aligned source data can use two
+ LW/SWL/SWR sequences or a single LD/SDL/SDR sequence. Both
+ sequences are better than the 4 LIs and 4 SHs that we'd generate
+ when storing by pieces.
+
+ The reasoning for higher alignments is similar:
+
+ (c1) A block move of less than 4 bytes would be the same as (b1).
+
+ (c2) A block move of 4 bytes would use an LW/SW sequence. Again,
+ loading the address of the source data would typically require
+ at least one extra instruction. It is generally better to use
+ LUI/ORI/SW instead.
+
+ (c3) A block move of up to 3 additional bytes would be like (b1).
+
+ (c4) A block move of 8 bytes can use two LW/SW sequences or a single
+ LD/SD sequence, and in these cases we've traditionally preferred
+ the memory copy over the more bulky constant moves. */
+ return size < 8;
+}
+
/* Emit straight-line code to move LENGTH bytes from SRC to DEST.
Assume that the areas do not overlap. */
diff --git a/gcc/config/mips/mips.h b/gcc/config/mips/mips.h
index 55f9b07c416..23d40baf9a6 100644
--- a/gcc/config/mips/mips.h
+++ b/gcc/config/mips/mips.h
@@ -2782,23 +2782,8 @@ while (0)
? MIPS_MAX_MOVE_BYTES_STRAIGHT / MOVE_MAX \
: MIPS_CALL_RATIO / 2)
-/* movmemsi is meant to generate code that is at least as good as
- move_by_pieces. However, movmemsi effectively uses a by-pieces
- implementation both for moves smaller than a word and for word-aligned
- moves of no more than MIPS_MAX_MOVE_BYTES_STRAIGHT bytes. We should
- allow the tree-level optimisers to do such moves by pieces, as it
- often exposes other optimization opportunities. We might as well
- continue to use movmemsi at the rtl level though, as it produces
- better code when scheduling is disabled (such as at -O). */
-
-#define MOVE_BY_PIECES_P(SIZE, ALIGN) \
- (HAVE_movmemsi \
- ? (!currently_expanding_to_rtl \
- && ((ALIGN) < BITS_PER_WORD \
- ? (SIZE) < UNITS_PER_WORD \
- : (SIZE) <= MIPS_MAX_MOVE_BYTES_STRAIGHT)) \
- : (move_by_pieces_ninsns (SIZE, ALIGN, MOVE_MAX_PIECES + 1) \
- < (unsigned int) MOVE_RATIO (false)))
+#define MOVE_BY_PIECES_P(SIZE, ALIGN) \
+ mips_move_by_pieces_p (SIZE, ALIGN)
/* For CLEAR_RATIO, when optimizing for size, give a better estimate
of the length of a memset call, but use the default otherwise. */
@@ -2813,16 +2798,8 @@ while (0)
#define SET_RATIO(speed) \
((speed) ? 15 : MIPS_CALL_RATIO - 2)
-/* STORE_BY_PIECES_P can be used when copying a constant string, but
- in that case each word takes 3 insns (lui, ori, sw), or more in
- 64-bit mode, instead of 2 (lw, sw). For now we always fail this
- and let the move_by_pieces code copy the string from read-only
- memory. In the future, this could be tuned further for multi-issue
- CPUs that can issue stores down one pipe and arithmetic instructions
- down another; in that case, the lui/ori/sw combination would be a
- win for long enough strings. */
-
-#define STORE_BY_PIECES_P(SIZE, ALIGN) 0
+#define STORE_BY_PIECES_P(SIZE, ALIGN) \
+ mips_store_by_pieces_p (SIZE, ALIGN)
#ifndef __mips16
/* Since the bits of the _init and _fini function is spread across
diff --git a/gcc/expr.c b/gcc/expr.c
index c10f9157687..9825d126df4 100644
--- a/gcc/expr.c
+++ b/gcc/expr.c
@@ -123,9 +123,6 @@ struct store_by_pieces_d
int reverse;
};
-static unsigned HOST_WIDE_INT move_by_pieces_ninsns (unsigned HOST_WIDE_INT,
- unsigned int,
- unsigned int);
static void move_by_pieces_1 (rtx (*) (rtx, ...), enum machine_mode,
struct move_by_pieces_d *);
static bool block_move_libcall_safe_for_call_parm (void);
@@ -1016,7 +1013,7 @@ move_by_pieces (rtx to, rtx from, unsigned HOST_WIDE_INT len,
/* Return number of insns required to move L bytes by pieces.
ALIGN (in bits) is maximum alignment we can assume. */
-static unsigned HOST_WIDE_INT
+unsigned HOST_WIDE_INT
move_by_pieces_ninsns (unsigned HOST_WIDE_INT l, unsigned int align,
unsigned int max_size)
{
diff --git a/gcc/expr.h b/gcc/expr.h
index 7a323bacd6a..0096367a727 100644
--- a/gcc/expr.h
+++ b/gcc/expr.h
@@ -367,6 +367,10 @@ extern bool set_storage_via_setmem (rtx, rtx, rtx, unsigned int,
succeed. */
extern int can_move_by_pieces (unsigned HOST_WIDE_INT, unsigned int);
+extern unsigned HOST_WIDE_INT move_by_pieces_ninsns (unsigned HOST_WIDE_INT,
+ unsigned int,
+ unsigned int);
+
/* Return nonzero if it is desirable to store LEN bytes generated by
CONSTFUN with several move instructions by store_by_pieces
function. CONSTFUNDATA is a pointer which will be passed as argument
diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog
index e24d96c4522..dd29a88d25e 100644
--- a/gcc/testsuite/ChangeLog
+++ b/gcc/testsuite/ChangeLog
@@ -1,3 +1,9 @@
+2012-01-02 Richard Sandiford <rdsandiford@googlemail.com>
+
+ * gcc.dg/memcpy-4.c: Add nomips16 attribute for MIPS targets.
+ Increase copy to 5 bytes. Look for at least two "mem/s/u"s,
+ rather than a specific number.
+
2012-01-02 Paul Thomas <pault@gcc.gnu.org>
PR fortran/46262
diff --git a/gcc/testsuite/gcc.dg/memcpy-4.c b/gcc/testsuite/gcc.dg/memcpy-4.c
index 4fe72ec5b89..80a943bdb78 100644
--- a/gcc/testsuite/gcc.dg/memcpy-4.c
+++ b/gcc/testsuite/gcc.dg/memcpy-4.c
@@ -1,11 +1,14 @@
/* { dg-do compile } */
/* { dg-options "-O2 -fdump-rtl-expand" } */
+#ifdef __mips
+__attribute__((nomips16))
+#endif
void
f1 (char *p)
{
- __builtin_memcpy (p, "123", 3);
+ __builtin_memcpy (p, "12345", 5);
}
-/* { dg-final { scan-rtl-dump-times "mem/s/u" 3 "expand" { target mips*-*-* } } } */
+/* { dg-final { scan-rtl-dump "mem/s/u.*mem/s/u" "expand" { target mips*-*-* } } } */
/* { dg-final { cleanup-rtl-dump "expand" } } */