summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorhubicka <hubicka@138bc75d-0d04-0410-961f-82ee72b054a4>2003-06-04 16:42:23 +0000
committerhubicka <hubicka@138bc75d-0d04-0410-961f-82ee72b054a4>2003-06-04 16:42:23 +0000
commit2d93887b0dfd20fdaa76be712b7f1838a1cdcaab (patch)
treeaa9aeacad2cb7bb2bf196867a31b18e218904bfb
parent18fb8076c16299ab1dd0fe3d0b26e2a6c89481de (diff)
downloadgcc-2d93887b0dfd20fdaa76be712b7f1838a1cdcaab.tar.gz
* i386.c (min_insn_size, k8_avoid_jump_misspredicts): New functions
(ix86_reorg): Use it. * i386.md (align): New insn pattern. git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@67446 138bc75d-0d04-0410-961f-82ee72b054a4
-rw-r--r--gcc/ChangeLog6
-rw-r--r--gcc/config/i386/i386.c114
-rw-r--r--gcc/config/i386/i386.md14
3 files changed, 134 insertions, 0 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index 0adc19a1db7..96a0ab80a70 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,9 @@
+Wed Jun 4 18:39:33 CEST 2003 Jan Hubicka <jh@suse.cz>
+
+ * i386.c (min_insn_size, k8_avoid_jump_misspredicts): New functions
+ (ix86_reorg): Use it.
+ * i386.md (align): New insn pattern.
+
2003-06-04 Kaveh R. Ghazi <ghazi@caip.rutgers.edu>
* toplev.c (rest_of_type_compilation): Fix typo.
diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
index 7386ba55131..c96ac8c68c7 100644
--- a/gcc/config/i386/i386.c
+++ b/gcc/config/i386/i386.c
@@ -878,6 +878,8 @@ static bool ix86_ms_bitfield_layout_p PARAMS ((tree));
static tree ix86_handle_struct_attribute PARAMS ((tree *, tree, tree, int, bool *));
static int extended_reg_mentioned_1 PARAMS ((rtx *, void *));
static bool ix86_rtx_costs PARAMS ((rtx, int, int, int *));
+static int min_insn_size PARAMS ((rtx));
+static void k8_avoid_jump_misspredicts PARAMS ((void));
#if defined (DO_GLOBAL_CTORS_BODY) && defined (HAS_INIT_SECTION)
static void ix86_svr3_asm_out_constructor PARAMS ((rtx, int));
@@ -15526,6 +15528,117 @@ x86_function_profiler (file, labelno)
}
}
+/* We don't have exact information about the insn sizes, but we may assume
+ quite safely that we are informed about all 1 byte insns and memory
+ address sizes. This is enought to elliminate unnecesary padding in
+ 99% of cases. */
+
+static int
+min_insn_size (insn)
+ rtx insn;
+{
+ int l = 0;
+
+ if (!INSN_P (insn) || !active_insn_p (insn))
+ return 0;
+
+ /* Discard alignments we've emit and jump instructions. */
+ if (GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
+ && XINT (PATTERN (insn), 1) == UNSPECV_ALIGN)
+ return 0;
+ if (GET_CODE (insn) == JUMP_INSN
+ && (GET_CODE (PATTERN (insn)) == ADDR_VEC
+ || GET_CODE (PATTERN (insn)) == ADDR_DIFF_VEC))
+ return 0;
+
+ /* Important case - calls are always 5 bytes.
+ It is common to have many calls in the row. */
+ if (GET_CODE (insn) == CALL_INSN
+ && symbolic_reference_mentioned_p (PATTERN (insn))
+ && !SIBLING_CALL_P (insn))
+ return 5;
+ if (get_attr_length (insn) <= 1)
+ return 1;
+
+ /* For normal instructions we may rely on the sizes of addresses
+ and the presence of symbol to require 4 bytes of encoding.
+ This is not the case for jumps where references are PC relative. */
+ if (GET_CODE (insn) != JUMP_INSN)
+ {
+ l = get_attr_length_address (insn);
+ if (l < 4 && symbolic_reference_mentioned_p (PATTERN (insn)))
+ l = 4;
+ }
+ if (l)
+ return 1+l;
+ else
+ return 2;
+}
+
+/* AMD K8 core misspredicts jumps when there are more than 3 jumps in 16 byte
+ window. */
+
+static void
+k8_avoid_jump_misspredicts ()
+{
+ rtx insn, start = get_insns ();
+ int nbytes = 0, njumps = 0;
+ int isjump = 0;
+
+ /* Look for all minimal intervals of instructions containing 4 jumps.
+ The intervals are bounded by START and INSN. NBYTES is the total
+ size of instructions in the interval including INSN and not including
+ START. When the NBYTES is smaller than 16 bytes, it is possible
+ that the end of START and INSN ends up in the same 16byte page.
+
+ The smallest offset in the page INSN can start is the case where START
+ ends on the offset 0. Offset of INSN is then NBYTES - sizeof (INSN).
+ We add p2align to 16byte window with maxskip 17 - NBYTES + sizeof (INSN).
+ */
+ for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
+ {
+
+ nbytes += min_insn_size (insn);
+ if (rtl_dump_file)
+ fprintf(stderr,"Insn %i estimated to %i bytes\n",
+ INSN_UID (insn), min_insn_size (insn));
+ if ((GET_CODE (insn) == JUMP_INSN
+ && GET_CODE (PATTERN (insn)) != ADDR_VEC
+ && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC)
+ || GET_CODE (insn) == CALL_INSN)
+ njumps++;
+ else
+ continue;
+
+ while (njumps > 3)
+ {
+ start = NEXT_INSN (start);
+ if ((GET_CODE (start) == JUMP_INSN
+ && GET_CODE (PATTERN (start)) != ADDR_VEC
+ && GET_CODE (PATTERN (start)) != ADDR_DIFF_VEC)
+ || GET_CODE (start) == CALL_INSN)
+ njumps--, isjump = 1;
+ else
+ isjump = 0;
+ nbytes -= min_insn_size (start);
+ }
+ if (njumps < 0)
+ abort ();
+ if (rtl_dump_file)
+ fprintf(stderr,"Interval %i to %i has %i bytes\n",
+ INSN_UID (start), INSN_UID (insn), nbytes);
+
+ if (njumps == 3 && isjump && nbytes < 16)
+ {
+ int padsize = 15 - nbytes + min_insn_size (insn);
+
+ if (rtl_dump_file)
+ fprintf (rtl_dump_file, "Padding insn %i by %i bytes!\n", INSN_UID (insn), padsize);
+ emit_insn_before (gen_align (GEN_INT (padsize)), insn);
+ }
+ }
+}
+
/* Implement machine specific optimizations.
At the moment we implement single transformation: AMD Athlon works faster
when RET is not destination of conditional jump or directly preceded
@@ -15577,6 +15690,7 @@ ix86_reorg ()
delete_insn (ret);
}
}
+ k8_avoid_jump_misspredicts ();
}
/* Return nonzero when QImode register that must be represented via REX prefix
diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
index d192cc8b1cf..65b54ea9050 100644
--- a/gcc/config/i386/i386.md
+++ b/gcc/config/i386/i386.md
@@ -126,6 +126,7 @@
(UNSPECV_STMXCSR 40)
(UNSPECV_FEMMS 46)
(UNSPECV_CLFLUSH 57)
+ (UNSPECV_ALIGN 68)
])
;; Insns whose names begin with "x86_" are emitted by gen_FOO calls
@@ -14278,6 +14279,19 @@
(set_attr "modrm" "0")
(set_attr "ppro_uops" "one")])
+;; UNSPEC_VOLATILE is considered to use and clobber all hard registers and
+;; all of memory. This blocks insns from being moved across this point.
+
+(define_insn "align"
+ [(unspec_volatile [(match_operand 0 "" "")] UNSPECV_ALIGN)]
+ ""
+{
+#ifdef HAVE_GAS_MAX_SKIP_P2ALIGN
+ return ".p2align\t4,,%c0";
+#endif
+}
+ [(set_attr "length" "16")])
+
(define_expand "prologue"
[(const_int 1)]
""