diff options
author | Jan Hubicka <jh@suse.cz> | 2003-06-04 18:42:23 +0200 |
---|---|---|
committer | Jan Hubicka <hubicka@gcc.gnu.org> | 2003-06-04 16:42:23 +0000 |
commit | d2c49530bb30888afa711911c98fc8d9bf6ee380 (patch) | |
tree | aa9aeacad2cb7bb2bf196867a31b18e218904bfb /gcc/config/i386 | |
parent | 05a0e2af5828c599ac6174f2191996cf8138b6b0 (diff) | |
download | gcc-d2c49530bb30888afa711911c98fc8d9bf6ee380.tar.gz |
i386.c (min_insn_size, [...]): New functions
* i386.c (min_insn_size, k8_avoid_jump_misspredicts): New functions
(ix86_reorg): Use it.
* i386.md (align): New insn pattern.
From-SVN: r67446
Diffstat (limited to 'gcc/config/i386')
-rw-r--r-- | gcc/config/i386/i386.c | 114 | ||||
-rw-r--r-- | gcc/config/i386/i386.md | 14 |
2 files changed, 128 insertions, 0 deletions
diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c index 7386ba55131..c96ac8c68c7 100644 --- a/gcc/config/i386/i386.c +++ b/gcc/config/i386/i386.c @@ -878,6 +878,8 @@ static bool ix86_ms_bitfield_layout_p PARAMS ((tree)); static tree ix86_handle_struct_attribute PARAMS ((tree *, tree, tree, int, bool *)); static int extended_reg_mentioned_1 PARAMS ((rtx *, void *)); static bool ix86_rtx_costs PARAMS ((rtx, int, int, int *)); +static int min_insn_size PARAMS ((rtx)); +static void k8_avoid_jump_misspredicts PARAMS ((void)); #if defined (DO_GLOBAL_CTORS_BODY) && defined (HAS_INIT_SECTION) static void ix86_svr3_asm_out_constructor PARAMS ((rtx, int)); @@ -15526,6 +15528,117 @@ x86_function_profiler (file, labelno) } } +/* We don't have exact information about the insn sizes, but we may assume + quite safely that we are informed about all 1 byte insns and memory + address sizes. This is enought to elliminate unnecesary padding in + 99% of cases. */ + +static int +min_insn_size (insn) + rtx insn; +{ + int l = 0; + + if (!INSN_P (insn) || !active_insn_p (insn)) + return 0; + + /* Discard alignments we've emit and jump instructions. */ + if (GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE + && XINT (PATTERN (insn), 1) == UNSPECV_ALIGN) + return 0; + if (GET_CODE (insn) == JUMP_INSN + && (GET_CODE (PATTERN (insn)) == ADDR_VEC + || GET_CODE (PATTERN (insn)) == ADDR_DIFF_VEC)) + return 0; + + /* Important case - calls are always 5 bytes. + It is common to have many calls in the row. */ + if (GET_CODE (insn) == CALL_INSN + && symbolic_reference_mentioned_p (PATTERN (insn)) + && !SIBLING_CALL_P (insn)) + return 5; + if (get_attr_length (insn) <= 1) + return 1; + + /* For normal instructions we may rely on the sizes of addresses + and the presence of symbol to require 4 bytes of encoding. + This is not the case for jumps where references are PC relative. */ + if (GET_CODE (insn) != JUMP_INSN) + { + l = get_attr_length_address (insn); + if (l < 4 && symbolic_reference_mentioned_p (PATTERN (insn))) + l = 4; + } + if (l) + return 1+l; + else + return 2; +} + +/* AMD K8 core misspredicts jumps when there are more than 3 jumps in 16 byte + window. */ + +static void +k8_avoid_jump_misspredicts () +{ + rtx insn, start = get_insns (); + int nbytes = 0, njumps = 0; + int isjump = 0; + + /* Look for all minimal intervals of instructions containing 4 jumps. + The intervals are bounded by START and INSN. NBYTES is the total + size of instructions in the interval including INSN and not including + START. When the NBYTES is smaller than 16 bytes, it is possible + that the end of START and INSN ends up in the same 16byte page. + + The smallest offset in the page INSN can start is the case where START + ends on the offset 0. Offset of INSN is then NBYTES - sizeof (INSN). + We add p2align to 16byte window with maxskip 17 - NBYTES + sizeof (INSN). + */ + for (insn = get_insns (); insn; insn = NEXT_INSN (insn)) + { + + nbytes += min_insn_size (insn); + if (rtl_dump_file) + fprintf(stderr,"Insn %i estimated to %i bytes\n", + INSN_UID (insn), min_insn_size (insn)); + if ((GET_CODE (insn) == JUMP_INSN + && GET_CODE (PATTERN (insn)) != ADDR_VEC + && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC) + || GET_CODE (insn) == CALL_INSN) + njumps++; + else + continue; + + while (njumps > 3) + { + start = NEXT_INSN (start); + if ((GET_CODE (start) == JUMP_INSN + && GET_CODE (PATTERN (start)) != ADDR_VEC + && GET_CODE (PATTERN (start)) != ADDR_DIFF_VEC) + || GET_CODE (start) == CALL_INSN) + njumps--, isjump = 1; + else + isjump = 0; + nbytes -= min_insn_size (start); + } + if (njumps < 0) + abort (); + if (rtl_dump_file) + fprintf(stderr,"Interval %i to %i has %i bytes\n", + INSN_UID (start), INSN_UID (insn), nbytes); + + if (njumps == 3 && isjump && nbytes < 16) + { + int padsize = 15 - nbytes + min_insn_size (insn); + + if (rtl_dump_file) + fprintf (rtl_dump_file, "Padding insn %i by %i bytes!\n", INSN_UID (insn), padsize); + emit_insn_before (gen_align (GEN_INT (padsize)), insn); + } + } +} + /* Implement machine specific optimizations. At the moment we implement single transformation: AMD Athlon works faster when RET is not destination of conditional jump or directly preceded @@ -15577,6 +15690,7 @@ ix86_reorg () delete_insn (ret); } } + k8_avoid_jump_misspredicts (); } /* Return nonzero when QImode register that must be represented via REX prefix diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md index d192cc8b1cf..65b54ea9050 100644 --- a/gcc/config/i386/i386.md +++ b/gcc/config/i386/i386.md @@ -126,6 +126,7 @@ (UNSPECV_STMXCSR 40) (UNSPECV_FEMMS 46) (UNSPECV_CLFLUSH 57) + (UNSPECV_ALIGN 68) ]) ;; Insns whose names begin with "x86_" are emitted by gen_FOO calls @@ -14278,6 +14279,19 @@ (set_attr "modrm" "0") (set_attr "ppro_uops" "one")]) +;; UNSPEC_VOLATILE is considered to use and clobber all hard registers and +;; all of memory. This blocks insns from being moved across this point. + +(define_insn "align" + [(unspec_volatile [(match_operand 0 "" "")] UNSPECV_ALIGN)] + "" +{ +#ifdef HAVE_GAS_MAX_SKIP_P2ALIGN + return ".p2align\t4,,%c0"; +#endif +} + [(set_attr "length" "16")]) + (define_expand "prologue" [(const_int 1)] "" |