From 80b8656cbaaf09b685c2f3c9dd96f61274ed7fb7 Mon Sep 17 00:00:00 2001 From: "H.J. Lu" Date: Fri, 20 Mar 2015 04:39:04 -0700 Subject: Limit multi-byte nop instructions to 10 bytes There is no performance advantage to use multi-byte nop instructions greater than 10 bytes. This patch limits multi-byte nop instructions to 10 bytes. Since there is only one way to encode multi-byte nop instructions now, it also removed redundant nop tests. gas/ * config/tc-i386.c (i386_align_code): Limit multi-byte nop instructions to 10 bytes. gas/testsuite/ * gas/i386/i386.exp: Don't run nops-1-bdver1, nops-1-bdver2, nops-1-bdver3, nops-1-bdver4, nops-1-znver1, nops-1-btver1 nops-1-btver2, x86-64-nops-1-nocona, x86-64-nops-1-bdver1, x86-64-nops-1-bdver2, x86-64-nops-1-bdver3, x86-64-nops-1-bdver4, x86-64-nops-1-znver1, x86-64-nops-1-btver1 nor x86-64-nops-1-btver2. * gas/i386/nops-1-core2.d: Updated. * gas/i386/nops-1-k8.d: Likewise. * gas/i386/nops-4a-i686.d: Likewise. * gas/i386/nops-5-i686.d: Likewise. * gas/i386/nops-5.d: Likewise. * gas/i386/nops-6.d: Likewise. * gas/i386/x86-64-nops-1-core2.d: Likewise. * gas/i386/x86-64-nops-1-g64.d: Likewise. * gas/i386/x86-64-nops-1-k8.d: Likewise. * gas/i386/x86-64-nops-1.d: Likewise. * gas/i386/x86-64-nops-2.d: Likewise. * gas/i386/x86-64-nops-3.d: Likewise. * gas/i386/x86-64-nops-4-core2.d: Likewise. * gas/i386/x86-64-nops-4-k8.d: Likewise. * gas/i386/x86-64-nops-4.d: Likewise. * gas/i386/x86-64-nops-5-k8.d: Likewise. * gas/i386/x86-64-nops-5.d: Likewise. * gas/i386/ilp32/x86-64-nops-1-core2.d: Likewise. * gas/i386/ilp32/x86-64-nops-1-k8.d: Likewise. * gas/i386/ilp32/x86-64-nops-1.d: Likewise. * gas/i386/ilp32/x86-64-nops-2.d: Likewise. * gas/i386/ilp32/x86-64-nops-3.d: Likewise. * gas/i386/ilp32/x86-64-nops-4-core2.d: Likewise. * gas/i386/ilp32/x86-64-nops-4-k8.d: Likewise. * gas/i386/ilp32/x86-64-nops-4.d: Likewise. * gas/i386/ilp32/x86-64-nops-5-k8.d: Likewise. * gas/i386/ilp32/x86-64-nops-5.d: Likewise. * gas/i386/nops-1-bdver1.d: Removed. * gas/i386/nops-1-bdver2.d: Likewise. * gas/i386/nops-1-bdver3.d: Likewise. * gas/i386/nops-1-bdver4.d: Likewise. * gas/i386/nops-1-btver1.d: Likewise. * gas/i386/nops-1-btver2.d: Likewise. * gas/i386/nops-1-znver1.d: Likewise. * gas/i386/x86-64-nops-1-bdver1.d: Likewise. * gas/i386/x86-64-nops-1-bdver2.d: Likewise. * gas/i386/x86-64-nops-1-bdver3.d: Likewise. * gas/i386/x86-64-nops-1-bdver4.d: Likewise. * gas/i386/x86-64-nops-1-btver1.d: Likewise. * gas/i386/x86-64-nops-1-btver2.d: Likewise. * gas/i386/x86-64-nops-1-nocona.d: Likewise. * gas/i386/x86-64-nops-1-znver1.d: Likewise. * gas/i386/ilp32/x86-64-nops-1-nocona.d: Likewise. --- gas/config/tc-i386.c | 113 +++++++-------------------------------------------- 1 file changed, 15 insertions(+), 98 deletions(-) (limited to 'gas/config') diff --git a/gas/config/tc-i386.c b/gas/config/tc-i386.c index b6e8f5cebb3..d7102163b77 100644 --- a/gas/config/tc-i386.c +++ b/gas/config/tc-i386.c @@ -1138,85 +1138,9 @@ i386_align_code (fragS *fragP, int count) /* nopw %cs:0L(%[re]ax,%[re]ax,1) */ static const char alt_10[] = {0x66,0x2e,0x0f,0x1f,0x84,0x00,0x00,0x00,0x00,0x00}; - /* data16 - nopw %cs:0L(%[re]ax,%[re]ax,1) */ - static const char alt_long_11[] = - {0x66, - 0x66,0x2e,0x0f,0x1f,0x84,0x00,0x00,0x00,0x00,0x00}; - /* data16 - data16 - nopw %cs:0L(%[re]ax,%[re]ax,1) */ - static const char alt_long_12[] = - {0x66, - 0x66, - 0x66,0x2e,0x0f,0x1f,0x84,0x00,0x00,0x00,0x00,0x00}; - /* data16 - data16 - data16 - nopw %cs:0L(%[re]ax,%[re]ax,1) */ - static const char alt_long_13[] = - {0x66, - 0x66, - 0x66, - 0x66,0x2e,0x0f,0x1f,0x84,0x00,0x00,0x00,0x00,0x00}; - /* data16 - data16 - data16 - data16 - nopw %cs:0L(%[re]ax,%[re]ax,1) */ - static const char alt_long_14[] = - {0x66, - 0x66, - 0x66, - 0x66, - 0x66,0x2e,0x0f,0x1f,0x84,0x00,0x00,0x00,0x00,0x00}; - /* data16 - data16 - data16 - data16 - data16 - nopw %cs:0L(%[re]ax,%[re]ax,1) */ - static const char alt_long_15[] = - {0x66, - 0x66, - 0x66, - 0x66, - 0x66, - 0x66,0x2e,0x0f,0x1f,0x84,0x00,0x00,0x00,0x00,0x00}; - /* nopl 0(%[re]ax,%[re]ax,1) - nopw 0(%[re]ax,%[re]ax,1) */ - static const char alt_short_11[] = - {0x0f,0x1f,0x44,0x00,0x00, - 0x66,0x0f,0x1f,0x44,0x00,0x00}; - /* nopw 0(%[re]ax,%[re]ax,1) - nopw 0(%[re]ax,%[re]ax,1) */ - static const char alt_short_12[] = - {0x66,0x0f,0x1f,0x44,0x00,0x00, - 0x66,0x0f,0x1f,0x44,0x00,0x00}; - /* nopw 0(%[re]ax,%[re]ax,1) - nopl 0L(%[re]ax) */ - static const char alt_short_13[] = - {0x66,0x0f,0x1f,0x44,0x00,0x00, - 0x0f,0x1f,0x80,0x00,0x00,0x00,0x00}; - /* nopl 0L(%[re]ax) - nopl 0L(%[re]ax) */ - static const char alt_short_14[] = - {0x0f,0x1f,0x80,0x00,0x00,0x00,0x00, - 0x0f,0x1f,0x80,0x00,0x00,0x00,0x00}; - /* nopl 0L(%[re]ax) - nopl 0L(%[re]ax,%[re]ax,1) */ - static const char alt_short_15[] = - {0x0f,0x1f,0x80,0x00,0x00,0x00,0x00, - 0x0f,0x1f,0x84,0x00,0x00,0x00,0x00,0x00}; - static const char *const alt_short_patt[] = { + static const char *const alt_patt[] = { f32_1, f32_2, alt_3, alt_4, alt_5, alt_6, alt_7, alt_8, - alt_9, alt_10, alt_short_11, alt_short_12, alt_short_13, - alt_short_14, alt_short_15 - }; - static const char *const alt_long_patt[] = { - f32_1, f32_2, alt_3, alt_4, alt_5, alt_6, alt_7, alt_8, - alt_9, alt_10, alt_long_11, alt_long_12, alt_long_13, - alt_long_14, alt_long_15 + alt_9, alt_10 }; /* Only align for at least a positive non-zero boundary. */ @@ -1228,14 +1152,9 @@ i386_align_code (fragS *fragP, int count) 1. For PROCESSOR_I386, PROCESSOR_I486, PROCESSOR_PENTIUM and PROCESSOR_GENERIC32, f32_patt will be used. - 2. For PROCESSOR_PENTIUMPRO, PROCESSOR_PENTIUM4, PROCESSOR_NOCONA, - PROCESSOR_CORE, PROCESSOR_CORE2, PROCESSOR_COREI7, and - PROCESSOR_GENERIC64, alt_long_patt will be used. - 3. For PROCESSOR_ATHLON, PROCESSOR_K6, PROCESSOR_K8 and - PROCESSOR_AMDFAM10, PROCESSOR_BD and PROCESSOR_BT, alt_short_patt - will be used. - - When -mtune= isn't used, alt_long_patt will be used if + 2. For the rest, alt_patt will be used. + + When -mtune= isn't used, alt_patt will be used if cpu_arch_isa_flags has CpuNop. Otherwise, f32_patt will be used. @@ -1268,7 +1187,7 @@ i386_align_code (fragS *fragP, int count) /* We use cpu_arch_isa_flags to check if we SHOULD optimize with nops. */ if (fragP->tc_frag_data.isa_flags.bitfield.cpunop) - patt = alt_long_patt; + patt = alt_patt; else patt = f32_patt; break; @@ -1280,8 +1199,6 @@ i386_align_code (fragS *fragP, int count) case PROCESSOR_L1OM: case PROCESSOR_K1OM: case PROCESSOR_GENERIC64: - patt = alt_long_patt; - break; case PROCESSOR_K6: case PROCESSOR_ATHLON: case PROCESSOR_K8: @@ -1289,7 +1206,7 @@ i386_align_code (fragS *fragP, int count) case PROCESSOR_BD: case PROCESSOR_ZNVER: case PROCESSOR_BT: - patt = alt_short_patt; + patt = alt_patt; break; case PROCESSOR_I386: case PROCESSOR_I486: @@ -1324,7 +1241,7 @@ i386_align_code (fragS *fragP, int count) /* We use cpu_arch_isa_flags to check if we CAN optimize with nops. */ if (fragP->tc_frag_data.isa_flags.bitfield.cpunop) - patt = alt_short_patt; + patt = alt_patt; else patt = f32_patt; break; @@ -1337,12 +1254,12 @@ i386_align_code (fragS *fragP, int count) case PROCESSOR_L1OM: case PROCESSOR_K1OM: if (fragP->tc_frag_data.isa_flags.bitfield.cpunop) - patt = alt_long_patt; + patt = alt_patt; else patt = f32_patt; break; case PROCESSOR_GENERIC64: - patt = alt_long_patt; + patt = alt_patt; break; } } @@ -1373,15 +1290,15 @@ i386_align_code (fragS *fragP, int count) } else { - /* Maximum length of an instruction is 15 byte. If the - padding is greater than 15 bytes and we don't use jump, + /* Maximum length of an instruction is 10 byte. If the + padding is greater than 10 bytes and we don't use jump, we have to break it into smaller pieces. */ int padding = count; - while (padding > 15) + while (padding > 10) { - padding -= 15; + padding -= 10; memcpy (fragP->fr_literal + fragP->fr_fix + padding, - patt [14], 15); + patt [9], 10); } if (padding) -- cgit v1.2.1