diff options
author | nickc <nickc@138bc75d-0d04-0410-961f-82ee72b054a4> | 2009-07-06 13:55:09 +0000 |
---|---|---|
committer | nickc <nickc@138bc75d-0d04-0410-961f-82ee72b054a4> | 2009-07-06 13:55:09 +0000 |
commit | c4af075bcc6eafc2517acf337d53f4cbc1b7c2ce (patch) | |
tree | f2b4569c011e34c11f7d7c4e6539913dbbd5f507 | |
parent | a31502084bd689aa52fb605429a4a27beb715281 (diff) | |
download | gcc-c4af075bcc6eafc2517acf337d53f4cbc1b7c2ce.tar.gz |
* config.sh/lib1funcs.h (FMOVD_WORKS): Only define if
__FMOVD_ENABLED__ is defined.
* config/sh/sh.h
(TARGET_FMOVD): Provide a default definition.
(MASK_FMOVD): Likewise.
(TARGET_CPU_CPP_BUILTINS): Define
__FMOVD_ENABLED__ if TARGET_FMOVD is true.
* config/sh/sh.md (movdf_i4): For alternative 0 use either one or
two fmov instructions depending upon whether TARGET_FMOVD is
enabled.
(split for DF load from memory into register): Also handle
MEMs which consist of REG+DISP addressing.
(split for DF store from register to memory): Likewise.
(movsf_ie): Always use single fp_mode.
* config/sh/sh.c (sh_override_options): Do not automatically
enable TARGET_MOVD for the SH2A when supporting doubles - leave
that to the -mfmovd command line switch.
(broken_move): Do not restrict fldi test to only the SH4 and SH4A.
(fldi_ok): Always allow.
* config/sh/sh.opt (mfmovd): Remove this switch.
* doc/invoke.texi (-mfmovd): Remove documentation of this switch.
git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@149283 138bc75d-0d04-0410-961f-82ee72b054a4
-rw-r--r-- | gcc/ChangeLog | 52 | ||||
-rw-r--r-- | gcc/config/sh/lib1funcs.h | 4 | ||||
-rw-r--r-- | gcc/config/sh/sh.c | 15 | ||||
-rw-r--r-- | gcc/config/sh/sh.h | 7 | ||||
-rw-r--r-- | gcc/config/sh/sh.md | 185 | ||||
-rw-r--r-- | gcc/config/sh/sh.opt | 3 | ||||
-rw-r--r-- | gcc/doc/invoke.texi | 148 |
7 files changed, 263 insertions, 151 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog index a490e33668e..7a035f6f0b9 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,28 @@ +2009-07-06 Nick Clifton <nickc@redhat.com> + DJ Delorie <dj@redhat.com> + + * config.sh/lib1funcs.h (FMOVD_WORKS): Only define if + __FMOVD_ENABLED__ is defined. + * config/sh/sh.h + (TARGET_FMOVD): Provide a default definition. + (MASK_FMOVD): Likewise. + (TARGET_CPU_CPP_BUILTINS): Define + __FMOVD_ENABLED__ if TARGET_FMOVD is true. + * config/sh/sh.md (movdf_i4): For alternative 0 use either one or + two fmov instructions depending upon whether TARGET_FMOVD is + enabled. + (split for DF load from memory into register): Also handle + MEMs which consist of REG+DISP addressing. + (split for DF store from register to memory): Likewise. + (movsf_ie): Always use single fp_mode. + * config/sh/sh.c (sh_override_options): Do not automatically + enable TARGET_MOVD for the SH2A when supporting doubles - leave + that to the -mfmovd command line switch. + (broken_move): Do not restrict fldi test to only the SH4 and SH4A. + (fldi_ok): Always allow. + * config/sh/sh.opt (mfmovd): Remove this switch. + * doc/invoke.texi (-mfmovd): Remove documentation of this switch. + 2009-07-06 J"orn Rennecke <joern.rennecke@arc.com> Kaz Kojima <kkojima@gcc.gnu.org> @@ -372,6 +397,33 @@ * config/i386/i386.c (memory_address_length): Check existence of base register before using it. +2009-06-30 Nick Clifton <nickc@redhat.com> + DJ Delorie <dj@redhat.com> + + * config.sh/lib1funcs.h (FMOVD_WORKS): Only define if + __FMOVD_ENABLED__ is defined. + * config/sh/sh.h + (TARGET_FMOVD): Provide a default definition. + (MASK_FMOVD): Likewise. + (TARGET_CPU_CPP_BUILTINS): Define + __FMOVD_ENABLED__ if TARGET_FMOVD is true. + * config/sh/sh.md (movdf_i4): For alternative 0 use either one or + two fmov instructions depending upon whether TARGET_FMOVD is + enabled. + (split for DF load from memory into register): Also handle + MEMs which consist of REG+DISP addressing. + (split for DF store from register to memory): Likewise. + * config/sh/sh.opt (mfmovd): Remove this switch. + * doc/invoke.texi (-mfmovd): Remove documentation of this switch. + * config/sh/sh.c (sh_override_options): Do not automatically + enable TARGET_MOVD for the SH2A when supporting doubles - leave + that to the -mfmovd command line switch. + + * config/sh/sh.c (broken_move): Do not restrict fldi test to only + the SH4 and SH4A. + (fldi_ok): Always allow. + * config/sh/sh.md (movsf_ie): Always use single fp_mode. + 2009-06-29 DJ Delorie <dj@redhat.com> * doc/install.texi (mep-x-elf): Correct chip's full name. diff --git a/gcc/config/sh/lib1funcs.h b/gcc/config/sh/lib1funcs.h index d16a18b8147..af4b41cc314 100644 --- a/gcc/config/sh/lib1funcs.h +++ b/gcc/config/sh/lib1funcs.h @@ -42,8 +42,8 @@ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see #define ALIAS(X,Y) .global GLOBAL(X); .set GLOBAL(X),GLOBAL(Y) -#ifdef __SH2A__ -#undef FMOVD_WORKS +#if defined __SH2A__ && defined __FMOVD_ENABLED__ +#undef FMOVD_WORKS #define FMOVD_WORKS #endif diff --git a/gcc/config/sh/sh.c b/gcc/config/sh/sh.c index 08cae31b995..18123c34cde 100644 --- a/gcc/config/sh/sh.c +++ b/gcc/config/sh/sh.c @@ -694,11 +694,7 @@ sh_override_options (void) if (TARGET_SH2E) sh_cpu = PROCESSOR_SH2E; if (TARGET_SH2A) - { - sh_cpu = PROCESSOR_SH2A; - if (TARGET_SH2A_DOUBLE) - target_flags |= MASK_FMOVD; - } + sh_cpu = PROCESSOR_SH2A; if (TARGET_SH3) sh_cpu = PROCESSOR_SH3; if (TARGET_SH3E) @@ -4208,14 +4204,13 @@ broken_move (rtx insn) && GET_CODE (SET_SRC (pat)) == CONST_DOUBLE && (fp_zero_operand (SET_SRC (pat)) || fp_one_operand (SET_SRC (pat))) - /* ??? If this is a -m4 or -m4-single compilation, in general - we don't know the current setting of fpscr, so disable fldi. + /* In general we don't know the current setting of fpscr, so disable fldi. There is an exception if this was a register-register move before reload - and hence it was ascertained that we have single precision setting - and in a post-reload optimization we changed this to do a constant load. In that case we don't have an r0 clobber, hence we must use fldi. */ - && (! TARGET_SH4 || TARGET_FMOVD + && (TARGET_FMOVD || (GET_CODE (XEXP (XVECEXP (PATTERN (insn), 0, 2), 0)) == SCRATCH)) && REG_P (SET_DEST (pat)) @@ -8876,7 +8871,7 @@ fp_one_operand (rtx op) return REAL_VALUES_EQUAL (r, dconst1); } -/* For -m4 and -m4-single-only, mode switching is used. If we are +/* In general mode switching is used. If we are compiling without -mfmovd, movsf_ie isn't taken into account for mode switching. We could check in machine_dependent_reorg for cases where we know we are in single precision mode, but there is @@ -8886,7 +8881,7 @@ fp_one_operand (rtx op) int fldi_ok (void) { - return ! TARGET_SH4 || TARGET_FMOVD || reload_completed; + return 1; } int diff --git a/gcc/config/sh/sh.h b/gcc/config/sh/sh.h index d8b9a297834..d9a4c5f25cc 100644 --- a/gcc/config/sh/sh.h +++ b/gcc/config/sh/sh.h @@ -28,6 +28,11 @@ along with GCC; see the file COPYING3. If not see #define TARGET_VERSION \ fputs (" (Hitachi SH)", stderr); +#ifndef TARGET_FMOVD +#define TARGET_FMOVD 0 +#define MASK_FMOVD 0 +#endif + /* Unfortunately, insn-attrtab.c doesn't include insn-codes.h. We can't include it here, because bconfig.h is also included by gencodes.c . */ /* ??? No longer true. */ @@ -91,6 +96,8 @@ do { \ builtin_define ("__SH_FPU_DOUBLE__"); \ if (TARGET_HITACHI) \ builtin_define ("__HITACHI__"); \ + if (TARGET_FMOVD) \ + builtin_define ("__FMOVD_ENABLED__"); \ builtin_define (TARGET_LITTLE_ENDIAN \ ? "__LITTLE_ENDIAN__" : "__BIG_ENDIAN__"); \ } while (0) diff --git a/gcc/config/sh/sh.md b/gcc/config/sh/sh.md index e446164eaea..30084c2d0f8 100644 --- a/gcc/config/sh/sh.md +++ b/gcc/config/sh/sh.md @@ -5780,25 +5780,31 @@ label: ;; up pcloads, so we need usable length information for that. (define_insn "movdf_i4" [(set (match_operand:DF 0 "general_movdst_operand" "=d,r,d,d,m,r,r,m,!??r,!???d") - (match_operand:DF 1 "general_movsrc_operand" "d,r,F,m,d,FQ,m,r,d,r")) - (use (match_operand:PSI 2 "fpscr_operand" "c,c,c,c,c,c,c,c,c,c")) - (clobber (match_scratch:SI 3 "=X,X,&z,X,X,X,X,X,X,X"))] + (match_operand:DF 1 "general_movsrc_operand" "d,r,F,m,d,FQ,m,r,d,r")) + (use (match_operand:PSI 2 "fpscr_operand" "c,c,c,c,c,c,c,c,c,c")) + (clobber (match_scratch:SI 3 "=X,X,&z,X,X,X,X,X,X,X"))] "(TARGET_SH4 || TARGET_SH2A_DOUBLE) && (arith_reg_operand (operands[0], DFmode) || arith_reg_operand (operands[1], DFmode))" - "@ - fmov %1,%0 - # - # - fmov.d %1,%0 - fmov.d %1,%0 - # - # - # - # - #" + { + switch (which_alternative) + { + case 0: + if (TARGET_FMOVD) + return "fmov %1,%0"; + else if (REGNO (operands[0]) != REGNO (operands[1]) + 1) + return "fmov %R1,%R0\n\tfmov %S1,%S0"; + else + return "fmov %S1,%S0\n\tfmov %R1,%R0"; + case 3: + case 4: + return "fmov.d %1,%0"; + default: + return "#"; + } + } [(set_attr_alternative "length" - [(if_then_else (eq_attr "fmovd" "yes") (const_int 2) (const_int 4)) + [(if_then_else (eq_attr "fmovd" "yes") (const_int 4) (const_int 8)) (const_int 4) (if_then_else (eq_attr "fmovd" "yes") (const_int 4) (const_int 6)) (if_then_else (eq_attr "fmovd" "yes") (const_int 4) (const_int 6)) @@ -6032,37 +6038,63 @@ label: "(TARGET_SH4 || TARGET_SH2A_DOUBLE) && ! TARGET_FMOVD && reload_completed && FP_OR_XD_REGISTER_P (true_regnum (operands[0]))" [(const_int 0)] - " { int regno = true_regnum (operands[0]); - rtx addr, insn, adjust = NULL_RTX; + rtx addr, insn; rtx mem2 = change_address (operands[1], SFmode, NULL_RTX); - rtx reg0 = gen_rtx_REG (SFmode, regno + !! TARGET_LITTLE_ENDIAN); - rtx reg1 = gen_rtx_REG (SFmode, regno + ! TARGET_LITTLE_ENDIAN); + rtx reg0 = gen_rtx_REG (SFmode, regno + (TARGET_LITTLE_ENDIAN ? 1 : 0)); + rtx reg1 = gen_rtx_REG (SFmode, regno + (TARGET_LITTLE_ENDIAN ? 0 : 1)); operands[1] = copy_rtx (mem2); addr = XEXP (mem2, 0); - if (GET_CODE (addr) != POST_INC) + + switch (GET_CODE (addr)) { - /* If we have to modify the stack pointer, the value that we have - read with post-increment might be modified by an interrupt, - so write it back. */ - if (REGNO (addr) == STACK_POINTER_REGNUM) - adjust = gen_push_e (reg0); - else - adjust = gen_addsi3 (addr, addr, GEN_INT (-4)); - XEXP (mem2, 0) = addr = gen_rtx_POST_INC (SImode, addr); - } - addr = XEXP (addr, 0); - insn = emit_insn (gen_movsf_ie (reg0, mem2, operands[2])); - add_reg_note (insn, REG_INC, addr); - insn = emit_insn (gen_movsf_ie (reg1, operands[1], operands[2])); - if (adjust) - emit_insn (adjust); - else - add_reg_note (insn, REG_INC, addr); + case REG: + /* This is complicated. If the register is an arithmetic register + we can just fall through to the REG+DISP case below. Otherwise + we have to use a combination of POST_INC and REG addressing... */ + if (! arith_reg_operand (operands[1], SFmode)) + { + XEXP (mem2, 0) = addr = gen_rtx_POST_INC (SImode, addr); + insn = emit_insn (gen_movsf_ie (reg0, mem2, operands[2])); + add_reg_note (insn, REG_INC, XEXP (addr, 0)); + + emit_insn (gen_movsf_ie (reg1, operands[1], operands[2])); + + /* If we have modified the stack pointer, the value that we have + read with post-increment might be modified by an interrupt, + so write it back. */ + if (REGNO (addr) == STACK_POINTER_REGNUM) + emit_insn (gen_push_e (reg0)); + else + emit_insn (gen_addsi3 (XEXP (operands[1], 0), XEXP (operands[1], 0), GEN_INT (-4))); + break; + } + /* Fall through. */ + + case PLUS: + emit_insn (gen_movsf_ie (reg0, operands[1], operands[2])); + operands[1] = copy_rtx (operands[1]); + XEXP (operands[1], 0) = plus_constant (addr, 4); + emit_insn (gen_movsf_ie (reg1, operands[1], operands[2])); + break; + + case POST_INC: + insn = emit_insn (gen_movsf_ie (reg0, operands[1], operands[2])); + add_reg_note (insn, REG_INC, XEXP (addr, 0)); + + insn = emit_insn (gen_movsf_ie (reg1, operands[1], operands[2])); + add_reg_note (insn, REG_INC, XEXP (addr, 0)); + break; + + default: + debug_rtx (addr); + gcc_unreachable (); + } + DONE; -}") +}) (define_split [(set (match_operand:DF 0 "memory_operand" "") @@ -6072,35 +6104,70 @@ label: "(TARGET_SH4 || TARGET_SH2A_DOUBLE) && ! TARGET_FMOVD && reload_completed && FP_OR_XD_REGISTER_P (true_regnum (operands[1]))" [(const_int 0)] - " { int regno = true_regnum (operands[1]); - rtx insn, addr, adjust = NULL_RTX; + rtx insn, addr; + rtx reg0 = gen_rtx_REG (SFmode, regno + (TARGET_LITTLE_ENDIAN ? 1 : 0)); + rtx reg1 = gen_rtx_REG (SFmode, regno + (TARGET_LITTLE_ENDIAN ? 0 : 1)); operands[0] = copy_rtx (operands[0]); PUT_MODE (operands[0], SFmode); - insn = emit_insn (gen_movsf_ie (operands[0], - gen_rtx_REG (SFmode, - regno + ! TARGET_LITTLE_ENDIAN), - operands[2])); - operands[0] = copy_rtx (operands[0]); addr = XEXP (operands[0], 0); - if (GET_CODE (addr) != PRE_DEC) + + switch (GET_CODE (addr)) { - adjust = gen_addsi3 (addr, addr, GEN_INT (4)); - emit_insn_before (adjust, insn); - XEXP (operands[0], 0) = addr = gen_rtx_PRE_DEC (SImode, addr); + case REG: + /* This is complicated. If the register is an arithmetic register + we can just fall through to the REG+DISP case below. Otherwise + we have to use a combination of REG and PRE_DEC addressing... */ + if (! arith_reg_operand (operands[0], SFmode)) + { + emit_insn (gen_addsi3 (addr, addr, GEN_INT (4))); + emit_insn (gen_movsf_ie (operands[0], reg1, operands[2])); + + operands[0] = copy_rtx (operands[0]); + XEXP (operands[0], 0) = addr = gen_rtx_PRE_DEC (SImode, addr); + + insn = emit_insn (gen_movsf_ie (operands[0], reg0, operands[2])); + add_reg_note (insn, REG_INC, XEXP (addr, 0)); + break; + } + /* Fall through. */ + + case PLUS: + /* Since REG+DISP addressing has already been decided upon by gcc + we can rely upon it having chosen an arithmetic register as the + register component of the address. Just emit the lower numbered + register first, to the lower address, then the higher numbered + register to the higher address. */ + emit_insn (gen_movsf_ie (operands[0], reg0, operands[2])); + + operands[0] = copy_rtx (operands[0]); + XEXP (operands[0], 0) = plus_constant (addr, 4); + + emit_insn (gen_movsf_ie (operands[0], reg1, operands[2])); + break; + + case PRE_DEC: + /* This is easy. Output the word to go to the higher address + first (ie the word in the higher numbered register) then the + word to go to the lower address. */ + + insn = emit_insn (gen_movsf_ie (operands[0], reg1, operands[2])); + add_reg_note (insn, REG_INC, XEXP (addr, 0)); + + insn = emit_insn (gen_movsf_ie (operands[0], reg0, operands[2])); + add_reg_note (insn, REG_INC, XEXP (addr, 0)); + break; + + default: + /* FAIL; */ + debug_rtx (addr); + gcc_unreachable (); } - addr = XEXP (addr, 0); - if (! adjust) - add_reg_note (insn, REG_INC, addr); - insn = emit_insn (gen_movsf_ie (operands[0], - gen_rtx_REG (SFmode, - regno + !! TARGET_LITTLE_ENDIAN), - operands[2])); - add_reg_note (insn, REG_INC, addr); + DONE; -}") +}) ;; If the output is a register and the input is memory or a register, we have ;; to be careful and see which word needs to be loaded first. @@ -6562,7 +6629,7 @@ label: (const_int 0)]) (set (attr "fp_mode") (if_then_else (eq_attr "fmovd" "yes") (const_string "single") - (const_string "none")))]) + (const_string "single")))]) (define_split [(set (match_operand:SF 0 "register_operand" "") diff --git a/gcc/config/sh/sh.opt b/gcc/config/sh/sh.opt index f365b2b15c3..b2e7c6de77f 100644 --- a/gcc/config/sh/sh.opt +++ b/gcc/config/sh/sh.opt @@ -248,9 +248,6 @@ mfixed-range= Target RejectNegative Joined Var(sh_fixed_range_str) Specify range of registers to make fixed -mfmovd -Target RejectNegative Mask(FMOVD) Undocumented - mfused-madd Target Var(TARGET_FMAC) Enable the use of the fused floating point multiply-accumulate operation diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi index 34bc48c87df..7742623b5b5 100644 --- a/gcc/doc/invoke.texi +++ b/gcc/doc/invoke.texi @@ -339,7 +339,7 @@ Objective-C and Objective-C++ Dialects}. -fgcse -fgcse-after-reload -fgcse-las -fgcse-lm @gol -fgcse-sm -fif-conversion -fif-conversion2 -findirect-inlining @gol -finline-functions -finline-functions-called-once -finline-limit=@var{n} @gol --finline-small-functions -fipa-cp -fipa-cp-clone -fipa-matrix-reorg -fipa-pta @gol +-finline-small-functions -fipa-cp -fipa-cp-clone -fipa-matrix-reorg -fipa-pta @gol -fipa-pure-const -fipa-reference -fipa-struct-reorg @gol -fipa-type-escape -fira-algorithm=@var{algorithm} @gol -fira-region=@var{region} -fira-coalesce -fno-ira-share-save-slots @gol @@ -795,7 +795,7 @@ See RS/6000 and PowerPC Options. -m5-32media -m5-32media-nofpu @gol -m5-compact -m5-compact-nofpu @gol -mb -ml -mdalign -mrelax @gol --mbigtable -mfmovd -mhitachi -mrenesas -mno-renesas -mnomacsave @gol +-mbigtable -mhitachi -mrenesas -mno-renesas -mnomacsave @gol -mieee -mbitops -misize -minline-ic_invalidate -mpadstruct -mspace @gol -mprefergot -musermode -multcost=@var{number} -mdiv=@var{strategy} @gol -mdivsi3_libfunc=@var{name} -mfixed-range=@var{register-range} @gol @@ -1210,8 +1210,8 @@ This will display the values recognized by the @option{--param} option. @item @var{language} -This will display the options supported for @var{language}, where -@var{language} is the name of one of the languages supported in this +This will display the options supported for @var{language}, where +@var{language} is the name of one of the languages supported in this version of GCC. @item @samp{common} @@ -1424,7 +1424,7 @@ affected. @opindex std Determine the language standard. @xref{Standards,,Language Standards Supported by GCC}, for details of these standard versions. This option -is currently only supported when compiling C or C++. +is currently only supported when compiling C or C++. The compiler can accept several base standards, such as @samp{c89} or @samp{c++98}, and GNU dialects of those standards, such as @@ -2843,21 +2843,21 @@ following cases: A pointer is compared against integer zero with @samp{<}, @samp{<=}, @samp{>}, or @samp{>=}. -@item +@item (C++ only) An enumerator and a non-enumerator both appear in a conditional expression. -@item +@item (C++ only) Ambiguous virtual bases. -@item +@item (C++ only) Subscripting an array which has been declared @samp{register}. -@item +@item (C++ only) Taking the address of a variable which has been declared @samp{register}. -@item +@item (C++ only) A base class is not initialized in a derived class' copy constructor. @@ -3411,9 +3411,9 @@ with n=3. Level 1: Most aggressive, quick, least accurate. Possibly useful when higher levels -do not warn but -fstrict-aliasing still breaks the code, as it has very few +do not warn but -fstrict-aliasing still breaks the code, as it has very few false negatives. However, it has many false positives. -Warns for all pointer conversions between possibly incompatible types, +Warns for all pointer conversions between possibly incompatible types, even if never dereferenced. Runs in the frontend only. Level 2: Aggressive, quick, not too precise. @@ -3422,12 +3422,12 @@ and few false negatives (but possibly more than level 1). Unlike level 1, it only warns when an address is taken. Warns about incomplete types. Runs in the frontend only. -Level 3 (default for @option{-Wstrict-aliasing}): -Should have very few false positives and few false +Level 3 (default for @option{-Wstrict-aliasing}): +Should have very few false positives and few false negatives. Slightly slower than levels 1 or 2 when optimization is enabled. Takes care of the common punn+dereference pattern in the frontend: @code{*(int*)&some_float}. -If optimization is enabled, it also runs in the backend, where it deals +If optimization is enabled, it also runs in the backend, where it deals with multiple statement cases using flow-sensitive points-to information. Only warns when the converted pointer is dereferenced. Does not warn about incomplete types. @@ -4686,7 +4686,7 @@ Print the name and the counter upperbound for all debug counters. @item -fdbg-cnt=@var{counter-value-list} @opindex fdbg-cnt -Set the internal debug counter upperbound. @var{counter-value-list} +Set the internal debug counter upperbound. @var{counter-value-list} is a comma-separated list of @var{name}:@var{value} pairs which sets the upperbound of each debug counter @var{name} to @var{value}. All debug counters have the initial upperbound of @var{UINT_MAX}, @@ -4767,7 +4767,7 @@ Dump after duplicating the computed gotos. @opindex fdump-rtl-ce3 @option{-fdump-rtl-ce1}, @option{-fdump-rtl-ce2}, and @option{-fdump-rtl-ce3} enable dumping after the three -if conversion passes. +if conversion passes. @itemx -fdump-rtl-cprop_hardreg @opindex fdump-rtl-cprop_hardreg @@ -4896,7 +4896,7 @@ Dump after sign extension elimination. @item -fdump-rtl-seqabstr @opindex fdump-rtl-seqabstr -Dump after common sequence discovery. +Dump after common sequence discovery. @item -fdump-rtl-shorten @opindex fdump-rtl-shorten @@ -5538,9 +5538,9 @@ each of them. Not all optimizations are controlled directly by a flag. Only optimizations that have a flag are listed in this section. -Depending on the target and how GCC was configured, a slightly different -set of optimizations may be enabled at each @option{-O} level than -those listed here. You can invoke GCC with @samp{-Q --help=optimizers} +Depending on the target and how GCC was configured, a slightly different +set of optimizations may be enabled at each @option{-O} level than +those listed here. You can invoke GCC with @samp{-Q --help=optimizers} to find out the exact set of optimizations that are enabled at each level. @xref{Overall Options}, for examples. @@ -6253,8 +6253,8 @@ scheduling runs instead of the second scheduler pass. @item -fsel-sched-pipelining @opindex fsel-sched-pipelining -Enable software pipelining of innermost loops during selective scheduling. -This option has no effect until one of @option{-fselective-scheduling} or +Enable software pipelining of innermost loops during selective scheduling. +This option has no effect until one of @option{-fselective-scheduling} or @option{-fselective-scheduling2} is turned on. @item -fsel-sched-pipelining-outer-loops @@ -6328,9 +6328,9 @@ Enabled by default at @option{-O} and higher. @item -fipa-struct-reorg @opindex fipa-struct-reorg -Perform structure reorganization optimization, that change C-like structures -layout in order to better utilize spatial locality. This transformation is -affective for programs containing arrays of structures. Available in two +Perform structure reorganization optimization, that change C-like structures +layout in order to better utilize spatial locality. This transformation is +affective for programs containing arrays of structures. Available in two compilation modes: profile-based (enabled with @option{-fprofile-generate}) or static (which uses built-in heuristics). Require @option{-fipa-type-escape} to provide the safety of this transformation. It works only in whole program @@ -6349,7 +6349,7 @@ and does not affect generated code. @opindex fipa-cp Perform interprocedural constant propagation. This optimization analyzes the program to determine when values passed -to functions are constants and then optimizes accordingly. +to functions are constants and then optimizes accordingly. This optimization can substantially increase performance if the application has constants passed to functions. This flag is enabled by default at @option{-O2}, @option{-Os} and @option{-O3}. @@ -6373,10 +6373,9 @@ This reduces the level of indirection needed for accessing the elements of the matrix. The second optimization is matrix transposing that attempts to change the order of the matrix's dimensions in order to improve cache locality. -Both optimizations need the @option{-fwhole-program} flag. +Both optimizations need the @option{-fwhole-program} flag. Transposing is enabled only if profiling information is available. - @item -ftree-sink @opindex ftree-sink Perform forward store motion on trees. This flag is @@ -6400,9 +6399,9 @@ default at @option{-O} and higher. @item -ftree-builtin-call-dce @opindex ftree-builtin-call-dce -Perform conditional dead code elimination (DCE) for calls to builtin functions -that may set @code{errno} but are otherwise side-effect free. This flag is -enabled by default at @option{-O2} and higher if @option{-Os} is not also +Perform conditional dead code elimination (DCE) for calls to builtin functions +that may set @code{errno} but are otherwise side-effect free. This flag is +enabled by default at @option{-O2} and higher if @option{-Os} is not also specified. @item -ftree-dominator-opts @@ -6467,8 +6466,8 @@ Graphite loop transformation infrastructure. @item -floop-strip-mine Perform loop strip mining transformations on loops. Strip mining -splits a loop into two nested loops. The outer loop has strides -equal to the strip size and the inner loop has strides of the +splits a loop into two nested loops. The outer loop has strides +equal to the strip size and the inner loop has strides of the original loop within a strip. For example, given a loop like: @smallexample DO I = 1, N @@ -6970,7 +6969,7 @@ default, GCC will emit an error message when an inconsistent profile is detected Set the directory to search the profile data files in to @var{path}. This option affects only the profile data generated by @option{-fprofile-generate}, @option{-ftest-coverage}, @option{-fprofile-arcs} -and used by @option{-fprofile-use} and @option{-fbranch-probabilities} +and used by @option{-fprofile-use} and @option{-fbranch-probabilities} and its related options. By default, GCC will use the current directory as @var{path} thus the profile data file will appear in the same directory as the object file. @@ -7437,8 +7436,8 @@ In each case, the @var{value} is an integer. The allowable choices for The threshold ratio (as a percentage) between a structure frequency and the frequency of the hottest structure in the program. This parameter is used by struct-reorg optimization enabled by @option{-fipa-struct-reorg}. -We say that if the ratio of a structure frequency, calculated by profiling, -to the hottest structure frequency in the program is less than this +We say that if the ratio of a structure frequency, calculated by profiling, +to the hottest structure frequency in the program is less than this parameter, then structure reorganization is not applied to this structure. The default is 10. @@ -7880,8 +7879,8 @@ depth of search for available instructions. The default value is 50. @item selsched-max-sched-times -The maximum number of times that an instruction will be scheduled during -selective scheduling. This is the limit on the number of iterations +The maximum number of times that an instruction will be scheduled during +selective scheduling. This is the limit on the number of iterations through which the instruction may be pipelined. The default value is 2. @item selsched-max-insns-to-rename @@ -7942,12 +7941,12 @@ The size of L1 cache, in kilobytes. The size of L2 cache, in kilobytes. @item min-insn-to-prefetch-ratio -The minimum ratio between the number of instructions and the -number of prefetches to enable prefetching in a loop with an +The minimum ratio between the number of instructions and the +number of prefetches to enable prefetching in a loop with an unknown trip count. @item prefetch-min-insn-to-mem-ratio -The minimum ratio between the number of instructions and the +The minimum ratio between the number of instructions and the number of memory references to enable prefetching in a loop. @item use-canonical-types @@ -8278,7 +8277,7 @@ this option. @cindex linker script Use @var{script} as the linker script. This option is supported by most systems using the GNU linker. On some targets, such as bare-board -targets without an operating system, the @option{-T} option may be required +targets without an operating system, the @option{-T} option may be required when linking to avoid references to undefined symbols. @item -Xlinker @var{option} @@ -8294,7 +8293,7 @@ For example, to pass @option{-assert definitions}, you must write @option{-Xlinker "-assert definitions"}, because this passes the entire string as a single argument, which is not what the linker expects. -When using the GNU linker, it is usually more convenient to pass +When using the GNU linker, it is usually more convenient to pass arguments to linker options using the @option{@var{option}=@var{value}} syntax than as separate arguments. For example, you can specify @samp{-Xlinker -Map=output.map} rather than @@ -8305,7 +8304,7 @@ this syntax for command-line options. @opindex Wl Pass @var{option} as an option to the linker. If @var{option} contains commas, it is split into multiple options at the commas. You can use this -syntax to pass an argument to the option. +syntax to pass an argument to the option. For example, @samp{-Wl,-Map,output.map} passes @samp{-Map output.map} to the linker. When using the GNU linker, you can also get the same effect with @samp{-Wl,-Map=output.map}. @@ -9226,11 +9225,11 @@ default is @option{-msched-prolog}. Specifies which floating-point ABI to use. Permissible values are: @samp{soft}, @samp{softfp} and @samp{hard}. -Specifying @samp{soft} causes GCC to generate output containing +Specifying @samp{soft} causes GCC to generate output containing library calls for floating-point operations. -@samp{softfp} allows the generation of code using hardware floating-point -instructions, but still uses the soft-float calling conventions. -@samp{hard} allows generation of floating-point instructions +@samp{softfp} allows the generation of code using hardware floating-point +instructions, but still uses the soft-float calling conventions. +@samp{hard} allows generation of floating-point instructions and uses FPU-specific calling conventions. Using @option{-mfloat-abi=hard} with VFP coprocessors is not supported. @@ -9340,8 +9339,8 @@ floating point values. @item -mfp16-format=@var{name} @opindex mfp16-format Specify the format of the @code{__fp16} half-precision floating-point type. -Permissible names are @samp{none}, @samp{ieee}, and @samp{alternative}; -the default is @samp{none}, in which case the @code{__fp16} type is not +Permissible names are @samp{none}, @samp{ieee}, and @samp{alternative}; +the default is @samp{none}, in which case the @code{__fp16} type is not defined. @xref{Half-Precision}, for more information. @item -mstructure-size-boundary=@var{n} @@ -9445,9 +9444,9 @@ Generate code for the Thumb instruction set. The default is to use the 32-bit ARM instruction set. This option automatically enables either 16-bit Thumb-1 or mixed 16/32-bit Thumb-2 instructions based on the @option{-mcpu=@var{name}} -and @option{-march=@var{name}} options. This option is not passed to the +and @option{-march=@var{name}} options. This option is not passed to the assembler. If you want to force assembler files to be interpreted as Thumb code, -either add a @samp{.thumb} directive to the source or pass the @option{-mthumb} +either add a @samp{.thumb} directive to the source or pass the @option{-mthumb} option directly to the assembler by prefixing it with @option{-Wa}. @item -mtpcs-frame @@ -9467,7 +9466,7 @@ not call any other functions.) The default is @option{-mno-apcs-leaf-frame}. Gives all externally visible functions in the file being compiled an ARM instruction set header which switches to Thumb mode before executing the rest of the function. This allows these functions to be called from -non-interworking code. This option is not valid in AAPCS configurations +non-interworking code. This option is not valid in AAPCS configurations because interworking is enabled by default. @item -mcaller-super-interworking @@ -9475,7 +9474,7 @@ because interworking is enabled by default. Allows calls via function pointers (including virtual functions) to execute correctly regardless of whether the target code has been compiled for interworking or not. There is a small overhead in the cost -of executing a function pointer if this option is enabled. This option +of executing a function pointer if this option is enabled. This option is not valid in AAPCS configurations because interworking is enabled by default. @@ -9740,7 +9739,7 @@ one application per core programming model. Proper start files and link scripts will be used to support Core B. This option defines @code{__BFIN_COREB}. When this option is used, coreb_main should be used instead of main. It must be used with -@option{-mmulticore}. +@option{-mmulticore}. @item -msdram @opindex msdram @@ -11477,7 +11476,7 @@ Setting the rounding of floating-point operations to less than the default libraries assume that extended precision (80 bit) floating-point operations are enabled by default; routines in such libraries could suffer significant loss of accuracy, typically through so-called "catastrophic cancellation", -when this option is used to set the precision to less than extended precision. +when this option is used to set the precision to less than extended precision. @item -mstackrealign @opindex mstackrealign @@ -12925,7 +12924,6 @@ register. The default for this option is 4, but note that there's a @end table - @node MIPS Options @subsection MIPS Options @cindex MIPS options @@ -13063,7 +13061,7 @@ Generate (do not generate) MIPS16 code. If GCC is targetting a MIPS32 or MIPS64 architecture, it will make use of the MIPS16e ASE@. MIPS16 code generation can also be controlled on a per-function basis -by means of @code{mips16} and @code{nomips16} attributes. +by means of @code{mips16} and @code{nomips16} attributes. @xref{Function Attributes}, for more information. @item -mflip-mips16 @@ -14407,8 +14405,8 @@ Software floating point emulation is provided if you use the @itemx -mdouble-float @opindex msingle-float @opindex mdouble-float -Generate code for single or double-precision floating point operations. -@option{-mdouble-float} implies @option{-msingle-float}. +Generate code for single or double-precision floating point operations. +@option{-mdouble-float} implies @option{-msingle-float}. @item -msimple-fpu @opindex msimple-fpu @@ -14416,7 +14414,7 @@ Do not generate sqrt and div instructions for hardware floating point unit. @item -mfpu @opindex mfpu -Specify type of floating point unit. Valid values are @var{sp_lite} +Specify type of floating point unit. Valid values are @var{sp_lite} (equivalent to -msingle-float -msimple-fpu), @var{dp_lite} (equivalent to -mdouble-float -msimple-fpu), @var{sp_full} (equivalent to -msingle-float), and @var{dp_full} (equivalent to -mdouble-float). @@ -15074,7 +15072,7 @@ Compile code for big endian mode. This is the default. @item -mel @opindex mel -Compile code for little endian mode. +Compile code for little endian mode. @item -mnhwloop @opindex mnhwloop @@ -15086,7 +15084,7 @@ Enable generate unaligned load and store instruction. @item -mmac @opindex mmac -Enable the use of multiply-accumulate instructions. Disabled by default. +Enable the use of multiply-accumulate instructions. Disabled by default. @item -mscore5 @opindex mscore5 @@ -15221,10 +15219,6 @@ Use 32-bit offsets in @code{switch} tables. The default is to use @opindex mbitops Enable the use of bit manipulation instructions on SH2A. -@item -mfmovd -@opindex mfmovd -Enable the use of the instruction @code{fmovd}. - @item -mhitachi @opindex mhitachi Comply with the calling conventions defined by Renesas. @@ -15759,7 +15753,7 @@ generate the branch hint. @opindex mhint-max-distance The encoding of the branch hint instruction limits the hint to be within 256 instructions of the branch it is effecting. By default, GCC makes -sure it is within 125. +sure it is within 125. @item -msafe-hints @opindex msafe-hints @@ -16045,7 +16039,7 @@ appropriately. @item -mpe-aligned-commons @opindex mpe-aligned-commons This option is available for Cygwin and MinGW targets. It -specifies that the GNU extension to the PE file format that +specifies that the GNU extension to the PE file format that permits the correct alignment of COMMON variables should be used when generating code. It will be enabled by default if GCC detects that the target assembler found during configuration @@ -16306,19 +16300,19 @@ Use it to conform to a non-default application binary interface. In C code, controls the placement of uninitialized global variables. Unix C compilers have traditionally permitted multiple definitions of such variables in different compilation units by placing the variables -in a common block. -This is the behavior specified by @option{-fcommon}, and is the default -for GCC on most targets. +in a common block. +This is the behavior specified by @option{-fcommon}, and is the default +for GCC on most targets. On the other hand, this behavior is not required by ISO C, and on some targets may carry a speed or code size penalty on variable references. -The @option{-fno-common} option specifies that the compiler should place +The @option{-fno-common} option specifies that the compiler should place uninitialized global variables in the data section of the object file, rather than generating them as common blocks. -This has the effect that if the same variable is declared +This has the effect that if the same variable is declared (without @code{extern}) in two different compilations, you will get a multiple-definition error when you link them. -In this case, you must compile with @option{-fcommon} instead. -Compiling with @option{-fno-common} is useful on targets for which +In this case, you must compile with @option{-fcommon} instead. +Compiling with @option{-fno-common} is useful on targets for which it provides better performance, or if you wish to verify that the program will work on other systems which always treat uninitialized variable declarations this way. |