diff options
-rw-r--r-- | gcc/ChangeLog | 24 | ||||
-rw-r--r-- | gcc/config/rs6000/rs6000.c | 46 | ||||
-rw-r--r-- | gcc/config/rs6000/rs6000.h | 14 | ||||
-rw-r--r-- | gcc/ginclude/va-ppc.h | 3 | ||||
-rw-r--r-- | gcc/loop.c | 43 | ||||
-rw-r--r-- | gcc/rtl.h | 2 | ||||
-rw-r--r-- | gcc/toplev.c | 4 |
7 files changed, 100 insertions, 36 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 9e844b0e23a..e921e814df0 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,27 @@ +Fri Sep 18 23:50:56 1998 David Edelsohn <edelsohn@mhpcc.edu> + + * toplev.c (rest_of_compilation): Set bct_p on second call to + loop_optimize. + * loop.c (loop_optimize, scan_loop, strength_reduce): New argument + bct_p. + (strength_reduce): Only call analyze_loop_iterations and + insert_bct if bct_p set. + (check_dbra_loop): Fix typo. + (insert_bct): Use word_mode instead of SImode. + (instrument_loop_bct): Likewise. Do not delete iteration count + condition code generation insn. Initialize iteration count before + loop start. + * rtl.h (loop_optimize): Update prototype. + + * ginclude/va-ppc.h (va_arg): longlong types in overflow area are + not doubleword aligned. + + * rs6000.c (optimization_options): New function. + (secondary_reload_class): Only call true_regnum for PSEUDO_REGs. + * rs6000.h (OPTIMIZATION_OPTIONS): Define. + (REG_ALLOC_ORDER): Allocate highest numbered condition regsiters + first; cr1 can be used for FP record condition insns. + Fri Sep 18 09:44:55 1998 Nick Clifton <nickc@cygnus.com> * config/m32r/m32r.h (m32r_block_immediate_operand): Add to diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c index a2b58fd9d6c..e5f199c6066 100644 --- a/gcc/config/rs6000/rs6000.c +++ b/gcc/config/rs6000/rs6000.c @@ -347,6 +347,20 @@ rs6000_override_options (default_cpu) SUBTARGET_OVERRIDE_OPTIONS; #endif } + +void +optimization_options (level, size) + int level; + int size ATTRIBUTE_UNUSED; +{ +#if 0 +#ifdef HAIFA + /* When optimizing, enable use of BCT instruction. */ + if (level >= 1) + flag_branch_on_count_reg = 1; +#endif +#endif +} /* Do anything needed at the start of the asm file. */ @@ -1305,14 +1319,18 @@ function_arg_padding (mode, type) Windows NT wants anything >= 8 bytes to be double word aligned. - V.4 wants long longs to be double word aligned. */ + V.4 wants long longs to be double word aligned. + + FP emulation: double precision passed, returned, and same alignment + as long long. */ int function_arg_boundary (mode, type) enum machine_mode mode; tree type; { - if ((DEFAULT_ABI == ABI_V4 || DEFAULT_ABI == ABI_SOLARIS) && mode == DImode) + if ((DEFAULT_ABI == ABI_V4 || DEFAULT_ABI == ABI_SOLARIS) + && ((mode == DImode) || (TARGET_SOFT_FLOAT && mode == DFmode))) return 64; if (DEFAULT_ABI != ABI_NT || TARGET_64BIT) @@ -2188,10 +2206,7 @@ secondary_reload_class (class, mode, in) enum machine_mode mode ATTRIBUTE_UNUSED; rtx in; { - int regno = true_regnum (in); - - if (regno >= FIRST_PSEUDO_REGISTER) - regno = -1; + int regno; /* We can not copy a symbolic operand directly into anything other than BASE_REGS for TARGET_ELF. So indicate that a register from BASE_REGS @@ -2203,6 +2218,25 @@ secondary_reload_class (class, mode, in) || GET_CODE (in) == CONST)) return BASE_REGS; + if (GET_CODE (in) == REG) + { + regno = REGNO (in); + if (regno >= FIRST_PSEUDO_REGISTER) + { + regno = true_regnum (in); + if (regno >= FIRST_PSEUDO_REGISTER) + regno = -1; + } + } + else if (GET_CODE (in) == SUBREG) + { + regno = true_regnum (in); + if (regno >= FIRST_PSEUDO_REGISTER) + regno = -1; + } + else + regno = -1; + /* We can place anything into GENERAL_REGS and can put GENERAL_REGS into anything. */ if (class == GENERAL_REGS || class == BASE_REGS diff --git a/gcc/config/rs6000/rs6000.h b/gcc/config/rs6000/rs6000.h index 9856abfd677..53c99d617c2 100644 --- a/gcc/config/rs6000/rs6000.h +++ b/gcc/config/rs6000/rs6000.h @@ -473,10 +473,17 @@ extern int rs6000_debug_arg; /* debug argument handling */ defined, is executed once just after all the command options have been parsed. + Don't use this macro to turn on various extra optimizations for + `-O'. That is what `OPTIMIZATION_OPTIONS' is for. + On the RS/6000 this is used to define the target cpu type. */ #define OVERRIDE_OPTIONS rs6000_override_options (TARGET_CPU_DEFAULT) +/* Define this to change the optimizations performed by default. */ +#define OPTIMIZATION_OPTIONS(LEVEL,SIZE) optimization_options(LEVEL,SIZE) + + /* Show we can debug even without a frame pointer. */ #define CAN_DEBUG_WITHOUT_FP @@ -712,9 +719,10 @@ extern int rs6000_debug_arg; /* debug argument handling */ fp13 - fp2 (not saved; incoming fp arg registers) fp1 (not saved; return value) fp31 - fp14 (saved; order given to save least number) - cr1, cr6, cr7 (not saved or special) + cr7, cr6 (not saved or special) + cr1 (not saved, but used for FP operations) cr0 (not saved, but used for arithmetic operations) - cr2, cr3, cr4 (saved) + cr4, cr3, cr2 (saved) r0 (not saved; cannot be base reg) r9 (not saved; best for TImode) r11, r10, r8-r4 (not saved; highest used first to make less conflict) @@ -732,7 +740,7 @@ extern int rs6000_debug_arg; /* debug argument handling */ 33, \ 63, 62, 61, 60, 59, 58, 57, 56, 55, 54, 53, 52, 51, \ 50, 49, 48, 47, 46, \ - 69, 74, 75, 68, 70, 71, 72, \ + 75, 74, 69, 68, 72, 71, 70, \ 0, \ 9, 11, 10, 8, 7, 6, 5, 4, \ 3, \ diff --git a/gcc/ginclude/va-ppc.h b/gcc/ginclude/va-ppc.h index 5d87f38d910..736369dbe7d 100644 --- a/gcc/ginclude/va-ppc.h +++ b/gcc/ginclude/va-ppc.h @@ -158,9 +158,6 @@ __extension__ (*({ \ } \ else \ { \ - if (__va_longlong_p(TYPE) && ((long)__va_overflow(AP) & 4) != 0) \ - __va_overflow(AP) += 4; \ - \ __ptr = (TYPE *) (void *) (__va_overflow(AP)); \ __va_overflow(AP) += __va_size (TYPE) * sizeof (long); \ } \ diff --git a/gcc/loop.c b/gcc/loop.c index 3efb200f0f7..b171c75c1e6 100644 --- a/gcc/loop.c +++ b/gcc/loop.c @@ -310,7 +310,7 @@ static void count_loop_regs_set PROTO((rtx, rtx, varray_type, varray_type, int *, int)); static void note_addr_stored PROTO((rtx, rtx)); static int loop_reg_used_before_p PROTO((rtx, rtx, rtx, rtx, rtx)); -static void scan_loop PROTO((rtx, rtx, int)); +static void scan_loop PROTO((rtx, rtx, int, int)); #if 0 static void replace_call_address PROTO((rtx, rtx, rtx)); #endif @@ -324,7 +324,7 @@ static int rtx_equal_for_loop_p PROTO((rtx, rtx, struct movable *)); static void add_label_notes PROTO((rtx, rtx)); static void move_movables PROTO((struct movable *, int, int, rtx, rtx, int)); static int count_nonfixed_reads PROTO((rtx)); -static void strength_reduce PROTO((rtx, rtx, rtx, int, rtx, rtx, int)); +static void strength_reduce PROTO((rtx, rtx, rtx, int, rtx, rtx, int, int)); static void find_single_use_in_loop PROTO((rtx, rtx, varray_type)); static int valid_initial_value_p PROTO((rtx, rtx, int, rtx)); static void find_mem_givs PROTO((rtx, rtx, int, rtx, rtx)); @@ -440,11 +440,11 @@ init_loop () (or 0 if none should be output). */ void -loop_optimize (f, dumpfile, unroll_p) +loop_optimize (f, dumpfile, unroll_p, bct_p) /* f is the first instruction of a chain of insns for one function */ rtx f; FILE *dumpfile; - int unroll_p; + int unroll_p, bct_p; { register rtx insn; register int i; @@ -589,7 +589,7 @@ loop_optimize (f, dumpfile, unroll_p) for (i = max_loop_num-1; i >= 0; i--) if (! loop_invalid[i] && loop_number_loop_ends[i]) scan_loop (loop_number_loop_starts[i], loop_number_loop_ends[i], - unroll_p); + unroll_p, bct_p); /* If debugging and unrolling loops, we must replicate the tree nodes corresponding to the blocks inside the loop, so that the original one @@ -643,9 +643,9 @@ next_insn_in_loop (insn, start, end, loop_top) write, then we can also mark the memory read as invariant. */ static void -scan_loop (loop_start, end, unroll_p) +scan_loop (loop_start, end, unroll_p, bct_p) rtx loop_start, end; - int unroll_p; + int unroll_p, bct_p; { register int i; rtx p; @@ -1185,7 +1185,7 @@ scan_loop (loop_start, end, unroll_p) { the_movables = movables; strength_reduce (scan_start, end, loop_top, - insn_count, loop_start, end, unroll_p); + insn_count, loop_start, end, unroll_p, bct_p); } VARRAY_FREE (n_times_set); @@ -3579,14 +3579,14 @@ static rtx addr_placeholder; static void strength_reduce (scan_start, end, loop_top, insn_count, - loop_start, loop_end, unroll_p) + loop_start, loop_end, unroll_p, bct_p) rtx scan_start; rtx end; rtx loop_top; int insn_count; rtx loop_start; rtx loop_end; - int unroll_p; + int unroll_p, bct_p; { rtx p; rtx set; @@ -4106,7 +4106,7 @@ strength_reduce (scan_start, end, loop_top, insn_count, the loop. Unrolling may update part of this information, and the correct data will be used for generating the BCT. */ #ifdef HAVE_decrement_and_branch_on_count - if (HAVE_decrement_and_branch_on_count) + if (HAVE_decrement_and_branch_on_count && bct_p) analyze_loop_iterations (loop_start, loop_end); #endif #endif /* HAIFA */ @@ -4613,7 +4613,7 @@ strength_reduce (scan_start, end, loop_top, insn_count, #ifdef HAIFA /* instrument the loop with bct insn */ #ifdef HAVE_decrement_and_branch_on_count - if (HAVE_decrement_and_branch_on_count) + if (HAVE_decrement_and_branch_on_count && bct_p) insert_bct (loop_start, loop_end); #endif #endif /* HAIFA */ @@ -6981,7 +6981,7 @@ check_dbra_loop (loop_end, insn_count, loop_start) /* If we have a decrement_and_branch_on_count, prefer the NE test, since this will allow that instruction to be generated. */ -#if ! defined (HAVE_decrement_and_branch_on_zero) && defined (HAVE_decrement_and_branch_on_count) +#if ! defined (HAVE_decrement_and_branch_until_zero) && defined (HAVE_decrement_and_branch_on_count) && (add_val != 1 || ! vtop) #endif && GET_CODE (comparison_value) == CONST_INT @@ -8189,7 +8189,7 @@ insert_bct (loop_start, loop_end) /* the only machine mode we work with - is the integer of the size that the machine has */ - enum machine_mode loop_var_mode = SImode; + enum machine_mode loop_var_mode = word_mode; int loop_num = uid_loop_num [INSN_UID (loop_start)]; @@ -8284,7 +8284,8 @@ insert_bct (loop_start, loop_end) /* try to instrument the loop. */ /* Handle the simpler case, where the bounds are known at compile time. */ - if (GET_CODE (initial_value) == CONST_INT && GET_CODE (comparison_value) == CONST_INT) + if (GET_CODE (initial_value) == CONST_INT + && GET_CODE (comparison_value) == CONST_INT) { int n_iterations; int increment_value_abs = INTVAL (increment) * increment_direction; @@ -8459,15 +8460,15 @@ instrument_loop_bct (loop_start, loop_end, loop_num_iterations) rtx start_label; rtx sequence; - enum machine_mode loop_var_mode = SImode; + enum machine_mode loop_var_mode = word_mode; if (HAVE_decrement_and_branch_on_count) { if (loop_dump_stream) fprintf (loop_dump_stream, "Loop: Inserting BCT\n"); - /* eliminate the check on the old variable */ - delete_insn (PREV_INSN (loop_end)); + /* Discard original jump to continue loop. Original compare result + may still be live, so it cannot be discarded explicitly. */ delete_insn (PREV_INSN (loop_end)); /* insert the label which will delimit the start of the loop */ @@ -8488,12 +8489,13 @@ instrument_loop_bct (loop_start, loop_end, loop_num_iterations) sequence = gen_sequence (); end_sequence (); - emit_insn_after (sequence, loop_start); + emit_insn_before (sequence, loop_start); /* insert new comparison on the count register instead of the old one, generating the needed BCT pattern (that will be later recognized by assembly generation phase). */ - emit_jump_insn_before (gen_decrement_and_branch_on_count (temp_reg2, start_label), + emit_jump_insn_before (gen_decrement_and_branch_on_count (temp_reg2, + start_label), loop_end); LABEL_NUSES (start_label)++; } @@ -8935,4 +8937,3 @@ replace_label (x, data) return 0; } - diff --git a/gcc/rtl.h b/gcc/rtl.h index e72fd602596..8b88a7e42fd 100644 --- a/gcc/rtl.h +++ b/gcc/rtl.h @@ -1350,7 +1350,7 @@ extern void print_inline_rtx PROTO ((FILE *, rtx, int)); /* In loop.c */ extern void init_loop PROTO ((void)); #ifdef BUFSIZ -extern void loop_optimize PROTO ((rtx, FILE *, int)); +extern void loop_optimize PROTO ((rtx, FILE *, int, int)); #endif extern void record_excess_regs PROTO ((rtx, rtx, rtx *)); diff --git a/gcc/toplev.c b/gcc/toplev.c index c3a56b2d548..86cbcf6f64d 100644 --- a/gcc/toplev.c +++ b/gcc/toplev.c @@ -3519,7 +3519,7 @@ rest_of_compilation (decl) { /* We only want to perform unrolling once. */ - loop_optimize (insns, rtl_dump_file, 0); + loop_optimize (insns, rtl_dump_file, 0, 0); /* The first call to loop_optimize makes some instructions @@ -3532,7 +3532,7 @@ rest_of_compilation (decl) analysis code depends on this information. */ reg_scan (insns, max_reg_num (), 1); } - loop_optimize (insns, rtl_dump_file, flag_unroll_loops); + loop_optimize (insns, rtl_dump_file, flag_unroll_loops, 1); }); /* Dump rtl code after loop opt, if we are doing that. */ |