summaryrefslogtreecommitdiff
path: root/gcc/config/pa/pa.md
diff options
context:
space:
mode:
Diffstat (limited to 'gcc/config/pa/pa.md')
-rw-r--r--gcc/config/pa/pa.md364
1 files changed, 223 insertions, 141 deletions
diff --git a/gcc/config/pa/pa.md b/gcc/config/pa/pa.md
index c80edcc2e2b..ca99b3249e2 100644
--- a/gcc/config/pa/pa.md
+++ b/gcc/config/pa/pa.md
@@ -44,7 +44,7 @@
;;
;; FIXME: Add 800 scheduling for completeness?
-(define_attr "cpu" "700,7100,7100LC,7200,8000" (const (symbol_ref "pa_cpu_attr")))
+(define_attr "cpu" "700,7100,7100LC,7200,7300,8000" (const (symbol_ref "pa_cpu_attr")))
;; Length (in # of bytes).
(define_attr "length" ""
@@ -139,35 +139,10 @@
(const_int 0)))
[(eq_attr "in_branch_delay" "true") (nil) (nil)])
-;; Function units of the HPPA. The following data is for the 700 CPUs
-;; (Mustang CPU + Timex FPU aka PA-89) because that's what I have the docs for.
-;; Scheduling instructions for PA-83 machines according to the Snake
-;; constraints shouldn't hurt.
-
-;; (define_function_unit {name} {num-units} {n-users} {test}
-;; {ready-delay} {issue-delay} [{conflict-list}])
-
-;; The integer ALU.
-;; (Noted only for documentation; units that take one cycle do not need to
-;; be specified.)
-
-;; (define_function_unit "alu" 1 0
-;; (and (eq_attr "type" "unary,shift,nullshift,binary,move,address")
-;; (eq_attr "cpu" "700"))
-;; 1 0)
-
-
;; Memory. Disregarding Cache misses, the Mustang memory times are:
;; load: 2, fpload: 3
;; store, fpstore: 3, no D-cache operations should be scheduled.
-(define_function_unit "pa700memory" 1 0
- (and (eq_attr "type" "load,fpload")
- (eq_attr "cpu" "700")) 2 0)
-(define_function_unit "pa700memory" 1 0
- (and (eq_attr "type" "store,fpstore")
- (eq_attr "cpu" "700")) 3 3)
-
;; The Timex (aka 700) has two floating-point units: ALU, and MUL/DIV/SQRT.
;; Timings:
;; Instruction Time Unit Minimum Distance (unit contention)
@@ -186,44 +161,73 @@
;; fdiv,dbl 12 MPY 12
;; fsqrt,sgl 14 MPY 14
;; fsqrt,dbl 18 MPY 18
+;;
+;; We don't model fmpyadd/fmpysub properly as those instructions
+;; keep both the FP ALU and MPY units busy. Given that these
+;; processors are obsolete, I'm not going to spend the time to
+;; model those instructions correctly.
-(define_function_unit "pa700fp_alu" 1 0
+(define_automaton "pa700")
+(define_cpu_unit "dummy_700,mem_700,fpalu_700,fpmpy_700" "pa700")
+
+(define_insn_reservation "W0" 4
(and (eq_attr "type" "fpcc")
- (eq_attr "cpu" "700")) 4 2)
-(define_function_unit "pa700fp_alu" 1 0
+ (eq_attr "cpu" "700"))
+ "fpalu_700*2")
+
+(define_insn_reservation "W1" 3
(and (eq_attr "type" "fpalu")
- (eq_attr "cpu" "700")) 3 2)
-(define_function_unit "pa700fp_mpy" 1 0
+ (eq_attr "cpu" "700"))
+ "fpalu_700*2")
+
+(define_insn_reservation "W2" 3
(and (eq_attr "type" "fpmulsgl,fpmuldbl")
- (eq_attr "cpu" "700")) 3 2)
-(define_function_unit "pa700fp_mpy" 1 0
+ (eq_attr "cpu" "700"))
+ "fpmpy_700*2")
+
+(define_insn_reservation "W3" 10
(and (eq_attr "type" "fpdivsgl")
- (eq_attr "cpu" "700")) 10 10)
-(define_function_unit "pa700fp_mpy" 1 0
+ (eq_attr "cpu" "700"))
+ "fpmpy_700*10")
+
+(define_insn_reservation "W4" 12
(and (eq_attr "type" "fpdivdbl")
- (eq_attr "cpu" "700")) 12 12)
-(define_function_unit "pa700fp_mpy" 1 0
+ (eq_attr "cpu" "700"))
+ "fpmpy_700*12")
+
+(define_insn_reservation "W5" 14
(and (eq_attr "type" "fpsqrtsgl")
- (eq_attr "cpu" "700")) 14 14)
-(define_function_unit "pa700fp_mpy" 1 0
+ (eq_attr "cpu" "700"))
+ "fpmpy_700*14")
+
+(define_insn_reservation "W6" 18
(and (eq_attr "type" "fpsqrtdbl")
- (eq_attr "cpu" "700")) 18 18)
+ (eq_attr "cpu" "700"))
+ "fpmpy_700*18")
+
+(define_insn_reservation "W7" 2
+ (and (eq_attr "type" "load,fpload")
+ (eq_attr "cpu" "700"))
+ "mem_700")
+
+(define_insn_reservation "W8" 3
+ (and (eq_attr "type" "store,fpstore")
+ (eq_attr "cpu" "700"))
+ "mem_700*3")
+
+(define_insn_reservation "W9" 1
+ (and (eq_attr "type" "!fpcc,fpalu,fpmulsgl,fpmuldbl,fpdivsgl,fpdivdbl,fpsqrtsgl,fpsqrtdbl,load,fpload,store,fpstore")
+ (eq_attr "cpu" "700"))
+ "dummy_700")
;; Function units for the 7100 and 7150. The 7100/7150 can dual-issue
;; floating point computations with non-floating point computations (fp loads
;; and stores are not fp computations).
;;
-
;; Memory. Disregarding Cache misses, memory loads take two cycles; stores also
;; take two cycles, during which no Dcache operations should be scheduled.
;; Any special cases are handled in pa_adjust_cost. The 7100, 7150 and 7100LC
;; all have the same memory characteristics if one disregards cache misses.
-(define_function_unit "pa7100memory" 1 0
- (and (eq_attr "type" "load,fpload")
- (eq_attr "cpu" "7100,7100LC")) 2 0)
-(define_function_unit "pa7100memory" 1 0
- (and (eq_attr "type" "store,fpstore")
- (eq_attr "cpu" "7100,7100LC")) 2 2)
;; The 7100/7150 has three floating-point units: ALU, MUL, and DIV.
;; Timings:
@@ -243,41 +247,46 @@
;; fdiv,dbl 15 DIV 15
;; fsqrt,sgl 8 DIV 8
;; fsqrt,dbl 15 DIV 15
+;;
+;; We don't really model the FP ALU/MPY units properly (they are
+;; distinct subunits in the FP unit). However, there can never be
+;; a functional unit; conflict given the latency and issue rates
+;; for those units.
-(define_function_unit "pa7100fp_alu" 1 0
- (and (eq_attr "type" "fpcc,fpalu")
- (eq_attr "cpu" "7100")) 2 1)
-(define_function_unit "pa7100fp_mpy" 1 0
- (and (eq_attr "type" "fpmulsgl,fpmuldbl")
- (eq_attr "cpu" "7100")) 2 1)
-(define_function_unit "pa7100fp_div" 1 0
+(define_automaton "pa7100")
+(define_cpu_unit "i_7100, f_7100,fpmac_7100,fpdivsqrt_7100,mem_7100" "pa7100")
+
+(define_insn_reservation "X0" 2
+ (and (eq_attr "type" "fpcc,fpalu,fpmulsgl,fpmuldbl")
+ (eq_attr "cpu" "7100"))
+ "f_7100,fpmac_7100")
+
+(define_insn_reservation "X1" 8
(and (eq_attr "type" "fpdivsgl,fpsqrtsgl")
- (eq_attr "cpu" "7100")) 8 8)
-(define_function_unit "pa7100fp_div" 1 0
- (and (eq_attr "type" "fpdivdbl,fpsqrtdbl")
- (eq_attr "cpu" "7100")) 15 15)
+ (eq_attr "cpu" "7100"))
+ "f_7100+fpdivsqrt_7100,fpdivsqrt_7100*7")
-;; To encourage dual issue we define function units corresponding to
-;; the instructions which can be dual issued. This is a rather crude
-;; approximation, the "pa7100nonflop" test in particular could be refined.
-(define_function_unit "pa7100flop" 1 1
- (and
- (eq_attr "type" "fpcc,fpalu,fpmulsgl,fpmuldbl,fpdivsgl,fpsqrtsgl,fpdivdbl,fpsqrtdbl")
- (eq_attr "cpu" "7100")) 1 1)
+(define_insn_reservation "X2" 15
+ (and (eq_attr "type" "fpdivdbl,fpsqrtdbl")
+ (eq_attr "cpu" "7100"))
+ "f_7100+fpdivsqrt_7100,fpdivsqrt_7100*14")
-(define_function_unit "pa7100nonflop" 1 1
- (and
- (eq_attr "type" "!fpcc,fpalu,fpmulsgl,fpmuldbl,fpdivsgl,fpsqrtsgl,fpdivdbl,fpsqrtdbl")
- (eq_attr "cpu" "7100")) 1 1)
+(define_insn_reservation "X3" 2
+ (and (eq_attr "type" "load,fpload")
+ (eq_attr "cpu" "7100"))
+ "i_7100+mem_7100")
+(define_insn_reservation "X4" 2
+ (and (eq_attr "type" "store,fpstore")
+ (eq_attr "cpu" "7100"))
+ "i_7100+mem_7100,mem_7100")
-;; Memory subsystem works just like 7100/7150 (except for cache miss times which
-;; we don't model here).
+(define_insn_reservation "X5" 1
+ (and (eq_attr "type" "!fpcc,fpalu,fpmulsgl,fpmuldbl,fpdivsgl,fpsqrtsgl,fpdivdbl,fpsqrtdbl,load,fpload,store,fpstore")
+ (eq_attr "cpu" "7100"))
+ "i_7100")
;; The 7100LC has three floating-point units: ALU, MUL, and DIV.
-;; Note divides and sqrt flops lock the cpu until the flop is
-;; finished. fmpy and xmpyu (fmpyi) lock the cpu for one cycle.
-;; There's no way to avoid the penalty.
;; Timings:
;; Instruction Time Unit Minimum Distance (unit contention)
;; fcpy 2 ALU 1
@@ -299,106 +308,179 @@
;; fdiv,dbl 15 DIV 15
;; fsqrt,sgl 8 DIV 8
;; fsqrt,dbl 15 DIV 15
-
-(define_function_unit "pa7100LCfp_alu" 1 0
+;;
+;; The PA7200 is just like the PA7100LC except that there is
+;; no store-store penalty.
+;;
+;; The PA7300 is just like the PA7200 except that there is
+;; no store-load penalty.
+;;
+;; Note there are some aspects of the 7100LC we are not modeling
+;; at the moment. I'll be reviewing the 7100LC scheduling info
+;; shortly and updating this description.
+;;
+;; load-load pairs
+;; store-store pairs
+;; fmpyadd,dbl
+;; fmpysub,dbl
+;; other issue modeling
+
+(define_automaton "pa7100lc")
+(define_cpu_unit "i0_7100lc, i1_7100lc, f_7100lc" "pa7100lc")
+(define_cpu_unit "fpalu_7100lc,fpdivsqrt_7100lc,fpmul_7100lc" "pa7100lc")
+(define_cpu_unit "mem_7100lc" "pa7100lc")
+
+(define_insn_reservation "Y0" 2
(and (eq_attr "type" "fpcc,fpalu")
- (eq_attr "cpu" "7100LC,7200")) 2 1)
-(define_function_unit "pa7100LCfp_mpy" 1 0
+ (eq_attr "cpu" "7100LC,7200,7300"))
+ "f_7100lc,fpalu_7100lc")
+
+(define_insn_reservation "Y1" 2
(and (eq_attr "type" "fpmulsgl")
- (eq_attr "cpu" "7100LC,7200")) 2 1)
-(define_function_unit "pa7100LCfp_mpy" 1 0
+ (eq_attr "cpu" "7100LC,7200,7300"))
+ "f_7100lc,fpmul_7100lc")
+
+(define_insn_reservation "Y2" 3
(and (eq_attr "type" "fpmuldbl")
- (eq_attr "cpu" "7100LC,7200")) 3 2)
-(define_function_unit "pa7100LCfp_div" 1 0
+ (eq_attr "cpu" "7100LC,7200,7300"))
+ "f_7100lc,fpmul_7100lc,fpmul_7100lc")
+
+(define_insn_reservation "Y3" 8
(and (eq_attr "type" "fpdivsgl,fpsqrtsgl")
- (eq_attr "cpu" "7100LC,7200")) 8 8)
-(define_function_unit "pa7100LCfp_div" 1 0
- (and (eq_attr "type" "fpdivdbl,fpsqrtdbl")
- (eq_attr "cpu" "7100LC,7200")) 15 15)
+ (eq_attr "cpu" "7100LC,7200,7300"))
+ "f_7100lc+fpdivsqrt_7100lc,fpdivsqrt_7100lc*7")
-;; Define the various functional units for dual-issue.
+(define_insn_reservation "Y4" 15
+ (and (eq_attr "type" "fpdivdbl,fpsqrtdbl")
+ (eq_attr "cpu" "7100LC,7200,7300"))
+ "f_7100lc+fpdivsqrt_7100lc,fpdivsqrt_7100lc*14")
-;; There's only one floating point unit.
-(define_function_unit "pa7100LCflop" 1 1
- (and
- (eq_attr "type" "fpcc,fpalu,fpmulsgl,fpmuldbl,fpdivsgl,fpsqrtsgl,fpdivdbl,fpsqrtdbl")
- (eq_attr "cpu" "7100LC,7200")) 1 1)
+(define_insn_reservation "Y5" 2
+ (and (eq_attr "type" "load,fpload")
+ (eq_attr "cpu" "7100LC,7200,7300"))
+ "i1_7100lc+mem_7100lc")
-;; Shifts and memory ops execute in only one of the integer ALUs
-(define_function_unit "pa7100LCshiftmem" 1 1
- (and
- (eq_attr "type" "shift,nullshift,load,fpload,store,fpstore")
- (eq_attr "cpu" "7100LC,7200")) 1 1)
+(define_insn_reservation "Y6" 2
+ (and (eq_attr "type" "store,fpstore")
+ (eq_attr "cpu" "7100LC"))
+ "i1_7100lc+mem_7100lc,mem_7100lc")
-;; We have two basic ALUs.
-(define_function_unit "pa7100LCalu" 2 1
- (and
- (eq_attr "type" "!fpcc,fpalu,fpmulsgl,fpmuldbl,fpdivsgl,fpsqrtsgl,fpdivdbl,fpsqrtdbl")
- (eq_attr "cpu" "7100LC,7200")) 1 1)
+(define_insn_reservation "Y7" 1
+ (and (eq_attr "type" "shift,nullshift")
+ (eq_attr "cpu" "7100LC,7200,7300"))
+ "i1_7100lc")
-;; I don't have complete information on the PA7200; however, most of
-;; what I've heard makes it look like a 7100LC without the store-store
-;; penalty. So that's how we'll model it.
+(define_insn_reservation "Y8" 1
+ (and (eq_attr "type" "!fpcc,fpalu,fpmulsgl,fpmuldbl,fpdivsgl,fpsqrtsgl,fpdivdbl,fpsqrtdbl,load,fpload,store,fpstore,shift,nullshift")
+ (eq_attr "cpu" "7100LC,7200,7300"))
+ "(i0_7100lc|i1_7100lc)")
-;; Memory. Disregarding Cache misses, memory loads and stores take
-;; two cycles. Any special cases are handled in pa_adjust_cost.
-(define_function_unit "pa7200memory" 1 0
- (and (eq_attr "type" "load,fpload,store,fpstore")
- (eq_attr "cpu" "7200")) 2 0)
+;; The 7200 has a store-load penalty
+(define_insn_reservation "Y9" 2
+ (and (eq_attr "type" "store,fpstore")
+ (eq_attr "cpu" "7200"))
+ "i0_7100lc,mem_7100lc")
-;; I don't have detailed information on the PA7200 FP pipeline, so I
-;; treat it just like the 7100LC pipeline.
-;; Similarly for the multi-issue fake units.
+;; The 7300 has no penalty for store-store or store-load
+(define_insn_reservation "YA" 2
+ (and (eq_attr "type" "store,fpstore")
+ (eq_attr "cpu" "7300"))
+ "i0_7100lc")
-;;
;; Scheduling for the PA8000 is somewhat different than scheduling for a
;; traditional architecture.
;;
;; The PA8000 has a large (56) entry reorder buffer that is split between
;; memory and non-memory operations.
;;
-;; The PA800 can issue two memory and two non-memory operations per cycle to
-;; the function units. Similarly, the PA8000 can retire two memory and two
-;; non-memory operations per cycle.
+;; The PA8000 can issue two memory and two non-memory operations per cycle to
+;; the function units, with the exception of branches and multi-output
+;; instructions. The PA8000 can retire two non-memory operations per cycle
+;; and two memory operations per cycle, only one of which may be a store.
;;
;; Given the large reorder buffer, the processor can hide most latencies.
;; According to HP, they've got the best results by scheduling for retirement
;; bandwidth with limited latency scheduling for floating point operations.
;; Latency for integer operations and memory references is ignored.
;;
-;; We claim floating point operations have a 2 cycle latency and are
-;; fully pipelined, except for div and sqrt which are not pipelined.
;;
-;; It is not necessary to define the shifter and integer alu units.
+;; We claim floating point operations have a 2 cycle latency and are
+;; fully pipelined, except for div and sqrt which are not pipelined and
+;; take from 17 to 31 cycles to complete.
;;
-;; These first two define_unit_unit descriptions model retirement from
-;; the reorder buffer.
-(define_function_unit "pa8000lsu" 2 1
+;; It's worth noting that there is no way to saturate all the functional
+;; units on the PA8000 as there is not enough issue bandwidth.
+
+(define_automaton "pa8000")
+(define_cpu_unit "inm0_8000, inm1_8000, im0_8000, im1_8000" "pa8000")
+(define_cpu_unit "rnm0_8000, rnm1_8000, rm0_8000, rm1_8000" "pa8000")
+(define_cpu_unit "store_8000" "pa8000")
+(define_cpu_unit "f0_8000, f1_8000" "pa8000")
+(define_cpu_unit "fdivsqrt0_8000, fdivsqrt1_8000" "pa8000")
+(define_reservation "inm_8000" "inm0_8000 | inm1_8000")
+(define_reservation "im_8000" "im0_8000 | im1_8000")
+(define_reservation "rnm_8000" "rnm0_8000 | rnm1_8000")
+(define_reservation "rm_8000" "rm0_8000 | rm1_8000")
+(define_reservation "f_8000" "f0_8000 | f1_8000")
+(define_reservation "fdivsqrt_8000" "fdivsqrt0_8000 | fdivsqrt1_8000")
+
+;; We can issue any two memops per cycle, but we can only retire
+;; one memory store per cycle. We assume that the reorder buffer
+;; will hide any memory latencies per HP's recommendation.
+(define_insn_reservation "Z0" 0
(and
- (eq_attr "type" "load,fpload,store,fpstore")
- (eq_attr "cpu" "8000")) 1 1)
+ (eq_attr "type" "load,fpload")
+ (eq_attr "cpu" "8000"))
+ "im_8000,rm_8000")
-(define_function_unit "pa8000alu" 2 1
+(define_insn_reservation "Z1" 0
(and
- (eq_attr "type" "!load,fpload,store,fpstore")
- (eq_attr "cpu" "8000")) 1 1)
-
-;; Claim floating point ops have a 2 cycle latency, excluding div and
-;; sqrt, which are not pipelined and issue to different units.
-(define_function_unit "pa8000fmac" 2 0
+ (eq_attr "type" "store,fpstore")
+ (eq_attr "cpu" "8000"))
+ "im_8000,rm_8000+store_8000")
+
+;; We can issue and retire two non-memory operations per cycle with
+;; a few exceptions (branches). This group catches those we want
+;; to assume have zero latency.
+(define_insn_reservation "Z2" 0
(and
- (eq_attr "type" "fpcc,fpalu,fpmulsgl,fpmuldbl")
- (eq_attr "cpu" "8000")) 2 1)
+ (eq_attr "type" "!load,fpload,store,fpstore,uncond_branch,branch,cbranch,fbranch,call,dyncall,multi,milli,parallel_branch,fpcc,fpalu,fpmulsgl,fpmuldbl,fpsqrtsgl,fpsqrtdbl,fpdivsgl,fpdivdbl")
+ (eq_attr "cpu" "8000"))
+ "inm_8000,rnm_8000")
-(define_function_unit "pa8000fdiv" 2 1
+;; Branches use both slots in the non-memory issue and
+;; retirement unit.
+(define_insn_reservation "Z3" 0
(and
- (eq_attr "type" "fpdivsgl,fpsqrtsgl")
- (eq_attr "cpu" "8000")) 17 17)
+ (eq_attr "type" "uncond_branch,branch,cbranch,fbranch,call,dyncall,multi,milli,parallel_branch")
+ (eq_attr "cpu" "8000"))
+ "inm0_8000+inm1_8000,rnm0_8000+rnm1_8000")
+
+;; We partial latency schedule the floating point units.
+;; They can issue/retire two at a time in the non-memory
+;; units. We fix their latency at 2 cycles and they
+;; are fully pipelined.
+(define_insn_reservation "Z4" 1
+ (and
+ (eq_attr "type" "fpcc,fpalu,fpmulsgl,fpmuldbl")
+ (eq_attr "cpu" "8000"))
+ "inm_8000,f_8000,rnm_8000")
+
+;; The fdivsqrt units are not pipelined and have a very long latency.
+;; To keep the DFA from exploding, we do not show all the
+;; reservations for the divsqrt unit.
+(define_insn_reservation "Z5" 17
+ (and
+ (eq_attr "type" "fpdivsgl,fpsqrtsgl")
+ (eq_attr "cpu" "8000"))
+ "inm_8000,fdivsqrt_8000*6,rnm_8000")
+
+(define_insn_reservation "Z6" 31
+ (and
+ (eq_attr "type" "fpdivdbl,fpsqrtdbl")
+ (eq_attr "cpu" "8000"))
+ "inm_8000,fdivsqrt_8000*6,rnm_8000")
-(define_function_unit "pa8000fdiv" 2 1
- (and
- (eq_attr "type" "fpdivdbl,fpsqrtdbl")
- (eq_attr "cpu" "8000")) 31 31)
;; Compare instructions.