1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
|
;; Pentium Pro/PII Scheduling
;; Copyright (C) 2002 Free Software Foundation, Inc.
;;
;; This file is part of GNU CC.
;;
;; GNU CC is free software; you can redistribute it and/or modify
;; it under the terms of the GNU General Public License as published by
;; the Free Software Foundation; either version 2, or (at your option)
;; any later version.
;;
;; GNU CC is distributed in the hope that it will be useful,
;; but WITHOUT ANY WARRANTY; without even the implied warranty of
;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
;; GNU General Public License for more details.
;;
;; You should have received a copy of the GNU General Public License
;; along with GNU CC; see the file COPYING. If not, write to
;; the Free Software Foundation, 59 Temple Place - Suite 330,
;; Boston, MA 02111-1307, USA. */
;; Categorize how many uops an ia32 instruction evaluates to:
;; one -- an instruction with 1 uop can be decoded by any of the
;; three decoders.
;; few -- an instruction with 1 to 4 uops can be decoded only by
;; decoder 0.
;; many -- a complex instruction may take an unspecified number of
;; cycles to decode in decoder 0.
(define_attr "ppro_uops" "one,few,many"
(cond [(eq_attr "type" "other,multi,call,callv,fpspc,str")
(const_string "many")
(eq_attr "type" "icmov,fcmov,str,cld")
(const_string "few")
(eq_attr "type" "imov")
(if_then_else (eq_attr "memory" "store,both")
(const_string "few")
(const_string "one"))
(eq_attr "memory" "!none")
(const_string "few")
]
(const_string "one")))
;;
;; The PPro has an out-of-order core, but the instruction decoders are
;; naturally in-order and asymmetric. We get best performance by scheduling
;; for the decoders, for in doing so we give the oo execution unit the
;; most choices.
;;
;; Rough readiness numbers. Fine tuning happens in i386.c.
;;
;; p0 describes port 0.
;; p01 describes ports 0 and 1 as a pair; alu insns can issue to either.
;; p2 describes port 2 for loads.
;; p34 describes ports 3 and 4 for stores.
;; fpu describes the fpu accessed via port 0.
;; ??? It is less than clear if there are separate fadd and fmul units
;; that could operate in parallel.
;;
;; ??? fxch isn't handled; not an issue until sched3 after reg-stack is real.
(define_function_unit "ppro_p0" 1 0
(and (eq_attr "cpu" "pentiumpro")
(eq_attr "type" "ishift,rotate,ishift1,rotate1,lea,ibr,cld"))
1 1)
(define_function_unit "ppro_p0" 1 0
(and (eq_attr "cpu" "pentiumpro")
(eq_attr "type" "imul"))
4 1)
;; ??? Does the divider lock out the pipe while it works,
;; or is there a disconnected unit?
(define_function_unit "ppro_p0" 1 0
(and (eq_attr "cpu" "pentiumpro")
(eq_attr "type" "idiv"))
17 17)
(define_function_unit "ppro_p0" 1 0
(and (eq_attr "cpu" "pentiumpro")
(eq_attr "type" "fop,fsgn,fistp"))
3 1)
(define_function_unit "ppro_p0" 1 0
(and (eq_attr "cpu" "pentiumpro")
(eq_attr "type" "fcmov"))
2 1)
(define_function_unit "ppro_p0" 1 0
(and (eq_attr "cpu" "pentiumpro")
(eq_attr "type" "fcmp"))
1 1)
(define_function_unit "ppro_p0" 1 0
(and (eq_attr "cpu" "pentiumpro")
(eq_attr "type" "fmov"))
1 1)
(define_function_unit "ppro_p0" 1 0
(and (eq_attr "cpu" "pentiumpro")
(eq_attr "type" "fmul"))
5 1)
(define_function_unit "ppro_p0" 1 0
(and (eq_attr "cpu" "pentiumpro")
(eq_attr "type" "fdiv,fpspc"))
56 1)
(define_function_unit "ppro_p01" 2 0
(and (eq_attr "cpu" "pentiumpro")
(eq_attr "type" "!imov,fmov"))
1 1)
(define_function_unit "ppro_p01" 2 0
(and (and (eq_attr "cpu" "pentiumpro")
(eq_attr "type" "imov,fmov"))
(eq_attr "memory" "none"))
1 1)
(define_function_unit "ppro_p2" 1 0
(and (eq_attr "cpu" "pentiumpro")
(ior (eq_attr "type" "pop")
(eq_attr "memory" "load,both")))
3 1)
(define_function_unit "ppro_p34" 1 0
(and (eq_attr "cpu" "pentiumpro")
(ior (eq_attr "type" "push")
(eq_attr "memory" "store,both")))
1 1)
(define_function_unit "fpu" 1 0
(and (eq_attr "cpu" "pentiumpro")
(eq_attr "type" "fop,fsgn,fmov,fcmp,fcmov,fistp"))
1 1)
(define_function_unit "fpu" 1 0
(and (eq_attr "cpu" "pentiumpro")
(eq_attr "type" "fmul"))
5 2)
(define_function_unit "fpu" 1 0
(and (eq_attr "cpu" "pentiumpro")
(eq_attr "type" "fdiv,fpspc"))
56 56)
;; imul uses the fpu. ??? does it have the same throughput as fmul?
(define_function_unit "fpu" 1 0
(and (eq_attr "cpu" "pentiumpro")
(eq_attr "type" "imul"))
4 1)
|