1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
|
#!/usr/local/bin/perl
# alpha assember
sub bn_mul_comba8
{
local($name)=@_;
local(@a,@b,$r,$c0,$c1,$c2);
$cnt=1;
&init_pool(3);
$rp=&wparam(0);
$ap=&wparam(1);
$bp=&wparam(2);
&function_begin($name,"");
&comment("");
&stack_push(2);
&ld(($a[0])=&NR(1),&QWPw(0,$ap));
&ld(($b[0])=&NR(1),&QWPw(0,$bp));
&st($reg_s0,&swtmp(0)); &FR($reg_s0);
&st($reg_s1,&swtmp(1)); &FR($reg_s1);
&ld(($a[1])=&NR(1),&QWPw(1,$ap));
&ld(($b[1])=&NR(1),&QWPw(1,$bp));
&ld(($a[2])=&NR(1),&QWPw(2,$ap));
&ld(($b[2])=&NR(1),&QWPw(2,$bp));
&ld(($a[3])=&NR(1),&QWPw(3,$ap));
&ld(($b[3])=&NR(1),&QWPw(3,$bp));
&ld(($a[4])=&NR(1),&QWPw(1,$ap));
&ld(($b[4])=&NR(1),&QWPw(1,$bp));
&ld(($a[5])=&NR(1),&QWPw(1,$ap));
&ld(($b[5])=&NR(1),&QWPw(1,$bp));
&ld(($a[6])=&NR(1),&QWPw(1,$ap));
&ld(($b[6])=&NR(1),&QWPw(1,$bp));
&ld(($a[7])=&NR(1),&QWPw(1,$ap)); &FR($ap);
&ld(($b[7])=&NR(1),&QWPw(1,$bp)); &FR($bp);
($c0,$c1,$c2)=&NR(3);
&mov("zero",$c2);
&mul($a[0],$b[0],$c0);
&muh($a[0],$b[0],$c1);
&st($c0,&QWPw(0,$rp)); &FR($c0); ($c0)=&NR(1);
($c0,$c1,$c2)=($c1,$c2,$c0);
&mov("zero",$c2);
&mul_add_c($a[0],$b[1],$c0,$c1,$c2);
&mul_add_c($a[1],$b[0],$c0,$c1,$c2);
&st($c0,&QWPw(1,$rp)); &FR($c0); ($c0)=&NR(1);
($c0,$c1,$c2)=($c1,$c2,$c0);
&mov("zero",$c2);
&mul_add_c($a[0],$b[2],$c0,$c1,$c2);
&mul_add_c($a[1],$b[1],$c0,$c1,$c2);
&mul_add_c($a[2],$b[0],$c0,$c1,$c2);
&st($c0,&QWPw(2,$rp)); &FR($c0); ($c0)=&NR(1);
($c0,$c1,$c2)=($c1,$c2,$c0);
&mov("zero",$c2);
&mul_add_c($a[0],$b[3],$c0,$c1,$c2);
&mul_add_c($a[1],$b[2],$c0,$c1,$c2);
&mul_add_c($a[2],$b[1],$c0,$c1,$c2);
&mul_add_c($a[3],$b[0],$c0,$c1,$c2);
&st($c0,&QWPw(3,$rp)); &FR($c0); ($c0)=&NR(1);
($c0,$c1,$c2)=($c1,$c2,$c0);
&mov("zero",$c2);
&mul_add_c($a[0],$b[4],$c0,$c1,$c2);
&mul_add_c($a[1],$b[3],$c0,$c1,$c2);
&mul_add_c($a[2],$b[2],$c0,$c1,$c2);
&mul_add_c($a[3],$b[1],$c0,$c1,$c2);
&mul_add_c($a[4],$b[0],$c0,$c1,$c2);
&st($c0,&QWPw(4,$rp)); &FR($c0); ($c0)=&NR(1);
($c0,$c1,$c2)=($c1,$c2,$c0);
&mov("zero",$c2);
&mul_add_c($a[0],$b[5],$c0,$c1,$c2);
&mul_add_c($a[1],$b[4],$c0,$c1,$c2);
&mul_add_c($a[2],$b[3],$c0,$c1,$c2);
&mul_add_c($a[3],$b[2],$c0,$c1,$c2);
&mul_add_c($a[4],$b[1],$c0,$c1,$c2);
&mul_add_c($a[5],$b[0],$c0,$c1,$c2);
&st($c0,&QWPw(5,$rp)); &FR($c0); ($c0)=&NR(1);
($c0,$c1,$c2)=($c1,$c2,$c0);
&mov("zero",$c2);
&mul_add_c($a[0],$b[6],$c0,$c1,$c2);
&mul_add_c($a[1],$b[5],$c0,$c1,$c2);
&mul_add_c($a[2],$b[4],$c0,$c1,$c2);
&mul_add_c($a[3],$b[3],$c0,$c1,$c2);
&mul_add_c($a[4],$b[2],$c0,$c1,$c2);
&mul_add_c($a[5],$b[1],$c0,$c1,$c2);
&mul_add_c($a[6],$b[0],$c0,$c1,$c2);
&st($c0,&QWPw(6,$rp)); &FR($c0); ($c0)=&NR(1);
($c0,$c1,$c2)=($c1,$c2,$c0);
&mov("zero",$c2);
&mul_add_c($a[0],$b[7],$c0,$c1,$c2); &FR($a[0]);
&mul_add_c($a[1],$b[6],$c0,$c1,$c2);
&mul_add_c($a[2],$b[5],$c0,$c1,$c2);
&mul_add_c($a[3],$b[4],$c0,$c1,$c2);
&mul_add_c($a[4],$b[3],$c0,$c1,$c2);
&mul_add_c($a[5],$b[2],$c0,$c1,$c2);
&mul_add_c($a[6],$b[1],$c0,$c1,$c2);
&mul_add_c($a[7],$b[0],$c0,$c1,$c2); &FR($b[0]);
&st($c0,&QWPw(7,$rp)); &FR($c0); ($c0)=&NR(1);
($c0,$c1,$c2)=($c1,$c2,$c0);
&mov("zero",$c2);
&mul_add_c($a[1],$b[7],$c0,$c1,$c2); &FR($a[1]);
&mul_add_c($a[2],$b[6],$c0,$c1,$c2);
&mul_add_c($a[3],$b[5],$c0,$c1,$c2);
&mul_add_c($a[4],$b[4],$c0,$c1,$c2);
&mul_add_c($a[5],$b[3],$c0,$c1,$c2);
&mul_add_c($a[6],$b[2],$c0,$c1,$c2);
&mul_add_c($a[7],$b[1],$c0,$c1,$c2); &FR($b[1]);
&st($c0,&QWPw(8,$rp)); &FR($c0); ($c0)=&NR(1);
($c0,$c1,$c2)=($c1,$c2,$c0);
&mov("zero",$c2);
&mul_add_c($a[2],$b[7],$c0,$c1,$c2); &FR($a[2]);
&mul_add_c($a[3],$b[6],$c0,$c1,$c2);
&mul_add_c($a[4],$b[5],$c0,$c1,$c2);
&mul_add_c($a[5],$b[4],$c0,$c1,$c2);
&mul_add_c($a[6],$b[3],$c0,$c1,$c2);
&mul_add_c($a[7],$b[2],$c0,$c1,$c2); &FR($b[2]);
&st($c0,&QWPw(9,$rp)); &FR($c0); ($c0)=&NR(1);
($c0,$c1,$c2)=($c1,$c2,$c0);
&mov("zero",$c2);
&mul_add_c($a[3],$b[7],$c0,$c1,$c2); &FR($a[3]);
&mul_add_c($a[4],$b[6],$c0,$c1,$c2);
&mul_add_c($a[5],$b[5],$c0,$c1,$c2);
&mul_add_c($a[6],$b[4],$c0,$c1,$c2);
&mul_add_c($a[7],$b[3],$c0,$c1,$c2); &FR($b[3]);
&st($c0,&QWPw(10,$rp)); &FR($c0); ($c0)=&NR(1);
($c0,$c1,$c2)=($c1,$c2,$c0);
&mov("zero",$c2);
&mul_add_c($a[4],$b[7],$c0,$c1,$c2); &FR($a[4]);
&mul_add_c($a[5],$b[6],$c0,$c1,$c2);
&mul_add_c($a[6],$b[5],$c0,$c1,$c2);
&mul_add_c($a[7],$b[4],$c0,$c1,$c2); &FR($b[4]);
&st($c0,&QWPw(11,$rp)); &FR($c0); ($c0)=&NR(1);
($c0,$c1,$c2)=($c1,$c2,$c0);
&mov("zero",$c2);
&mul_add_c($a[5],$b[7],$c0,$c1,$c2); &FR($a[5]);
&mul_add_c($a[6],$b[6],$c0,$c1,$c2);
&mul_add_c($a[7],$b[5],$c0,$c1,$c2); &FR($b[5]);
&st($c0,&QWPw(12,$rp)); &FR($c0); ($c0)=&NR(1);
($c0,$c1,$c2)=($c1,$c2,$c0);
&mov("zero",$c2);
&mul_add_c($a[6],$b[7],$c0,$c1,$c2); &FR($a[6]);
&mul_add_c($a[7],$b[6],$c0,$c1,$c2); &FR($b[6]);
&st($c0,&QWPw(13,$rp)); &FR($c0); ($c0)=&NR(1);
($c0,$c1,$c2)=($c1,$c2,$c0);
&mov("zero",$c2);
&mul_add_c($a[7],$b[7],$c0,$c1,$c2); &FR($a[7],$b[7]);
&st($c0,&QWPw(14,$rp));
&st($c1,&QWPw(15,$rp));
&FR($c0,$c1,$c2);
&ld($reg_s0,&swtmp(0));
&ld($reg_s1,&swtmp(1));
&stack_pop(2);
&function_end($name);
&fin_pool;
}
1;
|