1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
|
#!/usr/bin/env perl
$flavour = shift;
$output = shift;
if ($flavour =~ /\./) { $output = $flavour; undef $flavour; }
$win64=0; $win64=1 if ($flavour =~ /[nm]asm|mingw64/ || $output =~ /\.asm$/);
$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
open STDOUT,"| $^X ${dir}perlasm/x86_64-xlate.pl $flavour $output";
if ($win64) { $arg1="%rcx"; $arg2="%rdx"; }
else { $arg1="%rdi"; $arg2="%rsi"; }
print<<___;
.extern OPENSSL_cpuid_setup
.section .init
call OPENSSL_cpuid_setup
.text
.globl OPENSSL_atomic_add
.type OPENSSL_atomic_add,\@abi-omnipotent
.align 16
OPENSSL_atomic_add:
movl ($arg1),%eax
.Lspin: leaq ($arg2,%rax),%r8
.byte 0xf0 # lock
cmpxchgl %r8d,($arg1)
jne .Lspin
movl %r8d,%eax
.byte 0x48,0x98 # cltq/cdqe
ret
.size OPENSSL_atomic_add,.-OPENSSL_atomic_add
.globl OPENSSL_rdtsc
.type OPENSSL_rdtsc,\@abi-omnipotent
.align 16
OPENSSL_rdtsc:
rdtsc
shl \$32,%rdx
or %rdx,%rax
ret
.size OPENSSL_rdtsc,.-OPENSSL_rdtsc
.globl OPENSSL_ia32_cpuid
.type OPENSSL_ia32_cpuid,\@abi-omnipotent
.align 16
OPENSSL_ia32_cpuid:
mov %rbx,%r8
xor %eax,%eax
cpuid
xor %eax,%eax
cmp \$0x756e6547,%ebx # "Genu"
setne %al
mov %eax,%r9d
cmp \$0x49656e69,%edx # "ineI"
setne %al
or %eax,%r9d
cmp \$0x6c65746e,%ecx # "ntel"
setne %al
or %eax,%r9d # 0 indicates Intel CPU
mov \$1,%r10d # "number of [AMD] cores"
jz .Lintel
cmp \$0x68747541,%ebx # "Auth"
setne %al
mov %eax,%r10d
cmp \$0x69746E65,%edx # "enti"
setne %al
or %eax,%r10d
cmp \$0x444D4163,%ecx # "cAMD"
setne %al
or %eax,%r10d # 0 indicates AMD CPU
jnz .Lintel
mov \$0x80000000,%eax
cpuid
cmp \$0x80000008,%eax
mov \$1,%r10d # "number of [AMD] cores"
jb .Lintel
mov \$0x80000008,%eax
cpuid
movzb %cl,%r10 # number of cores - 1
inc %r10 # number of cores
.Lintel:
mov \$1,%eax
cpuid
cmp \$0,%r9d
jne .Lnotintel
or \$0x00100000,%edx # use reserved 20th bit to engage RC4_CHAR
and \$15,%ah
cmp \$15,%ah # examine Family ID
je .Lnotintel
or \$0x40000000,%edx # use reserved bit to skip unrolled loop
.Lnotintel:
bt \$28,%edx # test hyper-threading bit
jnc .Ldone
shr \$16,%ebx
cmp %r10b,%bl # see if cache is shared
ja .Ldone
and \$0xefffffff,%edx # ~(1<<28)
.Ldone:
shl \$32,%rcx
mov %edx,%eax
mov %r8,%rbx
or %rcx,%rax
ret
.size OPENSSL_ia32_cpuid,.-OPENSSL_ia32_cpuid
.globl OPENSSL_cleanse
.type OPENSSL_cleanse,\@abi-omnipotent
.align 16
OPENSSL_cleanse:
xor %rax,%rax
cmp \$15,$arg2
jae .Lot
.Little:
mov %al,($arg1)
sub \$1,$arg2
lea 1($arg1),$arg1
jnz .Little
ret
.align 16
.Lot:
test \$7,$arg1
jz .Laligned
mov %al,($arg1)
lea -1($arg2),$arg2
lea 1($arg1),$arg1
jmp .Lot
.Laligned:
mov %rax,($arg1)
lea -8($arg2),$arg2
test \$-8,$arg2
lea 8($arg1),$arg1
jnz .Laligned
cmp \$0,$arg2
jne .Little
ret
.size OPENSSL_cleanse,.-OPENSSL_cleanse
___
print<<___ if (!$win64);
.globl OPENSSL_wipe_cpu
.type OPENSSL_wipe_cpu,\@abi-omnipotent
.align 16
OPENSSL_wipe_cpu:
pxor %xmm0,%xmm0
pxor %xmm1,%xmm1
pxor %xmm2,%xmm2
pxor %xmm3,%xmm3
pxor %xmm4,%xmm4
pxor %xmm5,%xmm5
pxor %xmm6,%xmm6
pxor %xmm7,%xmm7
pxor %xmm8,%xmm8
pxor %xmm9,%xmm9
pxor %xmm10,%xmm10
pxor %xmm11,%xmm11
pxor %xmm12,%xmm12
pxor %xmm13,%xmm13
pxor %xmm14,%xmm14
pxor %xmm15,%xmm15
xorq %rcx,%rcx
xorq %rdx,%rdx
xorq %rsi,%rsi
xorq %rdi,%rdi
xorq %r8,%r8
xorq %r9,%r9
xorq %r10,%r10
xorq %r11,%r11
leaq 8(%rsp),%rax
ret
.size OPENSSL_wipe_cpu,.-OPENSSL_wipe_cpu
___
print<<___ if ($win64);
.globl OPENSSL_wipe_cpu
.type OPENSSL_wipe_cpu,\@abi-omnipotent
.align 16
OPENSSL_wipe_cpu:
pxor %xmm0,%xmm0
pxor %xmm1,%xmm1
pxor %xmm2,%xmm2
pxor %xmm3,%xmm3
pxor %xmm4,%xmm4
pxor %xmm5,%xmm5
xorq %rcx,%rcx
xorq %rdx,%rdx
xorq %r8,%r8
xorq %r9,%r9
xorq %r10,%r10
xorq %r11,%r11
leaq 8(%rsp),%rax
ret
.size OPENSSL_wipe_cpu,.-OPENSSL_wipe_cpu
___
close STDOUT; # flush
|