summaryrefslogtreecommitdiff
path: root/crypto/x86_64cpuid.pl
diff options
context:
space:
mode:
authorAndy Polyakov <appro@openssl.org>2017-11-22 20:48:44 +0100
committerAndy Polyakov <appro@openssl.org>2017-11-23 21:05:44 +0100
commit88ac224cdaf65c3c3a08beddccb694706a311336 (patch)
tree4f9de52577d5c5fc2fcbbe986c3b6442646e1ddb /crypto/x86_64cpuid.pl
parent6df34091bac6f82b7d9553dc857123b2bd478c22 (diff)
downloadopenssl-new-88ac224cdaf65c3c3a08beddccb694706a311336.tar.gz
crypto/x86_64cpuid.pl: fix AVX512 capability masking.
Originally it was thought that it's possible to use AVX512VL+BW instructions with XMM and YMM registers without kernel enabling ZMM support, but it turned to be wrong assumption. Reviewed-by: Rich Salz <rsalz@openssl.org>
Diffstat (limited to 'crypto/x86_64cpuid.pl')
-rw-r--r--crypto/x86_64cpuid.pl9
1 files changed, 5 insertions, 4 deletions
diff --git a/crypto/x86_64cpuid.pl b/crypto/x86_64cpuid.pl
index 7c8952e721..d30928efbd 100644
--- a/crypto/x86_64cpuid.pl
+++ b/crypto/x86_64cpuid.pl
@@ -197,10 +197,11 @@ OPENSSL_ia32_cpuid:
and \$0xe6,%eax # isolate XMM, YMM and ZMM state support
cmp \$0xe6,%eax
je .Ldone
- andl \$0xfffeffff,8(%rdi) # clear AVX512F, ~(1<<16)
- # note that we don't touch other AVX512
- # extensions, because they can be used
- # with YMM (without opmasking though)
+ andl \$0x3fdeffff,8(%rdi) # ~(1<<31|1<<30|1<<21|1<<16)
+ # clear AVX512F+BW+VL+FIMA, all of
+ # them are EVEX-encoded, which requires
+ # ZMM state support even if one uses
+ # only XMM and YMM :-(
and \$6,%eax # isolate XMM and YMM state support
cmp \$6,%eax
je .Ldone