summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--config.h3
-rw-r--r--cpu.cpp18
-rw-r--r--cpu.h56
-rw-r--r--gcm-simd.cpp115
-rw-r--r--validat3.cpp6
5 files changed, 173 insertions, 25 deletions
diff --git a/config.h b/config.h
index 9f659c76..2a06663d 100644
--- a/config.h
+++ b/config.h
@@ -785,9 +785,10 @@ NAMESPACE_END
#if !defined(CRYPTOPP_POWER8_AES_AVAILABLE) && !defined(CRYPTOPP_DISABLE_POWER8_AES) && defined(CRYPTOPP_POWER8_AVAILABLE)
# if defined(__CRYPTO__) || defined(_ARCH_PWR8) || (CRYPTOPP_XLC_VERSION >= 130000) || (CRYPTOPP_GCC_VERSION >= 40800)
+//# define CRYPTOPP_POWER8_CRC_AVAILABLE 1
# define CRYPTOPP_POWER8_AES_AVAILABLE 1
+// # define CRYPTOPP_POWER8_PMULL_AVAILABLE 1
# define CRYPTOPP_POWER8_SHA_AVAILABLE 1
-//# define CRYPTOPP_POWER8_CRC_AVAILABLE 1
# endif
#endif
diff --git a/cpu.cpp b/cpu.cpp
index 6c82fdc8..8d60c4b8 100644
--- a/cpu.cpp
+++ b/cpu.cpp
@@ -804,6 +804,7 @@ bool CRYPTOPP_SECTION_INIT g_hasAltivec = false;
bool CRYPTOPP_SECTION_INIT g_hasPower7 = false;
bool CRYPTOPP_SECTION_INIT g_hasPower8 = false;
bool CRYPTOPP_SECTION_INIT g_hasAES = false;
+bool CRYPTOPP_SECTION_INIT g_hasPMULL = false;
bool CRYPTOPP_SECTION_INIT g_hasSHA256 = false;
bool CRYPTOPP_SECTION_INIT g_hasSHA512 = false;
word32 CRYPTOPP_SECTION_INIT g_cacheLineSize = CRYPTOPP_L1_CACHE_LINE_SIZE;
@@ -812,6 +813,7 @@ extern bool CPU_ProbeAltivec();
extern bool CPU_ProbePower7();
extern bool CPU_ProbePower8();
extern bool CPU_ProbeAES();
+extern bool CPU_ProbePMULL();
extern bool CPU_ProbeSHA256();
extern bool CPU_ProbeSHA512();
@@ -884,6 +886,20 @@ inline bool CPU_QueryAES()
return false;
}
+inline bool CPU_QueryPMULL()
+{
+ // Power8 and ISA 2.07 provide in-core crypto. Glibc
+ // 2.24 or higher is required for PPC_FEATURE2_VEC_CRYPTO.
+#if defined(__linux__)
+ if ((getauxval(AT_HWCAP2) & PPC_FEATURE2_VEC_CRYPTO) != 0)
+ return true;
+#elif defined(_AIX)
+ if (__power_8_andup() != 0)
+ return true;
+#endif
+ return false;
+}
+
inline bool CPU_QuerySHA256()
{
// Power8 and ISA 2.07 provide in-core crypto. Glibc
@@ -918,7 +934,7 @@ void DetectPowerpcFeatures()
g_hasAltivec = CPU_QueryAltivec() || CPU_ProbeAltivec();
g_hasPower7 = CPU_QueryPower7() || CPU_ProbePower7();
g_hasPower8 = CPU_QueryPower8() || CPU_ProbePower8();
- //g_hasPMULL = CPU_QueryPMULL() || CPU_ProbePMULL();
+ g_hasPMULL = CPU_QueryPMULL() || CPU_ProbePMULL();
g_hasAES = CPU_QueryAES() || CPU_ProbeAES();
g_hasSHA256 = CPU_QuerySHA256() || CPU_ProbeSHA256();
g_hasSHA512 = CPU_QuerySHA512() || CPU_ProbeSHA512();
diff --git a/cpu.h b/cpu.h
index 40678043..8f2f3850 100644
--- a/cpu.h
+++ b/cpu.h
@@ -342,7 +342,17 @@ inline int GetCacheLineSize()
// Hide from Doxygen
#ifndef CRYPTOPP_DOXYGEN_PROCESSING
extern bool g_ArmDetectionDone;
-extern bool g_hasARMv7, g_hasNEON, g_hasPMULL, g_hasCRC32, g_hasAES, g_hasSHA1, g_hasSHA2, g_hasSHA512, g_hasSHA3, g_hasSM3, g_hasSM4;
+extern bool g_hasARMv7;
+extern bool g_hasNEON;
+extern bool g_hasPMULL;
+extern bool g_hasCRC32;
+extern bool g_hasAES;
+extern bool g_hasSHA1;
+extern bool g_hasSHA2;
+extern bool g_hasSHA512;
+extern bool g_hasSHA3;
+extern bool g_hasSM3;
+extern bool g_hasSM4;
void CRYPTOPP_API DetectArmFeatures();
#endif // CRYPTOPP_DOXYGEN_PROCESSING
@@ -578,7 +588,13 @@ inline bool HasSM4()
// Hide from Doxygen
#ifndef CRYPTOPP_DOXYGEN_PROCESSING
extern bool g_PowerpcDetectionDone;
-extern bool g_hasAltivec, g_hasPower7, g_hasPower8, g_hasAES, g_hasSHA256, g_hasSHA512;
+extern bool g_hasAltivec;
+extern bool g_hasPower7;
+extern bool g_hasPower8;
+extern bool g_hasAES;
+extern bool g_hasPMULL;
+extern bool g_hasSHA256;
+extern bool g_hasSHA512;
extern word32 g_cacheLineSize;
void CRYPTOPP_API DetectPowerpcFeatures();
#endif // CRYPTOPP_DOXYGEN_PROCESSING
@@ -590,11 +606,11 @@ void CRYPTOPP_API DetectPowerpcFeatures();
/// \returns true if the hardware is capable of Altivec at runtime, false otherwise.
/// \details Altivec instructions are available under most modern PowerPCs.
/// \details Runtime support requires compile time support. When compiling with GCC, you may
-/// need to compile with <tt>-mcpu=power7</tt>; while IBM XL C/C++ compilers require
-/// <tt>-qarch=pwr7 -qaltivec</tt>. Also see PowerPC's <tt>_ALTIVEC_</tt> preprocessor macro.
-/// \details Atilvec was first available on Power4 platforms. However Crypto++ releies on unaligned
-/// loads and stores which is a Power7 feature. If the platform lacks Power7 extensions, then the
-/// GNUmakefile sets <tt>-DCRYPTOPP_DISABLE_ALTIVEC</tt>.
+/// need to compile with <tt>-mcpu=power4</tt>; while IBM XL C/C++ compilers require
+/// <tt>-qarch=pwr6 -qaltivec</tt>. Also see PowerPC's <tt>_ALTIVEC_</tt> preprocessor macro.
+/// \details Atilvec was first available on Power4 platforms. However Crypto++ releies heavily
+/// on unaligned loads and stores which is a Power7 feature. If the platform lacks Power7
+/// extensions, then the GNUmakefile sets <tt>-DCRYPTOPP_DISABLE_POWER7</tt>.
/// \note This function is only available on PowerPC and PowerPC-64 platforms
inline bool HasAltivec()
{
@@ -609,9 +625,9 @@ inline bool HasAltivec()
/// \details Runtime support requires compile time support. When compiling with GCC, you may
/// need to compile with <tt>-mcpu=power8</tt>; while IBM XL C/C++ compilers require
/// <tt>-qarch=pwr8 -qaltivec</tt>. Also see PowerPC's <tt>_ALTIVEC_</tt> preprocessor macro.
-/// \details Atilvec was first available on Power4 platforms. However Crypto++ releies on unaligned
-/// loads and stores which is a Power7 feature. If the platform lacks Power7 extensions, then the
-/// GNUmakefile sets <tt>-DCRYPTOPP_DISABLE_ALTIVEC</tt>.
+/// \details Atilvec was first available on Power4 platforms. However Crypto++ releies heavily
+/// on unaligned loads and stores which is a Power7 feature. If the platform lacks Power7
+/// extensions, then the GNUmakefile sets <tt>-DCRYPTOPP_DISABLE_POWER7</tt>.
/// \note This function is only available on PowerPC and PowerPC-64 platforms
inline bool HasPower7()
{
@@ -626,9 +642,9 @@ inline bool HasPower7()
/// \details Runtime support requires compile time support. When compiling with GCC, you may
/// need to compile with <tt>-mcpu=power8</tt>; while IBM XL C/C++ compilers require
/// <tt>-qarch=pwr8 -qaltivec</tt>. Also see PowerPC's <tt>_ALTIVEC_</tt> preprocessor macro.
-/// \details Atilvec was first available on Power4 platforms. However Crypto++ releies on unaligned
-/// loads and stores which is a Power7 feature. If the platform lacks Power7 extensions, then the
-/// GNUmakefile sets <tt>-DCRYPTOPP_DISABLE_ALTIVEC</tt>.
+/// \details Atilvec was first available on Power4 platforms. However Crypto++ releies heavily
+/// on unaligned loads and stores which is a Power7 feature. If the platform lacks Power7
+/// extensions, then the GNUmakefile sets <tt>-DCRYPTOPP_DISABLE_POWER7</tt>.
/// \note This function is only available on PowerPC and PowerPC-64 platforms
inline bool HasPower8()
{
@@ -651,6 +667,20 @@ inline bool HasAES()
return g_hasAES;
}
+/// \brief Determine if a PowerPC processor has Polynomial Multiply available
+/// \returns true if the hardware is capable of PMULL at runtime, false otherwise.
+/// \details PMULL is part of the in-crypto extensions on Power8 and Power9.
+/// \details Runtime support requires compile time support. When compiling with GCC, you may
+/// need to compile with <tt>-mcpu=power8</tt>; while IBM XL C/C++ compilers require
+/// <tt>-qarch=pwr8 -qaltivec</tt>. Also see PowerPC's <tt>__CRYPTO</tt> preprocessor macro.
+/// \note This function is only available on PowerPC and PowerPC-64 platforms
+inline bool HasPMULL()
+{
+ if (!g_PowerpcDetectionDone)
+ DetectPowerpcFeatures();
+ return g_hasPMULL;
+}
+
/// \brief Determine if a PowerPC processor has SHA256 available
/// \returns true if the hardware is capable of SHA256 at runtime, false otherwise.
/// \details SHA is part of the in-crypto extensions on Power8 and Power9.
diff --git a/gcm-simd.cpp b/gcm-simd.cpp
index d6085b3f..c7c958b4 100644
--- a/gcm-simd.cpp
+++ b/gcm-simd.cpp
@@ -39,6 +39,10 @@
# include <arm_acle.h>
#endif
+#if defined(CRYPTOPP_POWER8_PMULL_AVAILABLE)
+# include "ppc-simd.h"
+#endif
+
#ifdef CRYPTOPP_GNU_STYLE_INLINE_ASSEMBLY
# include <signal.h>
# include <setjmp.h>
@@ -61,6 +65,8 @@ extern const char GCM_SIMD_FNAME[] = __FILE__;
ANONYMOUS_NAMESPACE_BEGIN
+// ************************* Miscellaneous ************************* //
+
// GCC 4.8 is missing PMULL gear
#if (CRYPTOPP_ARM_PMULL_AVAILABLE)
# if (CRYPTOPP_GCC_VERSION >= 40800) && (CRYPTOPP_GCC_VERSION < 49000)
@@ -182,10 +188,45 @@ inline uint64x2_t VEXT_U8(uint64x2_t a, uint64x2_t b)
#endif // Microsoft and compatibles
#endif // CRYPTOPP_ARM_PMULL_AVAILABLE
+#if CRYPTOPP_POWER8_PMULL_AVAILABLE
+using CryptoPP::uint8x16_p;
+using CryptoPP::uint64x2_p;
+using CryptoPP::VectorXor;
+using CryptoPP::VectorShiftLeft;
+using CryptoPP::VectorShiftRight;
+
+inline uint64x2_p VMULL_P64(uint64x2_p a, uint64x2_p b)
+{
+ // Multiplies low dwords
+#if defined(__xlc__) || defined(__xlC__)
+ return __vpmsumd (a, b);
+#else
+ return __builtin_crypto_vpmsumd (a, b);
+#endif
+}
+
+inline uint64x2_p VMULL_HIGH_P64(uint64x2_p a, uint64x2_p b)
+{
+#if defined(__xlc__) || defined(__xlC__)
+ const uint64x2_p z = VectorXor(a, a);
+ const uint64x2_p s = VectorShiftRight<8>(a, z);
+ const uint64x2_p t = VectorShiftRight<8>(b, z);
+ return __vpmsumd (s, t);
+#else
+ const uint64x2_p z = VectorXor(a, a);
+ const uint64x2_p s = VectorShiftRight<8>(a, z);
+ const uint64x2_p t = VectorShiftRight<8>(b, z);
+ return __builtin_crypto_vpmsumd (s, t);
+#endif
+}
+#endif // CRYPTOPP_POWER8_PMULL_AVAILABLE
+
ANONYMOUS_NAMESPACE_END
NAMESPACE_BEGIN(CryptoPP)
+// ************************* Feature Probes ************************* //
+
#ifdef CRYPTOPP_GNU_STYLE_INLINE_ASSEMBLY
extern "C" {
typedef void (*SigHandler)(int);
@@ -209,8 +250,10 @@ bool CPU_ProbePMULL()
__try
{
const poly64_t a1={0x9090909090909090}, b1={0xb0b0b0b0b0b0b0b0};
- const poly8x16_t a2={0x80,0x80,0x80,0x80,0x80,0x80,0x80,0x80,0xa0,0xa0,0xa0,0xa0,0xa0,0xa0,0xa0,0xa0},
- b2={0xc0,0xc0,0xc0,0xc0,0xc0,0xc0,0xc0,0xc0,0xe0,0xe0,0xe0,0xe0,0xe0,0xe0,0xe0,0xe0};
+ const poly8x16_t a2={0x80,0x80,0x80,0x80,0x80,0x80,0x80,0x80,
+ 0xa0,0xa0,0xa0,0xa0,0xa0,0xa0,0xa0,0xa0},
+ b2={0xc0,0xc0,0xc0,0xc0,0xc0,0xc0,0xc0,0xc0,
+ 0xe0,0xe0,0xe0,0xe0,0xe0,0xe0,0xe0,0xe0};
const poly128_t r1 = vmull_p64(a1, b1);
const poly128_t r2 = vmull_high_p64((poly64x2_t)(a2), (poly64x2_t)(b2));
@@ -219,8 +262,10 @@ bool CPU_ProbePMULL()
const uint64x2_t t1 = (uint64x2_t)(r1); // {bignum,bignum}
const uint64x2_t t2 = (uint64x2_t)(r2); // {bignum,bignum}
- result = !!(vgetq_lane_u64(t1,0) == 0x5300530053005300 && vgetq_lane_u64(t1,1) == 0x5300530053005300 &&
- vgetq_lane_u64(t2,0) == 0x6c006c006c006c00 && vgetq_lane_u64(t2,1) == 0x6c006c006c006c00);
+ result = !!(vgetq_lane_u64(t1,0) == 0x5300530053005300 &&
+ vgetq_lane_u64(t1,1) == 0x5300530053005300 &&
+ vgetq_lane_u64(t2,0) == 0x6c006c006c006c00 &&
+ vgetq_lane_u64(t2,1) == 0x6c006c006c006c00);
}
__except (EXCEPTION_EXECUTE_HANDLER)
{
@@ -246,8 +291,10 @@ bool CPU_ProbePMULL()
else
{
const poly64_t a1={0x9090909090909090}, b1={0xb0b0b0b0b0b0b0b0};
- const poly8x16_t a2={0x80,0x80,0x80,0x80,0x80,0x80,0x80,0x80,0xa0,0xa0,0xa0,0xa0,0xa0,0xa0,0xa0,0xa0},
- b2={0xc0,0xc0,0xc0,0xc0,0xc0,0xc0,0xc0,0xc0,0xe0,0xe0,0xe0,0xe0,0xe0,0xe0,0xe0,0xe0};
+ const poly8x16_t a2={0x80,0x80,0x80,0x80,0x80,0x80,0x80,0x80,
+ 0xa0,0xa0,0xa0,0xa0,0xa0,0xa0,0xa0,0xa0},
+ b2={0xc0,0xc0,0xc0,0xc0,0xc0,0xc0,0xc0,0xc0,
+ 0xe0,0xe0,0xe0,0xe0,0xe0,0xe0,0xe0,0xe0};
const poly128_t r1 = VMULL_P64(a1, b1);
const poly128_t r2 = VMULL_HIGH_P64((poly64x2_t)(a2), (poly64x2_t)(b2));
@@ -256,8 +303,10 @@ bool CPU_ProbePMULL()
const uint64x2_t t1 = (uint64x2_t)(r1); // {bignum,bignum}
const uint64x2_t t2 = (uint64x2_t)(r2); // {bignum,bignum}
- result = !!(vgetq_lane_u64(t1,0) == 0x5300530053005300 && vgetq_lane_u64(t1,1) == 0x5300530053005300 &&
- vgetq_lane_u64(t2,0) == 0x6c006c006c006c00 && vgetq_lane_u64(t2,1) == 0x6c006c006c006c00);
+ result = !!(vgetq_lane_u64(t1,0) == 0x5300530053005300 &&
+ vgetq_lane_u64(t1,1) == 0x5300530053005300 &&
+ vgetq_lane_u64(t2,0) == 0x6c006c006c006c00 &&
+ vgetq_lane_u64(t2,1) == 0x6c006c006c006c00);
}
sigprocmask(SIG_SETMASK, (sigset_t*)&oldMask, NULLPTR);
@@ -270,6 +319,54 @@ bool CPU_ProbePMULL()
}
#endif // ARM32 or ARM64
+#if (CRYPTOPP_BOOL_PPC32 || CRYPTOPP_BOOL_PPC64)
+bool CPU_ProbePMULL()
+{
+#if defined(CRYPTOPP_NO_CPU_FEATURE_PROBES)
+ return false;
+#elif (CRYPTOPP_POWER8_PMULL_AVAILABLE)
+ // longjmp and clobber warnings. Volatile is required.
+ // http://github.com/weidai11/cryptopp/issues/24 and http://stackoverflow.com/q/7721854
+ volatile bool result = true;
+
+ volatile SigHandler oldHandler = signal(SIGILL, SigIllHandler);
+ if (oldHandler == SIG_ERR)
+ return false;
+
+ volatile sigset_t oldMask;
+ if (sigprocmask(0, NULLPTR, (sigset_t*)&oldMask))
+ return false;
+
+ if (setjmp(s_jmpSIGILL))
+ result = false;
+ else
+ {
+ const uint64x2_p a1={0x9090909090909090ull}, b1={0xb0b0b0b0b0b0b0b0ull};
+ const uint8x16_p a2={0x80,0x80,0x80,0x80,0x80,0x80,0x80,0x80,
+ 0xa0,0xa0,0xa0,0xa0,0xa0,0xa0,0xa0,0xa0},
+ b2={0xc0,0xc0,0xc0,0xc0,0xc0,0xc0,0xc0,0xc0,
+ 0xe0,0xe0,0xe0,0xe0,0xe0,0xe0,0xe0,0xe0};
+
+ const uint64x2_p r1 = VMULL_P64(a1, b1);
+ const uint64x2_p r2 = VMULL_HIGH_P64((uint64x2_p)(a2), (uint64x2_p)(b2));
+
+ word64 w1[2], w2[2];
+ VectorStore(r1, (byte*)w1); VectorStore(r2, (byte*)w2);
+ result = !!(w1[0] == 0x5300530053005300ull && w1[1] == 0x5300530053005300ull &&
+ w2[0] == 0x6c006c006c006c00ull && w2[1] == 0x6c006c006c006c00ull);
+ }
+
+ sigprocmask(SIG_SETMASK, (sigset_t*)&oldMask, NULLPTR);
+ signal(SIGILL, oldHandler);
+ return result;
+#else
+ return false;
+#endif // CRYPTOPP_POWER8_PMULL_AVAILABLE
+}
+#endif // PPC32 or PPC64
+
+// *************************** ARM NEON *************************** //
+
#if CRYPTOPP_ARM_NEON_AVAILABLE
void GCM_Xor16_NEON(byte *a, const byte *b, const byte *c)
{
@@ -413,6 +510,8 @@ void GCM_ReverseHashBufferIfNeeded_PMULL(byte *hashBuffer)
}
#endif // CRYPTOPP_ARM_PMULL_AVAILABLE
+// ***************************** SSE ***************************** //
+
#if CRYPTOPP_SSE2_INTRIN_AVAILABLE || CRYPTOPP_SSE2_ASM_AVAILABLE
// SunCC 5.10-5.11 compiler crash. Move GCM_Xor16_SSE2 out-of-line, and place in
// a source file with a SSE architecture switch. Also see GH #226 and GH #284.
diff --git a/validat3.cpp b/validat3.cpp
index 218b08c1..4a2be166 100644
--- a/validat3.cpp
+++ b/validat3.cpp
@@ -374,14 +374,16 @@ bool TestSettings()
const bool hasAltivec = HasAltivec();
const bool hasPower7 = HasPower7();
const bool hasPower8 = HasPower8();
+ const bool hasPMULL = HasPMULL();
const bool hasAES = HasAES();
const bool hasSHA256 = HasSHA256();
const bool hasSHA512 = HasSHA512();
std::cout << "passed: ";
std::cout << "hasAltivec == " << hasAltivec << ", hasPower7 == " << hasPower7;
- std::cout << ", hasPower8 == " << hasPower8 << ", hasAES == " << hasAES;
- std::cout << ", hasSHA256 == " << hasSHA256 << ", hasSHA512 == " << hasSHA512 << "\n";
+ std::cout << ", hasPower8 == " << hasPower8 << ", hasPMULL == " << hasPMULL;
+ std::cout << ", hasAES == " << hasAES << ", hasSHA256 == " << hasSHA256;
+ std::cout << ", hasSHA512 == " << hasSHA512 << "\n";
#endif