Add CRYPTOPP_POWER5_AVAILABLE

Power4 lacks 'vector long long' Rename datatypes such as 'uint8x16_p8' to 'uint8x16_p'. Originally the p8 suffix indicated use with Power8 in-core crypto. We are now using Altivec/Power4 for general vector operations.
author: Jeffrey Walton <noloader@gmail.com> 2017-12-12 08:09:31 -0500
committer: Jeffrey Walton <noloader@gmail.com> 2017-12-12 08:09:31 -0500
commit: 15d637771f2f41747410dfeb177ff642d91c130d (patch)
tree: 0018687735ee64e91f270039c001889672e854dc
parent: b7e636ac51f1ecfef468f358babc0cc54c281bdb (diff)
download: cryptopp-git-15d637771f2f41747410dfeb177ff642d91c130d.tar.gz
2 files changed, 45 insertions, 39 deletions
diff --git a/config.h b/config.h
index 33ff6f83..5d3ceff6 100644
--- a/config.h
+++ b/config.h
@@ -629,7 +629,6 @@ NAMESPACE_END
 #endif
 
 // An old Apple G5 with GCC 4.01 has AltiVec, but its only Power4 or so.
-// We need Power7 or above, so the makefile defines CRYPTOPP_DISABLE_ALTIVEC.
 #if !defined(CRYPTOPP_ALTIVEC_AVAILABLE) && !defined(CRYPTOPP_DISABLE_ALTIVEC)
 # if defined(_ARCH_PWR4) || defined(__ALTIVEC__) || \
 	(CRYPTOPP_XLC_VERSION >= 100000) || (CRYPTOPP_GCC_VERSION >= 40001)
@@ -637,12 +636,21 @@ NAMESPACE_END
 # endif
 #endif
 
+// We need Power5 for 'vector unsigned long long'
+#if !defined(CRYPTOPP_POWER5_AVAILABLE) && !defined(CRYPTOPP_DISABLE_POWER5) && defined(CRYPTOPP_ALTIVEC_AVAILABLE)
+# if defined(_ARCH_PWR5) || (CRYPTOPP_XLC_VERSION >= 100000) || (CRYPTOPP_GCC_VERSION >= 40100)
+#  define CRYPTOPP_POWER5_AVAILABLE 1
+# endif
+#endif
+
+// We need Power7 for unaligned loads and stores
 #if !defined(CRYPTOPP_POWER7_AVAILABLE) && !defined(CRYPTOPP_DISABLE_POWER7) && defined(CRYPTOPP_ALTIVEC_AVAILABLE)
 # if defined(_ARCH_PWR7) || (CRYPTOPP_XLC_VERSION >= 100000) || (CRYPTOPP_GCC_VERSION >= 40100)
 #  define CRYPTOPP_POWER7_AVAILABLE 1
 # endif
 #endif
 
+// We need Power8 for in-core crypto
 #if !defined(CRYPTOPP_POWER8_AVAILABLE) && !defined(CRYPTOPP_DISABLE_POWER8) && defined(CRYPTOPP_POWER7_AVAILABLE)
 # if defined(_ARCH_PWR8) || (CRYPTOPP_XLC_VERSION >= 130000) || (CRYPTOPP_GCC_VERSION >= 40800)
 #  define CRYPTOPP_POWER8_AVAILABLE 1
diff --git a/ppc-simd.h b/ppc-simd.h
index d98fcea3..fbd6d4b0 100644
--- a/ppc-simd.h
+++ b/ppc-simd.h
@@ -29,27 +29,25 @@ NAMESPACE_BEGIN(CryptoPP)
 
 #if defined(CRYPTOPP_ALTIVEC_AVAILABLE) || defined(CRYPTOPP_DOXYGEN_PROCESSING)
 
-typedef __vector unsigned char      uint8x16_p8;
-typedef __vector unsigned int       uint32x4_p8;
+typedef __vector unsigned char      uint8x16_p;
+typedef __vector unsigned int       uint32x4_p;
 
-#if defined(CRYPTOPP_POWER7_AVAILABLE)
-typedef __vector unsigned long long uint64x2_p8;
-#else
-typedef __vector unsigned int       uint64x2_p8;
+#if defined(CRYPTOPP_POWER5_AVAILABLE)
+typedef __vector unsigned long long uint64x2_p;
 #endif
 
 // Use 8x16 for documentation because it is used frequently
 #if defined(CRYPTOPP_XLC_VERSION)
-typedef uint8x16_p8 VectorType;
+typedef uint8x16_p VectorType;
 #elif defined(CRYPTOPP_GCC_VERSION)
-typedef uint64x2_p8 VectorType;
+typedef uint64x2_p VectorType;
 #endif
 
 #if defined(CRYPTOPP_DOXYGEN_PROCESSING)
 /// \brief Default vector typedef
 /// \details IBM XL C/C++ provides equally good support for all vector types,
-///   including <tt>uint8x16_p8</tt>. GCC provides good support for
-///   <tt>uint64x2_p8</tt>. <tt>VectorType</tt> is typedef'd accordingly to
+///   including <tt>uint8x16_p</tt>. GCC provides good support for
+///   <tt>uint64x2_p</tt>. <tt>VectorType</tt> is typedef'd accordingly to
 ///   minimize casting to and from buit-in function calls.
 # define VectorType ...
 #endif
@@ -68,8 +66,8 @@ inline void ReverseByteArrayLE(byte src[16])
 #if defined(CRYPTOPP_XLC_VERSION) && defined(CRYPTOPP_LITTLE_ENDIAN)
 	vec_st(vec_reve(vec_ld(0, src)), 0, src);
 #elif defined(CRYPTOPP_LITTLE_ENDIAN)
-	const uint8x16_p8 mask = {15,14,13,12, 11,10,9,8, 7,6,5,4, 3,2,1,0};
-	const uint8x16_p8 zero = {0};
+	const uint8x16_p mask = {15,14,13,12, 11,10,9,8, 7,6,5,4, 3,2,1,0};
+	const uint8x16_p zero = {0};
 	vec_vsx_st(vec_perm(vec_vsx_ld(0, src), zero, mask), 0, src);
 #endif
 }
@@ -83,8 +81,8 @@ inline void ReverseByteArrayLE(byte src[16])
 template <class T>
 inline T Reverse(const T& src)
 {
-	const uint8x16_p8 mask = {15,14,13,12, 11,10,9,8, 7,6,5,4, 3,2,1,0};
-	const uint8x16_p8 zero = {0};
+	const uint8x16_p mask = {15,14,13,12, 11,10,9,8, 7,6,5,4, 3,2,1,0};
+	const uint8x16_p zero = {0};
 	return vec_perm(src, zero, mask);
 }
 
@@ -216,12 +214,12 @@ template <class T>
 inline void VectorStoreBE(const T& src, uint8_t dest[16])
 {
 #if defined(CRYPTOPP_XLC_VERSION)
-	vec_xst_be((uint8x16_p8)src, 0, (uint8_t*)dest);
+	vec_xst_be((uint8x16_p)src, 0, (uint8_t*)dest);
 #else
 # if defined(CRYPTOPP_LITTLE_ENDIAN)
-	vec_vsx_st(Reverse((uint8x16_p8)src), 0, (uint8_t*)dest);
+	vec_vsx_st(Reverse((uint8x16_p)src), 0, (uint8_t*)dest);
 # else
-	vec_vsx_st((uint8x16_p8)src, 0, (uint8_t*)dest);
+	vec_vsx_st((uint8x16_p)src, 0, (uint8_t*)dest);
 # endif
 #endif
 }
@@ -239,12 +237,12 @@ template <class T>
 inline void VectorStoreBE(const T& src, int off, uint8_t dest[16])
 {
 #if defined(CRYPTOPP_XLC_VERSION)
-	vec_xst_be((uint8x16_p8)src, off, (uint8_t*)dest);
+	vec_xst_be((uint8x16_p)src, off, (uint8_t*)dest);
 #else
 # if defined(CRYPTOPP_LITTLE_ENDIAN)
-	vec_vsx_st(Reverse((uint8x16_p8)src), off, (uint8_t*)dest);
+	vec_vsx_st(Reverse((uint8x16_p)src), off, (uint8_t*)dest);
 # else
-	vec_vsx_st((uint8x16_p8)src, off, (uint8_t*)dest);
+	vec_vsx_st((uint8x16_p)src, off, (uint8_t*)dest);
 # endif
 #endif
 }
@@ -262,12 +260,12 @@ inline void VectorStore(const T& src, byte dest[16])
 {
 	// Do not call VectorStoreBE. It slows us down by about 0.5 cpb on LE.
 #if defined(CRYPTOPP_XLC_VERSION)
-	vec_xst_be((uint8x16_p8)src, 0, (uint8_t*)dest);
+	vec_xst_be((uint8x16_p)src, 0, (uint8_t*)dest);
 #else
 # if defined(CRYPTOPP_LITTLE_ENDIAN)
-	vec_vsx_st(Reverse((uint8x16_p8)src), 0, (uint8_t*)dest);
+	vec_vsx_st(Reverse((uint8x16_p)src), 0, (uint8_t*)dest);
 # else
-	vec_vsx_st((uint8x16_p8)src, 0, (uint8_t*)dest);
+	vec_vsx_st((uint8x16_p)src, 0, (uint8_t*)dest);
 # endif
 #endif
 }
@@ -286,12 +284,12 @@ inline void VectorStore(const T& src, int off, byte dest[16])
 {
 	// Do not call VectorStoreBE. It slows us down by about 0.5 cpb on LE.
 #if defined(CRYPTOPP_XLC_VERSION)
-	vec_xst_be((uint8x16_p8)src, off, (uint8_t*)dest);
+	vec_xst_be((uint8x16_p)src, off, (uint8_t*)dest);
 #else
 # if defined(CRYPTOPP_LITTLE_ENDIAN)
-	vec_vsx_st(Reverse((uint8x16_p8)src), off, (uint8_t*)dest);
+	vec_vsx_st(Reverse((uint8x16_p)src), off, (uint8_t*)dest);
 # else
-	vec_vsx_st((uint8x16_p8)src, off, (uint8_t*)dest);
+	vec_vsx_st((uint8x16_p)src, off, (uint8_t*)dest);
 # endif
 #endif
 }
@@ -303,13 +301,13 @@ inline void VectorStore(const T& src, int off, byte dest[16])
 /// \param vec2 the second vector
 /// \param mask vector mask
 /// \details VectorPermute returns a new vector from vec1 and vec2
-///   based on mask. mask is an uint8x16_p8 type vector. The return
+///   based on mask. mask is an uint8x16_p type vector. The return
 ///   vector is the same type as vec1.
 /// \since Crypto++ 6.0
 template <class T1, class T2>
 inline T1 VectorPermute(const T1& vec1, const T1& vec2, const T2& mask)
 {
-	return (T1)vec_perm(vec1, vec2, (uint8x16_p8)mask);
+	return (T1)vec_perm(vec1, vec2, (uint8x16_p)mask);
 }
 
 /// \brief XOR two vectors
@@ -349,16 +347,16 @@ inline T1 VectorAdd(const T1& vec1, const T2& vec2)
 /// \param vec2 the second vector
 /// \details VectorShiftLeft() concatenates vec1 and vec2 and returns a
 ///   new vector after shifting the concatenation by the specified number
-///   of bytes. Both vec1 and vec2 are cast to uint8x16_p8. The return
+///   of bytes. Both vec1 and vec2 are cast to uint8x16_p. The return
 ///   vector is the same type as vec1.
 /// \details On big endian machines VectorShiftLeft() is <tt>vec_sld(a, b,
 ///   c)</tt>. On little endian machines VectorShiftLeft() is translated to
 ///   <tt>vec_sld(b, a, 16-c)</tt>. You should always call the function as
 ///   if on a big endian machine as shown below.
 /// <pre>
-///    uint8x16_p8 r0 = {0};
-///    uint8x16_p8 r1 = VectorLoad(ptr);
-///    uint8x16_p8 r5 = VectorShiftLeft<12>(r0, r1);
+///    uint8x16_p r0 = {0};
+///    uint8x16_p r1 = VectorLoad(ptr);
+///    uint8x16_p r5 = VectorShiftLeft<12>(r0, r1);
 /// </pre>
 /// \sa <A HREF="https://stackoverflow.com/q/46341923/608639">Is vec_sld
 ///   endian sensitive?</A> on Stack Overflow
@@ -367,9 +365,9 @@ template <unsigned int C, class T1, class T2>
 inline T1 VectorShiftLeft(const T1& vec1, const T2& vec2)
 {
 #if defined(CRYPTOPP_LITTLE_ENDIAN)
-	return (T1)vec_sld((uint8x16_p8)vec2, (uint8x16_p8)vec1, 16-C);
+	return (T1)vec_sld((uint8x16_p)vec2, (uint8x16_p)vec1, 16-C);
 #else
-	return (T1)vec_sld((uint8x16_p8)vec1, (uint8x16_p8)vec2, C);
+	return (T1)vec_sld((uint8x16_p)vec1, (uint8x16_p)vec2, C);
 #endif
 }
 
@@ -469,9 +467,9 @@ template <int func, int subfunc, class T>
 inline T VectorSHA256(const T& vec)
 {
 #if defined(CRYPTOPP_XLC_VERSION)
-	return (T)__vshasigmaw((uint32x4_p8)vec, func, subfunc);
+	return (T)__vshasigmaw((uint32x4_p)vec, func, subfunc);
 #elif defined(CRYPTOPP_GCC_VERSION)
-	return (T)__builtin_crypto_vshasigmaw((uint32x4_p8)vec, func, subfunc);
+	return (T)__builtin_crypto_vshasigmaw((uint32x4_p)vec, func, subfunc);
 #else
 	CRYPTOPP_ASSERT(0);
 #endif
@@ -489,9 +487,9 @@ template <int func, int subfunc, class T>
 inline T VectorSHA512(const T& vec)
 {
 #if defined(CRYPTOPP_XLC_VERSION)
-	return (T)__vshasigmad((uint64x2_p8)vec, func, subfunc);
+	return (T)__vshasigmad((uint64x2_p)vec, func, subfunc);
 #elif defined(CRYPTOPP_GCC_VERSION)
-	return (T)__builtin_crypto_vshasigmad((uint64x2_p8)vec, func, subfunc);
+	return (T)__builtin_crypto_vshasigmad((uint64x2_p)vec, func, subfunc);
 #else
 	CRYPTOPP_ASSERT(0);
 #endif
author	Jeffrey Walton <noloader@gmail.com>	2017-12-12 08:09:31 -0500
committer	Jeffrey Walton <noloader@gmail.com>	2017-12-12 08:09:31 -0500
commit	15d637771f2f41747410dfeb177ff642d91c130d (patch)
tree	0018687735ee64e91f270039c001889672e854dc
parent	b7e636ac51f1ecfef468f358babc0cc54c281bdb (diff)
download	cryptopp-git-15d637771f2f41747410dfeb177ff642d91c130d.tar.gz