diff options
author | noloader <noloader@57ff6487-cd31-0410-9ec3-f628ee90f5f0> | 2015-07-14 02:57:13 +0000 |
---|---|---|
committer | noloader <noloader@57ff6487-cd31-0410-9ec3-f628ee90f5f0> | 2015-07-14 02:57:13 +0000 |
commit | 51dd45a78501d37d02fea9ffa504d2e609ad8672 (patch) | |
tree | 79997d63ec3bcfc521fbc6b058d6405c90884787 | |
parent | 47e5a4d4ea89c7f236bfc55df01c052efc3059e1 (diff) | |
download | cryptopp-51dd45a78501d37d02fea9ffa504d2e609ad8672.tar.gz |
Cleared crash with GCC 4.8 and above and -O3. In a nutshell, it was due to vectorization and alignment violations agains the vmovdqa instruction
git-svn-id: svn://svn.code.sf.net/p/cryptopp/code/trunk/c5@583 57ff6487-cd31-0410-9ec3-f628ee90f5f0
-rw-r--r-- | GNUmakefile | 3 | ||||
-rw-r--r-- | config.h | 8 | ||||
-rw-r--r-- | misc.cpp | 19 | ||||
-rw-r--r-- | misc.h | 35 | ||||
-rw-r--r-- | sha3.h | 2 | ||||
-rw-r--r-- | stdcpp.h | 4 |
6 files changed, 62 insertions, 9 deletions
diff --git a/GNUmakefile b/GNUmakefile index ca36725..30284ec 100644 --- a/GNUmakefile +++ b/GNUmakefile @@ -1,6 +1,6 @@ CXXFLAGS ?= -DNDEBUG SYMBOLS ?= -g2 -OPTIMIZE ?= -O2 +OPTIMIZE ?= -O3 # -fPIC is supported, and enabled by default for x86_64. # the following options reduce code size, but breaks link or makes link very slow on some systems # CXXFLAGS += -ffunction-sections -fdata-sections @@ -64,7 +64,6 @@ endif # We can do integer math using the Posix shell in a GNUmakefile # Below, we are building a boolean circuit that says "Darwin && (GCC 4.2 || Clang)" MULTIARCH_SUPPORT = $(shell echo $$(($(IS_DARWIN) * ($(GCC42_OR_LATER) + $(CLANG_COMPILER))))) - ifneq ($(MULTIARCH_SUPPORT),0) CXXFLAGS += -arch x86_64 -arch i386 else @@ -23,6 +23,11 @@ // This macro will be ignored if NO_OS_DEPENDENCE is defined. #define USE_MS_CRYPTOAPI +// Define this to ensure C/C++ standard compliance and adherence +// to aliasing rules and other alignment fodder. If you experience +// a break at -O3 with GCC, you should try this first. +// #define CRYPTOPP_NO_UNALIGNED_DATA_ACCESS + // ***************** Less Important Settings *************** // define this to retain (as much as possible) old deprecated function and class names @@ -342,7 +347,8 @@ NAMESPACE_END #define CRYPTOPP_BOOL_X86 0 #endif -#if CRYPTOPP_BOOL_X64 || CRYPTOPP_BOOL_X86 || defined(__powerpc__) +// CRYPTOPP_NO_UNALIGNED_DATA_ACCESS can be set on the command line or in config.h above. +#if !defined(CRYPTOPP_NO_UNALIGNED_DATA_ACCESS) && (CRYPTOPP_BOOL_X64 || CRYPTOPP_BOOL_X86 || defined(__powerpc__)) #define CRYPTOPP_ALLOW_UNALIGNED_DATA_ACCESS #endif @@ -14,14 +14,20 @@ NAMESPACE_BEGIN(CryptoPP) +// Vectorization at -O3 requires IsStrictAligned<word64> for GCC 4.8 and above with xorbuf and VerifyBufsEqual. +// Problems have not been experienced for the word32 variant, but it may aoccur in the future. + void xorbuf(byte *buf, const byte *mask, size_t count) { size_t i; if (IsAligned<word32>(buf) && IsAligned<word32>(mask)) { - if (!CRYPTOPP_BOOL_SLOW_WORD64 && IsAligned<word64>(buf) && IsAligned<word64>(mask)) + if (!CRYPTOPP_BOOL_SLOW_WORD64 && IsStrictAligned<word64>(buf) && IsStrictAligned<word64>(mask)) { + assert(IsAlignedOn(input, GetStrictAlignedOn<word64>(buf))); + assert(IsAlignedOn(mask, GetStrictAlignedOn<word64>(mask))); + for (i=0; i<count/8; i++) ((word64*)buf)[i] ^= ((word64*)mask)[i]; count -= 8*i; @@ -50,8 +56,12 @@ void xorbuf(byte *output, const byte *input, const byte *mask, size_t count) if (IsAligned<word32>(output) && IsAligned<word32>(input) && IsAligned<word32>(mask)) { - if (!CRYPTOPP_BOOL_SLOW_WORD64 && IsAligned<word64>(output) && IsAligned<word64>(input) && IsAligned<word64>(mask)) + if (!CRYPTOPP_BOOL_SLOW_WORD64 && IsStrictAligned<word64>(output) && IsStrictAligned<word64>(input) && IsStrictAligned<word64>(mask)) { + assert(IsAlignedOn(output, GetStrictAlignedOn<word64>(output))); + assert(IsAlignedOn(input, GetStrictAlignedOn<word64>(input))); + assert(IsAlignedOn(mask, GetStrictAlignedOn<word64>(mask))); + for (i=0; i<count/8; i++) ((word64*)output)[i] = ((word64*)input)[i] ^ ((word64*)mask)[i]; count -= 8*i; @@ -84,8 +94,11 @@ bool VerifyBufsEqual(const byte *buf, const byte *mask, size_t count) if (IsAligned<word32>(buf) && IsAligned<word32>(mask)) { word32 acc32 = 0; - if (!CRYPTOPP_BOOL_SLOW_WORD64 && IsAligned<word64>(buf) && IsAligned<word64>(mask)) + if (!CRYPTOPP_BOOL_SLOW_WORD64 && IsStrictAligned<word64>(buf) && IsStrictAligned<word64>(mask)) { + assert(IsAlignedOn(buf, GetStrictAlignedOn<word64>(buf))); + assert(IsAlignedOn(mask, GetStrictAlignedOn<word64>(mask))); + word64 acc64 = 0; for (i=0; i<count/8; i++) acc64 |= ((word64*)buf)[i] ^ ((word64*)mask)[i]; @@ -382,21 +382,26 @@ inline T1 RoundDownToMultipleOf(const T1 &n, const T2 &m) template <class T1, class T2> inline T1 RoundUpToMultipleOf(const T1 &n, const T2 &m) { + // TODO: undefined behavior here... if (n+m-1 < n) throw InvalidArgument("RoundUpToMultipleOf: integer overflow"); return RoundDownToMultipleOf(n+m-1, m); } +// Influenced by CRYPTOPP_ALLOW_UNALIGNED_DATA_ACCESS; may cause +// problems at -O3 and GCC vectorization. template <class T> inline unsigned int GetAlignmentOf(T *dummy=NULL) // VC60 workaround { #ifdef CRYPTOPP_ALLOW_UNALIGNED_DATA_ACCESS if (sizeof(T) < 16) - return 1; + return 1; #endif - + #if (_MSC_VER >= 1300) return __alignof(T); +#elif defined(__clang__) + return __alignof(T); #elif defined(__GNUC__) return __alignof__(T); #elif CRYPTOPP_BOOL_SLOW_WORD64 @@ -406,17 +411,43 @@ inline unsigned int GetAlignmentOf(T *dummy=NULL) // VC60 workaround #endif } +// Not influenced by CRYPTOPP_ALLOW_UNALIGNED_DATA_ACCESS; will not +// cause problems with -O3 and GCC vectorization. +template <class T> +inline unsigned int GetStrictAlignmentOf(T *dummy=NULL) // VC60 workaround +{ +#if (_MSC_VER >= 1300) + return __alignof(T); +#elif defined(__clang__) + return __alignof(T); +#elif defined(__GNUC__) + return __alignof__(T); +#else + return sizeof(T); +#endif +} + inline bool IsAlignedOn(const void *p, unsigned int alignment) { return alignment==1 || (IsPowerOf2(alignment) ? ModPowerOf2((size_t)p, alignment) == 0 : (size_t)p % alignment == 0); } +// Influenced by CRYPTOPP_ALLOW_UNALIGNED_DATA_ACCESS; may cause +// problems at -O3 and GCC vectorization. template <class T> inline bool IsAligned(const void *p, T *dummy=NULL) // VC60 workaround { return IsAlignedOn(p, GetAlignmentOf<T>()); } +// Not influenced by CRYPTOPP_ALLOW_UNALIGNED_DATA_ACCESS; will not +// cause problems with -O3 and GCC vectorization. +template <class T> +inline bool IsStrictAligned(const void *p, T *dummy=NULL) // VC60 workaround +{ + return IsAlignedOn(p, GetStrictAlignmentOf<T>()); +} + #ifdef IS_LITTLE_ENDIAN typedef LittleEndian NativeByteOrder; #else @@ -15,7 +15,7 @@ public: SHA3(unsigned int digestSize) : m_digestSize(digestSize) {Restart();}
unsigned int DigestSize() const {return m_digestSize;}
std::string AlgorithmName() const {return "SHA-3-" + IntToString(m_digestSize*8);}
- unsigned int OptimalDataAlignment() const {return GetAlignmentOf<word64>();}
+ unsigned int OptimalDataAlignment() const {return GetStrictAlignmentOf<word64>();}
void Update(const byte *input, size_t length);
void Restart();
@@ -19,6 +19,10 @@ #include <map> #include <vector> +#if !defined(_NDEBUG) && !defined(NDEBUG) +# include <cassert> +#endif + #ifdef CRYPTOPP_INCLUDE_VECTOR_CC // workaround needed on Sun Studio 12u1 Sun C++ 5.10 SunOS_i386 128229-02 2009/09/21 #include <vector.cc> |