summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorweidai <weidai11@users.noreply.github.com>2007-04-16 00:21:07 +0000
committerweidai <weidai11@users.noreply.github.com>2007-04-16 00:21:07 +0000
commit5cad605559f973f2ce7d5ede1e6b67151f8e1d51 (patch)
tree6cc6caeba741b13cbc0cf5498d1c957f97451bf3
parent199c7960213d94682913e72de0e3f6d04f575a8b (diff)
downloadcryptopp-git-5cad605559f973f2ce7d5ede1e6b67151f8e1d51.tar.gz
optimizations
-rw-r--r--gf2_32.cpp2
-rw-r--r--misc.h244
-rw-r--r--seal.cpp14
-rw-r--r--seal.h2
-rw-r--r--trunhash.h2
-rw-r--r--words.h3
6 files changed, 209 insertions, 58 deletions
diff --git a/gf2_32.cpp b/gf2_32.cpp
index fd2661b5..ae4874a4 100644
--- a/gf2_32.cpp
+++ b/gf2_32.cpp
@@ -22,7 +22,7 @@ GF2_32::Element GF2_32::Multiply(Element a, Element b) const
table[3] = m_modulus ^ (a<<1);
}
-#ifdef FAST_ROTATE
+#if CRYPTOPP_FAST_ROTATE(32)
b = rotrFixed(b, 30U);
word32 result = table[b&2];
diff --git a/misc.h b/misc.h
index 2c71deb4..c898040f 100644
--- a/misc.h
+++ b/misc.h
@@ -4,14 +4,39 @@
#include "cryptlib.h"
#include "smartptr.h"
-#ifdef INTEL_INTRINSICS
-#include <stdlib.h>
+#ifdef _MSC_VER
+ #include <stdlib.h>
+ #if _MSC_VER >= 1400
+ // VC2005 workaround: disable declarations that conflict with winnt.h
+ #define _interlockedbittestandset CRYPTOPP_DISABLED_INTRINSIC_1
+ #define _interlockedbittestandreset CRYPTOPP_DISABLED_INTRINSIC_2
+ #include <intrin.h>
+ #undef _interlockedbittestandset
+ #undef _interlockedbittestandreset
+ #define CRYPTOPP_FAST_ROTATE(x) 1
+ #elif _MSC_VER >= 1300
+ #define CRYPTOPP_FAST_ROTATE(x) ((x) == 32 | (x) == 64)
+ #else
+ #define CRYPTOPP_FAST_ROTATE(x) ((x) == 32)
+ #endif
+#elif (defined(__MWERKS__) && TARGET_CPU_PPC) || \
+ (defined(__GNUC__) && (defined(_ARCH_PWR2) || defined(_ARCH_PWR) || defined(_ARCH_PPC) || defined(_ARCH_PPC64) || defined(_ARCH_COM)))
+ #define CRYPTOPP_FAST_ROTATE(x) ((x) == 32)
+#elif defined(__GNUC__) && (CRYPTOPP_BOOL_X64 || CRYPTOPP_BOOL_X86) // depend on GCC's peephole optimization to generate rotate instructions
+ #define CRYPTOPP_FAST_ROTATE(x) 1
+#elif
+ #define CRYPTOPP_FAST_ROTATE(x) 0
#endif
#ifdef __BORLANDC__
#include <mem.h>
#endif
+#if defined(__GNUC__) && !defined(__sun__) && !defined(__MINGW32__)
+#define CRYPTOPP_BYTESWAP_AVAILABLE
+#include <byteswap.h>
+#endif
+
NAMESPACE_BEGIN(CryptoPP)
// ************** compile-time assertion ***************
@@ -289,8 +314,13 @@ inline T1 RoundUpToMultipleOf(const T1 &n, const T2 &m)
}
template <class T>
-inline unsigned int GetAlignment(T *dummy=NULL) // VC60 workaround
+inline unsigned int GetAlignmentOf(T *dummy=NULL) // VC60 workaround
{
+#if CRYPTOPP_BOOL_X64 || CRYPTOPP_BOOL_X86
+ if (sizeof(T) < 16)
+ return 1; // alignment not needed on x86 and x64
+#endif
+
#if (_MSC_VER >= 1300)
return __alignof(T);
#elif defined(__GNUC__)
@@ -304,13 +334,13 @@ inline unsigned int GetAlignment(T *dummy=NULL) // VC60 workaround
inline bool IsAlignedOn(const void *p, unsigned int alignment)
{
- return IsPowerOf2(alignment) ? ModPowerOf2((size_t)p, alignment) == 0 : (size_t)p % alignment == 0;
+ return alignment==1 || (IsPowerOf2(alignment) ? ModPowerOf2((size_t)p, alignment) == 0 : (size_t)p % alignment == 0);
}
template <class T>
inline bool IsAligned(const void *p, T *dummy=NULL) // VC60 workaround
{
- return IsAlignedOn(p, GetAlignment<T>());
+ return IsAlignedOn(p, GetAlignmentOf<T>());
}
#ifdef IS_LITTLE_ENDIAN
@@ -418,31 +448,29 @@ template <class T> inline T rotrMod(T x, unsigned int y)
return T((x>>y) | (x<<(sizeof(T)*8-y)));
}
-#ifdef INTEL_INTRINSICS
-
-#pragma intrinsic(_lrotl, _lrotr)
+#ifdef _MSC_VER
template<> inline word32 rotlFixed<word32>(word32 x, unsigned int y)
{
- assert(y < 32);
+ assert(y < 8*sizeof(x));
return y ? _lrotl(x, y) : x;
}
template<> inline word32 rotrFixed<word32>(word32 x, unsigned int y)
{
- assert(y < 32);
+ assert(y < 8*sizeof(x));
return y ? _lrotr(x, y) : x;
}
template<> inline word32 rotlVariable<word32>(word32 x, unsigned int y)
{
- assert(y < 32);
+ assert(y < 8*sizeof(x));
return _lrotl(x, y);
}
template<> inline word32 rotrVariable<word32>(word32 x, unsigned int y)
{
- assert(y < 32);
+ assert(y < 8*sizeof(x));
return _lrotr(x, y);
}
@@ -456,9 +484,119 @@ template<> inline word32 rotrMod<word32>(word32 x, unsigned int y)
return _lrotr(x, y);
}
-#endif // #ifdef INTEL_INTRINSICS
+#if _MSC_VER >= 1300
+
+template<> inline word64 rotlFixed<word64>(word64 x, unsigned int y)
+{
+ assert(y < 8*sizeof(x));
+ return y ? _rotl64(x, y) : x;
+}
+
+template<> inline word64 rotrFixed<word64>(word64 x, unsigned int y)
+{
+ assert(y < 8*sizeof(x));
+ return y ? _rotr64(x, y) : x;
+}
+
+template<> inline word64 rotlVariable<word64>(word64 x, unsigned int y)
+{
+ assert(y < 8*sizeof(x));
+ return _rotl64(x, y);
+}
+
+template<> inline word64 rotrVariable<word64>(word64 x, unsigned int y)
+{
+ assert(y < 8*sizeof(x));
+ return _rotr64(x, y);
+}
+
+template<> inline word64 rotlMod<word64>(word64 x, unsigned int y)
+{
+ return _rotl64(x, y);
+}
+
+template<> inline word64 rotrMod<word64>(word64 x, unsigned int y)
+{
+ return _rotr64(x, y);
+}
+
+#endif // #if _MSC_VER >= 1310
+
+#if _MSC_VER >= 1400 && (!defined(__INTEL_COMPILER) || __INTEL_COMPILER >= 1000)
+
+template<> inline word16 rotlFixed<word16>(word16 x, unsigned int y)
+{
+ assert(y < 8*sizeof(x));
+ return y ? _rotl16(x, y) : x;
+}
+
+template<> inline word16 rotrFixed<word16>(word16 x, unsigned int y)
+{
+ assert(y < 8*sizeof(x));
+ return y ? _rotr16(x, y) : x;
+}
-#ifdef PPC_INTRINSICS
+template<> inline word16 rotlVariable<word16>(word16 x, unsigned int y)
+{
+ assert(y < 8*sizeof(x));
+ return _rotl16(x, y);
+}
+
+template<> inline word16 rotrVariable<word16>(word16 x, unsigned int y)
+{
+ assert(y < 8*sizeof(x));
+ return _rotr16(x, y);
+}
+
+template<> inline word16 rotlMod<word16>(word16 x, unsigned int y)
+{
+ return _rotl16(x, y);
+}
+
+template<> inline word16 rotrMod<word16>(word16 x, unsigned int y)
+{
+ return _rotr16(x, y);
+}
+
+template<> inline byte rotlFixed<byte>(byte x, unsigned int y)
+{
+ assert(y < 8*sizeof(x));
+ return y ? _rotl8(x, y) : x;
+}
+
+template<> inline byte rotrFixed<byte>(byte x, unsigned int y)
+{
+ assert(y < 8*sizeof(x));
+ return y ? _rotr8(x, y) : x;
+}
+
+template<> inline byte rotlVariable<byte>(byte x, unsigned int y)
+{
+ assert(y < 8*sizeof(x));
+ return _rotl8(x, y);
+}
+
+template<> inline byte rotrVariable<byte>(byte x, unsigned int y)
+{
+ assert(y < 8*sizeof(x));
+ return _rotr8(x, y);
+}
+
+template<> inline byte rotlMod<byte>(byte x, unsigned int y)
+{
+ return _rotl8(x, y);
+}
+
+template<> inline byte rotrMod<byte>(byte x, unsigned int y)
+{
+ return _rotr8(x, y);
+}
+
+#endif // #if _MSC_VER >= 1400
+
+#endif // #ifdef _MSC_VER
+
+#if (defined(__MWERKS__) && TARGET_CPU_PPC)
template<> inline word32 rotlFixed<word32>(word32 x, unsigned int y)
{
@@ -494,7 +632,7 @@ template<> inline word32 rotrMod<word32>(word32 x, unsigned int y)
return (__rlwnm(x,32-y,0,31));
}
-#endif // #ifdef PPC_INTRINSICS
+#endif // #if (defined(__MWERKS__) && TARGET_CPU_PPC)
// ************** endian reversal ***************
@@ -514,15 +652,27 @@ inline byte ByteReverse(byte value)
inline word16 ByteReverse(word16 value)
{
+#ifdef CRYPTOPP_BYTESWAP_AVAILABLE
+ return bswap_16(value);
+#elif defined(_MSC_VER) && _MSC_VER >= 1300
+ return _byteswap_ushort(value);
+#else
return rotlFixed(value, 8U);
+#endif
}
inline word32 ByteReverse(word32 value)
{
-#ifdef PPC_INTRINSICS
- // PPC: load reverse indexed instruction
+#if defined(__GNUC__) && defined(CRYPTOPP_X86_ASM_AVAILABLE)
+ __asm__ ("bswap %0" : "=r" (value) : "0" (value));
+ return value;
+#elif defined(CRYPTOPP_BYTESWAP_AVAILABLE)
+ return bswap_32(value);
+#elif defined(__MWERKS__) && TARGET_CPU_PPC
return (word32)__lwbrx(&value,0);
-#elif defined(FAST_ROTATE)
+#elif defined(_MSC_VER) && _MSC_VER >= 1300
+ return _byteswap_ulong(value);
+#elif CRYPTOPP_FAST_ROTATE(32)
// 5 instructions with rotate instruction, 9 without
return (rotrFixed(value, 8U) & 0xff00ff00) | (rotlFixed(value, 8U) & 0x00ff00ff);
#else
@@ -535,7 +685,14 @@ inline word32 ByteReverse(word32 value)
#ifdef WORD64_AVAILABLE
inline word64 ByteReverse(word64 value)
{
-#ifdef CRYPTOPP_SLOW_WORD64
+#if defined(__GNUC__) && defined(CRYPTOPP_X86_ASM_AVAILABLE) && defined(__x86_64__)
+ __asm__ ("bswap %0" : "=r" (value) : "0" (value));
+ return value;
+#elif defined(CRYPTOPP_BYTESWAP_AVAILABLE)
+ return bswap_64(value);
+#elif defined(_MSC_VER) && _MSC_VER >= 1300
+ return _byteswap_uint64(value);
+#elif defined(CRYPTOPP_SLOW_WORD64)
return (word64(ByteReverse(word32(value))) << 32) | ByteReverse(word32(value>>32));
#else
value = ((value & W64LIT(0xFF00FF00FF00FF00)) >> 8) | ((value & W64LIT(0x00FF00FF00FF00FF)) << 8);
@@ -637,6 +794,7 @@ inline void GetUserKey(ByteOrder order, T *out, size_t outlen, const byte *in, s
ConditionalByteReverse(order, out, out, RoundUpToMultipleOf(inlen, U));
}
+#ifndef CRYPTOPP_ALLOW_UNALIGNED_DATA_ACCESS
inline byte UnalignedGetWordNonTemplate(ByteOrder order, const byte *block, byte*)
{
return block[0];
@@ -681,18 +839,12 @@ inline word64 UnalignedGetWordNonTemplate(ByteOrder order, const byte *block, wo
}
#endif
-template <class T>
-inline T UnalignedGetWord(ByteOrder order, const byte *block, T*dummy=NULL)
-{
- return UnalignedGetWordNonTemplate(order, block, dummy);
-}
-
-inline void UnalignedPutWord(ByteOrder order, byte *block, byte value, const byte *xorBlock = NULL)
+inline void UnalignedPutWordNonTemplate(ByteOrder order, byte *block, byte value, const byte *xorBlock)
{
block[0] = xorBlock ? (value ^ xorBlock[0]) : value;
}
-inline void UnalignedPutWord(ByteOrder order, byte *block, word16 value, const byte *xorBlock = NULL)
+inline void UnalignedPutWordNonTemplate(ByteOrder order, byte *block, word16 value, const byte *xorBlock)
{
if (order == BIG_ENDIAN_ORDER)
{
@@ -712,7 +864,7 @@ inline void UnalignedPutWord(ByteOrder order, byte *block, word16 value, const b
}
}
-inline void UnalignedPutWord(ByteOrder order, byte *block, word32 value, const byte *xorBlock = NULL)
+inline void UnalignedPutWordNonTemplate(ByteOrder order, byte *block, word32 value, const byte *xorBlock)
{
if (order == BIG_ENDIAN_ORDER)
{
@@ -739,7 +891,7 @@ inline void UnalignedPutWord(ByteOrder order, byte *block, word32 value, const b
}
#ifdef WORD64_AVAILABLE
-inline void UnalignedPutWord(ByteOrder order, byte *block, word64 value, const byte *xorBlock = NULL)
+inline void UnalignedPutWordNonTemplate(ByteOrder order, byte *block, word64 value, const byte *xorBlock)
{
if (order == BIG_ENDIAN_ORDER)
{
@@ -777,17 +929,17 @@ inline void UnalignedPutWord(ByteOrder order, byte *block, word64 value, const b
}
}
#endif
+#endif // #ifndef CRYPTOPP_ALLOW_UNALIGNED_DATA_ACCESS
template <class T>
inline T GetWord(bool assumeAligned, ByteOrder order, const byte *block)
{
- if (assumeAligned)
- {
- assert(IsAligned<T>(block));
- return ConditionalByteReverse(order, *reinterpret_cast<const T *>(block));
- }
- else
- return UnalignedGetWord<T>(order, block);
+#ifndef CRYPTOPP_ALLOW_UNALIGNED_DATA_ACCESS
+ if (!assumeAligned)
+ return UnalignedGetWordNonTemplate(order, block);
+ assert(IsAligned<T>(block));
+#endif
+ return ConditionalByteReverse(order, *reinterpret_cast<const T *>(block));
}
template <class T>
@@ -799,17 +951,13 @@ inline void GetWord(bool assumeAligned, ByteOrder order, T &result, const byte *
template <class T>
inline void PutWord(bool assumeAligned, ByteOrder order, byte *block, T value, const byte *xorBlock = NULL)
{
- if (assumeAligned)
- {
- assert(IsAligned<T>(block));
- assert(IsAligned<T>(xorBlock));
- if (xorBlock)
- *reinterpret_cast<T *>(block) = ConditionalByteReverse(order, value) ^ *reinterpret_cast<const T *>(xorBlock);
- else
- *reinterpret_cast<T *>(block) = ConditionalByteReverse(order, value);
- }
- else
- UnalignedPutWord(order, block, value, xorBlock);
+#ifndef CRYPTOPP_ALLOW_UNALIGNED_DATA_ACCESS
+ if (!assumeAligned)
+ return UnalignedGetWordNonTemplate(order, block, value, xorBlock);
+ assert(IsAligned<T>(block));
+ assert(IsAligned<T>(xorBlock));
+#endif
+ *reinterpret_cast<T *>(block) = ConditionalByteReverse(order, value) ^ (xorBlock ? *reinterpret_cast<const T *>(xorBlock) : 0);
}
template <class T, class B, bool A=true>
@@ -927,4 +1075,4 @@ inline T SafeLeftShift(T value)
NAMESPACE_END
-#endif // MISC_H
+#endif
diff --git a/seal.cpp b/seal.cpp
index f240b50a..196965f8 100644
--- a/seal.cpp
+++ b/seal.cpp
@@ -71,7 +71,7 @@ void SEAL_Policy<B>::CipherSetKey(const NameValuePairs &params, const byte *key,
template <class B>
void SEAL_Policy<B>::CipherResynchronize(byte *keystreamBuffer, const byte *IV)
{
- m_outsideCounter = IV ? UnalignedGetWord<word32>(BIG_ENDIAN_ORDER, IV) : 0;
+ m_outsideCounter = IV ? GetWord<word32>(false, BIG_ENDIAN_ORDER, IV) : 0;
m_startCount = m_outsideCounter;
m_insideCounter = 0;
}
@@ -86,7 +86,6 @@ void SEAL_Policy<B>::SeekToIteration(lword iterationCount)
template <class B>
void SEAL_Policy<B>::OperateKeystream(KeystreamOperation operation, byte *output, const byte *input, size_t iterationCount)
{
- KeystreamOutput<B> keystreamOutput(operation, output, input);
word32 a, b, c, d, n1, n2, n3, n4;
unsigned int p, q;
@@ -175,10 +174,13 @@ void SEAL_Policy<B>::OperateKeystream(KeystreamOperation operation, byte *output
d = rotrFixed(d, 9U);
a += Ttab(q);
- keystreamOutput (b + m_S[4*i+0])
- (c ^ m_S[4*i+1])
- (d + m_S[4*i+2])
- (a ^ m_S[4*i+3]);
+#define SEAL_OUTPUT(x) \
+ CRYPTOPP_KEYSTREAM_OUTPUT_WORD(x, B::ToEnum(), 0, b + m_S[4*i+0]);\
+ CRYPTOPP_KEYSTREAM_OUTPUT_WORD(x, B::ToEnum(), 1, c ^ m_S[4*i+1]);\
+ CRYPTOPP_KEYSTREAM_OUTPUT_WORD(x, B::ToEnum(), 2, d + m_S[4*i+2]);\
+ CRYPTOPP_KEYSTREAM_OUTPUT_WORD(x, B::ToEnum(), 3, a ^ m_S[4*i+3]);
+
+ CRYPTOPP_KEYSTREAM_OUTPUT_SWITCH(SEAL_OUTPUT, 4*4);
if (i & 1)
{
diff --git a/seal.h b/seal.h
index fc53c2f0..551a3382 100644
--- a/seal.h
+++ b/seal.h
@@ -18,7 +18,7 @@ class CRYPTOPP_NO_VTABLE SEAL_Policy : public AdditiveCipherConcretePolicy<word3
protected:
void CipherSetKey(const NameValuePairs &params, const byte *key, size_t length);
void OperateKeystream(KeystreamOperation operation, byte *output, const byte *input, size_t iterationCount);
- void CipherGetNextIV(byte *IV) {UnalignedPutWord(BIG_ENDIAN_ORDER, IV, m_outsideCounter+1);}
+ void CipherGetNextIV(byte *IV) {PutWord(false, BIG_ENDIAN_ORDER, IV, m_outsideCounter+1);}
void CipherResynchronize(byte *keystreamBuffer, const byte *IV);
bool IsRandomAccess() const {return true;}
void SeekToIteration(lword iterationCount);
diff --git a/trunhash.h b/trunhash.h
index df733a6d..c1c4e9b6 100644
--- a/trunhash.h
+++ b/trunhash.h
@@ -26,6 +26,8 @@ public:
TruncatedHashTemplate(size_t digestSize)
: m_digestSize(digestSize) {}
+ void Restart()
+ {m_hm.Restart();}
void Update(const byte *input, size_t length)
{m_hm.Update(input, length);}
unsigned int DigestSize() const {return m_digestSize;}
diff --git a/words.h b/words.h
index ad76f1d8..fc835591 100644
--- a/words.h
+++ b/words.h
@@ -20,8 +20,7 @@ inline void SetWords(word *r, word a, size_t n)
inline void CopyWords(word *r, const word *a, size_t n)
{
- for (size_t i=0; i<n; i++)
- r[i] = a[i];
+ memcpy(r, a, n*WORD_SIZE);
}
inline void XorWords(word *r, const word *a, const word *b, size_t n)