summaryrefslogtreecommitdiff
path: root/GNUmakefile
diff options
context:
space:
mode:
authorJeffrey Walton <noloader@gmail.com>2018-10-24 11:00:35 -0400
committerJeffrey Walton <noloader@gmail.com>2018-10-24 11:00:35 -0400
commit18dcbdf514298d7097934d9a1d2e9032f14b54b7 (patch)
tree9e420d118ad0cde18ddec44457e6c4cd720232a0 /GNUmakefile
parentd230999b408740d604b136bdfa28f1a60b0211fe (diff)
downloadcryptopp-git-18dcbdf514298d7097934d9a1d2e9032f14b54b7.tar.gz
Move input xor to ChaCha_OperateKeystream_SSE2
This picks up about 0.2 cpb in ChaCha::OperateKeystream. It may not sound like much but it puts SSE2 intrinsics version on par with the ASM version of Salsa20. Salsa20 leads ChaCha by 0.1 to 0.15 cpb, which equates to about 50 MB/s.
Diffstat (limited to 'GNUmakefile')
-rwxr-xr-xGNUmakefile11
1 files changed, 8 insertions, 3 deletions
diff --git a/GNUmakefile b/GNUmakefile
index 592b59dc..60cd8a23 100755
--- a/GNUmakefile
+++ b/GNUmakefile
@@ -238,9 +238,8 @@ endif # CXXFLAGS
# SSE2 is a core feature of x86_64
ifeq ($(findstring -DCRYPTOPP_DISABLE_ASM,$(CXXFLAGS)),)
- ifeq ($(IS_X86),1)
- SSE_FLAG = -msse2
- endif
+ SSE_FLAG = -msse2
+ CHACHA_FLAG = -msse2
endif
ifeq ($(findstring -DCRYPTOPP_DISABLE_SSSE3,$(CXXFLAGS)),)
HAVE_SSSE3 = $(shell $(CXX) $(CXXFLAGS) -DADHOC_MAIN -mssse3 -dM -E adhoc.cpp 2>&1 | $(GREP) -i -c __SSSE3__)
@@ -379,6 +378,7 @@ ifeq ($(IS_NEON),1)
CRC_FLAG = -march=armv7-a -mfloat-abi=$(FP_ABI) -mfpu=neon
GCM_FLAG = -march=armv7-a -mfloat-abi=$(FP_ABI) -mfpu=neon
BLAKE2_FLAG = -march=armv7-a -mfloat-abi=$(FP_ABI) -mfpu=neon
+ CHACHA_FLAG = -march=armv7-a -mfloat-abi=$(FP_ABI) -mfpu=neon
CHAM_FLAG = -march=armv7-a -mfloat-abi=$(FP_ABI) -mfpu=neon
LEA_FLAG = -march=armv7-a -mfloat-abi=$(FP_ABI) -mfpu=neon
SHA_FLAG = -march=armv7-a -mfloat-abi=$(FP_ABI) -mfpu=neon
@@ -396,6 +396,7 @@ ifeq ($(IS_ARMV8),1)
ifeq ($(HAVE_NEON),1)
ARIA_FLAG = -march=armv8-a
BLAKE2_FLAG = -march=armv8-a
+ CHACHA_FLAG = -march=armv8-a
CHAM_FLAG = -march=armv8-a
LEA_FLAG = -march=armv8-a
NEON_FLAG = -march=armv8-a
@@ -1176,6 +1177,10 @@ aria-simd.o : aria-simd.cpp
blake2-simd.o : blake2-simd.cpp
$(CXX) $(strip $(CXXFLAGS) $(BLAKE2_FLAG) -c) $<
+# SSE2 or NEON available
+chacha-simd.o : chacha-simd.cpp
+ $(CXX) $(strip $(CXXFLAGS) $(CHACHA_FLAG) -c) $<
+
# SSSE3 available
cham-simd.o : cham-simd.cpp
$(CXX) $(strip $(CXXFLAGS) $(CHAM_FLAG) -c) $<