summaryrefslogtreecommitdiff
path: root/keccak_simd.cpp
diff options
context:
space:
mode:
authorJeffrey Walton <noloader@gmail.com>2019-02-12 23:12:35 -0500
committerJeffrey Walton <noloader@gmail.com>2019-02-12 23:12:35 -0500
commit9defd0afcd776b7d3232ff28624c478ec013f8cb (patch)
tree9438b2300eda47f2e8a31546729021fcc88fb120 /keccak_simd.cpp
parentc6e8a61b8b7dac8ac33bf12a4b9a0b510232da83 (diff)
downloadcryptopp-git-9defd0afcd776b7d3232ff28624c478ec013f8cb.tar.gz
Add SHAKE-128 and SHAKE-256 (GH #805, PR #806)
This should have been added during the original commit
Diffstat (limited to 'keccak_simd.cpp')
-rw-r--r--keccak_simd.cpp2678
1 files changed, 2678 insertions, 0 deletions
diff --git a/keccak_simd.cpp b/keccak_simd.cpp
new file mode 100644
index 00000000..863bca58
--- /dev/null
+++ b/keccak_simd.cpp
@@ -0,0 +1,2678 @@
+// keccak_simd.cpp - written and placed in the public domain by Jeffrey Walton.
+//
+// This source file uses intrinsics to gain access to SSE and
+// NEON instructions. A separate source file is needed because
+// additional CXXFLAGS are required to enable the appropriate
+// instructions sets in some build configurations.
+
+// The XKCP package is provided by Guido Bertoni, Joan Daemen, Seth Hoffert,
+// Michael Peeters, Gilles Van Assche, and Ronny Van Keer. The code was
+// placed public domain by the authors.
+
+// KeccakF1600x2_SSE is ParallelHash128. The SSE2 ParallelHash128
+// implementation was extracted from XKCP using the following command.
+//
+// gcc -I lib/common -I lib/low/KeccakP-1600/Optimized \
+// -I lib/low/KeccakP-1600-times2/SIMD128/SSE2ufull \
+// lib/low/KeccakP-1600-times2/SIMD128/KeccakP-1600-times2-SIMD128.c -E
+
+#include "pch.h"
+#include "config.h"
+#include "keccak.h"
+#include "misc.h"
+
+#if (CRYPTOPP_SSSE3_AVAILABLE)
+# include <emmintrin.h>
+# include <immintrin.h>
+#endif
+
+// Squash MS LNK4221 and libtool warnings
+extern const char KECCAK_SIMD_FNAME[] = __FILE__;
+
+NAMESPACE_BEGIN(CryptoPP)
+
+#if (CRYPTOPP_SSSE3_AVAILABLE)
+
+// The Keccak ParallelHash128 core function
+extern void KeccakF1600x2_SSE(word64 *state);
+
+// The F1600 round constants
+extern const word64 KeccakF1600Constants[24];
+
+const word64 rho8[2] = {0x0605040302010007, 0x0E0D0C0B0A09080F};
+const word64 rho56[2] = {0x0007060504030201, 0x080F0E0D0C0B0A09};
+
+#define V128 __m128i
+#define CV128 const __m128i
+
+#define CONST128(a) _mm_load_si128((CV128 *)&(a))
+#define XOREQ128(a, b) a = _mm_xor_si128((a), (b))
+#define UNPACKL(a, b) _mm_unpacklo_epi64((a), (b))
+#define UNPACKH(a, b) _mm_unpackhi_epi64((a), (b))
+
+#if defined(__XOP__)
+# define ROL64in128(a, o) _mm_roti_epi64((a), (o))
+# define ROL64in128_8(a) ROL64in128((a), 8)
+# define ROL64in128_56(a) ROL64in128((a), 56)
+#else
+# define ROL64in128(a, o) _mm_or_si128(_mm_slli_epi64((a), (o)), _mm_srli_epi64(a, 64-(o)))
+# define ROL64in128_8(a) _mm_shuffle_epi8((a), CONST128(rho8))
+# define ROL64in128_56(a) _mm_shuffle_epi8((a), CONST128(rho56))
+#endif
+
+// Damn Visual Studio is missing too many intrinsics...
+inline __m128i SPLAT64(const word64 a)
+{
+#if defined(_MSC_VER)
+ double x; std::memcpy(&x, &a, 8);
+ return _mm_castpd_si128(_mm_loaddup_pd(&x));
+#else
+ return _mm_set1_epi64x(a);
+#endif
+}
+
+// The Keccak ParallelHash128 core function
+void KeccakF1600x2_SSE(word64 *state)
+{
+ V128 *statesAsLanes = (V128 *)state;
+
+ V128 Aba, Abe, Abi, Abo, Abu;
+ V128 Aga, Age, Agi, Ago, Agu;
+ V128 Aka, Ake, Aki, Ako, Aku;
+ V128 Ama, Ame, Ami, Amo, Amu;
+ V128 Asa, Ase, Asi, Aso, Asu;
+ V128 Bba, Bbe, Bbi, Bbo, Bbu;
+ V128 Bga, Bge, Bgi, Bgo, Bgu;
+ V128 Bka, Bke, Bki, Bko, Bku;
+ V128 Bma, Bme, Bmi, Bmo, Bmu;
+ V128 Bsa, Bse, Bsi, Bso, Bsu;
+ V128 Ca, Ce, Ci, Co, Cu;
+ V128 Da, De, Di, Do, Du;
+ V128 Eba, Ebe, Ebi, Ebo, Ebu;
+ V128 Ega, Ege, Egi, Ego, Egu;
+ V128 Eka, Eke, Eki, Eko, Eku;
+ V128 Ema, Eme, Emi, Emo, Emu;
+ V128 Esa, Ese, Esi, Eso, Esu;
+
+ Aba = _mm_loadu_si128((CV128 *)&(statesAsLanes[ 0]));
+ Abe = _mm_loadu_si128((CV128 *)&(statesAsLanes[ 1]));
+ Abi = _mm_loadu_si128((CV128 *)&(statesAsLanes[ 2]));
+ Abo = _mm_loadu_si128((CV128 *)&(statesAsLanes[ 3]));
+ Abu = _mm_loadu_si128((CV128 *)&(statesAsLanes[ 4]));
+ Aga = _mm_loadu_si128((CV128 *)&(statesAsLanes[ 5]));
+ Age = _mm_loadu_si128((CV128 *)&(statesAsLanes[ 6]));
+ Agi = _mm_loadu_si128((CV128 *)&(statesAsLanes[ 7]));
+ Ago = _mm_loadu_si128((CV128 *)&(statesAsLanes[ 8]));
+ Agu = _mm_loadu_si128((CV128 *)&(statesAsLanes[ 9]));
+ Aka = _mm_loadu_si128((CV128 *)&(statesAsLanes[10]));
+ Ake = _mm_loadu_si128((CV128 *)&(statesAsLanes[11]));
+ Aki = _mm_loadu_si128((CV128 *)&(statesAsLanes[12]));
+ Ako = _mm_loadu_si128((CV128 *)&(statesAsLanes[13]));
+ Aku = _mm_loadu_si128((CV128 *)&(statesAsLanes[14]));
+ Ama = _mm_loadu_si128((CV128 *)&(statesAsLanes[15]));
+ Ame = _mm_loadu_si128((CV128 *)&(statesAsLanes[16]));
+ Ami = _mm_loadu_si128((CV128 *)&(statesAsLanes[17]));
+ Amo = _mm_loadu_si128((CV128 *)&(statesAsLanes[18]));
+ Amu = _mm_loadu_si128((CV128 *)&(statesAsLanes[19]));
+ Asa = _mm_loadu_si128((CV128 *)&(statesAsLanes[20]));
+ Ase = _mm_loadu_si128((CV128 *)&(statesAsLanes[21]));
+ Asi = _mm_loadu_si128((CV128 *)&(statesAsLanes[22]));
+ Aso = _mm_loadu_si128((CV128 *)&(statesAsLanes[23]));
+ Asu = _mm_loadu_si128((CV128 *)&(statesAsLanes[24]));
+
+ Ca = _mm_xor_si128(Aba, _mm_xor_si128(Aga, _mm_xor_si128(Aka, _mm_xor_si128(Ama, Asa))));
+ Ce = _mm_xor_si128(Abe, _mm_xor_si128(Age, _mm_xor_si128(Ake, _mm_xor_si128(Ame, Ase))));
+ Ci = _mm_xor_si128(Abi, _mm_xor_si128(Agi, _mm_xor_si128(Aki, _mm_xor_si128(Ami, Asi))));
+ Co = _mm_xor_si128(Abo, _mm_xor_si128(Ago, _mm_xor_si128(Ako, _mm_xor_si128(Amo, Aso))));
+ Cu = _mm_xor_si128(Abu, _mm_xor_si128(Agu, _mm_xor_si128(Aku, _mm_xor_si128(Amu, Asu))));
+ Da = _mm_xor_si128(Cu, ROL64in128(Ce, 1));
+ De = _mm_xor_si128(Ca, ROL64in128(Ci, 1));
+ Di = _mm_xor_si128(Ce, ROL64in128(Co, 1));
+ Do = _mm_xor_si128(Ci, ROL64in128(Cu, 1));
+ Du = _mm_xor_si128(Co, ROL64in128(Ca, 1));
+
+ Aba = _mm_xor_si128(Aba, Da);
+ Bba = Aba;
+ Age = _mm_xor_si128(Age, De);
+ Bbe = ROL64in128(Age, 44);
+ Aki = _mm_xor_si128(Aki, Di);
+ Bbi = ROL64in128(Aki, 43);
+ Eba = _mm_xor_si128(Bba, _mm_andnot_si128(Bbe, Bbi));
+ Eba = _mm_xor_si128(Eba, SPLAT64(KeccakF1600Constants[0]));
+ Ca = Eba;
+ Amo = _mm_xor_si128(Amo, Do);
+ Bbo = ROL64in128(Amo, 21);
+ Ebe = _mm_xor_si128(Bbe, _mm_andnot_si128(Bbi, Bbo));
+ Ce = Ebe;
+ Asu = _mm_xor_si128(Asu, Du);
+ Bbu = ROL64in128(Asu, 14);
+ Ebi = _mm_xor_si128(Bbi, _mm_andnot_si128(Bbo, Bbu));
+ Ci = Ebi;
+ Ebo = _mm_xor_si128(Bbo, _mm_andnot_si128(Bbu, Bba));
+ Co = Ebo;
+ Ebu = _mm_xor_si128(Bbu, _mm_andnot_si128(Bba, Bbe));
+ Cu = Ebu;
+ Abo = _mm_xor_si128(Abo, Do);
+ Bga = ROL64in128(Abo, 28);
+ Agu = _mm_xor_si128(Agu, Du);
+ Bge = ROL64in128(Agu, 20);
+ Aka = _mm_xor_si128(Aka, Da);
+ Bgi = ROL64in128(Aka, 3);
+ Ega = _mm_xor_si128(Bga, _mm_andnot_si128(Bge, Bgi));
+ Ca = _mm_xor_si128(Ca, Ega);
+ Ame = _mm_xor_si128(Ame, De);
+ Bgo = ROL64in128(Ame, 45);
+ Ege = _mm_xor_si128(Bge, _mm_andnot_si128(Bgi, Bgo));
+ Ce = _mm_xor_si128(Ce, Ege);
+ Asi = _mm_xor_si128(Asi, Di);
+ Bgu = ROL64in128(Asi, 61);
+ Egi = _mm_xor_si128(Bgi, _mm_andnot_si128(Bgo, Bgu));
+ Ci = _mm_xor_si128(Ci, Egi);
+ Ego = _mm_xor_si128(Bgo, _mm_andnot_si128(Bgu, Bga));
+ Co = _mm_xor_si128(Co, Ego);
+ Egu = _mm_xor_si128(Bgu, _mm_andnot_si128(Bga, Bge));
+ Cu = _mm_xor_si128(Cu, Egu);
+ Abe = _mm_xor_si128(Abe, De);
+ Bka = ROL64in128(Abe, 1);
+ Agi = _mm_xor_si128(Agi, Di);
+ Bke = ROL64in128(Agi, 6);
+ Ako = _mm_xor_si128(Ako, Do);
+ Bki = ROL64in128(Ako, 25);
+ Eka = _mm_xor_si128(Bka, _mm_andnot_si128(Bke, Bki));
+ Ca = _mm_xor_si128(Ca, Eka);
+ Amu = _mm_xor_si128(Amu, Du);
+ Bko = ROL64in128_8(Amu);
+ Eke = _mm_xor_si128(Bke, _mm_andnot_si128(Bki, Bko));
+ Ce = _mm_xor_si128(Ce, Eke);
+ Asa = _mm_xor_si128(Asa, Da);
+ Bku = ROL64in128(Asa, 18);
+ Eki = _mm_xor_si128(Bki, _mm_andnot_si128(Bko, Bku));
+ Ci = _mm_xor_si128(Ci, Eki);
+ Eko = _mm_xor_si128(Bko, _mm_andnot_si128(Bku, Bka));
+ Co = _mm_xor_si128(Co, Eko);
+ Eku = _mm_xor_si128(Bku, _mm_andnot_si128(Bka, Bke));
+ Cu = _mm_xor_si128(Cu, Eku);
+ Abu = _mm_xor_si128(Abu, Du);
+ Bma = ROL64in128(Abu, 27);
+ Aga = _mm_xor_si128(Aga, Da);
+ Bme = ROL64in128(Aga, 36);
+ Ake = _mm_xor_si128(Ake, De);
+ Bmi = ROL64in128(Ake, 10);
+ Ema = _mm_xor_si128(Bma, _mm_andnot_si128(Bme, Bmi));
+ Ca = _mm_xor_si128(Ca, Ema);
+ Ami = _mm_xor_si128(Ami, Di);
+ Bmo = ROL64in128(Ami, 15);
+ Eme = _mm_xor_si128(Bme, _mm_andnot_si128(Bmi, Bmo));
+ Ce = _mm_xor_si128(Ce, Eme);
+ Aso = _mm_xor_si128(Aso, Do);
+ Bmu = ROL64in128_56(Aso);
+ Emi = _mm_xor_si128(Bmi, _mm_andnot_si128(Bmo, Bmu));
+ Ci = _mm_xor_si128(Ci, Emi);
+ Emo = _mm_xor_si128(Bmo, _mm_andnot_si128(Bmu, Bma));
+ Co = _mm_xor_si128(Co, Emo);
+ Emu = _mm_xor_si128(Bmu, _mm_andnot_si128(Bma, Bme));
+ Cu = _mm_xor_si128(Cu, Emu);
+ Abi = _mm_xor_si128(Abi, Di);
+ Bsa = ROL64in128(Abi, 62);
+ Ago = _mm_xor_si128(Ago, Do);
+ Bse = ROL64in128(Ago, 55);
+ Aku = _mm_xor_si128(Aku, Du);
+ Bsi = ROL64in128(Aku, 39);
+ Esa = _mm_xor_si128(Bsa, _mm_andnot_si128(Bse, Bsi));
+ Ca = _mm_xor_si128(Ca, Esa);
+ Ama = _mm_xor_si128(Ama, Da);
+ Bso = ROL64in128(Ama, 41);
+ Ese = _mm_xor_si128(Bse, _mm_andnot_si128(Bsi, Bso));
+ Ce = _mm_xor_si128(Ce, Ese);
+ Ase = _mm_xor_si128(Ase, De);
+ Bsu = ROL64in128(Ase, 2);
+ Esi = _mm_xor_si128(Bsi, _mm_andnot_si128(Bso, Bsu));
+ Ci = _mm_xor_si128(Ci, Esi);
+ Eso = _mm_xor_si128(Bso, _mm_andnot_si128(Bsu, Bsa));
+ Co = _mm_xor_si128(Co, Eso);
+ Esu = _mm_xor_si128(Bsu, _mm_andnot_si128(Bsa, Bse));
+ Cu = _mm_xor_si128(Cu, Esu);
+ Da = _mm_xor_si128(Cu, ROL64in128(Ce, 1));
+ De = _mm_xor_si128(Ca, ROL64in128(Ci, 1));
+ Di = _mm_xor_si128(Ce, ROL64in128(Co, 1));
+ Do = _mm_xor_si128(Ci, ROL64in128(Cu, 1));
+ Du = _mm_xor_si128(Co, ROL64in128(Ca, 1));
+ Eba = _mm_xor_si128(Eba, Da);
+ Bba = Eba;
+ Ege = _mm_xor_si128(Ege, De);
+ Bbe = ROL64in128(Ege, 44);
+ Eki = _mm_xor_si128(Eki, Di);
+ Bbi = ROL64in128(Eki, 43);
+ Aba = _mm_xor_si128(Bba, _mm_andnot_si128(Bbe, Bbi));
+ Aba = _mm_xor_si128(Aba, SPLAT64(KeccakF1600Constants[1]));
+ Ca = Aba;
+ Emo = _mm_xor_si128(Emo, Do);
+ Bbo = ROL64in128(Emo, 21);
+ Abe = _mm_xor_si128(Bbe, _mm_andnot_si128(Bbi, Bbo));
+ Ce = Abe;
+ Esu = _mm_xor_si128(Esu, Du);
+ Bbu = ROL64in128(Esu, 14);
+ Abi = _mm_xor_si128(Bbi, _mm_andnot_si128(Bbo, Bbu));
+ Ci = Abi;
+ Abo = _mm_xor_si128(Bbo, _mm_andnot_si128(Bbu, Bba));
+ Co = Abo;
+ Abu = _mm_xor_si128(Bbu, _mm_andnot_si128(Bba, Bbe));
+ Cu = Abu;
+ Ebo = _mm_xor_si128(Ebo, Do);
+ Bga = ROL64in128(Ebo, 28);
+ Egu = _mm_xor_si128(Egu, Du);
+ Bge = ROL64in128(Egu, 20);
+ Eka = _mm_xor_si128(Eka, Da);
+ Bgi = ROL64in128(Eka, 3);
+ Aga = _mm_xor_si128(Bga, _mm_andnot_si128(Bge, Bgi));
+ Ca = _mm_xor_si128(Ca, Aga);
+ Eme = _mm_xor_si128(Eme, De);
+ Bgo = ROL64in128(Eme, 45);
+ Age = _mm_xor_si128(Bge, _mm_andnot_si128(Bgi, Bgo));
+ Ce = _mm_xor_si128(Ce, Age);
+ Esi = _mm_xor_si128(Esi, Di);
+ Bgu = ROL64in128(Esi, 61);
+ Agi = _mm_xor_si128(Bgi, _mm_andnot_si128(Bgo, Bgu));
+ Ci = _mm_xor_si128(Ci, Agi);
+ Ago = _mm_xor_si128(Bgo, _mm_andnot_si128(Bgu, Bga));
+ Co = _mm_xor_si128(Co, Ago);
+ Agu = _mm_xor_si128(Bgu, _mm_andnot_si128(Bga, Bge));
+ Cu = _mm_xor_si128(Cu, Agu);
+ Ebe = _mm_xor_si128(Ebe, De);
+ Bka = ROL64in128(Ebe, 1);
+ Egi = _mm_xor_si128(Egi, Di);
+ Bke = ROL64in128(Egi, 6);
+ Eko = _mm_xor_si128(Eko, Do);
+ Bki = ROL64in128(Eko, 25);
+ Aka = _mm_xor_si128(Bka, _mm_andnot_si128(Bke, Bki));
+ Ca = _mm_xor_si128(Ca, Aka);
+ Emu = _mm_xor_si128(Emu, Du);
+ Bko = ROL64in128_8(Emu);
+ Ake = _mm_xor_si128(Bke, _mm_andnot_si128(Bki, Bko));
+ Ce = _mm_xor_si128(Ce, Ake);
+ Esa = _mm_xor_si128(Esa, Da);
+ Bku = ROL64in128(Esa, 18);
+ Aki = _mm_xor_si128(Bki, _mm_andnot_si128(Bko, Bku));
+ Ci = _mm_xor_si128(Ci, Aki);
+ Ako = _mm_xor_si128(Bko, _mm_andnot_si128(Bku, Bka));
+ Co = _mm_xor_si128(Co, Ako);
+ Aku = _mm_xor_si128(Bku, _mm_andnot_si128(Bka, Bke));
+ Cu = _mm_xor_si128(Cu, Aku);
+ Ebu = _mm_xor_si128(Ebu, Du);
+ Bma = ROL64in128(Ebu, 27);
+ Ega = _mm_xor_si128(Ega, Da);
+ Bme = ROL64in128(Ega, 36);
+ Eke = _mm_xor_si128(Eke, De);
+ Bmi = ROL64in128(Eke, 10);
+ Ama = _mm_xor_si128(Bma, _mm_andnot_si128(Bme, Bmi));
+ Ca = _mm_xor_si128(Ca, Ama);
+ Emi = _mm_xor_si128(Emi, Di);
+ Bmo = ROL64in128(Emi, 15);
+ Ame = _mm_xor_si128(Bme, _mm_andnot_si128(Bmi, Bmo));
+ Ce = _mm_xor_si128(Ce, Ame);
+ Eso = _mm_xor_si128(Eso, Do);
+ Bmu = ROL64in128_56(Eso);
+ Ami = _mm_xor_si128(Bmi, _mm_andnot_si128(Bmo, Bmu));
+ Ci = _mm_xor_si128(Ci, Ami);
+ Amo = _mm_xor_si128(Bmo, _mm_andnot_si128(Bmu, Bma));
+ Co = _mm_xor_si128(Co, Amo);
+ Amu = _mm_xor_si128(Bmu, _mm_andnot_si128(Bma, Bme));
+ Cu = _mm_xor_si128(Cu, Amu);
+ Ebi = _mm_xor_si128(Ebi, Di);
+ Bsa = ROL64in128(Ebi, 62);
+ Ego = _mm_xor_si128(Ego, Do);
+ Bse = ROL64in128(Ego, 55);
+ Eku = _mm_xor_si128(Eku, Du);
+ Bsi = ROL64in128(Eku, 39);
+ Asa = _mm_xor_si128(Bsa, _mm_andnot_si128(Bse, Bsi));
+ Ca = _mm_xor_si128(Ca, Asa);
+ Ema = _mm_xor_si128(Ema, Da);
+ Bso = ROL64in128(Ema, 41);
+ Ase = _mm_xor_si128(Bse, _mm_andnot_si128(Bsi, Bso));
+ Ce = _mm_xor_si128(Ce, Ase);
+ Ese = _mm_xor_si128(Ese, De);
+ Bsu = ROL64in128(Ese, 2);
+ Asi = _mm_xor_si128(Bsi, _mm_andnot_si128(Bso, Bsu));
+ Ci = _mm_xor_si128(Ci, Asi);
+ Aso = _mm_xor_si128(Bso, _mm_andnot_si128(Bsu, Bsa));
+ Co = _mm_xor_si128(Co, Aso);
+ Asu = _mm_xor_si128(Bsu, _mm_andnot_si128(Bsa, Bse));
+ Cu = _mm_xor_si128(Cu, Asu);
+ Da = _mm_xor_si128(Cu, ROL64in128(Ce, 1));
+ De = _mm_xor_si128(Ca, ROL64in128(Ci, 1));
+ Di = _mm_xor_si128(Ce, ROL64in128(Co, 1));
+ Do = _mm_xor_si128(Ci, ROL64in128(Cu, 1));
+ Du = _mm_xor_si128(Co, ROL64in128(Ca, 1));
+ Aba = _mm_xor_si128(Aba, Da);
+ Bba = Aba;
+ Age = _mm_xor_si128(Age, De);
+ Bbe = ROL64in128(Age, 44);
+ Aki = _mm_xor_si128(Aki, Di);
+ Bbi = ROL64in128(Aki, 43);
+ Eba = _mm_xor_si128(Bba, _mm_andnot_si128(Bbe, Bbi));
+ Eba = _mm_xor_si128(Eba, SPLAT64(KeccakF1600Constants[2]));
+ Ca = Eba;
+ Amo = _mm_xor_si128(Amo, Do);
+ Bbo = ROL64in128(Amo, 21);
+ Ebe = _mm_xor_si128(Bbe, _mm_andnot_si128(Bbi, Bbo));
+ Ce = Ebe;
+ Asu = _mm_xor_si128(Asu, Du);
+ Bbu = ROL64in128(Asu, 14);
+ Ebi = _mm_xor_si128(Bbi, _mm_andnot_si128(Bbo, Bbu));
+ Ci = Ebi;
+ Ebo = _mm_xor_si128(Bbo, _mm_andnot_si128(Bbu, Bba));
+ Co = Ebo;
+ Ebu = _mm_xor_si128(Bbu, _mm_andnot_si128(Bba, Bbe));
+ Cu = Ebu;
+ Abo = _mm_xor_si128(Abo, Do);
+ Bga = ROL64in128(Abo, 28);
+ Agu = _mm_xor_si128(Agu, Du);
+ Bge = ROL64in128(Agu, 20);
+ Aka = _mm_xor_si128(Aka, Da);
+ Bgi = ROL64in128(Aka, 3);
+ Ega = _mm_xor_si128(Bga, _mm_andnot_si128(Bge, Bgi));
+ Ca = _mm_xor_si128(Ca, Ega);
+ Ame = _mm_xor_si128(Ame, De);
+ Bgo = ROL64in128(Ame, 45);
+ Ege = _mm_xor_si128(Bge, _mm_andnot_si128(Bgi, Bgo));
+ Ce = _mm_xor_si128(Ce, Ege);
+ Asi = _mm_xor_si128(Asi, Di);
+ Bgu = ROL64in128(Asi, 61);
+ Egi = _mm_xor_si128(Bgi, _mm_andnot_si128(Bgo, Bgu));
+ Ci = _mm_xor_si128(Ci, Egi);
+ Ego = _mm_xor_si128(Bgo, _mm_andnot_si128(Bgu, Bga));
+ Co = _mm_xor_si128(Co, Ego);
+ Egu = _mm_xor_si128(Bgu, _mm_andnot_si128(Bga, Bge));
+ Cu = _mm_xor_si128(Cu, Egu);
+ Abe = _mm_xor_si128(Abe, De);
+ Bka = ROL64in128(Abe, 1);
+ Agi = _mm_xor_si128(Agi, Di);
+ Bke = ROL64in128(Agi, 6);
+ Ako = _mm_xor_si128(Ako, Do);
+ Bki = ROL64in128(Ako, 25);
+ Eka = _mm_xor_si128(Bka, _mm_andnot_si128(Bke, Bki));
+ Ca = _mm_xor_si128(Ca, Eka);
+ Amu = _mm_xor_si128(Amu, Du);
+ Bko = ROL64in128_8(Amu);
+ Eke = _mm_xor_si128(Bke, _mm_andnot_si128(Bki, Bko));
+ Ce = _mm_xor_si128(Ce, Eke);
+ Asa = _mm_xor_si128(Asa, Da);
+ Bku = ROL64in128(Asa, 18);
+ Eki = _mm_xor_si128(Bki, _mm_andnot_si128(Bko, Bku));
+ Ci = _mm_xor_si128(Ci, Eki);
+ Eko = _mm_xor_si128(Bko, _mm_andnot_si128(Bku, Bka));
+ Co = _mm_xor_si128(Co, Eko);
+ Eku = _mm_xor_si128(Bku, _mm_andnot_si128(Bka, Bke));
+ Cu = _mm_xor_si128(Cu, Eku);
+ Abu = _mm_xor_si128(Abu, Du);
+ Bma = ROL64in128(Abu, 27);
+ Aga = _mm_xor_si128(Aga, Da);
+ Bme = ROL64in128(Aga, 36);
+ Ake = _mm_xor_si128(Ake, De);
+ Bmi = ROL64in128(Ake, 10);
+ Ema = _mm_xor_si128(Bma, _mm_andnot_si128(Bme, Bmi));
+ Ca = _mm_xor_si128(Ca, Ema);
+ Ami = _mm_xor_si128(Ami, Di);
+ Bmo = ROL64in128(Ami, 15);
+ Eme = _mm_xor_si128(Bme, _mm_andnot_si128(Bmi, Bmo));
+ Ce = _mm_xor_si128(Ce, Eme);
+ Aso = _mm_xor_si128(Aso, Do);
+ Bmu = ROL64in128_56(Aso);
+ Emi = _mm_xor_si128(Bmi, _mm_andnot_si128(Bmo, Bmu));
+ Ci = _mm_xor_si128(Ci, Emi);
+ Emo = _mm_xor_si128(Bmo, _mm_andnot_si128(Bmu, Bma));
+ Co = _mm_xor_si128(Co, Emo);
+ Emu = _mm_xor_si128(Bmu, _mm_andnot_si128(Bma, Bme));
+ Cu = _mm_xor_si128(Cu, Emu);
+ Abi = _mm_xor_si128(Abi, Di);
+ Bsa = ROL64in128(Abi, 62);
+ Ago = _mm_xor_si128(Ago, Do);
+ Bse = ROL64in128(Ago, 55);
+ Aku = _mm_xor_si128(Aku, Du);
+ Bsi = ROL64in128(Aku, 39);
+ Esa = _mm_xor_si128(Bsa, _mm_andnot_si128(Bse, Bsi));
+ Ca = _mm_xor_si128(Ca, Esa);
+ Ama = _mm_xor_si128(Ama, Da);
+ Bso = ROL64in128(Ama, 41);
+ Ese = _mm_xor_si128(Bse, _mm_andnot_si128(Bsi, Bso));
+ Ce = _mm_xor_si128(Ce, Ese);
+ Ase = _mm_xor_si128(Ase, De);
+ Bsu = ROL64in128(Ase, 2);
+ Esi = _mm_xor_si128(Bsi, _mm_andnot_si128(Bso, Bsu));
+ Ci = _mm_xor_si128(Ci, Esi);
+ Eso = _mm_xor_si128(Bso, _mm_andnot_si128(Bsu, Bsa));
+ Co = _mm_xor_si128(Co, Eso);
+ Esu = _mm_xor_si128(Bsu, _mm_andnot_si128(Bsa, Bse));
+ Cu = _mm_xor_si128(Cu, Esu);
+ Da = _mm_xor_si128(Cu, ROL64in128(Ce, 1));
+ De = _mm_xor_si128(Ca, ROL64in128(Ci, 1));
+ Di = _mm_xor_si128(Ce, ROL64in128(Co, 1));
+ Do = _mm_xor_si128(Ci, ROL64in128(Cu, 1));
+ Du = _mm_xor_si128(Co, ROL64in128(Ca, 1));
+ Eba = _mm_xor_si128(Eba, Da);
+ Bba = Eba;
+ Ege = _mm_xor_si128(Ege, De);
+ Bbe = ROL64in128(Ege, 44);
+ Eki = _mm_xor_si128(Eki, Di);
+ Bbi = ROL64in128(Eki, 43);
+ Aba = _mm_xor_si128(Bba, _mm_andnot_si128(Bbe, Bbi));
+ Aba = _mm_xor_si128(Aba, SPLAT64(KeccakF1600Constants[3]));
+ Ca = Aba;
+ Emo = _mm_xor_si128(Emo, Do);
+ Bbo = ROL64in128(Emo, 21);
+ Abe = _mm_xor_si128(Bbe, _mm_andnot_si128(Bbi, Bbo));
+ Ce = Abe;
+ Esu = _mm_xor_si128(Esu, Du);
+ Bbu = ROL64in128(Esu, 14);
+ Abi = _mm_xor_si128(Bbi, _mm_andnot_si128(Bbo, Bbu));
+ Ci = Abi;
+ Abo = _mm_xor_si128(Bbo, _mm_andnot_si128(Bbu, Bba));
+ Co = Abo;
+ Abu = _mm_xor_si128(Bbu, _mm_andnot_si128(Bba, Bbe));
+ Cu = Abu;
+ Ebo = _mm_xor_si128(Ebo, Do);
+ Bga = ROL64in128(Ebo, 28);
+ Egu = _mm_xor_si128(Egu, Du);
+ Bge = ROL64in128(Egu, 20);
+ Eka = _mm_xor_si128(Eka, Da);
+ Bgi = ROL64in128(Eka, 3);
+ Aga = _mm_xor_si128(Bga, _mm_andnot_si128(Bge, Bgi));
+ Ca = _mm_xor_si128(Ca, Aga);
+ Eme = _mm_xor_si128(Eme, De);
+ Bgo = ROL64in128(Eme, 45);
+ Age = _mm_xor_si128(Bge, _mm_andnot_si128(Bgi, Bgo));
+ Ce = _mm_xor_si128(Ce, Age);
+ Esi = _mm_xor_si128(Esi, Di);
+ Bgu = ROL64in128(Esi, 61);
+ Agi = _mm_xor_si128(Bgi, _mm_andnot_si128(Bgo, Bgu));
+ Ci = _mm_xor_si128(Ci, Agi);
+ Ago = _mm_xor_si128(Bgo, _mm_andnot_si128(Bgu, Bga));
+ Co = _mm_xor_si128(Co, Ago);
+ Agu = _mm_xor_si128(Bgu, _mm_andnot_si128(Bga, Bge));
+ Cu = _mm_xor_si128(Cu, Agu);
+ Ebe = _mm_xor_si128(Ebe, De);
+ Bka = ROL64in128(Ebe, 1);
+ Egi = _mm_xor_si128(Egi, Di);
+ Bke = ROL64in128(Egi, 6);
+ Eko = _mm_xor_si128(Eko, Do);
+ Bki = ROL64in128(Eko, 25);
+ Aka = _mm_xor_si128(Bka, _mm_andnot_si128(Bke, Bki));
+ Ca = _mm_xor_si128(Ca, Aka);
+ Emu = _mm_xor_si128(Emu, Du);
+ Bko = ROL64in128_8(Emu);
+ Ake = _mm_xor_si128(Bke, _mm_andnot_si128(Bki, Bko));
+ Ce = _mm_xor_si128(Ce, Ake);
+ Esa = _mm_xor_si128(Esa, Da);
+ Bku = ROL64in128(Esa, 18);
+ Aki = _mm_xor_si128(Bki, _mm_andnot_si128(Bko, Bku));
+ Ci = _mm_xor_si128(Ci, Aki);
+ Ako = _mm_xor_si128(Bko, _mm_andnot_si128(Bku, Bka));
+ Co = _mm_xor_si128(Co, Ako);
+ Aku = _mm_xor_si128(Bku, _mm_andnot_si128(Bka, Bke));
+ Cu = _mm_xor_si128(Cu, Aku);
+ Ebu = _mm_xor_si128(Ebu, Du);
+ Bma = ROL64in128(Ebu, 27);
+ Ega = _mm_xor_si128(Ega, Da);
+ Bme = ROL64in128(Ega, 36);
+ Eke = _mm_xor_si128(Eke, De);
+ Bmi = ROL64in128(Eke, 10);
+ Ama = _mm_xor_si128(Bma, _mm_andnot_si128(Bme, Bmi));
+ Ca = _mm_xor_si128(Ca, Ama);
+ Emi = _mm_xor_si128(Emi, Di);
+ Bmo = ROL64in128(Emi, 15);
+ Ame = _mm_xor_si128(Bme, _mm_andnot_si128(Bmi, Bmo));
+ Ce = _mm_xor_si128(Ce, Ame);
+ Eso = _mm_xor_si128(Eso, Do);
+ Bmu = ROL64in128_56(Eso);
+ Ami = _mm_xor_si128(Bmi, _mm_andnot_si128(Bmo, Bmu));
+ Ci = _mm_xor_si128(Ci, Ami);
+ Amo = _mm_xor_si128(Bmo, _mm_andnot_si128(Bmu, Bma));
+ Co = _mm_xor_si128(Co, Amo);
+ Amu = _mm_xor_si128(Bmu, _mm_andnot_si128(Bma, Bme));
+ Cu = _mm_xor_si128(Cu, Amu);
+ Ebi = _mm_xor_si128(Ebi, Di);
+ Bsa = ROL64in128(Ebi, 62);
+ Ego = _mm_xor_si128(Ego, Do);
+ Bse = ROL64in128(Ego, 55);
+ Eku = _mm_xor_si128(Eku, Du);
+ Bsi = ROL64in128(Eku, 39);
+ Asa = _mm_xor_si128(Bsa, _mm_andnot_si128(Bse, Bsi));
+ Ca = _mm_xor_si128(Ca, Asa);
+ Ema = _mm_xor_si128(Ema, Da);
+ Bso = ROL64in128(Ema, 41);
+ Ase = _mm_xor_si128(Bse, _mm_andnot_si128(Bsi, Bso));
+ Ce = _mm_xor_si128(Ce, Ase);
+ Ese = _mm_xor_si128(Ese, De);
+ Bsu = ROL64in128(Ese, 2);
+ Asi = _mm_xor_si128(Bsi, _mm_andnot_si128(Bso, Bsu));
+ Ci = _mm_xor_si128(Ci, Asi);
+ Aso = _mm_xor_si128(Bso, _mm_andnot_si128(Bsu, Bsa));
+ Co = _mm_xor_si128(Co, Aso);
+ Asu = _mm_xor_si128(Bsu, _mm_andnot_si128(Bsa, Bse));
+ Cu = _mm_xor_si128(Cu, Asu);
+ Da = _mm_xor_si128(Cu, ROL64in128(Ce, 1));
+ De = _mm_xor_si128(Ca, ROL64in128(Ci, 1));
+ Di = _mm_xor_si128(Ce, ROL64in128(Co, 1));
+ Do = _mm_xor_si128(Ci, ROL64in128(Cu, 1));
+ Du = _mm_xor_si128(Co, ROL64in128(Ca, 1));
+ Aba = _mm_xor_si128(Aba, Da);
+ Bba = Aba;
+ Age = _mm_xor_si128(Age, De);
+ Bbe = ROL64in128(Age, 44);
+ Aki = _mm_xor_si128(Aki, Di);
+ Bbi = ROL64in128(Aki, 43);
+ Eba = _mm_xor_si128(Bba, _mm_andnot_si128(Bbe, Bbi));
+ Eba = _mm_xor_si128(Eba, SPLAT64(KeccakF1600Constants[4]));
+ Ca = Eba;
+ Amo = _mm_xor_si128(Amo, Do);
+ Bbo = ROL64in128(Amo, 21);
+ Ebe = _mm_xor_si128(Bbe, _mm_andnot_si128(Bbi, Bbo));
+ Ce = Ebe;
+ Asu = _mm_xor_si128(Asu, Du);
+ Bbu = ROL64in128(Asu, 14);
+ Ebi = _mm_xor_si128(Bbi, _mm_andnot_si128(Bbo, Bbu));
+ Ci = Ebi;
+ Ebo = _mm_xor_si128(Bbo, _mm_andnot_si128(Bbu, Bba));
+ Co = Ebo;
+ Ebu = _mm_xor_si128(Bbu, _mm_andnot_si128(Bba, Bbe));
+ Cu = Ebu;
+ Abo = _mm_xor_si128(Abo, Do);
+ Bga = ROL64in128(Abo, 28);
+ Agu = _mm_xor_si128(Agu, Du);
+ Bge = ROL64in128(Agu, 20);
+ Aka = _mm_xor_si128(Aka, Da);
+ Bgi = ROL64in128(Aka, 3);
+ Ega = _mm_xor_si128(Bga, _mm_andnot_si128(Bge, Bgi));
+ Ca = _mm_xor_si128(Ca, Ega);
+ Ame = _mm_xor_si128(Ame, De);
+ Bgo = ROL64in128(Ame, 45);
+ Ege = _mm_xor_si128(Bge, _mm_andnot_si128(Bgi, Bgo));
+ Ce = _mm_xor_si128(Ce, Ege);
+ Asi = _mm_xor_si128(Asi, Di);
+ Bgu = ROL64in128(Asi, 61);
+ Egi = _mm_xor_si128(Bgi, _mm_andnot_si128(Bgo, Bgu));
+ Ci = _mm_xor_si128(Ci, Egi);
+ Ego = _mm_xor_si128(Bgo, _mm_andnot_si128(Bgu, Bga));
+ Co = _mm_xor_si128(Co, Ego);
+ Egu = _mm_xor_si128(Bgu, _mm_andnot_si128(Bga, Bge));
+ Cu = _mm_xor_si128(Cu, Egu);
+ Abe = _mm_xor_si128(Abe, De);
+ Bka = ROL64in128(Abe, 1);
+ Agi = _mm_xor_si128(Agi, Di);
+ Bke = ROL64in128(Agi, 6);
+ Ako = _mm_xor_si128(Ako, Do);
+ Bki = ROL64in128(Ako, 25);
+ Eka = _mm_xor_si128(Bka, _mm_andnot_si128(Bke, Bki));
+ Ca = _mm_xor_si128(Ca, Eka);
+ Amu = _mm_xor_si128(Amu, Du);
+ Bko = ROL64in128_8(Amu);
+ Eke = _mm_xor_si128(Bke, _mm_andnot_si128(Bki, Bko));
+ Ce = _mm_xor_si128(Ce, Eke);
+ Asa = _mm_xor_si128(Asa, Da);
+ Bku = ROL64in128(Asa, 18);
+ Eki = _mm_xor_si128(Bki, _mm_andnot_si128(Bko, Bku));
+ Ci = _mm_xor_si128(Ci, Eki);
+ Eko = _mm_xor_si128(Bko, _mm_andnot_si128(Bku, Bka));
+ Co = _mm_xor_si128(Co, Eko);
+ Eku = _mm_xor_si128(Bku, _mm_andnot_si128(Bka, Bke));
+ Cu = _mm_xor_si128(Cu, Eku);
+ Abu = _mm_xor_si128(Abu, Du);
+ Bma = ROL64in128(Abu, 27);
+ Aga = _mm_xor_si128(Aga, Da);
+ Bme = ROL64in128(Aga, 36);
+ Ake = _mm_xor_si128(Ake, De);
+ Bmi = ROL64in128(Ake, 10);
+ Ema = _mm_xor_si128(Bma, _mm_andnot_si128(Bme, Bmi));
+ Ca = _mm_xor_si128(Ca, Ema);
+ Ami = _mm_xor_si128(Ami, Di);
+ Bmo = ROL64in128(Ami, 15);
+ Eme = _mm_xor_si128(Bme, _mm_andnot_si128(Bmi, Bmo));
+ Ce = _mm_xor_si128(Ce, Eme);
+ Aso = _mm_xor_si128(Aso, Do);
+ Bmu = ROL64in128_56(Aso);
+ Emi = _mm_xor_si128(Bmi, _mm_andnot_si128(Bmo, Bmu));
+ Ci = _mm_xor_si128(Ci, Emi);
+ Emo = _mm_xor_si128(Bmo, _mm_andnot_si128(Bmu, Bma));
+ Co = _mm_xor_si128(Co, Emo);
+ Emu = _mm_xor_si128(Bmu, _mm_andnot_si128(Bma, Bme));
+ Cu = _mm_xor_si128(Cu, Emu);
+ Abi = _mm_xor_si128(Abi, Di);
+ Bsa = ROL64in128(Abi, 62);
+ Ago = _mm_xor_si128(Ago, Do);
+ Bse = ROL64in128(Ago, 55);
+ Aku = _mm_xor_si128(Aku, Du);
+ Bsi = ROL64in128(Aku, 39);
+ Esa = _mm_xor_si128(Bsa, _mm_andnot_si128(Bse, Bsi));
+ Ca = _mm_xor_si128(Ca, Esa);
+ Ama = _mm_xor_si128(Ama, Da);
+ Bso = ROL64in128(Ama, 41);
+ Ese = _mm_xor_si128(Bse, _mm_andnot_si128(Bsi, Bso));
+ Ce = _mm_xor_si128(Ce, Ese);
+ Ase = _mm_xor_si128(Ase, De);
+ Bsu = ROL64in128(Ase, 2);
+ Esi = _mm_xor_si128(Bsi, _mm_andnot_si128(Bso, Bsu));
+ Ci = _mm_xor_si128(Ci, Esi);
+ Eso = _mm_xor_si128(Bso, _mm_andnot_si128(Bsu, Bsa));
+ Co = _mm_xor_si128(Co, Eso);
+ Esu = _mm_xor_si128(Bsu, _mm_andnot_si128(Bsa, Bse));
+ Cu = _mm_xor_si128(Cu, Esu);
+ Da = _mm_xor_si128(Cu, ROL64in128(Ce, 1));
+ De = _mm_xor_si128(Ca, ROL64in128(Ci, 1));
+ Di = _mm_xor_si128(Ce, ROL64in128(Co, 1));
+ Do = _mm_xor_si128(Ci, ROL64in128(Cu, 1));
+ Du = _mm_xor_si128(Co, ROL64in128(Ca, 1));
+ Eba = _mm_xor_si128(Eba, Da);
+ Bba = Eba;
+ Ege = _mm_xor_si128(Ege, De);
+ Bbe = ROL64in128(Ege, 44);
+ Eki = _mm_xor_si128(Eki, Di);
+ Bbi = ROL64in128(Eki, 43);
+ Aba = _mm_xor_si128(Bba, _mm_andnot_si128(Bbe, Bbi));
+ Aba = _mm_xor_si128(Aba, SPLAT64(KeccakF1600Constants[5]));
+ Ca = Aba;
+ Emo = _mm_xor_si128(Emo, Do);
+ Bbo = ROL64in128(Emo, 21);
+ Abe = _mm_xor_si128(Bbe, _mm_andnot_si128(Bbi, Bbo));
+ Ce = Abe;
+ Esu = _mm_xor_si128(Esu, Du);
+ Bbu = ROL64in128(Esu, 14);
+ Abi = _mm_xor_si128(Bbi, _mm_andnot_si128(Bbo, Bbu));
+ Ci = Abi;
+ Abo = _mm_xor_si128(Bbo, _mm_andnot_si128(Bbu, Bba));
+ Co = Abo;
+ Abu = _mm_xor_si128(Bbu, _mm_andnot_si128(Bba, Bbe));
+ Cu = Abu;
+ Ebo = _mm_xor_si128(Ebo, Do);
+ Bga = ROL64in128(Ebo, 28);
+ Egu = _mm_xor_si128(Egu, Du);
+ Bge = ROL64in128(Egu, 20);
+ Eka = _mm_xor_si128(Eka, Da);
+ Bgi = ROL64in128(Eka, 3);
+ Aga = _mm_xor_si128(Bga, _mm_andnot_si128(Bge, Bgi));
+ Ca = _mm_xor_si128(Ca, Aga);
+ Eme = _mm_xor_si128(Eme, De);
+ Bgo = ROL64in128(Eme, 45);
+ Age = _mm_xor_si128(Bge, _mm_andnot_si128(Bgi, Bgo));
+ Ce = _mm_xor_si128(Ce, Age);
+ Esi = _mm_xor_si128(Esi, Di);
+ Bgu = ROL64in128(Esi, 61);
+ Agi = _mm_xor_si128(Bgi, _mm_andnot_si128(Bgo, Bgu));
+ Ci = _mm_xor_si128(Ci, Agi);
+ Ago = _mm_xor_si128(Bgo, _mm_andnot_si128(Bgu, Bga));
+ Co = _mm_xor_si128(Co, Ago);
+ Agu = _mm_xor_si128(Bgu, _mm_andnot_si128(Bga, Bge));
+ Cu = _mm_xor_si128(Cu, Agu);
+ Ebe = _mm_xor_si128(Ebe, De);
+ Bka = ROL64in128(Ebe, 1);
+ Egi = _mm_xor_si128(Egi, Di);
+ Bke = ROL64in128(Egi, 6);
+ Eko = _mm_xor_si128(Eko, Do);
+ Bki = ROL64in128(Eko, 25);
+ Aka = _mm_xor_si128(Bka, _mm_andnot_si128(Bke, Bki));
+ Ca = _mm_xor_si128(Ca, Aka);
+ Emu = _mm_xor_si128(Emu, Du);
+ Bko = ROL64in128_8(Emu);
+ Ake = _mm_xor_si128(Bke, _mm_andnot_si128(Bki, Bko));
+ Ce = _mm_xor_si128(Ce, Ake);
+ Esa = _mm_xor_si128(Esa, Da);
+ Bku = ROL64in128(Esa, 18);
+ Aki = _mm_xor_si128(Bki, _mm_andnot_si128(Bko, Bku));
+ Ci = _mm_xor_si128(Ci, Aki);
+ Ako = _mm_xor_si128(Bko, _mm_andnot_si128(Bku, Bka));
+ Co = _mm_xor_si128(Co, Ako);
+ Aku = _mm_xor_si128(Bku, _mm_andnot_si128(Bka, Bke));
+ Cu = _mm_xor_si128(Cu, Aku);
+ Ebu = _mm_xor_si128(Ebu, Du);
+ Bma = ROL64in128(Ebu, 27);
+ Ega = _mm_xor_si128(Ega, Da);
+ Bme = ROL64in128(Ega, 36);
+ Eke = _mm_xor_si128(Eke, De);
+ Bmi = ROL64in128(Eke, 10);
+ Ama = _mm_xor_si128(Bma, _mm_andnot_si128(Bme, Bmi));
+ Ca = _mm_xor_si128(Ca, Ama);
+ Emi = _mm_xor_si128(Emi, Di);
+ Bmo = ROL64in128(Emi, 15);
+ Ame = _mm_xor_si128(Bme, _mm_andnot_si128(Bmi, Bmo));
+ Ce = _mm_xor_si128(Ce, Ame);
+ Eso = _mm_xor_si128(Eso, Do);
+ Bmu = ROL64in128_56(Eso);
+ Ami = _mm_xor_si128(Bmi, _mm_andnot_si128(Bmo, Bmu));
+ Ci = _mm_xor_si128(Ci, Ami);
+ Amo = _mm_xor_si128(Bmo, _mm_andnot_si128(Bmu, Bma));
+ Co = _mm_xor_si128(Co, Amo);
+ Amu = _mm_xor_si128(Bmu, _mm_andnot_si128(Bma, Bme));
+ Cu = _mm_xor_si128(Cu, Amu);
+ Ebi = _mm_xor_si128(Ebi, Di);
+ Bsa = ROL64in128(Ebi, 62);
+ Ego = _mm_xor_si128(Ego, Do);
+ Bse = ROL64in128(Ego, 55);
+ Eku = _mm_xor_si128(Eku, Du);
+ Bsi = ROL64in128(Eku, 39);
+ Asa = _mm_xor_si128(Bsa, _mm_andnot_si128(Bse, Bsi));
+ Ca = _mm_xor_si128(Ca, Asa);
+ Ema = _mm_xor_si128(Ema, Da);
+ Bso = ROL64in128(Ema, 41);
+ Ase = _mm_xor_si128(Bse, _mm_andnot_si128(Bsi, Bso));
+ Ce = _mm_xor_si128(Ce, Ase);
+ Ese = _mm_xor_si128(Ese, De);
+ Bsu = ROL64in128(Ese, 2);
+ Asi = _mm_xor_si128(Bsi, _mm_andnot_si128(Bso, Bsu));
+ Ci = _mm_xor_si128(Ci, Asi);
+ Aso = _mm_xor_si128(Bso, _mm_andnot_si128(Bsu, Bsa));
+ Co = _mm_xor_si128(Co, Aso);
+ Asu = _mm_xor_si128(Bsu, _mm_andnot_si128(Bsa, Bse));
+ Cu = _mm_xor_si128(Cu, Asu);
+ Da = _mm_xor_si128(Cu, ROL64in128(Ce, 1));
+ De = _mm_xor_si128(Ca, ROL64in128(Ci, 1));
+ Di = _mm_xor_si128(Ce, ROL64in128(Co, 1));
+ Do = _mm_xor_si128(Ci, ROL64in128(Cu, 1));
+ Du = _mm_xor_si128(Co, ROL64in128(Ca, 1));
+ Aba = _mm_xor_si128(Aba, Da);
+ Bba = Aba;
+ Age = _mm_xor_si128(Age, De);
+ Bbe = ROL64in128(Age, 44);
+ Aki = _mm_xor_si128(Aki, Di);
+ Bbi = ROL64in128(Aki, 43);
+ Eba = _mm_xor_si128(Bba, _mm_andnot_si128(Bbe, Bbi));
+ Eba = _mm_xor_si128(Eba, SPLAT64(KeccakF1600Constants[6]));
+ Ca = Eba;
+ Amo = _mm_xor_si128(Amo, Do);
+ Bbo = ROL64in128(Amo, 21);
+ Ebe = _mm_xor_si128(Bbe, _mm_andnot_si128(Bbi, Bbo));
+ Ce = Ebe;
+ Asu = _mm_xor_si128(Asu, Du);
+ Bbu = ROL64in128(Asu, 14);
+ Ebi = _mm_xor_si128(Bbi, _mm_andnot_si128(Bbo, Bbu));
+ Ci = Ebi;
+ Ebo = _mm_xor_si128(Bbo, _mm_andnot_si128(Bbu, Bba));
+ Co = Ebo;
+ Ebu = _mm_xor_si128(Bbu, _mm_andnot_si128(Bba, Bbe));
+ Cu = Ebu;
+ Abo = _mm_xor_si128(Abo, Do);
+ Bga = ROL64in128(Abo, 28);
+ Agu = _mm_xor_si128(Agu, Du);
+ Bge = ROL64in128(Agu, 20);
+ Aka = _mm_xor_si128(Aka, Da);
+ Bgi = ROL64in128(Aka, 3);
+ Ega = _mm_xor_si128(Bga, _mm_andnot_si128(Bge, Bgi));
+ Ca = _mm_xor_si128(Ca, Ega);
+ Ame = _mm_xor_si128(Ame, De);
+ Bgo = ROL64in128(Ame, 45);
+ Ege = _mm_xor_si128(Bge, _mm_andnot_si128(Bgi, Bgo));
+ Ce = _mm_xor_si128(Ce, Ege);
+ Asi = _mm_xor_si128(Asi, Di);
+ Bgu = ROL64in128(Asi, 61);
+ Egi = _mm_xor_si128(Bgi, _mm_andnot_si128(Bgo, Bgu));
+ Ci = _mm_xor_si128(Ci, Egi);
+ Ego = _mm_xor_si128(Bgo, _mm_andnot_si128(Bgu, Bga));
+ Co = _mm_xor_si128(Co, Ego);
+ Egu = _mm_xor_si128(Bgu, _mm_andnot_si128(Bga, Bge));
+ Cu = _mm_xor_si128(Cu, Egu);
+ Abe = _mm_xor_si128(Abe, De);
+ Bka = ROL64in128(Abe, 1);
+ Agi = _mm_xor_si128(Agi, Di);
+ Bke = ROL64in128(Agi, 6);
+ Ako = _mm_xor_si128(Ako, Do);
+ Bki = ROL64in128(Ako, 25);
+ Eka = _mm_xor_si128(Bka, _mm_andnot_si128(Bke, Bki));
+ Ca = _mm_xor_si128(Ca, Eka);
+ Amu = _mm_xor_si128(Amu, Du);
+ Bko = ROL64in128_8(Amu);
+ Eke = _mm_xor_si128(Bke, _mm_andnot_si128(Bki, Bko));
+ Ce = _mm_xor_si128(Ce, Eke);
+ Asa = _mm_xor_si128(Asa, Da);
+ Bku = ROL64in128(Asa, 18);
+ Eki = _mm_xor_si128(Bki, _mm_andnot_si128(Bko, Bku));
+ Ci = _mm_xor_si128(Ci, Eki);
+ Eko = _mm_xor_si128(Bko, _mm_andnot_si128(Bku, Bka));
+ Co = _mm_xor_si128(Co, Eko);
+ Eku = _mm_xor_si128(Bku, _mm_andnot_si128(Bka, Bke));
+ Cu = _mm_xor_si128(Cu, Eku);
+ Abu = _mm_xor_si128(Abu, Du);
+ Bma = ROL64in128(Abu, 27);
+ Aga = _mm_xor_si128(Aga, Da);
+ Bme = ROL64in128(Aga, 36);
+ Ake = _mm_xor_si128(Ake, De);
+ Bmi = ROL64in128(Ake, 10);
+ Ema = _mm_xor_si128(Bma, _mm_andnot_si128(Bme, Bmi));
+ Ca = _mm_xor_si128(Ca, Ema);
+ Ami = _mm_xor_si128(Ami, Di);
+ Bmo = ROL64in128(Ami, 15);
+ Eme = _mm_xor_si128(Bme, _mm_andnot_si128(Bmi, Bmo));
+ Ce = _mm_xor_si128(Ce, Eme);
+ Aso = _mm_xor_si128(Aso, Do);
+ Bmu = ROL64in128_56(Aso);
+ Emi = _mm_xor_si128(Bmi, _mm_andnot_si128(Bmo, Bmu));
+ Ci = _mm_xor_si128(Ci, Emi);
+ Emo = _mm_xor_si128(Bmo, _mm_andnot_si128(Bmu, Bma));
+ Co = _mm_xor_si128(Co, Emo);
+ Emu = _mm_xor_si128(Bmu, _mm_andnot_si128(Bma, Bme));
+ Cu = _mm_xor_si128(Cu, Emu);
+ Abi = _mm_xor_si128(Abi, Di);
+ Bsa = ROL64in128(Abi, 62);
+ Ago = _mm_xor_si128(Ago, Do);
+ Bse = ROL64in128(Ago, 55);
+ Aku = _mm_xor_si128(Aku, Du);
+ Bsi = ROL64in128(Aku, 39);
+ Esa = _mm_xor_si128(Bsa, _mm_andnot_si128(Bse, Bsi));
+ Ca = _mm_xor_si128(Ca, Esa);
+ Ama = _mm_xor_si128(Ama, Da);
+ Bso = ROL64in128(Ama, 41);
+ Ese = _mm_xor_si128(Bse, _mm_andnot_si128(Bsi, Bso));
+ Ce = _mm_xor_si128(Ce, Ese);
+ Ase = _mm_xor_si128(Ase, De);
+ Bsu = ROL64in128(Ase, 2);
+ Esi = _mm_xor_si128(Bsi, _mm_andnot_si128(Bso, Bsu));
+ Ci = _mm_xor_si128(Ci, Esi);
+ Eso = _mm_xor_si128(Bso, _mm_andnot_si128(Bsu, Bsa));
+ Co = _mm_xor_si128(Co, Eso);
+ Esu = _mm_xor_si128(Bsu, _mm_andnot_si128(Bsa, Bse));
+ Cu = _mm_xor_si128(Cu, Esu);
+ Da = _mm_xor_si128(Cu, ROL64in128(Ce, 1));
+ De = _mm_xor_si128(Ca, ROL64in128(Ci, 1));
+ Di = _mm_xor_si128(Ce, ROL64in128(Co, 1));
+ Do = _mm_xor_si128(Ci, ROL64in128(Cu, 1));
+ Du = _mm_xor_si128(Co, ROL64in128(Ca, 1));
+ Eba = _mm_xor_si128(Eba, Da);
+ Bba = Eba;
+ Ege = _mm_xor_si128(Ege, De);
+ Bbe = ROL64in128(Ege, 44);
+ Eki = _mm_xor_si128(Eki, Di);
+ Bbi = ROL64in128(Eki, 43);
+ Aba = _mm_xor_si128(Bba, _mm_andnot_si128(Bbe, Bbi));
+ Aba = _mm_xor_si128(Aba, SPLAT64(KeccakF1600Constants[7]));
+ Ca = Aba;
+ Emo = _mm_xor_si128(Emo, Do);
+ Bbo = ROL64in128(Emo, 21);
+ Abe = _mm_xor_si128(Bbe, _mm_andnot_si128(Bbi, Bbo));
+ Ce = Abe;
+ Esu = _mm_xor_si128(Esu, Du);
+ Bbu = ROL64in128(Esu, 14);
+ Abi = _mm_xor_si128(Bbi, _mm_andnot_si128(Bbo, Bbu));
+ Ci = Abi;
+ Abo = _mm_xor_si128(Bbo, _mm_andnot_si128(Bbu, Bba));
+ Co = Abo;
+ Abu = _mm_xor_si128(Bbu, _mm_andnot_si128(Bba, Bbe));
+ Cu = Abu;
+ Ebo = _mm_xor_si128(Ebo, Do);
+ Bga = ROL64in128(Ebo, 28);
+ Egu = _mm_xor_si128(Egu, Du);
+ Bge = ROL64in128(Egu, 20);
+ Eka = _mm_xor_si128(Eka, Da);
+ Bgi = ROL64in128(Eka, 3);
+ Aga = _mm_xor_si128(Bga, _mm_andnot_si128(Bge, Bgi));
+ Ca = _mm_xor_si128(Ca, Aga);
+ Eme = _mm_xor_si128(Eme, De);
+ Bgo = ROL64in128(Eme, 45);
+ Age = _mm_xor_si128(Bge, _mm_andnot_si128(Bgi, Bgo));
+ Ce = _mm_xor_si128(Ce, Age);
+ Esi = _mm_xor_si128(Esi, Di);
+ Bgu = ROL64in128(Esi, 61);
+ Agi = _mm_xor_si128(Bgi, _mm_andnot_si128(Bgo, Bgu));
+ Ci = _mm_xor_si128(Ci, Agi);
+ Ago = _mm_xor_si128(Bgo, _mm_andnot_si128(Bgu, Bga));
+ Co = _mm_xor_si128(Co, Ago);
+ Agu = _mm_xor_si128(Bgu, _mm_andnot_si128(Bga, Bge));
+ Cu = _mm_xor_si128(Cu, Agu);
+ Ebe = _mm_xor_si128(Ebe, De);
+ Bka = ROL64in128(Ebe, 1);
+ Egi = _mm_xor_si128(Egi, Di);
+ Bke = ROL64in128(Egi, 6);
+ Eko = _mm_xor_si128(Eko, Do);
+ Bki = ROL64in128(Eko, 25);
+ Aka = _mm_xor_si128(Bka, _mm_andnot_si128(Bke, Bki));
+ Ca = _mm_xor_si128(Ca, Aka);
+ Emu = _mm_xor_si128(Emu, Du);
+ Bko = ROL64in128_8(Emu);
+ Ake = _mm_xor_si128(Bke, _mm_andnot_si128(Bki, Bko));
+ Ce = _mm_xor_si128(Ce, Ake);
+ Esa = _mm_xor_si128(Esa, Da);
+ Bku = ROL64in128(Esa, 18);
+ Aki = _mm_xor_si128(Bki, _mm_andnot_si128(Bko, Bku));
+ Ci = _mm_xor_si128(Ci, Aki);
+ Ako = _mm_xor_si128(Bko, _mm_andnot_si128(Bku, Bka));
+ Co = _mm_xor_si128(Co, Ako);
+ Aku = _mm_xor_si128(Bku, _mm_andnot_si128(Bka, Bke));
+ Cu = _mm_xor_si128(Cu, Aku);
+ Ebu = _mm_xor_si128(Ebu, Du);
+ Bma = ROL64in128(Ebu, 27);
+ Ega = _mm_xor_si128(Ega, Da);
+ Bme = ROL64in128(Ega, 36);
+ Eke = _mm_xor_si128(Eke, De);
+ Bmi = ROL64in128(Eke, 10);
+ Ama = _mm_xor_si128(Bma, _mm_andnot_si128(Bme, Bmi));
+ Ca = _mm_xor_si128(Ca, Ama);
+ Emi = _mm_xor_si128(Emi, Di);
+ Bmo = ROL64in128(Emi, 15);
+ Ame = _mm_xor_si128(Bme, _mm_andnot_si128(Bmi, Bmo));
+ Ce = _mm_xor_si128(Ce, Ame);
+ Eso = _mm_xor_si128(Eso, Do);
+ Bmu = ROL64in128_56(Eso);
+ Ami = _mm_xor_si128(Bmi, _mm_andnot_si128(Bmo, Bmu));
+ Ci = _mm_xor_si128(Ci, Ami);
+ Amo = _mm_xor_si128(Bmo, _mm_andnot_si128(Bmu, Bma));
+ Co = _mm_xor_si128(Co, Amo);
+ Amu = _mm_xor_si128(Bmu, _mm_andnot_si128(Bma, Bme));
+ Cu = _mm_xor_si128(Cu, Amu);
+ Ebi = _mm_xor_si128(Ebi, Di);
+ Bsa = ROL64in128(Ebi, 62);
+ Ego = _mm_xor_si128(Ego, Do);
+ Bse = ROL64in128(Ego, 55);
+ Eku = _mm_xor_si128(Eku, Du);
+ Bsi = ROL64in128(Eku, 39);
+ Asa = _mm_xor_si128(Bsa, _mm_andnot_si128(Bse, Bsi));
+ Ca = _mm_xor_si128(Ca, Asa);
+ Ema = _mm_xor_si128(Ema, Da);
+ Bso = ROL64in128(Ema, 41);
+ Ase = _mm_xor_si128(Bse, _mm_andnot_si128(Bsi, Bso));
+ Ce = _mm_xor_si128(Ce, Ase);
+ Ese = _mm_xor_si128(Ese, De);
+ Bsu = ROL64in128(Ese, 2);
+ Asi = _mm_xor_si128(Bsi, _mm_andnot_si128(Bso, Bsu));
+ Ci = _mm_xor_si128(Ci, Asi);
+ Aso = _mm_xor_si128(Bso, _mm_andnot_si128(Bsu, Bsa));
+ Co = _mm_xor_si128(Co, Aso);
+ Asu = _mm_xor_si128(Bsu, _mm_andnot_si128(Bsa, Bse));
+ Cu = _mm_xor_si128(Cu, Asu);
+ Da = _mm_xor_si128(Cu, ROL64in128(Ce, 1));
+ De = _mm_xor_si128(Ca, ROL64in128(Ci, 1));
+ Di = _mm_xor_si128(Ce, ROL64in128(Co, 1));
+ Do = _mm_xor_si128(Ci, ROL64in128(Cu, 1));
+ Du = _mm_xor_si128(Co, ROL64in128(Ca, 1));
+ Aba = _mm_xor_si128(Aba, Da);
+ Bba = Aba;
+ Age = _mm_xor_si128(Age, De);
+ Bbe = ROL64in128(Age, 44);
+ Aki = _mm_xor_si128(Aki, Di);
+ Bbi = ROL64in128(Aki, 43);
+ Eba = _mm_xor_si128(Bba, _mm_andnot_si128(Bbe, Bbi));
+ Eba = _mm_xor_si128(Eba, SPLAT64(KeccakF1600Constants[8]));
+ Ca = Eba;
+ Amo = _mm_xor_si128(Amo, Do);
+ Bbo = ROL64in128(Amo, 21);
+ Ebe = _mm_xor_si128(Bbe, _mm_andnot_si128(Bbi, Bbo));
+ Ce = Ebe;
+ Asu = _mm_xor_si128(Asu, Du);
+ Bbu = ROL64in128(Asu, 14);
+ Ebi = _mm_xor_si128(Bbi, _mm_andnot_si128(Bbo, Bbu));
+ Ci = Ebi;
+ Ebo = _mm_xor_si128(Bbo, _mm_andnot_si128(Bbu, Bba));
+ Co = Ebo;
+ Ebu = _mm_xor_si128(Bbu, _mm_andnot_si128(Bba, Bbe));
+ Cu = Ebu;
+ Abo = _mm_xor_si128(Abo, Do);
+ Bga = ROL64in128(Abo, 28);
+ Agu = _mm_xor_si128(Agu, Du);
+ Bge = ROL64in128(Agu, 20);
+ Aka = _mm_xor_si128(Aka, Da);
+ Bgi = ROL64in128(Aka, 3);
+ Ega = _mm_xor_si128(Bga, _mm_andnot_si128(Bge, Bgi));
+ Ca = _mm_xor_si128(Ca, Ega);
+ Ame = _mm_xor_si128(Ame, De);
+ Bgo = ROL64in128(Ame, 45);
+ Ege = _mm_xor_si128(Bge, _mm_andnot_si128(Bgi, Bgo));
+ Ce = _mm_xor_si128(Ce, Ege);
+ Asi = _mm_xor_si128(Asi, Di);
+ Bgu = ROL64in128(Asi, 61);
+ Egi = _mm_xor_si128(Bgi, _mm_andnot_si128(Bgo, Bgu));
+ Ci = _mm_xor_si128(Ci, Egi);
+ Ego = _mm_xor_si128(Bgo, _mm_andnot_si128(Bgu, Bga));
+ Co = _mm_xor_si128(Co, Ego);
+ Egu = _mm_xor_si128(Bgu, _mm_andnot_si128(Bga, Bge));
+ Cu = _mm_xor_si128(Cu, Egu);
+ Abe = _mm_xor_si128(Abe, De);
+ Bka = ROL64in128(Abe, 1);
+ Agi = _mm_xor_si128(Agi, Di);
+ Bke = ROL64in128(Agi, 6);
+ Ako = _mm_xor_si128(Ako, Do);
+ Bki = ROL64in128(Ako, 25);
+ Eka = _mm_xor_si128(Bka, _mm_andnot_si128(Bke, Bki));
+ Ca = _mm_xor_si128(Ca, Eka);
+ Amu = _mm_xor_si128(Amu, Du);
+ Bko = ROL64in128_8(Amu);
+ Eke = _mm_xor_si128(Bke, _mm_andnot_si128(Bki, Bko));
+ Ce = _mm_xor_si128(Ce, Eke);
+ Asa = _mm_xor_si128(Asa, Da);
+ Bku = ROL64in128(Asa, 18);
+ Eki = _mm_xor_si128(Bki, _mm_andnot_si128(Bko, Bku));
+ Ci = _mm_xor_si128(Ci, Eki);
+ Eko = _mm_xor_si128(Bko, _mm_andnot_si128(Bku, Bka));
+ Co = _mm_xor_si128(Co, Eko);
+ Eku = _mm_xor_si128(Bku, _mm_andnot_si128(Bka, Bke));
+ Cu = _mm_xor_si128(Cu, Eku);
+ Abu = _mm_xor_si128(Abu, Du);
+ Bma = ROL64in128(Abu, 27);
+ Aga = _mm_xor_si128(Aga, Da);
+ Bme = ROL64in128(Aga, 36);
+ Ake = _mm_xor_si128(Ake, De);
+ Bmi = ROL64in128(Ake, 10);
+ Ema = _mm_xor_si128(Bma, _mm_andnot_si128(Bme, Bmi));
+ Ca = _mm_xor_si128(Ca, Ema);
+ Ami = _mm_xor_si128(Ami, Di);
+ Bmo = ROL64in128(Ami, 15);
+ Eme = _mm_xor_si128(Bme, _mm_andnot_si128(Bmi, Bmo));
+ Ce = _mm_xor_si128(Ce, Eme);
+ Aso = _mm_xor_si128(Aso, Do);
+ Bmu = ROL64in128_56(Aso);
+ Emi = _mm_xor_si128(Bmi, _mm_andnot_si128(Bmo, Bmu));
+ Ci = _mm_xor_si128(Ci, Emi);
+ Emo = _mm_xor_si128(Bmo, _mm_andnot_si128(Bmu, Bma));
+ Co = _mm_xor_si128(Co, Emo);
+ Emu = _mm_xor_si128(Bmu, _mm_andnot_si128(Bma, Bme));
+ Cu = _mm_xor_si128(Cu, Emu);
+ Abi = _mm_xor_si128(Abi, Di);
+ Bsa = ROL64in128(Abi, 62);
+ Ago = _mm_xor_si128(Ago, Do);
+ Bse = ROL64in128(Ago, 55);
+ Aku = _mm_xor_si128(Aku, Du);
+ Bsi = ROL64in128(Aku, 39);
+ Esa = _mm_xor_si128(Bsa, _mm_andnot_si128(Bse, Bsi));
+ Ca = _mm_xor_si128(Ca, Esa);
+ Ama = _mm_xor_si128(Ama, Da);
+ Bso = ROL64in128(Ama, 41);
+ Ese = _mm_xor_si128(Bse, _mm_andnot_si128(Bsi, Bso));
+ Ce = _mm_xor_si128(Ce, Ese);
+ Ase = _mm_xor_si128(Ase, De);
+ Bsu = ROL64in128(Ase, 2);
+ Esi = _mm_xor_si128(Bsi, _mm_andnot_si128(Bso, Bsu));
+ Ci = _mm_xor_si128(Ci, Esi);
+ Eso = _mm_xor_si128(Bso, _mm_andnot_si128(Bsu, Bsa));
+ Co = _mm_xor_si128(Co, Eso);
+ Esu = _mm_xor_si128(Bsu, _mm_andnot_si128(Bsa, Bse));
+ Cu = _mm_xor_si128(Cu, Esu);
+ Da = _mm_xor_si128(Cu, ROL64in128(Ce, 1));
+ De = _mm_xor_si128(Ca, ROL64in128(Ci, 1));
+ Di = _mm_xor_si128(Ce, ROL64in128(Co, 1));
+ Do = _mm_xor_si128(Ci, ROL64in128(Cu, 1));
+ Du = _mm_xor_si128(Co, ROL64in128(Ca, 1));
+ Eba = _mm_xor_si128(Eba, Da);
+ Bba = Eba;
+ Ege = _mm_xor_si128(Ege, De);
+ Bbe = ROL64in128(Ege, 44);
+ Eki = _mm_xor_si128(Eki, Di);
+ Bbi = ROL64in128(Eki, 43);
+ Aba = _mm_xor_si128(Bba, _mm_andnot_si128(Bbe, Bbi));
+ Aba = _mm_xor_si128(Aba, SPLAT64(KeccakF1600Constants[9]));
+ Ca = Aba;
+ Emo = _mm_xor_si128(Emo, Do);
+ Bbo = ROL64in128(Emo, 21);
+ Abe = _mm_xor_si128(Bbe, _mm_andnot_si128(Bbi, Bbo));
+ Ce = Abe;
+ Esu = _mm_xor_si128(Esu, Du);
+ Bbu = ROL64in128(Esu, 14);
+ Abi = _mm_xor_si128(Bbi, _mm_andnot_si128(Bbo, Bbu));
+ Ci = Abi;
+ Abo = _mm_xor_si128(Bbo, _mm_andnot_si128(Bbu, Bba));
+ Co = Abo;
+ Abu = _mm_xor_si128(Bbu, _mm_andnot_si128(Bba, Bbe));
+ Cu = Abu;
+ Ebo = _mm_xor_si128(Ebo, Do);
+ Bga = ROL64in128(Ebo, 28);
+ Egu = _mm_xor_si128(Egu, Du);
+ Bge = ROL64in128(Egu, 20);
+ Eka = _mm_xor_si128(Eka, Da);
+ Bgi = ROL64in128(Eka, 3);
+ Aga = _mm_xor_si128(Bga, _mm_andnot_si128(Bge, Bgi));
+ Ca = _mm_xor_si128(Ca, Aga);
+ Eme = _mm_xor_si128(Eme, De);
+ Bgo = ROL64in128(Eme, 45);
+ Age = _mm_xor_si128(Bge, _mm_andnot_si128(Bgi, Bgo));
+ Ce = _mm_xor_si128(Ce, Age);
+ Esi = _mm_xor_si128(Esi, Di);
+ Bgu = ROL64in128(Esi, 61);
+ Agi = _mm_xor_si128(Bgi, _mm_andnot_si128(Bgo, Bgu));
+ Ci = _mm_xor_si128(Ci, Agi);
+ Ago = _mm_xor_si128(Bgo, _mm_andnot_si128(Bgu, Bga));
+ Co = _mm_xor_si128(Co, Ago);
+ Agu = _mm_xor_si128(Bgu, _mm_andnot_si128(Bga, Bge));
+ Cu = _mm_xor_si128(Cu, Agu);
+ Ebe = _mm_xor_si128(Ebe, De);
+ Bka = ROL64in128(Ebe, 1);
+ Egi = _mm_xor_si128(Egi, Di);
+ Bke = ROL64in128(Egi, 6);
+ Eko = _mm_xor_si128(Eko, Do);
+ Bki = ROL64in128(Eko, 25);
+ Aka = _mm_xor_si128(Bka, _mm_andnot_si128(Bke, Bki));
+ Ca = _mm_xor_si128(Ca, Aka);
+ Emu = _mm_xor_si128(Emu, Du);
+ Bko = ROL64in128_8(Emu);
+ Ake = _mm_xor_si128(Bke, _mm_andnot_si128(Bki, Bko));
+ Ce = _mm_xor_si128(Ce, Ake);
+ Esa = _mm_xor_si128(Esa, Da);
+ Bku = ROL64in128(Esa, 18);
+ Aki = _mm_xor_si128(Bki, _mm_andnot_si128(Bko, Bku));
+ Ci = _mm_xor_si128(Ci, Aki);
+ Ako = _mm_xor_si128(Bko, _mm_andnot_si128(Bku, Bka));
+ Co = _mm_xor_si128(Co, Ako);
+ Aku = _mm_xor_si128(Bku, _mm_andnot_si128(Bka, Bke));
+ Cu = _mm_xor_si128(Cu, Aku);
+ Ebu = _mm_xor_si128(Ebu, Du);
+ Bma = ROL64in128(Ebu, 27);
+ Ega = _mm_xor_si128(Ega, Da);
+ Bme = ROL64in128(Ega, 36);
+ Eke = _mm_xor_si128(Eke, De);
+ Bmi = ROL64in128(Eke, 10);
+ Ama = _mm_xor_si128(Bma, _mm_andnot_si128(Bme, Bmi));
+ Ca = _mm_xor_si128(Ca, Ama);
+ Emi = _mm_xor_si128(Emi, Di);
+ Bmo = ROL64in128(Emi, 15);
+ Ame = _mm_xor_si128(Bme, _mm_andnot_si128(Bmi, Bmo));
+ Ce = _mm_xor_si128(Ce, Ame);
+ Eso = _mm_xor_si128(Eso, Do);
+ Bmu = ROL64in128_56(Eso);
+ Ami = _mm_xor_si128(Bmi, _mm_andnot_si128(Bmo, Bmu));
+ Ci = _mm_xor_si128(Ci, Ami);
+ Amo = _mm_xor_si128(Bmo, _mm_andnot_si128(Bmu, Bma));
+ Co = _mm_xor_si128(Co, Amo);
+ Amu = _mm_xor_si128(Bmu, _mm_andnot_si128(Bma, Bme));
+ Cu = _mm_xor_si128(Cu, Amu);
+ Ebi = _mm_xor_si128(Ebi, Di);
+ Bsa = ROL64in128(Ebi, 62);
+ Ego = _mm_xor_si128(Ego, Do);
+ Bse = ROL64in128(Ego, 55);
+ Eku = _mm_xor_si128(Eku, Du);
+ Bsi = ROL64in128(Eku, 39);
+ Asa = _mm_xor_si128(Bsa, _mm_andnot_si128(Bse, Bsi));
+ Ca = _mm_xor_si128(Ca, Asa);
+ Ema = _mm_xor_si128(Ema, Da);
+ Bso = ROL64in128(Ema, 41);
+ Ase = _mm_xor_si128(Bse, _mm_andnot_si128(Bsi, Bso));
+ Ce = _mm_xor_si128(Ce, Ase);
+ Ese = _mm_xor_si128(Ese, De);
+ Bsu = ROL64in128(Ese, 2);
+ Asi = _mm_xor_si128(Bsi, _mm_andnot_si128(Bso, Bsu));
+ Ci = _mm_xor_si128(Ci, Asi);
+ Aso = _mm_xor_si128(Bso, _mm_andnot_si128(Bsu, Bsa));
+ Co = _mm_xor_si128(Co, Aso);
+ Asu = _mm_xor_si128(Bsu, _mm_andnot_si128(Bsa, Bse));
+ Cu = _mm_xor_si128(Cu, Asu);
+ Da = _mm_xor_si128(Cu, ROL64in128(Ce, 1));
+ De = _mm_xor_si128(Ca, ROL64in128(Ci, 1));
+ Di = _mm_xor_si128(Ce, ROL64in128(Co, 1));
+ Do = _mm_xor_si128(Ci, ROL64in128(Cu, 1));
+ Du = _mm_xor_si128(Co, ROL64in128(Ca, 1));
+ Aba = _mm_xor_si128(Aba, Da);
+ Bba = Aba;
+ Age = _mm_xor_si128(Age, De);
+ Bbe = ROL64in128(Age, 44);
+ Aki = _mm_xor_si128(Aki, Di);
+ Bbi = ROL64in128(Aki, 43);
+ Eba = _mm_xor_si128(Bba, _mm_andnot_si128(Bbe, Bbi));
+ Eba = _mm_xor_si128(Eba, SPLAT64(KeccakF1600Constants[10]));
+ Ca = Eba;
+ Amo = _mm_xor_si128(Amo, Do);
+ Bbo = ROL64in128(Amo, 21);
+ Ebe = _mm_xor_si128(Bbe, _mm_andnot_si128(Bbi, Bbo));
+ Ce = Ebe;
+ Asu = _mm_xor_si128(Asu, Du);
+ Bbu = ROL64in128(Asu, 14);
+ Ebi = _mm_xor_si128(Bbi, _mm_andnot_si128(Bbo, Bbu));
+ Ci = Ebi;
+ Ebo = _mm_xor_si128(Bbo, _mm_andnot_si128(Bbu, Bba));
+ Co = Ebo;
+ Ebu = _mm_xor_si128(Bbu, _mm_andnot_si128(Bba, Bbe));
+ Cu = Ebu;
+ Abo = _mm_xor_si128(Abo, Do);
+ Bga = ROL64in128(Abo, 28);
+ Agu = _mm_xor_si128(Agu, Du);
+ Bge = ROL64in128(Agu, 20);
+ Aka = _mm_xor_si128(Aka, Da);
+ Bgi = ROL64in128(Aka, 3);
+ Ega = _mm_xor_si128(Bga, _mm_andnot_si128(Bge, Bgi));
+ Ca = _mm_xor_si128(Ca, Ega);
+ Ame = _mm_xor_si128(Ame, De);
+ Bgo = ROL64in128(Ame, 45);
+ Ege = _mm_xor_si128(Bge, _mm_andnot_si128(Bgi, Bgo));
+ Ce = _mm_xor_si128(Ce, Ege);
+ Asi = _mm_xor_si128(Asi, Di);
+ Bgu = ROL64in128(Asi, 61);
+ Egi = _mm_xor_si128(Bgi, _mm_andnot_si128(Bgo, Bgu));
+ Ci = _mm_xor_si128(Ci, Egi);
+ Ego = _mm_xor_si128(Bgo, _mm_andnot_si128(Bgu, Bga));
+ Co = _mm_xor_si128(Co, Ego);
+ Egu = _mm_xor_si128(Bgu, _mm_andnot_si128(Bga, Bge));
+ Cu = _mm_xor_si128(Cu, Egu);
+ Abe = _mm_xor_si128(Abe, De);
+ Bka = ROL64in128(Abe, 1);
+ Agi = _mm_xor_si128(Agi, Di);
+ Bke = ROL64in128(Agi, 6);
+ Ako = _mm_xor_si128(Ako, Do);
+ Bki = ROL64in128(Ako, 25);
+ Eka = _mm_xor_si128(Bka, _mm_andnot_si128(Bke, Bki));
+ Ca = _mm_xor_si128(Ca, Eka);
+ Amu = _mm_xor_si128(Amu, Du);
+ Bko = ROL64in128_8(Amu);
+ Eke = _mm_xor_si128(Bke, _mm_andnot_si128(Bki, Bko));
+ Ce = _mm_xor_si128(Ce, Eke);
+ Asa = _mm_xor_si128(Asa, Da);
+ Bku = ROL64in128(Asa, 18);
+ Eki = _mm_xor_si128(Bki, _mm_andnot_si128(Bko, Bku));
+ Ci = _mm_xor_si128(Ci, Eki);
+ Eko = _mm_xor_si128(Bko, _mm_andnot_si128(Bku, Bka));
+ Co = _mm_xor_si128(Co, Eko);
+ Eku = _mm_xor_si128(Bku, _mm_andnot_si128(Bka, Bke));
+ Cu = _mm_xor_si128(Cu, Eku);
+ Abu = _mm_xor_si128(Abu, Du);
+ Bma = ROL64in128(Abu, 27);
+ Aga = _mm_xor_si128(Aga, Da);
+ Bme = ROL64in128(Aga, 36);
+ Ake = _mm_xor_si128(Ake, De);
+ Bmi = ROL64in128(Ake, 10);
+ Ema = _mm_xor_si128(Bma, _mm_andnot_si128(Bme, Bmi));
+ Ca = _mm_xor_si128(Ca, Ema);
+ Ami = _mm_xor_si128(Ami, Di);
+ Bmo = ROL64in128(Ami, 15);
+ Eme = _mm_xor_si128(Bme, _mm_andnot_si128(Bmi, Bmo));
+ Ce = _mm_xor_si128(Ce, Eme);
+ Aso = _mm_xor_si128(Aso, Do);
+ Bmu = ROL64in128_56(Aso);
+ Emi = _mm_xor_si128(Bmi, _mm_andnot_si128(Bmo, Bmu));
+ Ci = _mm_xor_si128(Ci, Emi);
+ Emo = _mm_xor_si128(Bmo, _mm_andnot_si128(Bmu, Bma));
+ Co = _mm_xor_si128(Co, Emo);
+ Emu = _mm_xor_si128(Bmu, _mm_andnot_si128(Bma, Bme));
+ Cu = _mm_xor_si128(Cu, Emu);
+ Abi = _mm_xor_si128(Abi, Di);
+ Bsa = ROL64in128(Abi, 62);
+ Ago = _mm_xor_si128(Ago, Do);
+ Bse = ROL64in128(Ago, 55);
+ Aku = _mm_xor_si128(Aku, Du);
+ Bsi = ROL64in128(Aku, 39);
+ Esa = _mm_xor_si128(Bsa, _mm_andnot_si128(Bse, Bsi));
+ Ca = _mm_xor_si128(Ca, Esa);
+ Ama = _mm_xor_si128(Ama, Da);
+ Bso = ROL64in128(Ama, 41);
+ Ese = _mm_xor_si128(Bse, _mm_andnot_si128(Bsi, Bso));
+ Ce = _mm_xor_si128(Ce, Ese);
+ Ase = _mm_xor_si128(Ase, De);
+ Bsu = ROL64in128(Ase, 2);
+ Esi = _mm_xor_si128(Bsi, _mm_andnot_si128(Bso, Bsu));
+ Ci = _mm_xor_si128(Ci, Esi);
+ Eso = _mm_xor_si128(Bso, _mm_andnot_si128(Bsu, Bsa));
+ Co = _mm_xor_si128(Co, Eso);
+ Esu = _mm_xor_si128(Bsu, _mm_andnot_si128(Bsa, Bse));
+ Cu = _mm_xor_si128(Cu, Esu);
+ Da = _mm_xor_si128(Cu, ROL64in128(Ce, 1));
+ De = _mm_xor_si128(Ca, ROL64in128(Ci, 1));
+ Di = _mm_xor_si128(Ce, ROL64in128(Co, 1));
+ Do = _mm_xor_si128(Ci, ROL64in128(Cu, 1));
+ Du = _mm_xor_si128(Co, ROL64in128(Ca, 1));
+ Eba = _mm_xor_si128(Eba, Da);
+ Bba = Eba;
+ Ege = _mm_xor_si128(Ege, De);
+ Bbe = ROL64in128(Ege, 44);
+ Eki = _mm_xor_si128(Eki, Di);
+ Bbi = ROL64in128(Eki, 43);
+ Aba = _mm_xor_si128(Bba, _mm_andnot_si128(Bbe, Bbi));
+ Aba = _mm_xor_si128(Aba, SPLAT64(KeccakF1600Constants[11]));
+ Ca = Aba;
+ Emo = _mm_xor_si128(Emo, Do);
+ Bbo = ROL64in128(Emo, 21);
+ Abe = _mm_xor_si128(Bbe, _mm_andnot_si128(Bbi, Bbo));
+ Ce = Abe;
+ Esu = _mm_xor_si128(Esu, Du);
+ Bbu = ROL64in128(Esu, 14);
+ Abi = _mm_xor_si128(Bbi, _mm_andnot_si128(Bbo, Bbu));
+ Ci = Abi;
+ Abo = _mm_xor_si128(Bbo, _mm_andnot_si128(Bbu, Bba));
+ Co = Abo;
+ Abu = _mm_xor_si128(Bbu, _mm_andnot_si128(Bba, Bbe));
+ Cu = Abu;
+ Ebo = _mm_xor_si128(Ebo, Do);
+ Bga = ROL64in128(Ebo, 28);
+ Egu = _mm_xor_si128(Egu, Du);
+ Bge = ROL64in128(Egu, 20);
+ Eka = _mm_xor_si128(Eka, Da);
+ Bgi = ROL64in128(Eka, 3);
+ Aga = _mm_xor_si128(Bga, _mm_andnot_si128(Bge, Bgi));
+ Ca = _mm_xor_si128(Ca, Aga);
+ Eme = _mm_xor_si128(Eme, De);
+ Bgo = ROL64in128(Eme, 45);
+ Age = _mm_xor_si128(Bge, _mm_andnot_si128(Bgi, Bgo));
+ Ce = _mm_xor_si128(Ce, Age);
+ Esi = _mm_xor_si128(Esi, Di);
+ Bgu = ROL64in128(Esi, 61);
+ Agi = _mm_xor_si128(Bgi, _mm_andnot_si128(Bgo, Bgu));
+ Ci = _mm_xor_si128(Ci, Agi);
+ Ago = _mm_xor_si128(Bgo, _mm_andnot_si128(Bgu, Bga));
+ Co = _mm_xor_si128(Co, Ago);
+ Agu = _mm_xor_si128(Bgu, _mm_andnot_si128(Bga, Bge));
+ Cu = _mm_xor_si128(Cu, Agu);
+ Ebe = _mm_xor_si128(Ebe, De);
+ Bka = ROL64in128(Ebe, 1);
+ Egi = _mm_xor_si128(Egi, Di);
+ Bke = ROL64in128(Egi, 6);
+ Eko = _mm_xor_si128(Eko, Do);
+ Bki = ROL64in128(Eko, 25);
+ Aka = _mm_xor_si128(Bka, _mm_andnot_si128(Bke, Bki));
+ Ca = _mm_xor_si128(Ca, Aka);
+ Emu = _mm_xor_si128(Emu, Du);
+ Bko = ROL64in128_8(Emu);
+ Ake = _mm_xor_si128(Bke, _mm_andnot_si128(Bki, Bko));
+ Ce = _mm_xor_si128(Ce, Ake);
+ Esa = _mm_xor_si128(Esa, Da);
+ Bku = ROL64in128(Esa, 18);
+ Aki = _mm_xor_si128(Bki, _mm_andnot_si128(Bko, Bku));
+ Ci = _mm_xor_si128(Ci, Aki);
+ Ako = _mm_xor_si128(Bko, _mm_andnot_si128(Bku, Bka));
+ Co = _mm_xor_si128(Co, Ako);
+ Aku = _mm_xor_si128(Bku, _mm_andnot_si128(Bka, Bke));
+ Cu = _mm_xor_si128(Cu, Aku);
+ Ebu = _mm_xor_si128(Ebu, Du);
+ Bma = ROL64in128(Ebu, 27);
+ Ega = _mm_xor_si128(Ega, Da);
+ Bme = ROL64in128(Ega, 36);
+ Eke = _mm_xor_si128(Eke, De);
+ Bmi = ROL64in128(Eke, 10);
+ Ama = _mm_xor_si128(Bma, _mm_andnot_si128(Bme, Bmi));
+ Ca = _mm_xor_si128(Ca, Ama);
+ Emi = _mm_xor_si128(Emi, Di);
+ Bmo = ROL64in128(Emi, 15);
+ Ame = _mm_xor_si128(Bme, _mm_andnot_si128(Bmi, Bmo));
+ Ce = _mm_xor_si128(Ce, Ame);
+ Eso = _mm_xor_si128(Eso, Do);
+ Bmu = ROL64in128_56(Eso);
+ Ami = _mm_xor_si128(Bmi, _mm_andnot_si128(Bmo, Bmu));
+ Ci = _mm_xor_si128(Ci, Ami);
+ Amo = _mm_xor_si128(Bmo, _mm_andnot_si128(Bmu, Bma));
+ Co = _mm_xor_si128(Co, Amo);
+ Amu = _mm_xor_si128(Bmu, _mm_andnot_si128(Bma, Bme));
+ Cu = _mm_xor_si128(Cu, Amu);
+ Ebi = _mm_xor_si128(Ebi, Di);
+ Bsa = ROL64in128(Ebi, 62);
+ Ego = _mm_xor_si128(Ego, Do);
+ Bse = ROL64in128(Ego, 55);
+ Eku = _mm_xor_si128(Eku, Du);
+ Bsi = ROL64in128(Eku, 39);
+ Asa = _mm_xor_si128(Bsa, _mm_andnot_si128(Bse, Bsi));
+ Ca = _mm_xor_si128(Ca, Asa);
+ Ema = _mm_xor_si128(Ema, Da);
+ Bso = ROL64in128(Ema, 41);
+ Ase = _mm_xor_si128(Bse, _mm_andnot_si128(Bsi, Bso));
+ Ce = _mm_xor_si128(Ce, Ase);
+ Ese = _mm_xor_si128(Ese, De);
+ Bsu = ROL64in128(Ese, 2);
+ Asi = _mm_xor_si128(Bsi, _mm_andnot_si128(Bso, Bsu));
+ Ci = _mm_xor_si128(Ci, Asi);
+ Aso = _mm_xor_si128(Bso, _mm_andnot_si128(Bsu, Bsa));
+ Co = _mm_xor_si128(Co, Aso);
+ Asu = _mm_xor_si128(Bsu, _mm_andnot_si128(Bsa, Bse));
+ Cu = _mm_xor_si128(Cu, Asu);
+ Da = _mm_xor_si128(Cu, ROL64in128(Ce, 1));
+ De = _mm_xor_si128(Ca, ROL64in128(Ci, 1));
+ Di = _mm_xor_si128(Ce, ROL64in128(Co, 1));
+ Do = _mm_xor_si128(Ci, ROL64in128(Cu, 1));
+ Du = _mm_xor_si128(Co, ROL64in128(Ca, 1));
+ Aba = _mm_xor_si128(Aba, Da);
+ Bba = Aba;
+ Age = _mm_xor_si128(Age, De);
+ Bbe = ROL64in128(Age, 44);
+ Aki = _mm_xor_si128(Aki, Di);
+ Bbi = ROL64in128(Aki, 43);
+ Eba = _mm_xor_si128(Bba, _mm_andnot_si128(Bbe, Bbi));
+ Eba = _mm_xor_si128(Eba, SPLAT64(KeccakF1600Constants[12]));
+ Ca = Eba;
+ Amo = _mm_xor_si128(Amo, Do);
+ Bbo = ROL64in128(Amo, 21);
+ Ebe = _mm_xor_si128(Bbe, _mm_andnot_si128(Bbi, Bbo));
+ Ce = Ebe;
+ Asu = _mm_xor_si128(Asu, Du);
+ Bbu = ROL64in128(Asu, 14);
+ Ebi = _mm_xor_si128(Bbi, _mm_andnot_si128(Bbo, Bbu));
+ Ci = Ebi;
+ Ebo = _mm_xor_si128(Bbo, _mm_andnot_si128(Bbu, Bba));
+ Co = Ebo;
+ Ebu = _mm_xor_si128(Bbu, _mm_andnot_si128(Bba, Bbe));
+ Cu = Ebu;
+ Abo = _mm_xor_si128(Abo, Do);
+ Bga = ROL64in128(Abo, 28);
+ Agu = _mm_xor_si128(Agu, Du);
+ Bge = ROL64in128(Agu, 20);
+ Aka = _mm_xor_si128(Aka, Da);
+ Bgi = ROL64in128(Aka, 3);
+ Ega = _mm_xor_si128(Bga, _mm_andnot_si128(Bge, Bgi));
+ Ca = _mm_xor_si128(Ca, Ega);
+ Ame = _mm_xor_si128(Ame, De);
+ Bgo = ROL64in128(Ame, 45);
+ Ege = _mm_xor_si128(Bge, _mm_andnot_si128(Bgi, Bgo));
+ Ce = _mm_xor_si128(Ce, Ege);
+ Asi = _mm_xor_si128(Asi, Di);
+ Bgu = ROL64in128(Asi, 61);
+ Egi = _mm_xor_si128(Bgi, _mm_andnot_si128(Bgo, Bgu));
+ Ci = _mm_xor_si128(Ci, Egi);
+ Ego = _mm_xor_si128(Bgo, _mm_andnot_si128(Bgu, Bga));
+ Co = _mm_xor_si128(Co, Ego);
+ Egu = _mm_xor_si128(Bgu, _mm_andnot_si128(Bga, Bge));
+ Cu = _mm_xor_si128(Cu, Egu);
+ Abe = _mm_xor_si128(Abe, De);
+ Bka = ROL64in128(Abe, 1);
+ Agi = _mm_xor_si128(Agi, Di);
+ Bke = ROL64in128(Agi, 6);
+ Ako = _mm_xor_si128(Ako, Do);
+ Bki = ROL64in128(Ako, 25);
+ Eka = _mm_xor_si128(Bka, _mm_andnot_si128(Bke, Bki));
+ Ca = _mm_xor_si128(Ca, Eka);
+ Amu = _mm_xor_si128(Amu, Du);
+ Bko = ROL64in128_8(Amu);
+ Eke = _mm_xor_si128(Bke, _mm_andnot_si128(Bki, Bko));
+ Ce = _mm_xor_si128(Ce, Eke);
+ Asa = _mm_xor_si128(Asa, Da);
+ Bku = ROL64in128(Asa, 18);
+ Eki = _mm_xor_si128(Bki, _mm_andnot_si128(Bko, Bku));
+ Ci = _mm_xor_si128(Ci, Eki);
+ Eko = _mm_xor_si128(Bko, _mm_andnot_si128(Bku, Bka));
+ Co = _mm_xor_si128(Co, Eko);
+ Eku = _mm_xor_si128(Bku, _mm_andnot_si128(Bka, Bke));
+ Cu = _mm_xor_si128(Cu, Eku);
+ Abu = _mm_xor_si128(Abu, Du);
+ Bma = ROL64in128(Abu, 27);
+ Aga = _mm_xor_si128(Aga, Da);
+ Bme = ROL64in128(Aga, 36);
+ Ake = _mm_xor_si128(Ake, De);
+ Bmi = ROL64in128(Ake, 10);
+ Ema = _mm_xor_si128(Bma, _mm_andnot_si128(Bme, Bmi));
+ Ca = _mm_xor_si128(Ca, Ema);
+ Ami = _mm_xor_si128(Ami, Di);
+ Bmo = ROL64in128(Ami, 15);
+ Eme = _mm_xor_si128(Bme, _mm_andnot_si128(Bmi, Bmo));
+ Ce = _mm_xor_si128(Ce, Eme);
+ Aso = _mm_xor_si128(Aso, Do);
+ Bmu = ROL64in128_56(Aso);
+ Emi = _mm_xor_si128(Bmi, _mm_andnot_si128(Bmo, Bmu));
+ Ci = _mm_xor_si128(Ci, Emi);
+ Emo = _mm_xor_si128(Bmo, _mm_andnot_si128(Bmu, Bma));
+ Co = _mm_xor_si128(Co, Emo);
+ Emu = _mm_xor_si128(Bmu, _mm_andnot_si128(Bma, Bme));
+ Cu = _mm_xor_si128(Cu, Emu);
+ Abi = _mm_xor_si128(Abi, Di);
+ Bsa = ROL64in128(Abi, 62);
+ Ago = _mm_xor_si128(Ago, Do);
+ Bse = ROL64in128(Ago, 55);
+ Aku = _mm_xor_si128(Aku, Du);
+ Bsi = ROL64in128(Aku, 39);
+ Esa = _mm_xor_si128(Bsa, _mm_andnot_si128(Bse, Bsi));
+ Ca = _mm_xor_si128(Ca, Esa);
+ Ama = _mm_xor_si128(Ama, Da);
+ Bso = ROL64in128(Ama, 41);
+ Ese = _mm_xor_si128(Bse, _mm_andnot_si128(Bsi, Bso));
+ Ce = _mm_xor_si128(Ce, Ese);
+ Ase = _mm_xor_si128(Ase, De);
+ Bsu = ROL64in128(Ase, 2);
+ Esi = _mm_xor_si128(Bsi, _mm_andnot_si128(Bso, Bsu));
+ Ci = _mm_xor_si128(Ci, Esi);
+ Eso = _mm_xor_si128(Bso, _mm_andnot_si128(Bsu, Bsa));
+ Co = _mm_xor_si128(Co, Eso);
+ Esu = _mm_xor_si128(Bsu, _mm_andnot_si128(Bsa, Bse));
+ Cu = _mm_xor_si128(Cu, Esu);
+ Da = _mm_xor_si128(Cu, ROL64in128(Ce, 1));
+ De = _mm_xor_si128(Ca, ROL64in128(Ci, 1));
+ Di = _mm_xor_si128(Ce, ROL64in128(Co, 1));
+ Do = _mm_xor_si128(Ci, ROL64in128(Cu, 1));
+ Du = _mm_xor_si128(Co, ROL64in128(Ca, 1));
+ Eba = _mm_xor_si128(Eba, Da);
+ Bba = Eba;
+ Ege = _mm_xor_si128(Ege, De);
+ Bbe = ROL64in128(Ege, 44);
+ Eki = _mm_xor_si128(Eki, Di);
+ Bbi = ROL64in128(Eki, 43);
+ Aba = _mm_xor_si128(Bba, _mm_andnot_si128(Bbe, Bbi));
+ Aba = _mm_xor_si128(Aba, SPLAT64(KeccakF1600Constants[13]));
+ Ca = Aba;
+ Emo = _mm_xor_si128(Emo, Do);
+ Bbo = ROL64in128(Emo, 21);
+ Abe = _mm_xor_si128(Bbe, _mm_andnot_si128(Bbi, Bbo));
+ Ce = Abe;
+ Esu = _mm_xor_si128(Esu, Du);
+ Bbu = ROL64in128(Esu, 14);
+ Abi = _mm_xor_si128(Bbi, _mm_andnot_si128(Bbo, Bbu));
+ Ci = Abi;
+ Abo = _mm_xor_si128(Bbo, _mm_andnot_si128(Bbu, Bba));
+ Co = Abo;
+ Abu = _mm_xor_si128(Bbu, _mm_andnot_si128(Bba, Bbe));
+ Cu = Abu;
+ Ebo = _mm_xor_si128(Ebo, Do);
+ Bga = ROL64in128(Ebo, 28);
+ Egu = _mm_xor_si128(Egu, Du);
+ Bge = ROL64in128(Egu, 20);
+ Eka = _mm_xor_si128(Eka, Da);
+ Bgi = ROL64in128(Eka, 3);
+ Aga = _mm_xor_si128(Bga, _mm_andnot_si128(Bge, Bgi));
+ Ca = _mm_xor_si128(Ca, Aga);
+ Eme = _mm_xor_si128(Eme, De);
+ Bgo = ROL64in128(Eme, 45);
+ Age = _mm_xor_si128(Bge, _mm_andnot_si128(Bgi, Bgo));
+ Ce = _mm_xor_si128(Ce, Age);
+ Esi = _mm_xor_si128(Esi, Di);
+ Bgu = ROL64in128(Esi, 61);
+ Agi = _mm_xor_si128(Bgi, _mm_andnot_si128(Bgo, Bgu));
+ Ci = _mm_xor_si128(Ci, Agi);
+ Ago = _mm_xor_si128(Bgo, _mm_andnot_si128(Bgu, Bga));
+ Co = _mm_xor_si128(Co, Ago);
+ Agu = _mm_xor_si128(Bgu, _mm_andnot_si128(Bga, Bge));
+ Cu = _mm_xor_si128(Cu, Agu);
+ Ebe = _mm_xor_si128(Ebe, De);
+ Bka = ROL64in128(Ebe, 1);
+ Egi = _mm_xor_si128(Egi, Di);
+ Bke = ROL64in128(Egi, 6);
+ Eko = _mm_xor_si128(Eko, Do);
+ Bki = ROL64in128(Eko, 25);
+ Aka = _mm_xor_si128(Bka, _mm_andnot_si128(Bke, Bki));
+ Ca = _mm_xor_si128(Ca, Aka);
+ Emu = _mm_xor_si128(Emu, Du);
+ Bko = ROL64in128_8(Emu);
+ Ake = _mm_xor_si128(Bke, _mm_andnot_si128(Bki, Bko));
+ Ce = _mm_xor_si128(Ce, Ake);
+ Esa = _mm_xor_si128(Esa, Da);
+ Bku = ROL64in128(Esa, 18);
+ Aki = _mm_xor_si128(Bki, _mm_andnot_si128(Bko, Bku));
+ Ci = _mm_xor_si128(Ci, Aki);
+ Ako = _mm_xor_si128(Bko, _mm_andnot_si128(Bku, Bka));
+ Co = _mm_xor_si128(Co, Ako);
+ Aku = _mm_xor_si128(Bku, _mm_andnot_si128(Bka, Bke));
+ Cu = _mm_xor_si128(Cu, Aku);
+ Ebu = _mm_xor_si128(Ebu, Du);
+ Bma = ROL64in128(Ebu, 27);
+ Ega = _mm_xor_si128(Ega, Da);
+ Bme = ROL64in128(Ega, 36);
+ Eke = _mm_xor_si128(Eke, De);
+ Bmi = ROL64in128(Eke, 10);
+ Ama = _mm_xor_si128(Bma, _mm_andnot_si128(Bme, Bmi));
+ Ca = _mm_xor_si128(Ca, Ama);
+ Emi = _mm_xor_si128(Emi, Di);
+ Bmo = ROL64in128(Emi, 15);
+ Ame = _mm_xor_si128(Bme, _mm_andnot_si128(Bmi, Bmo));
+ Ce = _mm_xor_si128(Ce, Ame);
+ Eso = _mm_xor_si128(Eso, Do);
+ Bmu = ROL64in128_56(Eso);
+ Ami = _mm_xor_si128(Bmi, _mm_andnot_si128(Bmo, Bmu));
+ Ci = _mm_xor_si128(Ci, Ami);
+ Amo = _mm_xor_si128(Bmo, _mm_andnot_si128(Bmu, Bma));
+ Co = _mm_xor_si128(Co, Amo);
+ Amu = _mm_xor_si128(Bmu, _mm_andnot_si128(Bma, Bme));
+ Cu = _mm_xor_si128(Cu, Amu);
+ Ebi = _mm_xor_si128(Ebi, Di);
+ Bsa = ROL64in128(Ebi, 62);
+ Ego = _mm_xor_si128(Ego, Do);
+ Bse = ROL64in128(Ego, 55);
+ Eku = _mm_xor_si128(Eku, Du);
+ Bsi = ROL64in128(Eku, 39);
+ Asa = _mm_xor_si128(Bsa, _mm_andnot_si128(Bse, Bsi));
+ Ca = _mm_xor_si128(Ca, Asa);
+ Ema = _mm_xor_si128(Ema, Da);
+ Bso = ROL64in128(Ema, 41);
+ Ase = _mm_xor_si128(Bse, _mm_andnot_si128(Bsi, Bso));
+ Ce = _mm_xor_si128(Ce, Ase);
+ Ese = _mm_xor_si128(Ese, De);
+ Bsu = ROL64in128(Ese, 2);
+ Asi = _mm_xor_si128(Bsi, _mm_andnot_si128(Bso, Bsu));
+ Ci = _mm_xor_si128(Ci, Asi);
+ Aso = _mm_xor_si128(Bso, _mm_andnot_si128(Bsu, Bsa));
+ Co = _mm_xor_si128(Co, Aso);
+ Asu = _mm_xor_si128(Bsu, _mm_andnot_si128(Bsa, Bse));
+ Cu = _mm_xor_si128(Cu, Asu);
+ Da = _mm_xor_si128(Cu, ROL64in128(Ce, 1));
+ De = _mm_xor_si128(Ca, ROL64in128(Ci, 1));
+ Di = _mm_xor_si128(Ce, ROL64in128(Co, 1));
+ Do = _mm_xor_si128(Ci, ROL64in128(Cu, 1));
+ Du = _mm_xor_si128(Co, ROL64in128(Ca, 1));
+ Aba = _mm_xor_si128(Aba, Da);
+ Bba = Aba;
+ Age = _mm_xor_si128(Age, De);
+ Bbe = ROL64in128(Age, 44);
+ Aki = _mm_xor_si128(Aki, Di);
+ Bbi = ROL64in128(Aki, 43);
+ Eba = _mm_xor_si128(Bba, _mm_andnot_si128(Bbe, Bbi));
+ Eba = _mm_xor_si128(Eba, SPLAT64(KeccakF1600Constants[14]));
+ Ca = Eba;
+ Amo = _mm_xor_si128(Amo, Do);
+ Bbo = ROL64in128(Amo, 21);
+ Ebe = _mm_xor_si128(Bbe, _mm_andnot_si128(Bbi, Bbo));
+ Ce = Ebe;
+ Asu = _mm_xor_si128(Asu, Du);
+ Bbu = ROL64in128(Asu, 14);
+ Ebi = _mm_xor_si128(Bbi, _mm_andnot_si128(Bbo, Bbu));
+ Ci = Ebi;
+ Ebo = _mm_xor_si128(Bbo, _mm_andnot_si128(Bbu, Bba));
+ Co = Ebo;
+ Ebu = _mm_xor_si128(Bbu, _mm_andnot_si128(Bba, Bbe));
+ Cu = Ebu;
+ Abo = _mm_xor_si128(Abo, Do);
+ Bga = ROL64in128(Abo, 28);
+ Agu = _mm_xor_si128(Agu, Du);
+ Bge = ROL64in128(Agu, 20);
+ Aka = _mm_xor_si128(Aka, Da);
+ Bgi = ROL64in128(Aka, 3);
+ Ega = _mm_xor_si128(Bga, _mm_andnot_si128(Bge, Bgi));
+ Ca = _mm_xor_si128(Ca, Ega);
+ Ame = _mm_xor_si128(Ame, De);
+ Bgo = ROL64in128(Ame, 45);
+ Ege = _mm_xor_si128(Bge, _mm_andnot_si128(Bgi, Bgo));
+ Ce = _mm_xor_si128(Ce, Ege);
+ Asi = _mm_xor_si128(Asi, Di);
+ Bgu = ROL64in128(Asi, 61);
+ Egi = _mm_xor_si128(Bgi, _mm_andnot_si128(Bgo, Bgu));
+ Ci = _mm_xor_si128(Ci, Egi);
+ Ego = _mm_xor_si128(Bgo, _mm_andnot_si128(Bgu, Bga));
+ Co = _mm_xor_si128(Co, Ego);
+ Egu = _mm_xor_si128(Bgu, _mm_andnot_si128(Bga, Bge));
+ Cu = _mm_xor_si128(Cu, Egu);
+ Abe = _mm_xor_si128(Abe, De);
+ Bka = ROL64in128(Abe, 1);
+ Agi = _mm_xor_si128(Agi, Di);
+ Bke = ROL64in128(Agi, 6);
+ Ako = _mm_xor_si128(Ako, Do);
+ Bki = ROL64in128(Ako, 25);
+ Eka = _mm_xor_si128(Bka, _mm_andnot_si128(Bke, Bki));
+ Ca = _mm_xor_si128(Ca, Eka);
+ Amu = _mm_xor_si128(Amu, Du);
+ Bko = ROL64in128_8(Amu);
+ Eke = _mm_xor_si128(Bke, _mm_andnot_si128(Bki, Bko));
+ Ce = _mm_xor_si128(Ce, Eke);
+ Asa = _mm_xor_si128(Asa, Da);
+ Bku = ROL64in128(Asa, 18);
+ Eki = _mm_xor_si128(Bki, _mm_andnot_si128(Bko, Bku));
+ Ci = _mm_xor_si128(Ci, Eki);
+ Eko = _mm_xor_si128(Bko, _mm_andnot_si128(Bku, Bka));
+ Co = _mm_xor_si128(Co, Eko);
+ Eku = _mm_xor_si128(Bku, _mm_andnot_si128(Bka, Bke));
+ Cu = _mm_xor_si128(Cu, Eku);
+ Abu = _mm_xor_si128(Abu, Du);
+ Bma = ROL64in128(Abu, 27);
+ Aga = _mm_xor_si128(Aga, Da);
+ Bme = ROL64in128(Aga, 36);
+ Ake = _mm_xor_si128(Ake, De);
+ Bmi = ROL64in128(Ake, 10);
+ Ema = _mm_xor_si128(Bma, _mm_andnot_si128(Bme, Bmi));
+ Ca = _mm_xor_si128(Ca, Ema);
+ Ami = _mm_xor_si128(Ami, Di);
+ Bmo = ROL64in128(Ami, 15);
+ Eme = _mm_xor_si128(Bme, _mm_andnot_si128(Bmi, Bmo));
+ Ce = _mm_xor_si128(Ce, Eme);
+ Aso = _mm_xor_si128(Aso, Do);
+ Bmu = ROL64in128_56(Aso);
+ Emi = _mm_xor_si128(Bmi, _mm_andnot_si128(Bmo, Bmu));
+ Ci = _mm_xor_si128(Ci, Emi);
+ Emo = _mm_xor_si128(Bmo, _mm_andnot_si128(Bmu, Bma));
+ Co = _mm_xor_si128(Co, Emo);
+ Emu = _mm_xor_si128(Bmu, _mm_andnot_si128(Bma, Bme));
+ Cu = _mm_xor_si128(Cu, Emu);
+ Abi = _mm_xor_si128(Abi, Di);
+ Bsa = ROL64in128(Abi, 62);
+ Ago = _mm_xor_si128(Ago, Do);
+ Bse = ROL64in128(Ago, 55);
+ Aku = _mm_xor_si128(Aku, Du);
+ Bsi = ROL64in128(Aku, 39);
+ Esa = _mm_xor_si128(Bsa, _mm_andnot_si128(Bse, Bsi));
+ Ca = _mm_xor_si128(Ca, Esa);
+ Ama = _mm_xor_si128(Ama, Da);
+ Bso = ROL64in128(Ama, 41);
+ Ese = _mm_xor_si128(Bse, _mm_andnot_si128(Bsi, Bso));
+ Ce = _mm_xor_si128(Ce, Ese);
+ Ase = _mm_xor_si128(Ase, De);
+ Bsu = ROL64in128(Ase, 2);
+ Esi = _mm_xor_si128(Bsi, _mm_andnot_si128(Bso, Bsu));
+ Ci = _mm_xor_si128(Ci, Esi);
+ Eso = _mm_xor_si128(Bso, _mm_andnot_si128(Bsu, Bsa));
+ Co = _mm_xor_si128(Co, Eso);
+ Esu = _mm_xor_si128(Bsu, _mm_andnot_si128(Bsa, Bse));
+ Cu = _mm_xor_si128(Cu, Esu);
+ Da = _mm_xor_si128(Cu, ROL64in128(Ce, 1));
+ De = _mm_xor_si128(Ca, ROL64in128(Ci, 1));
+ Di = _mm_xor_si128(Ce, ROL64in128(Co, 1));
+ Do = _mm_xor_si128(Ci, ROL64in128(Cu, 1));
+ Du = _mm_xor_si128(Co, ROL64in128(Ca, 1));
+ Eba = _mm_xor_si128(Eba, Da);
+ Bba = Eba;
+ Ege = _mm_xor_si128(Ege, De);
+ Bbe = ROL64in128(Ege, 44);
+ Eki = _mm_xor_si128(Eki, Di);
+ Bbi = ROL64in128(Eki, 43);
+ Aba = _mm_xor_si128(Bba, _mm_andnot_si128(Bbe, Bbi));
+ Aba = _mm_xor_si128(Aba, SPLAT64(KeccakF1600Constants[15]));
+ Ca = Aba;
+ Emo = _mm_xor_si128(Emo, Do);
+ Bbo = ROL64in128(Emo, 21);
+ Abe = _mm_xor_si128(Bbe, _mm_andnot_si128(Bbi, Bbo));
+ Ce = Abe;
+ Esu = _mm_xor_si128(Esu, Du);
+ Bbu = ROL64in128(Esu, 14);
+ Abi = _mm_xor_si128(Bbi, _mm_andnot_si128(Bbo, Bbu));
+ Ci = Abi;
+ Abo = _mm_xor_si128(Bbo, _mm_andnot_si128(Bbu, Bba));
+ Co = Abo;
+ Abu = _mm_xor_si128(Bbu, _mm_andnot_si128(Bba, Bbe));
+ Cu = Abu;
+ Ebo = _mm_xor_si128(Ebo, Do);
+ Bga = ROL64in128(Ebo, 28);
+ Egu = _mm_xor_si128(Egu, Du);
+ Bge = ROL64in128(Egu, 20);
+ Eka = _mm_xor_si128(Eka, Da);
+ Bgi = ROL64in128(Eka, 3);
+ Aga = _mm_xor_si128(Bga, _mm_andnot_si128(Bge, Bgi));
+ Ca = _mm_xor_si128(Ca, Aga);
+ Eme = _mm_xor_si128(Eme, De);
+ Bgo = ROL64in128(Eme, 45);
+ Age = _mm_xor_si128(Bge, _mm_andnot_si128(Bgi, Bgo));
+ Ce = _mm_xor_si128(Ce, Age);
+ Esi = _mm_xor_si128(Esi, Di);
+ Bgu = ROL64in128(Esi, 61);
+ Agi = _mm_xor_si128(Bgi, _mm_andnot_si128(Bgo, Bgu));
+ Ci = _mm_xor_si128(Ci, Agi);
+ Ago = _mm_xor_si128(Bgo, _mm_andnot_si128(Bgu, Bga));
+ Co = _mm_xor_si128(Co, Ago);
+ Agu = _mm_xor_si128(Bgu, _mm_andnot_si128(Bga, Bge));
+ Cu = _mm_xor_si128(Cu, Agu);
+ Ebe = _mm_xor_si128(Ebe, De);
+ Bka = ROL64in128(Ebe, 1);
+ Egi = _mm_xor_si128(Egi, Di);
+ Bke = ROL64in128(Egi, 6);
+ Eko = _mm_xor_si128(Eko, Do);
+ Bki = ROL64in128(Eko, 25);
+ Aka = _mm_xor_si128(Bka, _mm_andnot_si128(Bke, Bki));
+ Ca = _mm_xor_si128(Ca, Aka);
+ Emu = _mm_xor_si128(Emu, Du);
+ Bko = ROL64in128_8(Emu);
+ Ake = _mm_xor_si128(Bke, _mm_andnot_si128(Bki, Bko));
+ Ce = _mm_xor_si128(Ce, Ake);
+ Esa = _mm_xor_si128(Esa, Da);
+ Bku = ROL64in128(Esa, 18);
+ Aki = _mm_xor_si128(Bki, _mm_andnot_si128(Bko, Bku));
+ Ci = _mm_xor_si128(Ci, Aki);
+ Ako = _mm_xor_si128(Bko, _mm_andnot_si128(Bku, Bka));
+ Co = _mm_xor_si128(Co, Ako);
+ Aku = _mm_xor_si128(Bku, _mm_andnot_si128(Bka, Bke));
+ Cu = _mm_xor_si128(Cu, Aku);
+ Ebu = _mm_xor_si128(Ebu, Du);
+ Bma = ROL64in128(Ebu, 27);
+ Ega = _mm_xor_si128(Ega, Da);
+ Bme = ROL64in128(Ega, 36);
+ Eke = _mm_xor_si128(Eke, De);
+ Bmi = ROL64in128(Eke, 10);
+ Ama = _mm_xor_si128(Bma, _mm_andnot_si128(Bme, Bmi));
+ Ca = _mm_xor_si128(Ca, Ama);
+ Emi = _mm_xor_si128(Emi, Di);
+ Bmo = ROL64in128(Emi, 15);
+ Ame = _mm_xor_si128(Bme, _mm_andnot_si128(Bmi, Bmo));
+ Ce = _mm_xor_si128(Ce, Ame);
+ Eso = _mm_xor_si128(Eso, Do);
+ Bmu = ROL64in128_56(Eso);
+ Ami = _mm_xor_si128(Bmi, _mm_andnot_si128(Bmo, Bmu));
+ Ci = _mm_xor_si128(Ci, Ami);
+ Amo = _mm_xor_si128(Bmo, _mm_andnot_si128(Bmu, Bma));
+ Co = _mm_xor_si128(Co, Amo);
+ Amu = _mm_xor_si128(Bmu, _mm_andnot_si128(Bma, Bme));
+ Cu = _mm_xor_si128(Cu, Amu);
+ Ebi = _mm_xor_si128(Ebi, Di);
+ Bsa = ROL64in128(Ebi, 62);
+ Ego = _mm_xor_si128(Ego, Do);
+ Bse = ROL64in128(Ego, 55);
+ Eku = _mm_xor_si128(Eku, Du);
+ Bsi = ROL64in128(Eku, 39);
+ Asa = _mm_xor_si128(Bsa, _mm_andnot_si128(Bse, Bsi));
+ Ca = _mm_xor_si128(Ca, Asa);
+ Ema = _mm_xor_si128(Ema, Da);
+ Bso = ROL64in128(Ema, 41);
+ Ase = _mm_xor_si128(Bse, _mm_andnot_si128(Bsi, Bso));
+ Ce = _mm_xor_si128(Ce, Ase);
+ Ese = _mm_xor_si128(Ese, De);
+ Bsu = ROL64in128(Ese, 2);
+ Asi = _mm_xor_si128(Bsi, _mm_andnot_si128(Bso, Bsu));
+ Ci = _mm_xor_si128(Ci, Asi);
+ Aso = _mm_xor_si128(Bso, _mm_andnot_si128(Bsu, Bsa));
+ Co = _mm_xor_si128(Co, Aso);
+ Asu = _mm_xor_si128(Bsu, _mm_andnot_si128(Bsa, Bse));
+ Cu = _mm_xor_si128(Cu, Asu);
+ Da = _mm_xor_si128(Cu, ROL64in128(Ce, 1));
+ De = _mm_xor_si128(Ca, ROL64in128(Ci, 1));
+ Di = _mm_xor_si128(Ce, ROL64in128(Co, 1));
+ Do = _mm_xor_si128(Ci, ROL64in128(Cu, 1));
+ Du = _mm_xor_si128(Co, ROL64in128(Ca, 1));
+ Aba = _mm_xor_si128(Aba, Da);
+ Bba = Aba;
+ Age = _mm_xor_si128(Age, De);
+ Bbe = ROL64in128(Age, 44);
+ Aki = _mm_xor_si128(Aki, Di);
+ Bbi = ROL64in128(Aki, 43);
+ Eba = _mm_xor_si128(Bba, _mm_andnot_si128(Bbe, Bbi));
+ Eba = _mm_xor_si128(Eba, SPLAT64(KeccakF1600Constants[16]));
+ Ca = Eba;
+ Amo = _mm_xor_si128(Amo, Do);
+ Bbo = ROL64in128(Amo, 21);
+ Ebe = _mm_xor_si128(Bbe, _mm_andnot_si128(Bbi, Bbo));
+ Ce = Ebe;
+ Asu = _mm_xor_si128(Asu, Du);
+ Bbu = ROL64in128(Asu, 14);
+ Ebi = _mm_xor_si128(Bbi, _mm_andnot_si128(Bbo, Bbu));
+ Ci = Ebi;
+ Ebo = _mm_xor_si128(Bbo, _mm_andnot_si128(Bbu, Bba));
+ Co = Ebo;
+ Ebu = _mm_xor_si128(Bbu, _mm_andnot_si128(Bba, Bbe));
+ Cu = Ebu;
+ Abo = _mm_xor_si128(Abo, Do);
+ Bga = ROL64in128(Abo, 28);
+ Agu = _mm_xor_si128(Agu, Du);
+ Bge = ROL64in128(Agu, 20);
+ Aka = _mm_xor_si128(Aka, Da);
+ Bgi = ROL64in128(Aka, 3);
+ Ega = _mm_xor_si128(Bga, _mm_andnot_si128(Bge, Bgi));
+ Ca = _mm_xor_si128(Ca, Ega);
+ Ame = _mm_xor_si128(Ame, De);
+ Bgo = ROL64in128(Ame, 45);
+ Ege = _mm_xor_si128(Bge, _mm_andnot_si128(Bgi, Bgo));
+ Ce = _mm_xor_si128(Ce, Ege);
+ Asi = _mm_xor_si128(Asi, Di);
+ Bgu = ROL64in128(Asi, 61);
+ Egi = _mm_xor_si128(Bgi, _mm_andnot_si128(Bgo, Bgu));
+ Ci = _mm_xor_si128(Ci, Egi);
+ Ego = _mm_xor_si128(Bgo, _mm_andnot_si128(Bgu, Bga));
+ Co = _mm_xor_si128(Co, Ego);
+ Egu = _mm_xor_si128(Bgu, _mm_andnot_si128(Bga, Bge));
+ Cu = _mm_xor_si128(Cu, Egu);
+ Abe = _mm_xor_si128(Abe, De);
+ Bka = ROL64in128(Abe, 1);
+ Agi = _mm_xor_si128(Agi, Di);
+ Bke = ROL64in128(Agi, 6);
+ Ako = _mm_xor_si128(Ako, Do);
+ Bki = ROL64in128(Ako, 25);
+ Eka = _mm_xor_si128(Bka, _mm_andnot_si128(Bke, Bki));
+ Ca = _mm_xor_si128(Ca, Eka);
+ Amu = _mm_xor_si128(Amu, Du);
+ Bko = ROL64in128_8(Amu);
+ Eke = _mm_xor_si128(Bke, _mm_andnot_si128(Bki, Bko));
+ Ce = _mm_xor_si128(Ce, Eke);
+ Asa = _mm_xor_si128(Asa, Da);
+ Bku = ROL64in128(Asa, 18);
+ Eki = _mm_xor_si128(Bki, _mm_andnot_si128(Bko, Bku));
+ Ci = _mm_xor_si128(Ci, Eki);
+ Eko = _mm_xor_si128(Bko, _mm_andnot_si128(Bku, Bka));
+ Co = _mm_xor_si128(Co, Eko);
+ Eku = _mm_xor_si128(Bku, _mm_andnot_si128(Bka, Bke));
+ Cu = _mm_xor_si128(Cu, Eku);
+ Abu = _mm_xor_si128(Abu, Du);
+ Bma = ROL64in128(Abu, 27);
+ Aga = _mm_xor_si128(Aga, Da);
+ Bme = ROL64in128(Aga, 36);
+ Ake = _mm_xor_si128(Ake, De);
+ Bmi = ROL64in128(Ake, 10);
+ Ema = _mm_xor_si128(Bma, _mm_andnot_si128(Bme, Bmi));
+ Ca = _mm_xor_si128(Ca, Ema);
+ Ami = _mm_xor_si128(Ami, Di);
+ Bmo = ROL64in128(Ami, 15);
+ Eme = _mm_xor_si128(Bme, _mm_andnot_si128(Bmi, Bmo));
+ Ce = _mm_xor_si128(Ce, Eme);
+ Aso = _mm_xor_si128(Aso, Do);
+ Bmu = ROL64in128_56(Aso);
+ Emi = _mm_xor_si128(Bmi, _mm_andnot_si128(Bmo, Bmu));
+ Ci = _mm_xor_si128(Ci, Emi);
+ Emo = _mm_xor_si128(Bmo, _mm_andnot_si128(Bmu, Bma));
+ Co = _mm_xor_si128(Co, Emo);
+ Emu = _mm_xor_si128(Bmu, _mm_andnot_si128(Bma, Bme));
+ Cu = _mm_xor_si128(Cu, Emu);
+ Abi = _mm_xor_si128(Abi, Di);
+ Bsa = ROL64in128(Abi, 62);
+ Ago = _mm_xor_si128(Ago, Do);
+ Bse = ROL64in128(Ago, 55);
+ Aku = _mm_xor_si128(Aku, Du);
+ Bsi = ROL64in128(Aku, 39);
+ Esa = _mm_xor_si128(Bsa, _mm_andnot_si128(Bse, Bsi));
+ Ca = _mm_xor_si128(Ca, Esa);
+ Ama = _mm_xor_si128(Ama, Da);
+ Bso = ROL64in128(Ama, 41);
+ Ese = _mm_xor_si128(Bse, _mm_andnot_si128(Bsi, Bso));
+ Ce = _mm_xor_si128(Ce, Ese);
+ Ase = _mm_xor_si128(Ase, De);
+ Bsu = ROL64in128(Ase, 2);
+ Esi = _mm_xor_si128(Bsi, _mm_andnot_si128(Bso, Bsu));
+ Ci = _mm_xor_si128(Ci, Esi);
+ Eso = _mm_xor_si128(Bso, _mm_andnot_si128(Bsu, Bsa));
+ Co = _mm_xor_si128(Co, Eso);
+ Esu = _mm_xor_si128(Bsu, _mm_andnot_si128(Bsa, Bse));
+ Cu = _mm_xor_si128(Cu, Esu);
+ Da = _mm_xor_si128(Cu, ROL64in128(Ce, 1));
+ De = _mm_xor_si128(Ca, ROL64in128(Ci, 1));
+ Di = _mm_xor_si128(Ce, ROL64in128(Co, 1));
+ Do = _mm_xor_si128(Ci, ROL64in128(Cu, 1));
+ Du = _mm_xor_si128(Co, ROL64in128(Ca, 1));
+ Eba = _mm_xor_si128(Eba, Da);
+ Bba = Eba;
+ Ege = _mm_xor_si128(Ege, De);
+ Bbe = ROL64in128(Ege, 44);
+ Eki = _mm_xor_si128(Eki, Di);
+ Bbi = ROL64in128(Eki, 43);
+ Aba = _mm_xor_si128(Bba, _mm_andnot_si128(Bbe, Bbi));
+ Aba = _mm_xor_si128(Aba, SPLAT64(KeccakF1600Constants[17]));
+ Ca = Aba;
+ Emo = _mm_xor_si128(Emo, Do);
+ Bbo = ROL64in128(Emo, 21);
+ Abe = _mm_xor_si128(Bbe, _mm_andnot_si128(Bbi, Bbo));
+ Ce = Abe;
+ Esu = _mm_xor_si128(Esu, Du);
+ Bbu = ROL64in128(Esu, 14);
+ Abi = _mm_xor_si128(Bbi, _mm_andnot_si128(Bbo, Bbu));
+ Ci = Abi;
+ Abo = _mm_xor_si128(Bbo, _mm_andnot_si128(Bbu, Bba));
+ Co = Abo;
+ Abu = _mm_xor_si128(Bbu, _mm_andnot_si128(Bba, Bbe));
+ Cu = Abu;
+ Ebo = _mm_xor_si128(Ebo, Do);
+ Bga = ROL64in128(Ebo, 28);
+ Egu = _mm_xor_si128(Egu, Du);
+ Bge = ROL64in128(Egu, 20);
+ Eka = _mm_xor_si128(Eka, Da);
+ Bgi = ROL64in128(Eka, 3);
+ Aga = _mm_xor_si128(Bga, _mm_andnot_si128(Bge, Bgi));
+ Ca = _mm_xor_si128(Ca, Aga);
+ Eme = _mm_xor_si128(Eme, De);
+ Bgo = ROL64in128(Eme, 45);
+ Age = _mm_xor_si128(Bge, _mm_andnot_si128(Bgi, Bgo));
+ Ce = _mm_xor_si128(Ce, Age);
+ Esi = _mm_xor_si128(Esi, Di);
+ Bgu = ROL64in128(Esi, 61);
+ Agi = _mm_xor_si128(Bgi, _mm_andnot_si128(Bgo, Bgu));
+ Ci = _mm_xor_si128(Ci, Agi);
+ Ago = _mm_xor_si128(Bgo, _mm_andnot_si128(Bgu, Bga));
+ Co = _mm_xor_si128(Co, Ago);
+ Agu = _mm_xor_si128(Bgu, _mm_andnot_si128(Bga, Bge));
+ Cu = _mm_xor_si128(Cu, Agu);
+ Ebe = _mm_xor_si128(Ebe, De);
+ Bka = ROL64in128(Ebe, 1);
+ Egi = _mm_xor_si128(Egi, Di);
+ Bke = ROL64in128(Egi, 6);
+ Eko = _mm_xor_si128(Eko, Do);
+ Bki = ROL64in128(Eko, 25);
+ Aka = _mm_xor_si128(Bka, _mm_andnot_si128(Bke, Bki));
+ Ca = _mm_xor_si128(Ca, Aka);
+ Emu = _mm_xor_si128(Emu, Du);
+ Bko = ROL64in128_8(Emu);
+ Ake = _mm_xor_si128(Bke, _mm_andnot_si128(Bki, Bko));
+ Ce = _mm_xor_si128(Ce, Ake);
+ Esa = _mm_xor_si128(Esa, Da);
+ Bku = ROL64in128(Esa, 18);
+ Aki = _mm_xor_si128(Bki, _mm_andnot_si128(Bko, Bku));
+ Ci = _mm_xor_si128(Ci, Aki);
+ Ako = _mm_xor_si128(Bko, _mm_andnot_si128(Bku, Bka));
+ Co = _mm_xor_si128(Co, Ako);
+ Aku = _mm_xor_si128(Bku, _mm_andnot_si128(Bka, Bke));
+ Cu = _mm_xor_si128(Cu, Aku);
+ Ebu = _mm_xor_si128(Ebu, Du);
+ Bma = ROL64in128(Ebu, 27);
+ Ega = _mm_xor_si128(Ega, Da);
+ Bme = ROL64in128(Ega, 36);
+ Eke = _mm_xor_si128(Eke, De);
+ Bmi = ROL64in128(Eke, 10);
+ Ama = _mm_xor_si128(Bma, _mm_andnot_si128(Bme, Bmi));
+ Ca = _mm_xor_si128(Ca, Ama);
+ Emi = _mm_xor_si128(Emi, Di);
+ Bmo = ROL64in128(Emi, 15);
+ Ame = _mm_xor_si128(Bme, _mm_andnot_si128(Bmi, Bmo));
+ Ce = _mm_xor_si128(Ce, Ame);
+ Eso = _mm_xor_si128(Eso, Do);
+ Bmu = ROL64in128_56(Eso);
+ Ami = _mm_xor_si128(Bmi, _mm_andnot_si128(Bmo, Bmu));
+ Ci = _mm_xor_si128(Ci, Ami);
+ Amo = _mm_xor_si128(Bmo, _mm_andnot_si128(Bmu, Bma));
+ Co = _mm_xor_si128(Co, Amo);
+ Amu = _mm_xor_si128(Bmu, _mm_andnot_si128(Bma, Bme));
+ Cu = _mm_xor_si128(Cu, Amu);
+ Ebi = _mm_xor_si128(Ebi, Di);
+ Bsa = ROL64in128(Ebi, 62);
+ Ego = _mm_xor_si128(Ego, Do);
+ Bse = ROL64in128(Ego, 55);
+ Eku = _mm_xor_si128(Eku, Du);
+ Bsi = ROL64in128(Eku, 39);
+ Asa = _mm_xor_si128(Bsa, _mm_andnot_si128(Bse, Bsi));
+ Ca = _mm_xor_si128(Ca, Asa);
+ Ema = _mm_xor_si128(Ema, Da);
+ Bso = ROL64in128(Ema, 41);
+ Ase = _mm_xor_si128(Bse, _mm_andnot_si128(Bsi, Bso));
+ Ce = _mm_xor_si128(Ce, Ase);
+ Ese = _mm_xor_si128(Ese, De);
+ Bsu = ROL64in128(Ese, 2);
+ Asi = _mm_xor_si128(Bsi, _mm_andnot_si128(Bso, Bsu));
+ Ci = _mm_xor_si128(Ci, Asi);
+ Aso = _mm_xor_si128(Bso, _mm_andnot_si128(Bsu, Bsa));
+ Co = _mm_xor_si128(Co, Aso);
+ Asu = _mm_xor_si128(Bsu, _mm_andnot_si128(Bsa, Bse));
+ Cu = _mm_xor_si128(Cu, Asu);
+ Da = _mm_xor_si128(Cu, ROL64in128(Ce, 1));
+ De = _mm_xor_si128(Ca, ROL64in128(Ci, 1));
+ Di = _mm_xor_si128(Ce, ROL64in128(Co, 1));
+ Do = _mm_xor_si128(Ci, ROL64in128(Cu, 1));
+ Du = _mm_xor_si128(Co, ROL64in128(Ca, 1));
+ Aba = _mm_xor_si128(Aba, Da);
+ Bba = Aba;
+ Age = _mm_xor_si128(Age, De);
+ Bbe = ROL64in128(Age, 44);
+ Aki = _mm_xor_si128(Aki, Di);
+ Bbi = ROL64in128(Aki, 43);
+ Eba = _mm_xor_si128(Bba, _mm_andnot_si128(Bbe, Bbi));
+ Eba = _mm_xor_si128(Eba, SPLAT64(KeccakF1600Constants[18]));
+ Ca = Eba;
+ Amo = _mm_xor_si128(Amo, Do);
+ Bbo = ROL64in128(Amo, 21);
+ Ebe = _mm_xor_si128(Bbe, _mm_andnot_si128(Bbi, Bbo));
+ Ce = Ebe;
+ Asu = _mm_xor_si128(Asu, Du);
+ Bbu = ROL64in128(Asu, 14);
+ Ebi = _mm_xor_si128(Bbi, _mm_andnot_si128(Bbo, Bbu));
+ Ci = Ebi;
+ Ebo = _mm_xor_si128(Bbo, _mm_andnot_si128(Bbu, Bba));
+ Co = Ebo;
+ Ebu = _mm_xor_si128(Bbu, _mm_andnot_si128(Bba, Bbe));
+ Cu = Ebu;
+ Abo = _mm_xor_si128(Abo, Do);
+ Bga = ROL64in128(Abo, 28);
+ Agu = _mm_xor_si128(Agu, Du);
+ Bge = ROL64in128(Agu, 20);
+ Aka = _mm_xor_si128(Aka, Da);
+ Bgi = ROL64in128(Aka, 3);
+ Ega = _mm_xor_si128(Bga, _mm_andnot_si128(Bge, Bgi));
+ Ca = _mm_xor_si128(Ca, Ega);
+ Ame = _mm_xor_si128(Ame, De);
+ Bgo = ROL64in128(Ame, 45);
+ Ege = _mm_xor_si128(Bge, _mm_andnot_si128(Bgi, Bgo));
+ Ce = _mm_xor_si128(Ce, Ege);
+ Asi = _mm_xor_si128(Asi, Di);
+ Bgu = ROL64in128(Asi, 61);
+ Egi = _mm_xor_si128(Bgi, _mm_andnot_si128(Bgo, Bgu));
+ Ci = _mm_xor_si128(Ci, Egi);
+ Ego = _mm_xor_si128(Bgo, _mm_andnot_si128(Bgu, Bga));
+ Co = _mm_xor_si128(Co, Ego);
+ Egu = _mm_xor_si128(Bgu, _mm_andnot_si128(Bga, Bge));
+ Cu = _mm_xor_si128(Cu, Egu);
+ Abe = _mm_xor_si128(Abe, De);
+ Bka = ROL64in128(Abe, 1);
+ Agi = _mm_xor_si128(Agi, Di);
+ Bke = ROL64in128(Agi, 6);
+ Ako = _mm_xor_si128(Ako, Do);
+ Bki = ROL64in128(Ako, 25);
+ Eka = _mm_xor_si128(Bka, _mm_andnot_si128(Bke, Bki));
+ Ca = _mm_xor_si128(Ca, Eka);
+ Amu = _mm_xor_si128(Amu, Du);
+ Bko = ROL64in128_8(Amu);
+ Eke = _mm_xor_si128(Bke, _mm_andnot_si128(Bki, Bko));
+ Ce = _mm_xor_si128(Ce, Eke);
+ Asa = _mm_xor_si128(Asa, Da);
+ Bku = ROL64in128(Asa, 18);
+ Eki = _mm_xor_si128(Bki, _mm_andnot_si128(Bko, Bku));
+ Ci = _mm_xor_si128(Ci, Eki);
+ Eko = _mm_xor_si128(Bko, _mm_andnot_si128(Bku, Bka));
+ Co = _mm_xor_si128(Co, Eko);
+ Eku = _mm_xor_si128(Bku, _mm_andnot_si128(Bka, Bke));
+ Cu = _mm_xor_si128(Cu, Eku);
+ Abu = _mm_xor_si128(Abu, Du);
+ Bma = ROL64in128(Abu, 27);
+ Aga = _mm_xor_si128(Aga, Da);
+ Bme = ROL64in128(Aga, 36);
+ Ake = _mm_xor_si128(Ake, De);
+ Bmi = ROL64in128(Ake, 10);
+ Ema = _mm_xor_si128(Bma, _mm_andnot_si128(Bme, Bmi));
+ Ca = _mm_xor_si128(Ca, Ema);
+ Ami = _mm_xor_si128(Ami, Di);
+ Bmo = ROL64in128(Ami, 15);
+ Eme = _mm_xor_si128(Bme, _mm_andnot_si128(Bmi, Bmo));
+ Ce = _mm_xor_si128(Ce, Eme);
+ Aso = _mm_xor_si128(Aso, Do);
+ Bmu = ROL64in128_56(Aso);
+ Emi = _mm_xor_si128(Bmi, _mm_andnot_si128(Bmo, Bmu));
+ Ci = _mm_xor_si128(Ci, Emi);
+ Emo = _mm_xor_si128(Bmo, _mm_andnot_si128(Bmu, Bma));
+ Co = _mm_xor_si128(Co, Emo);
+ Emu = _mm_xor_si128(Bmu, _mm_andnot_si128(Bma, Bme));
+ Cu = _mm_xor_si128(Cu, Emu);
+ Abi = _mm_xor_si128(Abi, Di);
+ Bsa = ROL64in128(Abi, 62);
+ Ago = _mm_xor_si128(Ago, Do);
+ Bse = ROL64in128(Ago, 55);
+ Aku = _mm_xor_si128(Aku, Du);
+ Bsi = ROL64in128(Aku, 39);
+ Esa = _mm_xor_si128(Bsa, _mm_andnot_si128(Bse, Bsi));
+ Ca = _mm_xor_si128(Ca, Esa);
+ Ama = _mm_xor_si128(Ama, Da);
+ Bso = ROL64in128(Ama, 41);
+ Ese = _mm_xor_si128(Bse, _mm_andnot_si128(Bsi, Bso));
+ Ce = _mm_xor_si128(Ce, Ese);
+ Ase = _mm_xor_si128(Ase, De);
+ Bsu = ROL64in128(Ase, 2);
+ Esi = _mm_xor_si128(Bsi, _mm_andnot_si128(Bso, Bsu));
+ Ci = _mm_xor_si128(Ci, Esi);
+ Eso = _mm_xor_si128(Bso, _mm_andnot_si128(Bsu, Bsa));
+ Co = _mm_xor_si128(Co, Eso);
+ Esu = _mm_xor_si128(Bsu, _mm_andnot_si128(Bsa, Bse));
+ Cu = _mm_xor_si128(Cu, Esu);
+ Da = _mm_xor_si128(Cu, ROL64in128(Ce, 1));
+ De = _mm_xor_si128(Ca, ROL64in128(Ci, 1));
+ Di = _mm_xor_si128(Ce, ROL64in128(Co, 1));
+ Do = _mm_xor_si128(Ci, ROL64in128(Cu, 1));
+ Du = _mm_xor_si128(Co, ROL64in128(Ca, 1));
+ Eba = _mm_xor_si128(Eba, Da);
+ Bba = Eba;
+ Ege = _mm_xor_si128(Ege, De);
+ Bbe = ROL64in128(Ege, 44);
+ Eki = _mm_xor_si128(Eki, Di);
+ Bbi = ROL64in128(Eki, 43);
+ Aba = _mm_xor_si128(Bba, _mm_andnot_si128(Bbe, Bbi));
+ Aba = _mm_xor_si128(Aba, SPLAT64(KeccakF1600Constants[19]));
+ Ca = Aba;
+ Emo = _mm_xor_si128(Emo, Do);
+ Bbo = ROL64in128(Emo, 21);
+ Abe = _mm_xor_si128(Bbe, _mm_andnot_si128(Bbi, Bbo));
+ Ce = Abe;
+ Esu = _mm_xor_si128(Esu, Du);
+ Bbu = ROL64in128(Esu, 14);
+ Abi = _mm_xor_si128(Bbi, _mm_andnot_si128(Bbo, Bbu));
+ Ci = Abi;
+ Abo = _mm_xor_si128(Bbo, _mm_andnot_si128(Bbu, Bba));
+ Co = Abo;
+ Abu = _mm_xor_si128(Bbu, _mm_andnot_si128(Bba, Bbe));
+ Cu = Abu;
+ Ebo = _mm_xor_si128(Ebo, Do);
+ Bga = ROL64in128(Ebo, 28);
+ Egu = _mm_xor_si128(Egu, Du);
+ Bge = ROL64in128(Egu, 20);
+ Eka = _mm_xor_si128(Eka, Da);
+ Bgi = ROL64in128(Eka, 3);
+ Aga = _mm_xor_si128(Bga, _mm_andnot_si128(Bge, Bgi));
+ Ca = _mm_xor_si128(Ca, Aga);
+ Eme = _mm_xor_si128(Eme, De);
+ Bgo = ROL64in128(Eme, 45);
+ Age = _mm_xor_si128(Bge, _mm_andnot_si128(Bgi, Bgo));
+ Ce = _mm_xor_si128(Ce, Age);
+ Esi = _mm_xor_si128(Esi, Di);
+ Bgu = ROL64in128(Esi, 61);
+ Agi = _mm_xor_si128(Bgi, _mm_andnot_si128(Bgo, Bgu));
+ Ci = _mm_xor_si128(Ci, Agi);
+ Ago = _mm_xor_si128(Bgo, _mm_andnot_si128(Bgu, Bga));
+ Co = _mm_xor_si128(Co, Ago);
+ Agu = _mm_xor_si128(Bgu, _mm_andnot_si128(Bga, Bge));
+ Cu = _mm_xor_si128(Cu, Agu);
+ Ebe = _mm_xor_si128(Ebe, De);
+ Bka = ROL64in128(Ebe, 1);
+ Egi = _mm_xor_si128(Egi, Di);
+ Bke = ROL64in128(Egi, 6);
+ Eko = _mm_xor_si128(Eko, Do);
+ Bki = ROL64in128(Eko, 25);
+ Aka = _mm_xor_si128(Bka, _mm_andnot_si128(Bke, Bki));
+ Ca = _mm_xor_si128(Ca, Aka);
+ Emu = _mm_xor_si128(Emu, Du);
+ Bko = ROL64in128_8(Emu);
+ Ake = _mm_xor_si128(Bke, _mm_andnot_si128(Bki, Bko));
+ Ce = _mm_xor_si128(Ce, Ake);
+ Esa = _mm_xor_si128(Esa, Da);
+ Bku = ROL64in128(Esa, 18);
+ Aki = _mm_xor_si128(Bki, _mm_andnot_si128(Bko, Bku));
+ Ci = _mm_xor_si128(Ci, Aki);
+ Ako = _mm_xor_si128(Bko, _mm_andnot_si128(Bku, Bka));
+ Co = _mm_xor_si128(Co, Ako);
+ Aku = _mm_xor_si128(Bku, _mm_andnot_si128(Bka, Bke));
+ Cu = _mm_xor_si128(Cu, Aku);
+ Ebu = _mm_xor_si128(Ebu, Du);
+ Bma = ROL64in128(Ebu, 27);
+ Ega = _mm_xor_si128(Ega, Da);
+ Bme = ROL64in128(Ega, 36);
+ Eke = _mm_xor_si128(Eke, De);
+ Bmi = ROL64in128(Eke, 10);
+ Ama = _mm_xor_si128(Bma, _mm_andnot_si128(Bme, Bmi));
+ Ca = _mm_xor_si128(Ca, Ama);
+ Emi = _mm_xor_si128(Emi, Di);
+ Bmo = ROL64in128(Emi, 15);
+ Ame = _mm_xor_si128(Bme, _mm_andnot_si128(Bmi, Bmo));
+ Ce = _mm_xor_si128(Ce, Ame);
+ Eso = _mm_xor_si128(Eso, Do);
+ Bmu = ROL64in128_56(Eso);
+ Ami = _mm_xor_si128(Bmi, _mm_andnot_si128(Bmo, Bmu));
+ Ci = _mm_xor_si128(Ci, Ami);
+ Amo = _mm_xor_si128(Bmo, _mm_andnot_si128(Bmu, Bma));
+ Co = _mm_xor_si128(Co, Amo);
+ Amu = _mm_xor_si128(Bmu, _mm_andnot_si128(Bma, Bme));
+ Cu = _mm_xor_si128(Cu, Amu);
+ Ebi = _mm_xor_si128(Ebi, Di);
+ Bsa = ROL64in128(Ebi, 62);
+ Ego = _mm_xor_si128(Ego, Do);
+ Bse = ROL64in128(Ego, 55);
+ Eku = _mm_xor_si128(Eku, Du);
+ Bsi = ROL64in128(Eku, 39);
+ Asa = _mm_xor_si128(Bsa, _mm_andnot_si128(Bse, Bsi));
+ Ca = _mm_xor_si128(Ca, Asa);
+ Ema = _mm_xor_si128(Ema, Da);
+ Bso = ROL64in128(Ema, 41);
+ Ase = _mm_xor_si128(Bse, _mm_andnot_si128(Bsi, Bso));
+ Ce = _mm_xor_si128(Ce, Ase);
+ Ese = _mm_xor_si128(Ese, De);
+ Bsu = ROL64in128(Ese, 2);
+ Asi = _mm_xor_si128(Bsi, _mm_andnot_si128(Bso, Bsu));
+ Ci = _mm_xor_si128(Ci, Asi);
+ Aso = _mm_xor_si128(Bso, _mm_andnot_si128(Bsu, Bsa));
+ Co = _mm_xor_si128(Co, Aso);
+ Asu = _mm_xor_si128(Bsu, _mm_andnot_si128(Bsa, Bse));
+ Cu = _mm_xor_si128(Cu, Asu);
+ Da = _mm_xor_si128(Cu, ROL64in128(Ce, 1));
+ De = _mm_xor_si128(Ca, ROL64in128(Ci, 1));
+ Di = _mm_xor_si128(Ce, ROL64in128(Co, 1));
+ Do = _mm_xor_si128(Ci, ROL64in128(Cu, 1));
+ Du = _mm_xor_si128(Co, ROL64in128(Ca, 1));
+ Aba = _mm_xor_si128(Aba, Da);
+ Bba = Aba;
+ Age = _mm_xor_si128(Age, De);
+ Bbe = ROL64in128(Age, 44);
+ Aki = _mm_xor_si128(Aki, Di);
+ Bbi = ROL64in128(Aki, 43);
+ Eba = _mm_xor_si128(Bba, _mm_andnot_si128(Bbe, Bbi));
+ Eba = _mm_xor_si128(Eba, SPLAT64(KeccakF1600Constants[20]));
+ Ca = Eba;
+ Amo = _mm_xor_si128(Amo, Do);
+ Bbo = ROL64in128(Amo, 21);
+ Ebe = _mm_xor_si128(Bbe, _mm_andnot_si128(Bbi, Bbo));
+ Ce = Ebe;
+ Asu = _mm_xor_si128(Asu, Du);
+ Bbu = ROL64in128(Asu, 14);
+ Ebi = _mm_xor_si128(Bbi, _mm_andnot_si128(Bbo, Bbu));
+ Ci = Ebi;
+ Ebo = _mm_xor_si128(Bbo, _mm_andnot_si128(Bbu, Bba));
+ Co = Ebo;
+ Ebu = _mm_xor_si128(Bbu, _mm_andnot_si128(Bba, Bbe));
+ Cu = Ebu;
+ Abo = _mm_xor_si128(Abo, Do);
+ Bga = ROL64in128(Abo, 28);
+ Agu = _mm_xor_si128(Agu, Du);
+ Bge = ROL64in128(Agu, 20);
+ Aka = _mm_xor_si128(Aka, Da);
+ Bgi = ROL64in128(Aka, 3);
+ Ega = _mm_xor_si128(Bga, _mm_andnot_si128(Bge, Bgi));
+ Ca = _mm_xor_si128(Ca, Ega);
+ Ame = _mm_xor_si128(Ame, De);
+ Bgo = ROL64in128(Ame, 45);
+ Ege = _mm_xor_si128(Bge, _mm_andnot_si128(Bgi, Bgo));
+ Ce = _mm_xor_si128(Ce, Ege);
+ Asi = _mm_xor_si128(Asi, Di);
+ Bgu = ROL64in128(Asi, 61);
+ Egi = _mm_xor_si128(Bgi, _mm_andnot_si128(Bgo, Bgu));
+ Ci = _mm_xor_si128(Ci, Egi);
+ Ego = _mm_xor_si128(Bgo, _mm_andnot_si128(Bgu, Bga));
+ Co = _mm_xor_si128(Co, Ego);
+ Egu = _mm_xor_si128(Bgu, _mm_andnot_si128(Bga, Bge));
+ Cu = _mm_xor_si128(Cu, Egu);
+ Abe = _mm_xor_si128(Abe, De);
+ Bka = ROL64in128(Abe, 1);
+ Agi = _mm_xor_si128(Agi, Di);
+ Bke = ROL64in128(Agi, 6);
+ Ako = _mm_xor_si128(Ako, Do);
+ Bki = ROL64in128(Ako, 25);
+ Eka = _mm_xor_si128(Bka, _mm_andnot_si128(Bke, Bki));
+ Ca = _mm_xor_si128(Ca, Eka);
+ Amu = _mm_xor_si128(Amu, Du);
+ Bko = ROL64in128_8(Amu);
+ Eke = _mm_xor_si128(Bke, _mm_andnot_si128(Bki, Bko));
+ Ce = _mm_xor_si128(Ce, Eke);
+ Asa = _mm_xor_si128(Asa, Da);
+ Bku = ROL64in128(Asa, 18);
+ Eki = _mm_xor_si128(Bki, _mm_andnot_si128(Bko, Bku));
+ Ci = _mm_xor_si128(Ci, Eki);
+ Eko = _mm_xor_si128(Bko, _mm_andnot_si128(Bku, Bka));
+ Co = _mm_xor_si128(Co, Eko);
+ Eku = _mm_xor_si128(Bku, _mm_andnot_si128(Bka, Bke));
+ Cu = _mm_xor_si128(Cu, Eku);
+ Abu = _mm_xor_si128(Abu, Du);
+ Bma = ROL64in128(Abu, 27);
+ Aga = _mm_xor_si128(Aga, Da);
+ Bme = ROL64in128(Aga, 36);
+ Ake = _mm_xor_si128(Ake, De);
+ Bmi = ROL64in128(Ake, 10);
+ Ema = _mm_xor_si128(Bma, _mm_andnot_si128(Bme, Bmi));
+ Ca = _mm_xor_si128(Ca, Ema);
+ Ami = _mm_xor_si128(Ami, Di);
+ Bmo = ROL64in128(Ami, 15);
+ Eme = _mm_xor_si128(Bme, _mm_andnot_si128(Bmi, Bmo));
+ Ce = _mm_xor_si128(Ce, Eme);
+ Aso = _mm_xor_si128(Aso, Do);
+ Bmu = ROL64in128_56(Aso);
+ Emi = _mm_xor_si128(Bmi, _mm_andnot_si128(Bmo, Bmu));
+ Ci = _mm_xor_si128(Ci, Emi);
+ Emo = _mm_xor_si128(Bmo, _mm_andnot_si128(Bmu, Bma));
+ Co = _mm_xor_si128(Co, Emo);
+ Emu = _mm_xor_si128(Bmu, _mm_andnot_si128(Bma, Bme));
+ Cu = _mm_xor_si128(Cu, Emu);
+ Abi = _mm_xor_si128(Abi, Di);
+ Bsa = ROL64in128(Abi, 62);
+ Ago = _mm_xor_si128(Ago, Do);
+ Bse = ROL64in128(Ago, 55);
+ Aku = _mm_xor_si128(Aku, Du);
+ Bsi = ROL64in128(Aku, 39);
+ Esa = _mm_xor_si128(Bsa, _mm_andnot_si128(Bse, Bsi));
+ Ca = _mm_xor_si128(Ca, Esa);
+ Ama = _mm_xor_si128(Ama, Da);
+ Bso = ROL64in128(Ama, 41);
+ Ese = _mm_xor_si128(Bse, _mm_andnot_si128(Bsi, Bso));
+ Ce = _mm_xor_si128(Ce, Ese);
+ Ase = _mm_xor_si128(Ase, De);
+ Bsu = ROL64in128(Ase, 2);
+ Esi = _mm_xor_si128(Bsi, _mm_andnot_si128(Bso, Bsu));
+ Ci = _mm_xor_si128(Ci, Esi);
+ Eso = _mm_xor_si128(Bso, _mm_andnot_si128(Bsu, Bsa));
+ Co = _mm_xor_si128(Co, Eso);
+ Esu = _mm_xor_si128(Bsu, _mm_andnot_si128(Bsa, Bse));
+ Cu = _mm_xor_si128(Cu, Esu);
+ Da = _mm_xor_si128(Cu, ROL64in128(Ce, 1));
+ De = _mm_xor_si128(Ca, ROL64in128(Ci, 1));
+ Di = _mm_xor_si128(Ce, ROL64in128(Co, 1));
+ Do = _mm_xor_si128(Ci, ROL64in128(Cu, 1));
+ Du = _mm_xor_si128(Co, ROL64in128(Ca, 1));
+ Eba = _mm_xor_si128(Eba, Da);
+ Bba = Eba;
+ Ege = _mm_xor_si128(Ege, De);
+ Bbe = ROL64in128(Ege, 44);
+ Eki = _mm_xor_si128(Eki, Di);
+ Bbi = ROL64in128(Eki, 43);
+ Aba = _mm_xor_si128(Bba, _mm_andnot_si128(Bbe, Bbi));
+ Aba = _mm_xor_si128(Aba, SPLAT64(KeccakF1600Constants[21]));
+ Ca = Aba;
+ Emo = _mm_xor_si128(Emo, Do);
+ Bbo = ROL64in128(Emo, 21);
+ Abe = _mm_xor_si128(Bbe, _mm_andnot_si128(Bbi, Bbo));
+ Ce = Abe;
+ Esu = _mm_xor_si128(Esu, Du);
+ Bbu = ROL64in128(Esu, 14);
+ Abi = _mm_xor_si128(Bbi, _mm_andnot_si128(Bbo, Bbu));
+ Ci = Abi;
+ Abo = _mm_xor_si128(Bbo, _mm_andnot_si128(Bbu, Bba));
+ Co = Abo;
+ Abu = _mm_xor_si128(Bbu, _mm_andnot_si128(Bba, Bbe));
+ Cu = Abu;
+ Ebo = _mm_xor_si128(Ebo, Do);
+ Bga = ROL64in128(Ebo, 28);
+ Egu = _mm_xor_si128(Egu, Du);
+ Bge = ROL64in128(Egu, 20);
+ Eka = _mm_xor_si128(Eka, Da);
+ Bgi = ROL64in128(Eka, 3);
+ Aga = _mm_xor_si128(Bga, _mm_andnot_si128(Bge, Bgi));
+ Ca = _mm_xor_si128(Ca, Aga);
+ Eme = _mm_xor_si128(Eme, De);
+ Bgo = ROL64in128(Eme, 45);
+ Age = _mm_xor_si128(Bge, _mm_andnot_si128(Bgi, Bgo));
+ Ce = _mm_xor_si128(Ce, Age);
+ Esi = _mm_xor_si128(Esi, Di);
+ Bgu = ROL64in128(Esi, 61);
+ Agi = _mm_xor_si128(Bgi, _mm_andnot_si128(Bgo, Bgu));
+ Ci = _mm_xor_si128(Ci, Agi);
+ Ago = _mm_xor_si128(Bgo, _mm_andnot_si128(Bgu, Bga));
+ Co = _mm_xor_si128(Co, Ago);
+ Agu = _mm_xor_si128(Bgu, _mm_andnot_si128(Bga, Bge));
+ Cu = _mm_xor_si128(Cu, Agu);
+ Ebe = _mm_xor_si128(Ebe, De);
+ Bka = ROL64in128(Ebe, 1);
+ Egi = _mm_xor_si128(Egi, Di);
+ Bke = ROL64in128(Egi, 6);
+ Eko = _mm_xor_si128(Eko, Do);
+ Bki = ROL64in128(Eko, 25);
+ Aka = _mm_xor_si128(Bka, _mm_andnot_si128(Bke, Bki));
+ Ca = _mm_xor_si128(Ca, Aka);
+ Emu = _mm_xor_si128(Emu, Du);
+ Bko = ROL64in128_8(Emu);
+ Ake = _mm_xor_si128(Bke, _mm_andnot_si128(Bki, Bko));
+ Ce = _mm_xor_si128(Ce, Ake);
+ Esa = _mm_xor_si128(Esa, Da);
+ Bku = ROL64in128(Esa, 18);
+ Aki = _mm_xor_si128(Bki, _mm_andnot_si128(Bko, Bku));
+ Ci = _mm_xor_si128(Ci, Aki);
+ Ako = _mm_xor_si128(Bko, _mm_andnot_si128(Bku, Bka));
+ Co = _mm_xor_si128(Co, Ako);
+ Aku = _mm_xor_si128(Bku, _mm_andnot_si128(Bka, Bke));
+ Cu = _mm_xor_si128(Cu, Aku);
+ Ebu = _mm_xor_si128(Ebu, Du);
+ Bma = ROL64in128(Ebu, 27);
+ Ega = _mm_xor_si128(Ega, Da);
+ Bme = ROL64in128(Ega, 36);
+ Eke = _mm_xor_si128(Eke, De);
+ Bmi = ROL64in128(Eke, 10);
+ Ama = _mm_xor_si128(Bma, _mm_andnot_si128(Bme, Bmi));
+ Ca = _mm_xor_si128(Ca, Ama);
+ Emi = _mm_xor_si128(Emi, Di);
+ Bmo = ROL64in128(Emi, 15);
+ Ame = _mm_xor_si128(Bme, _mm_andnot_si128(Bmi, Bmo));
+ Ce = _mm_xor_si128(Ce, Ame);
+ Eso = _mm_xor_si128(Eso, Do);
+ Bmu = ROL64in128_56(Eso);
+ Ami = _mm_xor_si128(Bmi, _mm_andnot_si128(Bmo, Bmu));
+ Ci = _mm_xor_si128(Ci, Ami);
+ Amo = _mm_xor_si128(Bmo, _mm_andnot_si128(Bmu, Bma));
+ Co = _mm_xor_si128(Co, Amo);
+ Amu = _mm_xor_si128(Bmu, _mm_andnot_si128(Bma, Bme));
+ Cu = _mm_xor_si128(Cu, Amu);
+ Ebi = _mm_xor_si128(Ebi, Di);
+ Bsa = ROL64in128(Ebi, 62);
+ Ego = _mm_xor_si128(Ego, Do);
+ Bse = ROL64in128(Ego, 55);
+ Eku = _mm_xor_si128(Eku, Du);
+ Bsi = ROL64in128(Eku, 39);
+ Asa = _mm_xor_si128(Bsa, _mm_andnot_si128(Bse, Bsi));
+ Ca = _mm_xor_si128(Ca, Asa);
+ Ema = _mm_xor_si128(Ema, Da);
+ Bso = ROL64in128(Ema, 41);
+ Ase = _mm_xor_si128(Bse, _mm_andnot_si128(Bsi, Bso));
+ Ce = _mm_xor_si128(Ce, Ase);
+ Ese = _mm_xor_si128(Ese, De);
+ Bsu = ROL64in128(Ese, 2);
+ Asi = _mm_xor_si128(Bsi, _mm_andnot_si128(Bso, Bsu));
+ Ci = _mm_xor_si128(Ci, Asi);
+ Aso = _mm_xor_si128(Bso, _mm_andnot_si128(Bsu, Bsa));
+ Co = _mm_xor_si128(Co, Aso);
+ Asu = _mm_xor_si128(Bsu, _mm_andnot_si128(Bsa, Bse));
+ Cu = _mm_xor_si128(Cu, Asu);
+ Da = _mm_xor_si128(Cu, ROL64in128(Ce, 1));
+ De = _mm_xor_si128(Ca, ROL64in128(Ci, 1));
+ Di = _mm_xor_si128(Ce, ROL64in128(Co, 1));
+ Do = _mm_xor_si128(Ci, ROL64in128(Cu, 1));
+ Du = _mm_xor_si128(Co, ROL64in128(Ca, 1));
+ Aba = _mm_xor_si128(Aba, Da);
+ Bba = Aba;
+ Age = _mm_xor_si128(Age, De);
+ Bbe = ROL64in128(Age, 44);
+ Aki = _mm_xor_si128(Aki, Di);
+ Bbi = ROL64in128(Aki, 43);
+ Eba = _mm_xor_si128(Bba, _mm_andnot_si128(Bbe, Bbi));
+ Eba = _mm_xor_si128(Eba, SPLAT64(KeccakF1600Constants[22]));
+ Ca = Eba;
+ Amo = _mm_xor_si128(Amo, Do);
+ Bbo = ROL64in128(Amo, 21);
+ Ebe = _mm_xor_si128(Bbe, _mm_andnot_si128(Bbi, Bbo));
+ Ce = Ebe;
+ Asu = _mm_xor_si128(Asu, Du);
+ Bbu = ROL64in128(Asu, 14);
+ Ebi = _mm_xor_si128(Bbi, _mm_andnot_si128(Bbo, Bbu));
+ Ci = Ebi;
+ Ebo = _mm_xor_si128(Bbo, _mm_andnot_si128(Bbu, Bba));
+ Co = Ebo;
+ Ebu = _mm_xor_si128(Bbu, _mm_andnot_si128(Bba, Bbe));
+ Cu = Ebu;
+ Abo = _mm_xor_si128(Abo, Do);
+ Bga = ROL64in128(Abo, 28);
+ Agu = _mm_xor_si128(Agu, Du);
+ Bge = ROL64in128(Agu, 20);
+ Aka = _mm_xor_si128(Aka, Da);
+ Bgi = ROL64in128(Aka, 3);
+ Ega = _mm_xor_si128(Bga, _mm_andnot_si128(Bge, Bgi));
+ Ca = _mm_xor_si128(Ca, Ega);
+ Ame = _mm_xor_si128(Ame, De);
+ Bgo = ROL64in128(Ame, 45);
+ Ege = _mm_xor_si128(Bge, _mm_andnot_si128(Bgi, Bgo));
+ Ce = _mm_xor_si128(Ce, Ege);
+ Asi = _mm_xor_si128(Asi, Di);
+ Bgu = ROL64in128(Asi, 61);
+ Egi = _mm_xor_si128(Bgi, _mm_andnot_si128(Bgo, Bgu));
+ Ci = _mm_xor_si128(Ci, Egi);
+ Ego = _mm_xor_si128(Bgo, _mm_andnot_si128(Bgu, Bga));
+ Co = _mm_xor_si128(Co, Ego);
+ Egu = _mm_xor_si128(Bgu, _mm_andnot_si128(Bga, Bge));
+ Cu = _mm_xor_si128(Cu, Egu);
+ Abe = _mm_xor_si128(Abe, De);
+ Bka = ROL64in128(Abe, 1);
+ Agi = _mm_xor_si128(Agi, Di);
+ Bke = ROL64in128(Agi, 6);
+ Ako = _mm_xor_si128(Ako, Do);
+ Bki = ROL64in128(Ako, 25);
+ Eka = _mm_xor_si128(Bka, _mm_andnot_si128(Bke, Bki));
+ Ca = _mm_xor_si128(Ca, Eka);
+ Amu = _mm_xor_si128(Amu, Du);
+ Bko = ROL64in128_8(Amu);
+ Eke = _mm_xor_si128(Bke, _mm_andnot_si128(Bki, Bko));
+ Ce = _mm_xor_si128(Ce, Eke);
+ Asa = _mm_xor_si128(Asa, Da);
+ Bku = ROL64in128(Asa, 18);
+ Eki = _mm_xor_si128(Bki, _mm_andnot_si128(Bko, Bku));
+ Ci = _mm_xor_si128(Ci, Eki);
+ Eko = _mm_xor_si128(Bko, _mm_andnot_si128(Bku, Bka));
+ Co = _mm_xor_si128(Co, Eko);
+ Eku = _mm_xor_si128(Bku, _mm_andnot_si128(Bka, Bke));
+ Cu = _mm_xor_si128(Cu, Eku);
+ Abu = _mm_xor_si128(Abu, Du);
+ Bma = ROL64in128(Abu, 27);
+ Aga = _mm_xor_si128(Aga, Da);
+ Bme = ROL64in128(Aga, 36);
+ Ake = _mm_xor_si128(Ake, De);
+ Bmi = ROL64in128(Ake, 10);
+ Ema = _mm_xor_si128(Bma, _mm_andnot_si128(Bme, Bmi));
+ Ca = _mm_xor_si128(Ca, Ema);
+ Ami = _mm_xor_si128(Ami, Di);
+ Bmo = ROL64in128(Ami, 15);
+ Eme = _mm_xor_si128(Bme, _mm_andnot_si128(Bmi, Bmo));
+ Ce = _mm_xor_si128(Ce, Eme);
+ Aso = _mm_xor_si128(Aso, Do);
+ Bmu = ROL64in128_56(Aso);
+ Emi = _mm_xor_si128(Bmi, _mm_andnot_si128(Bmo, Bmu));
+ Ci = _mm_xor_si128(Ci, Emi);
+ Emo = _mm_xor_si128(Bmo, _mm_andnot_si128(Bmu, Bma));
+ Co = _mm_xor_si128(Co, Emo);
+ Emu = _mm_xor_si128(Bmu, _mm_andnot_si128(Bma, Bme));
+ Cu = _mm_xor_si128(Cu, Emu);
+ Abi = _mm_xor_si128(Abi, Di);
+ Bsa = ROL64in128(Abi, 62);
+ Ago = _mm_xor_si128(Ago, Do);
+ Bse = ROL64in128(Ago, 55);
+ Aku = _mm_xor_si128(Aku, Du);
+ Bsi = ROL64in128(Aku, 39);
+ Esa = _mm_xor_si128(Bsa, _mm_andnot_si128(Bse, Bsi));
+ Ca = _mm_xor_si128(Ca, Esa);
+ Ama = _mm_xor_si128(Ama, Da);
+ Bso = ROL64in128(Ama, 41);
+ Ese = _mm_xor_si128(Bse, _mm_andnot_si128(Bsi, Bso));
+ Ce = _mm_xor_si128(Ce, Ese);
+ Ase = _mm_xor_si128(Ase, De);
+ Bsu = ROL64in128(Ase, 2);
+ Esi = _mm_xor_si128(Bsi, _mm_andnot_si128(Bso, Bsu));
+ Ci = _mm_xor_si128(Ci, Esi);
+ Eso = _mm_xor_si128(Bso, _mm_andnot_si128(Bsu, Bsa));
+ Co = _mm_xor_si128(Co, Eso);
+ Esu = _mm_xor_si128(Bsu, _mm_andnot_si128(Bsa, Bse));
+ Cu = _mm_xor_si128(Cu, Esu);
+ Da = _mm_xor_si128(Cu, ROL64in128(Ce, 1));
+ De = _mm_xor_si128(Ca, ROL64in128(Ci, 1));
+ Di = _mm_xor_si128(Ce, ROL64in128(Co, 1));
+ Do = _mm_xor_si128(Ci, ROL64in128(Cu, 1));
+ Du = _mm_xor_si128(Co, ROL64in128(Ca, 1));
+ Eba = _mm_xor_si128(Eba, Da);
+ Bba = Eba;
+ Ege = _mm_xor_si128(Ege, De);
+ Bbe = ROL64in128(Ege, 44);
+ Eki = _mm_xor_si128(Eki, Di);
+ Bbi = ROL64in128(Eki, 43);
+ Aba = _mm_xor_si128(Bba, _mm_andnot_si128(Bbe, Bbi));
+ Aba = _mm_xor_si128(Aba, SPLAT64(KeccakF1600Constants[23]));
+ Emo = _mm_xor_si128(Emo, Do);
+ Bbo = ROL64in128(Emo, 21);
+ Abe = _mm_xor_si128(Bbe, _mm_andnot_si128(Bbi, Bbo));
+ Esu = _mm_xor_si128(Esu, Du);
+ Bbu = ROL64in128(Esu, 14);
+ Abi = _mm_xor_si128(Bbi, _mm_andnot_si128(Bbo, Bbu));
+ Abo = _mm_xor_si128(Bbo, _mm_andnot_si128(Bbu, Bba));
+ Abu = _mm_xor_si128(Bbu, _mm_andnot_si128(Bba, Bbe));
+ Ebo = _mm_xor_si128(Ebo, Do);
+ Bga = ROL64in128(Ebo, 28);
+ Egu = _mm_xor_si128(Egu, Du);
+ Bge = ROL64in128(Egu, 20);
+ Eka = _mm_xor_si128(Eka, Da);
+ Bgi = ROL64in128(Eka, 3);
+ Aga = _mm_xor_si128(Bga, _mm_andnot_si128(Bge, Bgi));
+ Eme = _mm_xor_si128(Eme, De);
+ Bgo = ROL64in128(Eme, 45);
+ Age = _mm_xor_si128(Bge, _mm_andnot_si128(Bgi, Bgo));
+ Esi = _mm_xor_si128(Esi, Di);
+ Bgu = ROL64in128(Esi, 61);
+ Agi = _mm_xor_si128(Bgi, _mm_andnot_si128(Bgo, Bgu));
+ Ago = _mm_xor_si128(Bgo, _mm_andnot_si128(Bgu, Bga));
+ Agu = _mm_xor_si128(Bgu, _mm_andnot_si128(Bga, Bge));
+ Ebe = _mm_xor_si128(Ebe, De);
+ Bka = ROL64in128(Ebe, 1);
+ Egi = _mm_xor_si128(Egi, Di);
+ Bke = ROL64in128(Egi, 6);
+ Eko = _mm_xor_si128(Eko, Do);
+ Bki = ROL64in128(Eko, 25);
+ Aka = _mm_xor_si128(Bka, _mm_andnot_si128(Bke, Bki));
+ Emu = _mm_xor_si128(Emu, Du);
+ Bko = ROL64in128_8(Emu);
+ Ake = _mm_xor_si128(Bke, _mm_andnot_si128(Bki, Bko));
+ Esa = _mm_xor_si128(Esa, Da);
+ Bku = ROL64in128(Esa, 18);
+ Aki = _mm_xor_si128(Bki, _mm_andnot_si128(Bko, Bku));
+ Ako = _mm_xor_si128(Bko, _mm_andnot_si128(Bku, Bka));
+ Aku = _mm_xor_si128(Bku, _mm_andnot_si128(Bka, Bke));
+ Ebu = _mm_xor_si128(Ebu, Du);
+ Bma = ROL64in128(Ebu, 27);
+ Ega = _mm_xor_si128(Ega, Da);
+ Bme = ROL64in128(Ega, 36);
+ Eke = _mm_xor_si128(Eke, De);
+ Bmi = ROL64in128(Eke, 10);
+ Ama = _mm_xor_si128(Bma, _mm_andnot_si128(Bme, Bmi));
+ Emi = _mm_xor_si128(Emi, Di);
+ Bmo = ROL64in128(Emi, 15);
+ Ame = _mm_xor_si128(Bme, _mm_andnot_si128(Bmi, Bmo));
+ Eso = _mm_xor_si128(Eso, Do);
+ Bmu = ROL64in128_56(Eso);
+ Ami = _mm_xor_si128(Bmi, _mm_andnot_si128(Bmo, Bmu));
+ Amo = _mm_xor_si128(Bmo, _mm_andnot_si128(Bmu, Bma));
+ Amu = _mm_xor_si128(Bmu, _mm_andnot_si128(Bma, Bme));
+ Ebi = _mm_xor_si128(Ebi, Di);
+ Bsa = ROL64in128(Ebi, 62);
+ Ego = _mm_xor_si128(Ego, Do);
+ Bse = ROL64in128(Ego, 55);
+ Eku = _mm_xor_si128(Eku, Du);
+ Bsi = ROL64in128(Eku, 39);
+ Asa = _mm_xor_si128(Bsa, _mm_andnot_si128(Bse, Bsi));
+ Ema = _mm_xor_si128(Ema, Da);
+ Bso = ROL64in128(Ema, 41);
+ Ase = _mm_xor_si128(Bse, _mm_andnot_si128(Bsi, Bso));
+ Ese = _mm_xor_si128(Ese, De);
+ Bsu = ROL64in128(Ese, 2);
+ Asi = _mm_xor_si128(Bsi, _mm_andnot_si128(Bso, Bsu));
+ Aso = _mm_xor_si128(Bso, _mm_andnot_si128(Bsu, Bsa));
+ Asu = _mm_xor_si128(Bsu, _mm_andnot_si128(Bsa, Bse));
+
+ _mm_storeu_si128((V128 *)&(statesAsLanes[ 0]), Aba);
+ _mm_storeu_si128((V128 *)&(statesAsLanes[ 1]), Abe);
+ _mm_storeu_si128((V128 *)&(statesAsLanes[ 2]), Abi);
+ _mm_storeu_si128((V128 *)&(statesAsLanes[ 3]), Abo);
+ _mm_storeu_si128((V128 *)&(statesAsLanes[ 4]), Abu);
+ _mm_storeu_si128((V128 *)&(statesAsLanes[ 5]), Aga);
+ _mm_storeu_si128((V128 *)&(statesAsLanes[ 6]), Age);
+ _mm_storeu_si128((V128 *)&(statesAsLanes[ 7]), Agi);
+ _mm_storeu_si128((V128 *)&(statesAsLanes[ 8]), Ago);
+ _mm_storeu_si128((V128 *)&(statesAsLanes[ 9]), Agu);
+ _mm_storeu_si128((V128 *)&(statesAsLanes[10]), Aka);
+ _mm_storeu_si128((V128 *)&(statesAsLanes[11]), Ake);
+ _mm_storeu_si128((V128 *)&(statesAsLanes[12]), Aki);
+ _mm_storeu_si128((V128 *)&(statesAsLanes[13]), Ako);
+ _mm_storeu_si128((V128 *)&(statesAsLanes[14]), Aku);
+ _mm_storeu_si128((V128 *)&(statesAsLanes[15]), Ama);
+ _mm_storeu_si128((V128 *)&(statesAsLanes[16]), Ame);
+ _mm_storeu_si128((V128 *)&(statesAsLanes[17]), Ami);
+ _mm_storeu_si128((V128 *)&(statesAsLanes[18]), Amo);
+ _mm_storeu_si128((V128 *)&(statesAsLanes[19]), Amu);
+ _mm_storeu_si128((V128 *)&(statesAsLanes[20]), Asa);
+ _mm_storeu_si128((V128 *)&(statesAsLanes[21]), Ase);
+ _mm_storeu_si128((V128 *)&(statesAsLanes[22]), Asi);
+ _mm_storeu_si128((V128 *)&(statesAsLanes[23]), Aso);
+ _mm_storeu_si128((V128 *)&(statesAsLanes[24]), Asu);
+}
+
+#endif
+
+NAMESPACE_END