// keccak_simd.cpp - written and placed in the public domain by Jeffrey Walton. // // This source file uses intrinsics to gain access to SSE and // NEON instructions. A separate source file is needed because // additional CXXFLAGS are required to enable the appropriate // instructions sets in some build configurations. // The XKCP package is provided by Guido Bertoni, Joan Daemen, Seth Hoffert, // Michael Peeters, Gilles Van Assche, and Ronny Van Keer. The code was // placed public domain by the authors. // KeccakF1600x2_SSE is ParallelHash128. The SSE2 ParallelHash128 // implementation was extracted from XKCP using the following command. // // gcc -I lib/common -I lib/low/KeccakP-1600/Optimized // -I lib/low/KeccakP-1600-times2/SIMD128/SSE2ufull // lib/low/KeccakP-1600-times2/SIMD128/KeccakP-1600-times2-SIMD128.c -E #include "pch.h" #include "config.h" #include "keccak.h" #include "misc.h" #if (CRYPTOPP_SSSE3_AVAILABLE) # include # include #endif #if defined(__XOP__) # if defined(CRYPTOPP_GCC_COMPATIBLE) # include # endif # include #endif // XOP // Squash MS LNK4221 and libtool warnings extern const char KECCAK_SIMD_FNAME[] = __FILE__; NAMESPACE_BEGIN(CryptoPP) #if (CRYPTOPP_SSSE3_AVAILABLE) // The Keccak ParallelHash128 core function extern void KeccakF1600x2_SSE(word64 *state); // The F1600 round constants extern const word64 KeccakF1600Constants[24]; CRYPTOPP_ALIGN_DATA(16) const word64 rho8[2] = {W64LIT(0x0605040302010007), W64LIT(0x0E0D0C0B0A09080F)}; CRYPTOPP_ALIGN_DATA(16) const word64 rho56[2] = {W64LIT(0x0007060504030201), W64LIT(0x080F0E0D0C0B0A09)}; #if defined(__XOP__) # define ROL64in128(a, o) _mm_roti_epi64((a), (o)) # define ROL64in128_8(a) ROL64in128((a), 8) # define ROL64in128_56(a) ROL64in128((a), 56) #else # define ROL64in128(a, o) _mm_or_si128(_mm_slli_epi64((a), (o)), _mm_srli_epi64(a, 64-(o))) # define ROL64in128_8(a) _mm_shuffle_epi8((a), _mm_load_si128(CONST_M128_CAST(rho8))) # define ROL64in128_56(a) _mm_shuffle_epi8((a), _mm_load_si128(CONST_M128_CAST(rho56))) #endif // Damn Visual Studio is missing too many intrinsics... inline __m128i SPLAT64(const word64 a) { #if defined(_MSC_VER) double x; std::memcpy(&x, &a, 8); return _mm_castpd_si128(_mm_loaddup_pd(&x)); #else return _mm_set1_epi64x(a); #endif } // The Keccak ParallelHash128 core function void KeccakF1600x2_SSE(word64 *state) { __m128i Aba, Abe, Abi, Abo, Abu; __m128i Aga, Age, Agi, Ago, Agu; __m128i Aka, Ake, Aki, Ako, Aku; __m128i Ama, Ame, Ami, Amo, Amu; __m128i Asa, Ase, Asi, Aso, Asu; __m128i Bba, Bbe, Bbi, Bbo, Bbu; __m128i Bga, Bge, Bgi, Bgo, Bgu; __m128i Bka, Bke, Bki, Bko, Bku; __m128i Bma, Bme, Bmi, Bmo, Bmu; __m128i Bsa, Bse, Bsi, Bso, Bsu; __m128i Ca, Ce, Ci, Co, Cu; __m128i Da, De, Di, Do, Du; __m128i Eba, Ebe, Ebi, Ebo, Ebu; __m128i Ega, Ege, Egi, Ego, Egu; __m128i Eka, Eke, Eki, Eko, Eku; __m128i Ema, Eme, Emi, Emo, Emu; __m128i Esa, Ese, Esi, Eso, Esu; __m128i* lanes = reinterpret_cast<__m128i*>(state); Aba = _mm_loadu_si128(CONST_M128_CAST(lanes+ 0)); Abe = _mm_loadu_si128(CONST_M128_CAST(lanes+ 1)); Abi = _mm_loadu_si128(CONST_M128_CAST(lanes+ 2)); Abo = _mm_loadu_si128(CONST_M128_CAST(lanes+ 3)); Abu = _mm_loadu_si128(CONST_M128_CAST(lanes+ 4)); Aga = _mm_loadu_si128(CONST_M128_CAST(lanes+ 5)); Age = _mm_loadu_si128(CONST_M128_CAST(lanes+ 6)); Agi = _mm_loadu_si128(CONST_M128_CAST(lanes+ 7)); Ago = _mm_loadu_si128(CONST_M128_CAST(lanes+ 8)); Agu = _mm_loadu_si128(CONST_M128_CAST(lanes+ 9)); Aka = _mm_loadu_si128(CONST_M128_CAST(lanes+10)); Ake = _mm_loadu_si128(CONST_M128_CAST(lanes+11)); Aki = _mm_loadu_si128(CONST_M128_CAST(lanes+12)); Ako = _mm_loadu_si128(CONST_M128_CAST(lanes+13)); Aku = _mm_loadu_si128(CONST_M128_CAST(lanes+14)); Ama = _mm_loadu_si128(CONST_M128_CAST(lanes+15)); Ame = _mm_loadu_si128(CONST_M128_CAST(lanes+16)); Ami = _mm_loadu_si128(CONST_M128_CAST(lanes+17)); Amo = _mm_loadu_si128(CONST_M128_CAST(lanes+18)); Amu = _mm_loadu_si128(CONST_M128_CAST(lanes+19)); Asa = _mm_loadu_si128(CONST_M128_CAST(lanes+20)); Ase = _mm_loadu_si128(CONST_M128_CAST(lanes+21)); Asi = _mm_loadu_si128(CONST_M128_CAST(lanes+22)); Aso = _mm_loadu_si128(CONST_M128_CAST(lanes+23)); Asu = _mm_loadu_si128(CONST_M128_CAST(lanes+24)); Ca = _mm_xor_si128(Aba, _mm_xor_si128(Aga, _mm_xor_si128(Aka, _mm_xor_si128(Ama, Asa)))); Ce = _mm_xor_si128(Abe, _mm_xor_si128(Age, _mm_xor_si128(Ake, _mm_xor_si128(Ame, Ase)))); Ci = _mm_xor_si128(Abi, _mm_xor_si128(Agi, _mm_xor_si128(Aki, _mm_xor_si128(Ami, Asi)))); Co = _mm_xor_si128(Abo, _mm_xor_si128(Ago, _mm_xor_si128(Ako, _mm_xor_si128(Amo, Aso)))); Cu = _mm_xor_si128(Abu, _mm_xor_si128(Agu, _mm_xor_si128(Aku, _mm_xor_si128(Amu, Asu)))); Da = _mm_xor_si128(Cu, ROL64in128(Ce, 1)); De = _mm_xor_si128(Ca, ROL64in128(Ci, 1)); Di = _mm_xor_si128(Ce, ROL64in128(Co, 1)); Do = _mm_xor_si128(Ci, ROL64in128(Cu, 1)); Du = _mm_xor_si128(Co, ROL64in128(Ca, 1)); Aba = _mm_xor_si128(Aba, Da); Bba = Aba; Age = _mm_xor_si128(Age, De); Bbe = ROL64in128(Age, 44); Aki = _mm_xor_si128(Aki, Di); Bbi = ROL64in128(Aki, 43); Eba = _mm_xor_si128(Bba, _mm_andnot_si128(Bbe, Bbi)); Eba = _mm_xor_si128(Eba, SPLAT64(KeccakF1600Constants[0])); Ca = Eba; Amo = _mm_xor_si128(Amo, Do); Bbo = ROL64in128(Amo, 21); Ebe = _mm_xor_si128(Bbe, _mm_andnot_si128(Bbi, Bbo)); Ce = Ebe; Asu = _mm_xor_si128(Asu, Du); Bbu = ROL64in128(Asu, 14); Ebi = _mm_xor_si128(Bbi, _mm_andnot_si128(Bbo, Bbu)); Ci = Ebi; Ebo = _mm_xor_si128(Bbo, _mm_andnot_si128(Bbu, Bba)); Co = Ebo; Ebu = _mm_xor_si128(Bbu, _mm_andnot_si128(Bba, Bbe)); Cu = Ebu; Abo = _mm_xor_si128(Abo, Do); Bga = ROL64in128(Abo, 28); Agu = _mm_xor_si128(Agu, Du); Bge = ROL64in128(Agu, 20); Aka = _mm_xor_si128(Aka, Da); Bgi = ROL64in128(Aka, 3); Ega = _mm_xor_si128(Bga, _mm_andnot_si128(Bge, Bgi)); Ca = _mm_xor_si128(Ca, Ega); Ame = _mm_xor_si128(Ame, De); Bgo = ROL64in128(Ame, 45); Ege = _mm_xor_si128(Bge, _mm_andnot_si128(Bgi, Bgo)); Ce = _mm_xor_si128(Ce, Ege); Asi = _mm_xor_si128(Asi, Di); Bgu = ROL64in128(Asi, 61); Egi = _mm_xor_si128(Bgi, _mm_andnot_si128(Bgo, Bgu)); Ci = _mm_xor_si128(Ci, Egi); Ego = _mm_xor_si128(Bgo, _mm_andnot_si128(Bgu, Bga)); Co = _mm_xor_si128(Co, Ego); Egu = _mm_xor_si128(Bgu, _mm_andnot_si128(Bga, Bge)); Cu = _mm_xor_si128(Cu, Egu); Abe = _mm_xor_si128(Abe, De); Bka = ROL64in128(Abe, 1); Agi = _mm_xor_si128(Agi, Di); Bke = ROL64in128(Agi, 6); Ako = _mm_xor_si128(Ako, Do); Bki = ROL64in128(Ako, 25); Eka = _mm_xor_si128(Bka, _mm_andnot_si128(Bke, Bki)); Ca = _mm_xor_si128(Ca, Eka); Amu = _mm_xor_si128(Amu, Du); Bko = ROL64in128_8(Amu); Eke = _mm_xor_si128(Bke, _mm_andnot_si128(Bki, Bko)); Ce = _mm_xor_si128(Ce, Eke); Asa = _mm_xor_si128(Asa, Da); Bku = ROL64in128(Asa, 18); Eki = _mm_xor_si128(Bki, _mm_andnot_si128(Bko, Bku)); Ci = _mm_xor_si128(Ci, Eki); Eko = _mm_xor_si128(Bko, _mm_andnot_si128(Bku, Bka)); Co = _mm_xor_si128(Co, Eko); Eku = _mm_xor_si128(Bku, _mm_andnot_si128(Bka, Bke)); Cu = _mm_xor_si128(Cu, Eku); Abu = _mm_xor_si128(Abu, Du); Bma = ROL64in128(Abu, 27); Aga = _mm_xor_si128(Aga, Da); Bme = ROL64in128(Aga, 36); Ake = _mm_xor_si128(Ake, De); Bmi = ROL64in128(Ake, 10); Ema = _mm_xor_si128(Bma, _mm_andnot_si128(Bme, Bmi)); Ca = _mm_xor_si128(Ca, Ema); Ami = _mm_xor_si128(Ami, Di); Bmo = ROL64in128(Ami, 15); Eme = _mm_xor_si128(Bme, _mm_andnot_si128(Bmi, Bmo)); Ce = _mm_xor_si128(Ce, Eme); Aso = _mm_xor_si128(Aso, Do); Bmu = ROL64in128_56(Aso); Emi = _mm_xor_si128(Bmi, _mm_andnot_si128(Bmo, Bmu)); Ci = _mm_xor_si128(Ci, Emi); Emo = _mm_xor_si128(Bmo, _mm_andnot_si128(Bmu, Bma)); Co = _mm_xor_si128(Co, Emo); Emu = _mm_xor_si128(Bmu, _mm_andnot_si128(Bma, Bme)); Cu = _mm_xor_si128(Cu, Emu); Abi = _mm_xor_si128(Abi, Di); Bsa = ROL64in128(Abi, 62); Ago = _mm_xor_si128(Ago, Do); Bse = ROL64in128(Ago, 55); Aku = _mm_xor_si128(Aku, Du); Bsi = ROL64in128(Aku, 39); Esa = _mm_xor_si128(Bsa, _mm_andnot_si128(Bse, Bsi)); Ca = _mm_xor_si128(Ca, Esa); Ama = _mm_xor_si128(Ama, Da); Bso = ROL64in128(Ama, 41); Ese = _mm_xor_si128(Bse, _mm_andnot_si128(Bsi, Bso)); Ce = _mm_xor_si128(Ce, Ese); Ase = _mm_xor_si128(Ase, De); Bsu = ROL64in128(Ase, 2); Esi = _mm_xor_si128(Bsi, _mm_andnot_si128(Bso, Bsu)); Ci = _mm_xor_si128(Ci, Esi); Eso = _mm_xor_si128(Bso, _mm_andnot_si128(Bsu, Bsa)); Co = _mm_xor_si128(Co, Eso); Esu = _mm_xor_si128(Bsu, _mm_andnot_si128(Bsa, Bse)); Cu = _mm_xor_si128(Cu, Esu); Da = _mm_xor_si128(Cu, ROL64in128(Ce, 1)); De = _mm_xor_si128(Ca, ROL64in128(Ci, 1)); Di = _mm_xor_si128(Ce, ROL64in128(Co, 1)); Do = _mm_xor_si128(Ci, ROL64in128(Cu, 1)); Du = _mm_xor_si128(Co, ROL64in128(Ca, 1)); Eba = _mm_xor_si128(Eba, Da); Bba = Eba; Ege = _mm_xor_si128(Ege, De); Bbe = ROL64in128(Ege, 44); Eki = _mm_xor_si128(Eki, Di); Bbi = ROL64in128(Eki, 43); Aba = _mm_xor_si128(Bba, _mm_andnot_si128(Bbe, Bbi)); Aba = _mm_xor_si128(Aba, SPLAT64(KeccakF1600Constants[1])); Ca = Aba; Emo = _mm_xor_si128(Emo, Do); Bbo = ROL64in128(Emo, 21); Abe = _mm_xor_si128(Bbe, _mm_andnot_si128(Bbi, Bbo)); Ce = Abe; Esu = _mm_xor_si128(Esu, Du); Bbu = ROL64in128(Esu, 14); Abi = _mm_xor_si128(Bbi, _mm_andnot_si128(Bbo, Bbu)); Ci = Abi; Abo = _mm_xor_si128(Bbo, _mm_andnot_si128(Bbu, Bba)); Co = Abo; Abu = _mm_xor_si128(Bbu, _mm_andnot_si128(Bba, Bbe)); Cu = Abu; Ebo = _mm_xor_si128(Ebo, Do); Bga = ROL64in128(Ebo, 28); Egu = _mm_xor_si128(Egu, Du); Bge = ROL64in128(Egu, 20); Eka = _mm_xor_si128(Eka, Da); Bgi = ROL64in128(Eka, 3); Aga = _mm_xor_si128(Bga, _mm_andnot_si128(Bge, Bgi)); Ca = _mm_xor_si128(Ca, Aga); Eme = _mm_xor_si128(Eme, De); Bgo = ROL64in128(Eme, 45); Age = _mm_xor_si128(Bge, _mm_andnot_si128(Bgi, Bgo)); Ce = _mm_xor_si128(Ce, Age); Esi = _mm_xor_si128(Esi, Di); Bgu = ROL64in128(Esi, 61); Agi = _mm_xor_si128(Bgi, _mm_andnot_si128(Bgo, Bgu)); Ci = _mm_xor_si128(Ci, Agi); Ago = _mm_xor_si128(Bgo, _mm_andnot_si128(Bgu, Bga)); Co = _mm_xor_si128(Co, Ago); Agu = _mm_xor_si128(Bgu, _mm_andnot_si128(Bga, Bge)); Cu = _mm_xor_si128(Cu, Agu); Ebe = _mm_xor_si128(Ebe, De); Bka = ROL64in128(Ebe, 1); Egi = _mm_xor_si128(Egi, Di); Bke = ROL64in128(Egi, 6); Eko = _mm_xor_si128(Eko, Do); Bki = ROL64in128(Eko, 25); Aka = _mm_xor_si128(Bka, _mm_andnot_si128(Bke, Bki)); Ca = _mm_xor_si128(Ca, Aka); Emu = _mm_xor_si128(Emu, Du); Bko = ROL64in128_8(Emu); Ake = _mm_xor_si128(Bke, _mm_andnot_si128(Bki, Bko)); Ce = _mm_xor_si128(Ce, Ake); Esa = _mm_xor_si128(Esa, Da); Bku = ROL64in128(Esa, 18); Aki = _mm_xor_si128(Bki, _mm_andnot_si128(Bko, Bku)); Ci = _mm_xor_si128(Ci, Aki); Ako = _mm_xor_si128(Bko, _mm_andnot_si128(Bku, Bka)); Co = _mm_xor_si128(Co, Ako); Aku = _mm_xor_si128(Bku, _mm_andnot_si128(Bka, Bke)); Cu = _mm_xor_si128(Cu, Aku); Ebu = _mm_xor_si128(Ebu, Du); Bma = ROL64in128(Ebu, 27); Ega = _mm_xor_si128(Ega, Da); Bme = ROL64in128(Ega, 36); Eke = _mm_xor_si128(Eke, De); Bmi = ROL64in128(Eke, 10); Ama = _mm_xor_si128(Bma, _mm_andnot_si128(Bme, Bmi)); Ca = _mm_xor_si128(Ca, Ama); Emi = _mm_xor_si128(Emi, Di); Bmo = ROL64in128(Emi, 15); Ame = _mm_xor_si128(Bme, _mm_andnot_si128(Bmi, Bmo)); Ce = _mm_xor_si128(Ce, Ame); Eso = _mm_xor_si128(Eso, Do); Bmu = ROL64in128_56(Eso); Ami = _mm_xor_si128(Bmi, _mm_andnot_si128(Bmo, Bmu)); Ci = _mm_xor_si128(Ci, Ami); Amo = _mm_xor_si128(Bmo, _mm_andnot_si128(Bmu, Bma)); Co = _mm_xor_si128(Co, Amo); Amu = _mm_xor_si128(Bmu, _mm_andnot_si128(Bma, Bme)); Cu = _mm_xor_si128(Cu, Amu); Ebi = _mm_xor_si128(Ebi, Di); Bsa = ROL64in128(Ebi, 62); Ego = _mm_xor_si128(Ego, Do); Bse = ROL64in128(Ego, 55); Eku = _mm_xor_si128(Eku, Du); Bsi = ROL64in128(Eku, 39); Asa = _mm_xor_si128(Bsa, _mm_andnot_si128(Bse, Bsi)); Ca = _mm_xor_si128(Ca, Asa); Ema = _mm_xor_si128(Ema, Da); Bso = ROL64in128(Ema, 41); Ase = _mm_xor_si128(Bse, _mm_andnot_si128(Bsi, Bso)); Ce = _mm_xor_si128(Ce, Ase); Ese = _mm_xor_si128(Ese, De); Bsu = ROL64in128(Ese, 2); Asi = _mm_xor_si128(Bsi, _mm_andnot_si128(Bso, Bsu)); Ci = _mm_xor_si128(Ci, Asi); Aso = _mm_xor_si128(Bso, _mm_andnot_si128(Bsu, Bsa)); Co = _mm_xor_si128(Co, Aso); Asu = _mm_xor_si128(Bsu, _mm_andnot_si128(Bsa, Bse)); Cu = _mm_xor_si128(Cu, Asu); Da = _mm_xor_si128(Cu, ROL64in128(Ce, 1)); De = _mm_xor_si128(Ca, ROL64in128(Ci, 1)); Di = _mm_xor_si128(Ce, ROL64in128(Co, 1)); Do = _mm_xor_si128(Ci, ROL64in128(Cu, 1)); Du = _mm_xor_si128(Co, ROL64in128(Ca, 1)); Aba = _mm_xor_si128(Aba, Da); Bba = Aba; Age = _mm_xor_si128(Age, De); Bbe = ROL64in128(Age, 44); Aki = _mm_xor_si128(Aki, Di); Bbi = ROL64in128(Aki, 43); Eba = _mm_xor_si128(Bba, _mm_andnot_si128(Bbe, Bbi)); Eba = _mm_xor_si128(Eba, SPLAT64(KeccakF1600Constants[2])); Ca = Eba; Amo = _mm_xor_si128(Amo, Do); Bbo = ROL64in128(Amo, 21); Ebe = _mm_xor_si128(Bbe, _mm_andnot_si128(Bbi, Bbo)); Ce = Ebe; Asu = _mm_xor_si128(Asu, Du); Bbu = ROL64in128(Asu, 14); Ebi = _mm_xor_si128(Bbi, _mm_andnot_si128(Bbo, Bbu)); Ci = Ebi; Ebo = _mm_xor_si128(Bbo, _mm_andnot_si128(Bbu, Bba)); Co = Ebo; Ebu = _mm_xor_si128(Bbu, _mm_andnot_si128(Bba, Bbe)); Cu = Ebu; Abo = _mm_xor_si128(Abo, Do); Bga = ROL64in128(Abo, 28); Agu = _mm_xor_si128(Agu, Du); Bge = ROL64in128(Agu, 20); Aka = _mm_xor_si128(Aka, Da); Bgi = ROL64in128(Aka, 3); Ega = _mm_xor_si128(Bga, _mm_andnot_si128(Bge, Bgi)); Ca = _mm_xor_si128(Ca, Ega); Ame = _mm_xor_si128(Ame, De); Bgo = ROL64in128(Ame, 45); Ege = _mm_xor_si128(Bge, _mm_andnot_si128(Bgi, Bgo)); Ce = _mm_xor_si128(Ce, Ege); Asi = _mm_xor_si128(Asi, Di); Bgu = ROL64in128(Asi, 61); Egi = _mm_xor_si128(Bgi, _mm_andnot_si128(Bgo, Bgu)); Ci = _mm_xor_si128(Ci, Egi); Ego = _mm_xor_si128(Bgo, _mm_andnot_si128(Bgu, Bga)); Co = _mm_xor_si128(Co, Ego); Egu = _mm_xor_si128(Bgu, _mm_andnot_si128(Bga, Bge)); Cu = _mm_xor_si128(Cu, Egu); Abe = _mm_xor_si128(Abe, De); Bka = ROL64in128(Abe, 1); Agi = _mm_xor_si128(Agi, Di); Bke = ROL64in128(Agi, 6); Ako = _mm_xor_si128(Ako, Do); Bki = ROL64in128(Ako, 25); Eka = _mm_xor_si128(Bka, _mm_andnot_si128(Bke, Bki)); Ca = _mm_xor_si128(Ca, Eka); Amu = _mm_xor_si128(Amu, Du); Bko = ROL64in128_8(Amu); Eke = _mm_xor_si128(Bke, _mm_andnot_si128(Bki, Bko)); Ce = _mm_xor_si128(Ce, Eke); Asa = _mm_xor_si128(Asa, Da); Bku = ROL64in128(Asa, 18); Eki = _mm_xor_si128(Bki, _mm_andnot_si128(Bko, Bku)); Ci = _mm_xor_si128(Ci, Eki); Eko = _mm_xor_si128(Bko, _mm_andnot_si128(Bku, Bka)); Co = _mm_xor_si128(Co, Eko); Eku = _mm_xor_si128(Bku, _mm_andnot_si128(Bka, Bke)); Cu = _mm_xor_si128(Cu, Eku); Abu = _mm_xor_si128(Abu, Du); Bma = ROL64in128(Abu, 27); Aga = _mm_xor_si128(Aga, Da); Bme = ROL64in128(Aga, 36); Ake = _mm_xor_si128(Ake, De); Bmi = ROL64in128(Ake, 10); Ema = _mm_xor_si128(Bma, _mm_andnot_si128(Bme, Bmi)); Ca = _mm_xor_si128(Ca, Ema); Ami = _mm_xor_si128(Ami, Di); Bmo = ROL64in128(Ami, 15); Eme = _mm_xor_si128(Bme, _mm_andnot_si128(Bmi, Bmo)); Ce = _mm_xor_si128(Ce, Eme); Aso = _mm_xor_si128(Aso, Do); Bmu = ROL64in128_56(Aso); Emi = _mm_xor_si128(Bmi, _mm_andnot_si128(Bmo, Bmu)); Ci = _mm_xor_si128(Ci, Emi); Emo = _mm_xor_si128(Bmo, _mm_andnot_si128(Bmu, Bma)); Co = _mm_xor_si128(Co, Emo); Emu = _mm_xor_si128(Bmu, _mm_andnot_si128(Bma, Bme)); Cu = _mm_xor_si128(Cu, Emu); Abi = _mm_xor_si128(Abi, Di); Bsa = ROL64in128(Abi, 62); Ago = _mm_xor_si128(Ago, Do); Bse = ROL64in128(Ago, 55); Aku = _mm_xor_si128(Aku, Du); Bsi = ROL64in128(Aku, 39); Esa = _mm_xor_si128(Bsa, _mm_andnot_si128(Bse, Bsi)); Ca = _mm_xor_si128(Ca, Esa); Ama = _mm_xor_si128(Ama, Da); Bso = ROL64in128(Ama, 41); Ese = _mm_xor_si128(Bse, _mm_andnot_si128(Bsi, Bso)); Ce = _mm_xor_si128(Ce, Ese); Ase = _mm_xor_si128(Ase, De); Bsu = ROL64in128(Ase, 2); Esi = _mm_xor_si128(Bsi, _mm_andnot_si128(Bso, Bsu)); Ci = _mm_xor_si128(Ci, Esi); Eso = _mm_xor_si128(Bso, _mm_andnot_si128(Bsu, Bsa)); Co = _mm_xor_si128(Co, Eso); Esu = _mm_xor_si128(Bsu, _mm_andnot_si128(Bsa, Bse)); Cu = _mm_xor_si128(Cu, Esu); Da = _mm_xor_si128(Cu, ROL64in128(Ce, 1)); De = _mm_xor_si128(Ca, ROL64in128(Ci, 1)); Di = _mm_xor_si128(Ce, ROL64in128(Co, 1)); Do = _mm_xor_si128(Ci, ROL64in128(Cu, 1)); Du = _mm_xor_si128(Co, ROL64in128(Ca, 1)); Eba = _mm_xor_si128(Eba, Da); Bba = Eba; Ege = _mm_xor_si128(Ege, De); Bbe = ROL64in128(Ege, 44); Eki = _mm_xor_si128(Eki, Di); Bbi = ROL64in128(Eki, 43); Aba = _mm_xor_si128(Bba, _mm_andnot_si128(Bbe, Bbi)); Aba = _mm_xor_si128(Aba, SPLAT64(KeccakF1600Constants[3])); Ca = Aba; Emo = _mm_xor_si128(Emo, Do); Bbo = ROL64in128(Emo, 21); Abe = _mm_xor_si128(Bbe, _mm_andnot_si128(Bbi, Bbo)); Ce = Abe; Esu = _mm_xor_si128(Esu, Du); Bbu = ROL64in128(Esu, 14); Abi = _mm_xor_si128(Bbi, _mm_andnot_si128(Bbo, Bbu)); Ci = Abi; Abo = _mm_xor_si128(Bbo, _mm_andnot_si128(Bbu, Bba)); Co = Abo; Abu = _mm_xor_si128(Bbu, _mm_andnot_si128(Bba, Bbe)); Cu = Abu; Ebo = _mm_xor_si128(Ebo, Do); Bga = ROL64in128(Ebo, 28); Egu = _mm_xor_si128(Egu, Du); Bge = ROL64in128(Egu, 20); Eka = _mm_xor_si128(Eka, Da); Bgi = ROL64in128(Eka, 3); Aga = _mm_xor_si128(Bga, _mm_andnot_si128(Bge, Bgi)); Ca = _mm_xor_si128(Ca, Aga); Eme = _mm_xor_si128(Eme, De); Bgo = ROL64in128(Eme, 45); Age = _mm_xor_si128(Bge, _mm_andnot_si128(Bgi, Bgo)); Ce = _mm_xor_si128(Ce, Age); Esi = _mm_xor_si128(Esi, Di); Bgu = ROL64in128(Esi, 61); Agi = _mm_xor_si128(Bgi, _mm_andnot_si128(Bgo, Bgu)); Ci = _mm_xor_si128(Ci, Agi); Ago = _mm_xor_si128(Bgo, _mm_andnot_si128(Bgu, Bga)); Co = _mm_xor_si128(Co, Ago); Agu = _mm_xor_si128(Bgu, _mm_andnot_si128(Bga, Bge)); Cu = _mm_xor_si128(Cu, Agu); Ebe = _mm_xor_si128(Ebe, De); Bka = ROL64in128(Ebe, 1); Egi = _mm_xor_si128(Egi, Di); Bke = ROL64in128(Egi, 6); Eko = _mm_xor_si128(Eko, Do); Bki = ROL64in128(Eko, 25); Aka = _mm_xor_si128(Bka, _mm_andnot_si128(Bke, Bki)); Ca = _mm_xor_si128(Ca, Aka); Emu = _mm_xor_si128(Emu, Du); Bko = ROL64in128_8(Emu); Ake = _mm_xor_si128(Bke, _mm_andnot_si128(Bki, Bko)); Ce = _mm_xor_si128(Ce, Ake); Esa = _mm_xor_si128(Esa, Da); Bku = ROL64in128(Esa, 18); Aki = _mm_xor_si128(Bki, _mm_andnot_si128(Bko, Bku)); Ci = _mm_xor_si128(Ci, Aki); Ako = _mm_xor_si128(Bko, _mm_andnot_si128(Bku, Bka)); Co = _mm_xor_si128(Co, Ako); Aku = _mm_xor_si128(Bku, _mm_andnot_si128(Bka, Bke)); Cu = _mm_xor_si128(Cu, Aku); Ebu = _mm_xor_si128(Ebu, Du); Bma = ROL64in128(Ebu, 27); Ega = _mm_xor_si128(Ega, Da); Bme = ROL64in128(Ega, 36); Eke = _mm_xor_si128(Eke, De); Bmi = ROL64in128(Eke, 10); Ama = _mm_xor_si128(Bma, _mm_andnot_si128(Bme, Bmi)); Ca = _mm_xor_si128(Ca, Ama); Emi = _mm_xor_si128(Emi, Di); Bmo = ROL64in128(Emi, 15); Ame = _mm_xor_si128(Bme, _mm_andnot_si128(Bmi, Bmo)); Ce = _mm_xor_si128(Ce, Ame); Eso = _mm_xor_si128(Eso, Do); Bmu = ROL64in128_56(Eso); Ami = _mm_xor_si128(Bmi, _mm_andnot_si128(Bmo, Bmu)); Ci = _mm_xor_si128(Ci, Ami); Amo = _mm_xor_si128(Bmo, _mm_andnot_si128(Bmu, Bma)); Co = _mm_xor_si128(Co, Amo); Amu = _mm_xor_si128(Bmu, _mm_andnot_si128(Bma, Bme)); Cu = _mm_xor_si128(Cu, Amu); Ebi = _mm_xor_si128(Ebi, Di); Bsa = ROL64in128(Ebi, 62); Ego = _mm_xor_si128(Ego, Do); Bse = ROL64in128(Ego, 55); Eku = _mm_xor_si128(Eku, Du); Bsi = ROL64in128(Eku, 39); Asa = _mm_xor_si128(Bsa, _mm_andnot_si128(Bse, Bsi)); Ca = _mm_xor_si128(Ca, Asa); Ema = _mm_xor_si128(Ema, Da); Bso = ROL64in128(Ema, 41); Ase = _mm_xor_si128(Bse, _mm_andnot_si128(Bsi, Bso)); Ce = _mm_xor_si128(Ce, Ase); Ese = _mm_xor_si128(Ese, De); Bsu = ROL64in128(Ese, 2); Asi = _mm_xor_si128(Bsi, _mm_andnot_si128(Bso, Bsu)); Ci = _mm_xor_si128(Ci, Asi); Aso = _mm_xor_si128(Bso, _mm_andnot_si128(Bsu, Bsa)); Co = _mm_xor_si128(Co, Aso); Asu = _mm_xor_si128(Bsu, _mm_andnot_si128(Bsa, Bse)); Cu = _mm_xor_si128(Cu, Asu); Da = _mm_xor_si128(Cu, ROL64in128(Ce, 1)); De = _mm_xor_si128(Ca, ROL64in128(Ci, 1)); Di = _mm_xor_si128(Ce, ROL64in128(Co, 1)); Do = _mm_xor_si128(Ci, ROL64in128(Cu, 1)); Du = _mm_xor_si128(Co, ROL64in128(Ca, 1)); Aba = _mm_xor_si128(Aba, Da); Bba = Aba; Age = _mm_xor_si128(Age, De); Bbe = ROL64in128(Age, 44); Aki = _mm_xor_si128(Aki, Di); Bbi = ROL64in128(Aki, 43); Eba = _mm_xor_si128(Bba, _mm_andnot_si128(Bbe, Bbi)); Eba = _mm_xor_si128(Eba, SPLAT64(KeccakF1600Constants[4])); Ca = Eba; Amo = _mm_xor_si128(Amo, Do); Bbo = ROL64in128(Amo, 21); Ebe = _mm_xor_si128(Bbe, _mm_andnot_si128(Bbi, Bbo)); Ce = Ebe; Asu = _mm_xor_si128(Asu, Du); Bbu = ROL64in128(Asu, 14); Ebi = _mm_xor_si128(Bbi, _mm_andnot_si128(Bbo, Bbu)); Ci = Ebi; Ebo = _mm_xor_si128(Bbo, _mm_andnot_si128(Bbu, Bba)); Co = Ebo; Ebu = _mm_xor_si128(Bbu, _mm_andnot_si128(Bba, Bbe)); Cu = Ebu; Abo = _mm_xor_si128(Abo, Do); Bga = ROL64in128(Abo, 28); Agu = _mm_xor_si128(Agu, Du); Bge = ROL64in128(Agu, 20); Aka = _mm_xor_si128(Aka, Da); Bgi = ROL64in128(Aka, 3); Ega = _mm_xor_si128(Bga, _mm_andnot_si128(Bge, Bgi)); Ca = _mm_xor_si128(Ca, Ega); Ame = _mm_xor_si128(Ame, De); Bgo = ROL64in128(Ame, 45); Ege = _mm_xor_si128(Bge, _mm_andnot_si128(Bgi, Bgo)); Ce = _mm_xor_si128(Ce, Ege); Asi = _mm_xor_si128(Asi, Di); Bgu = ROL64in128(Asi, 61); Egi = _mm_xor_si128(Bgi, _mm_andnot_si128(Bgo, Bgu)); Ci = _mm_xor_si128(Ci, Egi); Ego = _mm_xor_si128(Bgo, _mm_andnot_si128(Bgu, Bga)); Co = _mm_xor_si128(Co, Ego); Egu = _mm_xor_si128(Bgu, _mm_andnot_si128(Bga, Bge)); Cu = _mm_xor_si128(Cu, Egu); Abe = _mm_xor_si128(Abe, De); Bka = ROL64in128(Abe, 1); Agi = _mm_xor_si128(Agi, Di); Bke = ROL64in128(Agi, 6); Ako = _mm_xor_si128(Ako, Do); Bki = ROL64in128(Ako, 25); Eka = _mm_xor_si128(Bka, _mm_andnot_si128(Bke, Bki)); Ca = _mm_xor_si128(Ca, Eka); Amu = _mm_xor_si128(Amu, Du); Bko = ROL64in128_8(Amu); Eke = _mm_xor_si128(Bke, _mm_andnot_si128(Bki, Bko)); Ce = _mm_xor_si128(Ce, Eke); Asa = _mm_xor_si128(Asa, Da); Bku = ROL64in128(Asa, 18); Eki = _mm_xor_si128(Bki, _mm_andnot_si128(Bko, Bku)); Ci = _mm_xor_si128(Ci, Eki); Eko = _mm_xor_si128(Bko, _mm_andnot_si128(Bku, Bka)); Co = _mm_xor_si128(Co, Eko); Eku = _mm_xor_si128(Bku, _mm_andnot_si128(Bka, Bke)); Cu = _mm_xor_si128(Cu, Eku); Abu = _mm_xor_si128(Abu, Du); Bma = ROL64in128(Abu, 27); Aga = _mm_xor_si128(Aga, Da); Bme = ROL64in128(Aga, 36); Ake = _mm_xor_si128(Ake, De); Bmi = ROL64in128(Ake, 10); Ema = _mm_xor_si128(Bma, _mm_andnot_si128(Bme, Bmi)); Ca = _mm_xor_si128(Ca, Ema); Ami = _mm_xor_si128(Ami, Di); Bmo = ROL64in128(Ami, 15); Eme = _mm_xor_si128(Bme, _mm_andnot_si128(Bmi, Bmo)); Ce = _mm_xor_si128(Ce, Eme); Aso = _mm_xor_si128(Aso, Do); Bmu = ROL64in128_56(Aso); Emi = _mm_xor_si128(Bmi, _mm_andnot_si128(Bmo, Bmu)); Ci = _mm_xor_si128(Ci, Emi); Emo = _mm_xor_si128(Bmo, _mm_andnot_si128(Bmu, Bma)); Co = _mm_xor_si128(Co, Emo); Emu = _mm_xor_si128(Bmu, _mm_andnot_si128(Bma, Bme)); Cu = _mm_xor_si128(Cu, Emu); Abi = _mm_xor_si128(Abi, Di); Bsa = ROL64in128(Abi, 62); Ago = _mm_xor_si128(Ago, Do); Bse = ROL64in128(Ago, 55); Aku = _mm_xor_si128(Aku, Du); Bsi = ROL64in128(Aku, 39); Esa = _mm_xor_si128(Bsa, _mm_andnot_si128(Bse, Bsi)); Ca = _mm_xor_si128(Ca, Esa); Ama = _mm_xor_si128(Ama, Da); Bso = ROL64in128(Ama, 41); Ese = _mm_xor_si128(Bse, _mm_andnot_si128(Bsi, Bso)); Ce = _mm_xor_si128(Ce, Ese); Ase = _mm_xor_si128(Ase, De); Bsu = ROL64in128(Ase, 2); Esi = _mm_xor_si128(Bsi, _mm_andnot_si128(Bso, Bsu)); Ci = _mm_xor_si128(Ci, Esi); Eso = _mm_xor_si128(Bso, _mm_andnot_si128(Bsu, Bsa)); Co = _mm_xor_si128(Co, Eso); Esu = _mm_xor_si128(Bsu, _mm_andnot_si128(Bsa, Bse)); Cu = _mm_xor_si128(Cu, Esu); Da = _mm_xor_si128(Cu, ROL64in128(Ce, 1)); De = _mm_xor_si128(Ca, ROL64in128(Ci, 1)); Di = _mm_xor_si128(Ce, ROL64in128(Co, 1)); Do = _mm_xor_si128(Ci, ROL64in128(Cu, 1)); Du = _mm_xor_si128(Co, ROL64in128(Ca, 1)); Eba = _mm_xor_si128(Eba, Da); Bba = Eba; Ege = _mm_xor_si128(Ege, De); Bbe = ROL64in128(Ege, 44); Eki = _mm_xor_si128(Eki, Di); Bbi = ROL64in128(Eki, 43); Aba = _mm_xor_si128(Bba, _mm_andnot_si128(Bbe, Bbi)); Aba = _mm_xor_si128(Aba, SPLAT64(KeccakF1600Constants[5])); Ca = Aba; Emo = _mm_xor_si128(Emo, Do); Bbo = ROL64in128(Emo, 21); Abe = _mm_xor_si128(Bbe, _mm_andnot_si128(Bbi, Bbo)); Ce = Abe; Esu = _mm_xor_si128(Esu, Du); Bbu = ROL64in128(Esu, 14); Abi = _mm_xor_si128(Bbi, _mm_andnot_si128(Bbo, Bbu)); Ci = Abi; Abo = _mm_xor_si128(Bbo, _mm_andnot_si128(Bbu, Bba)); Co = Abo; Abu = _mm_xor_si128(Bbu, _mm_andnot_si128(Bba, Bbe)); Cu = Abu; Ebo = _mm_xor_si128(Ebo, Do); Bga = ROL64in128(Ebo, 28); Egu = _mm_xor_si128(Egu, Du); Bge = ROL64in128(Egu, 20); Eka = _mm_xor_si128(Eka, Da); Bgi = ROL64in128(Eka, 3); Aga = _mm_xor_si128(Bga, _mm_andnot_si128(Bge, Bgi)); Ca = _mm_xor_si128(Ca, Aga); Eme = _mm_xor_si128(Eme, De); Bgo = ROL64in128(Eme, 45); Age = _mm_xor_si128(Bge, _mm_andnot_si128(Bgi, Bgo)); Ce = _mm_xor_si128(Ce, Age); Esi = _mm_xor_si128(Esi, Di); Bgu = ROL64in128(Esi, 61); Agi = _mm_xor_si128(Bgi, _mm_andnot_si128(Bgo, Bgu)); Ci = _mm_xor_si128(Ci, Agi); Ago = _mm_xor_si128(Bgo, _mm_andnot_si128(Bgu, Bga)); Co = _mm_xor_si128(Co, Ago); Agu = _mm_xor_si128(Bgu, _mm_andnot_si128(Bga, Bge)); Cu = _mm_xor_si128(Cu, Agu); Ebe = _mm_xor_si128(Ebe, De); Bka = ROL64in128(Ebe, 1); Egi = _mm_xor_si128(Egi, Di); Bke = ROL64in128(Egi, 6); Eko = _mm_xor_si128(Eko, Do); Bki = ROL64in128(Eko, 25); Aka = _mm_xor_si128(Bka, _mm_andnot_si128(Bke, Bki)); Ca = _mm_xor_si128(Ca, Aka); Emu = _mm_xor_si128(Emu, Du); Bko = ROL64in128_8(Emu); Ake = _mm_xor_si128(Bke, _mm_andnot_si128(Bki, Bko)); Ce = _mm_xor_si128(Ce, Ake); Esa = _mm_xor_si128(Esa, Da); Bku = ROL64in128(Esa, 18); Aki = _mm_xor_si128(Bki, _mm_andnot_si128(Bko, Bku)); Ci = _mm_xor_si128(Ci, Aki); Ako = _mm_xor_si128(Bko, _mm_andnot_si128(Bku, Bka)); Co = _mm_xor_si128(Co, Ako); Aku = _mm_xor_si128(Bku, _mm_andnot_si128(Bka, Bke)); Cu = _mm_xor_si128(Cu, Aku); Ebu = _mm_xor_si128(Ebu, Du); Bma = ROL64in128(Ebu, 27); Ega = _mm_xor_si128(Ega, Da); Bme = ROL64in128(Ega, 36); Eke = _mm_xor_si128(Eke, De); Bmi = ROL64in128(Eke, 10); Ama = _mm_xor_si128(Bma, _mm_andnot_si128(Bme, Bmi)); Ca = _mm_xor_si128(Ca, Ama); Emi = _mm_xor_si128(Emi, Di); Bmo = ROL64in128(Emi, 15); Ame = _mm_xor_si128(Bme, _mm_andnot_si128(Bmi, Bmo)); Ce = _mm_xor_si128(Ce, Ame); Eso = _mm_xor_si128(Eso, Do); Bmu = ROL64in128_56(Eso); Ami = _mm_xor_si128(Bmi, _mm_andnot_si128(Bmo, Bmu)); Ci = _mm_xor_si128(Ci, Ami); Amo = _mm_xor_si128(Bmo, _mm_andnot_si128(Bmu, Bma)); Co = _mm_xor_si128(Co, Amo); Amu = _mm_xor_si128(Bmu, _mm_andnot_si128(Bma, Bme)); Cu = _mm_xor_si128(Cu, Amu); Ebi = _mm_xor_si128(Ebi, Di); Bsa = ROL64in128(Ebi, 62); Ego = _mm_xor_si128(Ego, Do); Bse = ROL64in128(Ego, 55); Eku = _mm_xor_si128(Eku, Du); Bsi = ROL64in128(Eku, 39); Asa = _mm_xor_si128(Bsa, _mm_andnot_si128(Bse, Bsi)); Ca = _mm_xor_si128(Ca, Asa); Ema = _mm_xor_si128(Ema, Da); Bso = ROL64in128(Ema, 41); Ase = _mm_xor_si128(Bse, _mm_andnot_si128(Bsi, Bso)); Ce = _mm_xor_si128(Ce, Ase); Ese = _mm_xor_si128(Ese, De); Bsu = ROL64in128(Ese, 2); Asi = _mm_xor_si128(Bsi, _mm_andnot_si128(Bso, Bsu)); Ci = _mm_xor_si128(Ci, Asi); Aso = _mm_xor_si128(Bso, _mm_andnot_si128(Bsu, Bsa)); Co = _mm_xor_si128(Co, Aso); Asu = _mm_xor_si128(Bsu, _mm_andnot_si128(Bsa, Bse)); Cu = _mm_xor_si128(Cu, Asu); Da = _mm_xor_si128(Cu, ROL64in128(Ce, 1)); De = _mm_xor_si128(Ca, ROL64in128(Ci, 1)); Di = _mm_xor_si128(Ce, ROL64in128(Co, 1)); Do = _mm_xor_si128(Ci, ROL64in128(Cu, 1)); Du = _mm_xor_si128(Co, ROL64in128(Ca, 1)); Aba = _mm_xor_si128(Aba, Da); Bba = Aba; Age = _mm_xor_si128(Age, De); Bbe = ROL64in128(Age, 44); Aki = _mm_xor_si128(Aki, Di); Bbi = ROL64in128(Aki, 43); Eba = _mm_xor_si128(Bba, _mm_andnot_si128(Bbe, Bbi)); Eba = _mm_xor_si128(Eba, SPLAT64(KeccakF1600Constants[6])); Ca = Eba; Amo = _mm_xor_si128(Amo, Do); Bbo = ROL64in128(Amo, 21); Ebe = _mm_xor_si128(Bbe, _mm_andnot_si128(Bbi, Bbo)); Ce = Ebe; Asu = _mm_xor_si128(Asu, Du); Bbu = ROL64in128(Asu, 14); Ebi = _mm_xor_si128(Bbi, _mm_andnot_si128(Bbo, Bbu)); Ci = Ebi; Ebo = _mm_xor_si128(Bbo, _mm_andnot_si128(Bbu, Bba)); Co = Ebo; Ebu = _mm_xor_si128(Bbu, _mm_andnot_si128(Bba, Bbe)); Cu = Ebu; Abo = _mm_xor_si128(Abo, Do); Bga = ROL64in128(Abo, 28); Agu = _mm_xor_si128(Agu, Du); Bge = ROL64in128(Agu, 20); Aka = _mm_xor_si128(Aka, Da); Bgi = ROL64in128(Aka, 3); Ega = _mm_xor_si128(Bga, _mm_andnot_si128(Bge, Bgi)); Ca = _mm_xor_si128(Ca, Ega); Ame = _mm_xor_si128(Ame, De); Bgo = ROL64in128(Ame, 45); Ege = _mm_xor_si128(Bge, _mm_andnot_si128(Bgi, Bgo)); Ce = _mm_xor_si128(Ce, Ege); Asi = _mm_xor_si128(Asi, Di); Bgu = ROL64in128(Asi, 61); Egi = _mm_xor_si128(Bgi, _mm_andnot_si128(Bgo, Bgu)); Ci = _mm_xor_si128(Ci, Egi); Ego = _mm_xor_si128(Bgo, _mm_andnot_si128(Bgu, Bga)); Co = _mm_xor_si128(Co, Ego); Egu = _mm_xor_si128(Bgu, _mm_andnot_si128(Bga, Bge)); Cu = _mm_xor_si128(Cu, Egu); Abe = _mm_xor_si128(Abe, De); Bka = ROL64in128(Abe, 1); Agi = _mm_xor_si128(Agi, Di); Bke = ROL64in128(Agi, 6); Ako = _mm_xor_si128(Ako, Do); Bki = ROL64in128(Ako, 25); Eka = _mm_xor_si128(Bka, _mm_andnot_si128(Bke, Bki)); Ca = _mm_xor_si128(Ca, Eka); Amu = _mm_xor_si128(Amu, Du); Bko = ROL64in128_8(Amu); Eke = _mm_xor_si128(Bke, _mm_andnot_si128(Bki, Bko)); Ce = _mm_xor_si128(Ce, Eke); Asa = _mm_xor_si128(Asa, Da); Bku = ROL64in128(Asa, 18); Eki = _mm_xor_si128(Bki, _mm_andnot_si128(Bko, Bku)); Ci = _mm_xor_si128(Ci, Eki); Eko = _mm_xor_si128(Bko, _mm_andnot_si128(Bku, Bka)); Co = _mm_xor_si128(Co, Eko); Eku = _mm_xor_si128(Bku, _mm_andnot_si128(Bka, Bke)); Cu = _mm_xor_si128(Cu, Eku); Abu = _mm_xor_si128(Abu, Du); Bma = ROL64in128(Abu, 27); Aga = _mm_xor_si128(Aga, Da); Bme = ROL64in128(Aga, 36); Ake = _mm_xor_si128(Ake, De); Bmi = ROL64in128(Ake, 10); Ema = _mm_xor_si128(Bma, _mm_andnot_si128(Bme, Bmi)); Ca = _mm_xor_si128(Ca, Ema); Ami = _mm_xor_si128(Ami, Di); Bmo = ROL64in128(Ami, 15); Eme = _mm_xor_si128(Bme, _mm_andnot_si128(Bmi, Bmo)); Ce = _mm_xor_si128(Ce, Eme); Aso = _mm_xor_si128(Aso, Do); Bmu = ROL64in128_56(Aso); Emi = _mm_xor_si128(Bmi, _mm_andnot_si128(Bmo, Bmu)); Ci = _mm_xor_si128(Ci, Emi); Emo = _mm_xor_si128(Bmo, _mm_andnot_si128(Bmu, Bma)); Co = _mm_xor_si128(Co, Emo); Emu = _mm_xor_si128(Bmu, _mm_andnot_si128(Bma, Bme)); Cu = _mm_xor_si128(Cu, Emu); Abi = _mm_xor_si128(Abi, Di); Bsa = ROL64in128(Abi, 62); Ago = _mm_xor_si128(Ago, Do); Bse = ROL64in128(Ago, 55); Aku = _mm_xor_si128(Aku, Du); Bsi = ROL64in128(Aku, 39); Esa = _mm_xor_si128(Bsa, _mm_andnot_si128(Bse, Bsi)); Ca = _mm_xor_si128(Ca, Esa); Ama = _mm_xor_si128(Ama, Da); Bso = ROL64in128(Ama, 41); Ese = _mm_xor_si128(Bse, _mm_andnot_si128(Bsi, Bso)); Ce = _mm_xor_si128(Ce, Ese); Ase = _mm_xor_si128(Ase, De); Bsu = ROL64in128(Ase, 2); Esi = _mm_xor_si128(Bsi, _mm_andnot_si128(Bso, Bsu)); Ci = _mm_xor_si128(Ci, Esi); Eso = _mm_xor_si128(Bso, _mm_andnot_si128(Bsu, Bsa)); Co = _mm_xor_si128(Co, Eso); Esu = _mm_xor_si128(Bsu, _mm_andnot_si128(Bsa, Bse)); Cu = _mm_xor_si128(Cu, Esu); Da = _mm_xor_si128(Cu, ROL64in128(Ce, 1)); De = _mm_xor_si128(Ca, ROL64in128(Ci, 1)); Di = _mm_xor_si128(Ce, ROL64in128(Co, 1)); Do = _mm_xor_si128(Ci, ROL64in128(Cu, 1)); Du = _mm_xor_si128(Co, ROL64in128(Ca, 1)); Eba = _mm_xor_si128(Eba, Da); Bba = Eba; Ege = _mm_xor_si128(Ege, De); Bbe = ROL64in128(Ege, 44); Eki = _mm_xor_si128(Eki, Di); Bbi = ROL64in128(Eki, 43); Aba = _mm_xor_si128(Bba, _mm_andnot_si128(Bbe, Bbi)); Aba = _mm_xor_si128(Aba, SPLAT64(KeccakF1600Constants[7])); Ca = Aba; Emo = _mm_xor_si128(Emo, Do); Bbo = ROL64in128(Emo, 21); Abe = _mm_xor_si128(Bbe, _mm_andnot_si128(Bbi, Bbo)); Ce = Abe; Esu = _mm_xor_si128(Esu, Du); Bbu = ROL64in128(Esu, 14); Abi = _mm_xor_si128(Bbi, _mm_andnot_si128(Bbo, Bbu)); Ci = Abi; Abo = _mm_xor_si128(Bbo, _mm_andnot_si128(Bbu, Bba)); Co = Abo; Abu = _mm_xor_si128(Bbu, _mm_andnot_si128(Bba, Bbe)); Cu = Abu; Ebo = _mm_xor_si128(Ebo, Do); Bga = ROL64in128(Ebo, 28); Egu = _mm_xor_si128(Egu, Du); Bge = ROL64in128(Egu, 20); Eka = _mm_xor_si128(Eka, Da); Bgi = ROL64in128(Eka, 3); Aga = _mm_xor_si128(Bga, _mm_andnot_si128(Bge, Bgi)); Ca = _mm_xor_si128(Ca, Aga); Eme = _mm_xor_si128(Eme, De); Bgo = ROL64in128(Eme, 45); Age = _mm_xor_si128(Bge, _mm_andnot_si128(Bgi, Bgo)); Ce = _mm_xor_si128(Ce, Age); Esi = _mm_xor_si128(Esi, Di); Bgu = ROL64in128(Esi, 61); Agi = _mm_xor_si128(Bgi, _mm_andnot_si128(Bgo, Bgu)); Ci = _mm_xor_si128(Ci, Agi); Ago = _mm_xor_si128(Bgo, _mm_andnot_si128(Bgu, Bga)); Co = _mm_xor_si128(Co, Ago); Agu = _mm_xor_si128(Bgu, _mm_andnot_si128(Bga, Bge)); Cu = _mm_xor_si128(Cu, Agu); Ebe = _mm_xor_si128(Ebe, De); Bka = ROL64in128(Ebe, 1); Egi = _mm_xor_si128(Egi, Di); Bke = ROL64in128(Egi, 6); Eko = _mm_xor_si128(Eko, Do); Bki = ROL64in128(Eko, 25); Aka = _mm_xor_si128(Bka, _mm_andnot_si128(Bke, Bki)); Ca = _mm_xor_si128(Ca, Aka); Emu = _mm_xor_si128(Emu, Du); Bko = ROL64in128_8(Emu); Ake = _mm_xor_si128(Bke, _mm_andnot_si128(Bki, Bko)); Ce = _mm_xor_si128(Ce, Ake); Esa = _mm_xor_si128(Esa, Da); Bku = ROL64in128(Esa, 18); Aki = _mm_xor_si128(Bki, _mm_andnot_si128(Bko, Bku)); Ci = _mm_xor_si128(Ci, Aki); Ako = _mm_xor_si128(Bko, _mm_andnot_si128(Bku, Bka)); Co = _mm_xor_si128(Co, Ako); Aku = _mm_xor_si128(Bku, _mm_andnot_si128(Bka, Bke)); Cu = _mm_xor_si128(Cu, Aku); Ebu = _mm_xor_si128(Ebu, Du); Bma = ROL64in128(Ebu, 27); Ega = _mm_xor_si128(Ega, Da); Bme = ROL64in128(Ega, 36); Eke = _mm_xor_si128(Eke, De); Bmi = ROL64in128(Eke, 10); Ama = _mm_xor_si128(Bma, _mm_andnot_si128(Bme, Bmi)); Ca = _mm_xor_si128(Ca, Ama); Emi = _mm_xor_si128(Emi, Di); Bmo = ROL64in128(Emi, 15); Ame = _mm_xor_si128(Bme, _mm_andnot_si128(Bmi, Bmo)); Ce = _mm_xor_si128(Ce, Ame); Eso = _mm_xor_si128(Eso, Do); Bmu = ROL64in128_56(Eso); Ami = _mm_xor_si128(Bmi, _mm_andnot_si128(Bmo, Bmu)); Ci = _mm_xor_si128(Ci, Ami); Amo = _mm_xor_si128(Bmo, _mm_andnot_si128(Bmu, Bma)); Co = _mm_xor_si128(Co, Amo); Amu = _mm_xor_si128(Bmu, _mm_andnot_si128(Bma, Bme)); Cu = _mm_xor_si128(Cu, Amu); Ebi = _mm_xor_si128(Ebi, Di); Bsa = ROL64in128(Ebi, 62); Ego = _mm_xor_si128(Ego, Do); Bse = ROL64in128(Ego, 55); Eku = _mm_xor_si128(Eku, Du); Bsi = ROL64in128(Eku, 39); Asa = _mm_xor_si128(Bsa, _mm_andnot_si128(Bse, Bsi)); Ca = _mm_xor_si128(Ca, Asa); Ema = _mm_xor_si128(Ema, Da); Bso = ROL64in128(Ema, 41); Ase = _mm_xor_si128(Bse, _mm_andnot_si128(Bsi, Bso)); Ce = _mm_xor_si128(Ce, Ase); Ese = _mm_xor_si128(Ese, De); Bsu = ROL64in128(Ese, 2); Asi = _mm_xor_si128(Bsi, _mm_andnot_si128(Bso, Bsu)); Ci = _mm_xor_si128(Ci, Asi); Aso = _mm_xor_si128(Bso, _mm_andnot_si128(Bsu, Bsa)); Co = _mm_xor_si128(Co, Aso); Asu = _mm_xor_si128(Bsu, _mm_andnot_si128(Bsa, Bse)); Cu = _mm_xor_si128(Cu, Asu); Da = _mm_xor_si128(Cu, ROL64in128(Ce, 1)); De = _mm_xor_si128(Ca, ROL64in128(Ci, 1)); Di = _mm_xor_si128(Ce, ROL64in128(Co, 1)); Do = _mm_xor_si128(Ci, ROL64in128(Cu, 1)); Du = _mm_xor_si128(Co, ROL64in128(Ca, 1)); Aba = _mm_xor_si128(Aba, Da); Bba = Aba; Age = _mm_xor_si128(Age, De); Bbe = ROL64in128(Age, 44); Aki = _mm_xor_si128(Aki, Di); Bbi = ROL64in128(Aki, 43); Eba = _mm_xor_si128(Bba, _mm_andnot_si128(Bbe, Bbi)); Eba = _mm_xor_si128(Eba, SPLAT64(KeccakF1600Constants[8])); Ca = Eba; Amo = _mm_xor_si128(Amo, Do); Bbo = ROL64in128(Amo, 21); Ebe = _mm_xor_si128(Bbe, _mm_andnot_si128(Bbi, Bbo)); Ce = Ebe; Asu = _mm_xor_si128(Asu, Du); Bbu = ROL64in128(Asu, 14); Ebi = _mm_xor_si128(Bbi, _mm_andnot_si128(Bbo, Bbu)); Ci = Ebi; Ebo = _mm_xor_si128(Bbo, _mm_andnot_si128(Bbu, Bba)); Co = Ebo; Ebu = _mm_xor_si128(Bbu, _mm_andnot_si128(Bba, Bbe)); Cu = Ebu; Abo = _mm_xor_si128(Abo, Do); Bga = ROL64in128(Abo, 28); Agu = _mm_xor_si128(Agu, Du); Bge = ROL64in128(Agu, 20); Aka = _mm_xor_si128(Aka, Da); Bgi = ROL64in128(Aka, 3); Ega = _mm_xor_si128(Bga, _mm_andnot_si128(Bge, Bgi)); Ca = _mm_xor_si128(Ca, Ega); Ame = _mm_xor_si128(Ame, De); Bgo = ROL64in128(Ame, 45); Ege = _mm_xor_si128(Bge, _mm_andnot_si128(Bgi, Bgo)); Ce = _mm_xor_si128(Ce, Ege); Asi = _mm_xor_si128(Asi, Di); Bgu = ROL64in128(Asi, 61); Egi = _mm_xor_si128(Bgi, _mm_andnot_si128(Bgo, Bgu)); Ci = _mm_xor_si128(Ci, Egi); Ego = _mm_xor_si128(Bgo, _mm_andnot_si128(Bgu, Bga)); Co = _mm_xor_si128(Co, Ego); Egu = _mm_xor_si128(Bgu, _mm_andnot_si128(Bga, Bge)); Cu = _mm_xor_si128(Cu, Egu); Abe = _mm_xor_si128(Abe, De); Bka = ROL64in128(Abe, 1); Agi = _mm_xor_si128(Agi, Di); Bke = ROL64in128(Agi, 6); Ako = _mm_xor_si128(Ako, Do); Bki = ROL64in128(Ako, 25); Eka = _mm_xor_si128(Bka, _mm_andnot_si128(Bke, Bki)); Ca = _mm_xor_si128(Ca, Eka); Amu = _mm_xor_si128(Amu, Du); Bko = ROL64in128_8(Amu); Eke = _mm_xor_si128(Bke, _mm_andnot_si128(Bki, Bko)); Ce = _mm_xor_si128(Ce, Eke); Asa = _mm_xor_si128(Asa, Da); Bku = ROL64in128(Asa, 18); Eki = _mm_xor_si128(Bki, _mm_andnot_si128(Bko, Bku)); Ci = _mm_xor_si128(Ci, Eki); Eko = _mm_xor_si128(Bko, _mm_andnot_si128(Bku, Bka)); Co = _mm_xor_si128(Co, Eko); Eku = _mm_xor_si128(Bku, _mm_andnot_si128(Bka, Bke)); Cu = _mm_xor_si128(Cu, Eku); Abu = _mm_xor_si128(Abu, Du); Bma = ROL64in128(Abu, 27); Aga = _mm_xor_si128(Aga, Da); Bme = ROL64in128(Aga, 36); Ake = _mm_xor_si128(Ake, De); Bmi = ROL64in128(Ake, 10); Ema = _mm_xor_si128(Bma, _mm_andnot_si128(Bme, Bmi)); Ca = _mm_xor_si128(Ca, Ema); Ami = _mm_xor_si128(Ami, Di); Bmo = ROL64in128(Ami, 15); Eme = _mm_xor_si128(Bme, _mm_andnot_si128(Bmi, Bmo)); Ce = _mm_xor_si128(Ce, Eme); Aso = _mm_xor_si128(Aso, Do); Bmu = ROL64in128_56(Aso); Emi = _mm_xor_si128(Bmi, _mm_andnot_si128(Bmo, Bmu)); Ci = _mm_xor_si128(Ci, Emi); Emo = _mm_xor_si128(Bmo, _mm_andnot_si128(Bmu, Bma)); Co = _mm_xor_si128(Co, Emo); Emu = _mm_xor_si128(Bmu, _mm_andnot_si128(Bma, Bme)); Cu = _mm_xor_si128(Cu, Emu); Abi = _mm_xor_si128(Abi, Di); Bsa = ROL64in128(Abi, 62); Ago = _mm_xor_si128(Ago, Do); Bse = ROL64in128(Ago, 55); Aku = _mm_xor_si128(Aku, Du); Bsi = ROL64in128(Aku, 39); Esa = _mm_xor_si128(Bsa, _mm_andnot_si128(Bse, Bsi)); Ca = _mm_xor_si128(Ca, Esa); Ama = _mm_xor_si128(Ama, Da); Bso = ROL64in128(Ama, 41); Ese = _mm_xor_si128(Bse, _mm_andnot_si128(Bsi, Bso)); Ce = _mm_xor_si128(Ce, Ese); Ase = _mm_xor_si128(Ase, De); Bsu = ROL64in128(Ase, 2); Esi = _mm_xor_si128(Bsi, _mm_andnot_si128(Bso, Bsu)); Ci = _mm_xor_si128(Ci, Esi); Eso = _mm_xor_si128(Bso, _mm_andnot_si128(Bsu, Bsa)); Co = _mm_xor_si128(Co, Eso); Esu = _mm_xor_si128(Bsu, _mm_andnot_si128(Bsa, Bse)); Cu = _mm_xor_si128(Cu, Esu); Da = _mm_xor_si128(Cu, ROL64in128(Ce, 1)); De = _mm_xor_si128(Ca, ROL64in128(Ci, 1)); Di = _mm_xor_si128(Ce, ROL64in128(Co, 1)); Do = _mm_xor_si128(Ci, ROL64in128(Cu, 1)); Du = _mm_xor_si128(Co, ROL64in128(Ca, 1)); Eba = _mm_xor_si128(Eba, Da); Bba = Eba; Ege = _mm_xor_si128(Ege, De); Bbe = ROL64in128(Ege, 44); Eki = _mm_xor_si128(Eki, Di); Bbi = ROL64in128(Eki, 43); Aba = _mm_xor_si128(Bba, _mm_andnot_si128(Bbe, Bbi)); Aba = _mm_xor_si128(Aba, SPLAT64(KeccakF1600Constants[9])); Ca = Aba; Emo = _mm_xor_si128(Emo, Do); Bbo = ROL64in128(Emo, 21); Abe = _mm_xor_si128(Bbe, _mm_andnot_si128(Bbi, Bbo)); Ce = Abe; Esu = _mm_xor_si128(Esu, Du); Bbu = ROL64in128(Esu, 14); Abi = _mm_xor_si128(Bbi, _mm_andnot_si128(Bbo, Bbu)); Ci = Abi; Abo = _mm_xor_si128(Bbo, _mm_andnot_si128(Bbu, Bba)); Co = Abo; Abu = _mm_xor_si128(Bbu, _mm_andnot_si128(Bba, Bbe)); Cu = Abu; Ebo = _mm_xor_si128(Ebo, Do); Bga = ROL64in128(Ebo, 28); Egu = _mm_xor_si128(Egu, Du); Bge = ROL64in128(Egu, 20); Eka = _mm_xor_si128(Eka, Da); Bgi = ROL64in128(Eka, 3); Aga = _mm_xor_si128(Bga, _mm_andnot_si128(Bge, Bgi)); Ca = _mm_xor_si128(Ca, Aga); Eme = _mm_xor_si128(Eme, De); Bgo = ROL64in128(Eme, 45); Age = _mm_xor_si128(Bge, _mm_andnot_si128(Bgi, Bgo)); Ce = _mm_xor_si128(Ce, Age); Esi = _mm_xor_si128(Esi, Di); Bgu = ROL64in128(Esi, 61); Agi = _mm_xor_si128(Bgi, _mm_andnot_si128(Bgo, Bgu)); Ci = _mm_xor_si128(Ci, Agi); Ago = _mm_xor_si128(Bgo, _mm_andnot_si128(Bgu, Bga)); Co = _mm_xor_si128(Co, Ago); Agu = _mm_xor_si128(Bgu, _mm_andnot_si128(Bga, Bge)); Cu = _mm_xor_si128(Cu, Agu); Ebe = _mm_xor_si128(Ebe, De); Bka = ROL64in128(Ebe, 1); Egi = _mm_xor_si128(Egi, Di); Bke = ROL64in128(Egi, 6); Eko = _mm_xor_si128(Eko, Do); Bki = ROL64in128(Eko, 25); Aka = _mm_xor_si128(Bka, _mm_andnot_si128(Bke, Bki)); Ca = _mm_xor_si128(Ca, Aka); Emu = _mm_xor_si128(Emu, Du); Bko = ROL64in128_8(Emu); Ake = _mm_xor_si128(Bke, _mm_andnot_si128(Bki, Bko)); Ce = _mm_xor_si128(Ce, Ake); Esa = _mm_xor_si128(Esa, Da); Bku = ROL64in128(Esa, 18); Aki = _mm_xor_si128(Bki, _mm_andnot_si128(Bko, Bku)); Ci = _mm_xor_si128(Ci, Aki); Ako = _mm_xor_si128(Bko, _mm_andnot_si128(Bku, Bka)); Co = _mm_xor_si128(Co, Ako); Aku = _mm_xor_si128(Bku, _mm_andnot_si128(Bka, Bke)); Cu = _mm_xor_si128(Cu, Aku); Ebu = _mm_xor_si128(Ebu, Du); Bma = ROL64in128(Ebu, 27); Ega = _mm_xor_si128(Ega, Da); Bme = ROL64in128(Ega, 36); Eke = _mm_xor_si128(Eke, De); Bmi = ROL64in128(Eke, 10); Ama = _mm_xor_si128(Bma, _mm_andnot_si128(Bme, Bmi)); Ca = _mm_xor_si128(Ca, Ama); Emi = _mm_xor_si128(Emi, Di); Bmo = ROL64in128(Emi, 15); Ame = _mm_xor_si128(Bme, _mm_andnot_si128(Bmi, Bmo)); Ce = _mm_xor_si128(Ce, Ame); Eso = _mm_xor_si128(Eso, Do); Bmu = ROL64in128_56(Eso); Ami = _mm_xor_si128(Bmi, _mm_andnot_si128(Bmo, Bmu)); Ci = _mm_xor_si128(Ci, Ami); Amo = _mm_xor_si128(Bmo, _mm_andnot_si128(Bmu, Bma)); Co = _mm_xor_si128(Co, Amo); Amu = _mm_xor_si128(Bmu, _mm_andnot_si128(Bma, Bme)); Cu = _mm_xor_si128(Cu, Amu); Ebi = _mm_xor_si128(Ebi, Di); Bsa = ROL64in128(Ebi, 62); Ego = _mm_xor_si128(Ego, Do); Bse = ROL64in128(Ego, 55); Eku = _mm_xor_si128(Eku, Du); Bsi = ROL64in128(Eku, 39); Asa = _mm_xor_si128(Bsa, _mm_andnot_si128(Bse, Bsi)); Ca = _mm_xor_si128(Ca, Asa); Ema = _mm_xor_si128(Ema, Da); Bso = ROL64in128(Ema, 41); Ase = _mm_xor_si128(Bse, _mm_andnot_si128(Bsi, Bso)); Ce = _mm_xor_si128(Ce, Ase); Ese = _mm_xor_si128(Ese, De); Bsu = ROL64in128(Ese, 2); Asi = _mm_xor_si128(Bsi, _mm_andnot_si128(Bso, Bsu)); Ci = _mm_xor_si128(Ci, Asi); Aso = _mm_xor_si128(Bso, _mm_andnot_si128(Bsu, Bsa)); Co = _mm_xor_si128(Co, Aso); Asu = _mm_xor_si128(Bsu, _mm_andnot_si128(Bsa, Bse)); Cu = _mm_xor_si128(Cu, Asu); Da = _mm_xor_si128(Cu, ROL64in128(Ce, 1)); De = _mm_xor_si128(Ca, ROL64in128(Ci, 1)); Di = _mm_xor_si128(Ce, ROL64in128(Co, 1)); Do = _mm_xor_si128(Ci, ROL64in128(Cu, 1)); Du = _mm_xor_si128(Co, ROL64in128(Ca, 1)); Aba = _mm_xor_si128(Aba, Da); Bba = Aba; Age = _mm_xor_si128(Age, De); Bbe = ROL64in128(Age, 44); Aki = _mm_xor_si128(Aki, Di); Bbi = ROL64in128(Aki, 43); Eba = _mm_xor_si128(Bba, _mm_andnot_si128(Bbe, Bbi)); Eba = _mm_xor_si128(Eba, SPLAT64(KeccakF1600Constants[10])); Ca = Eba; Amo = _mm_xor_si128(Amo, Do); Bbo = ROL64in128(Amo, 21); Ebe = _mm_xor_si128(Bbe, _mm_andnot_si128(Bbi, Bbo)); Ce = Ebe; Asu = _mm_xor_si128(Asu, Du); Bbu = ROL64in128(Asu, 14); Ebi = _mm_xor_si128(Bbi, _mm_andnot_si128(Bbo, Bbu)); Ci = Ebi; Ebo = _mm_xor_si128(Bbo, _mm_andnot_si128(Bbu, Bba)); Co = Ebo; Ebu = _mm_xor_si128(Bbu, _mm_andnot_si128(Bba, Bbe)); Cu = Ebu; Abo = _mm_xor_si128(Abo, Do); Bga = ROL64in128(Abo, 28); Agu = _mm_xor_si128(Agu, Du); Bge = ROL64in128(Agu, 20); Aka = _mm_xor_si128(Aka, Da); Bgi = ROL64in128(Aka, 3); Ega = _mm_xor_si128(Bga, _mm_andnot_si128(Bge, Bgi)); Ca = _mm_xor_si128(Ca, Ega); Ame = _mm_xor_si128(Ame, De); Bgo = ROL64in128(Ame, 45); Ege = _mm_xor_si128(Bge, _mm_andnot_si128(Bgi, Bgo)); Ce = _mm_xor_si128(Ce, Ege); Asi = _mm_xor_si128(Asi, Di); Bgu = ROL64in128(Asi, 61); Egi = _mm_xor_si128(Bgi, _mm_andnot_si128(Bgo, Bgu)); Ci = _mm_xor_si128(Ci, Egi); Ego = _mm_xor_si128(Bgo, _mm_andnot_si128(Bgu, Bga)); Co = _mm_xor_si128(Co, Ego); Egu = _mm_xor_si128(Bgu, _mm_andnot_si128(Bga, Bge)); Cu = _mm_xor_si128(Cu, Egu); Abe = _mm_xor_si128(Abe, De); Bka = ROL64in128(Abe, 1); Agi = _mm_xor_si128(Agi, Di); Bke = ROL64in128(Agi, 6); Ako = _mm_xor_si128(Ako, Do); Bki = ROL64in128(Ako, 25); Eka = _mm_xor_si128(Bka, _mm_andnot_si128(Bke, Bki)); Ca = _mm_xor_si128(Ca, Eka); Amu = _mm_xor_si128(Amu, Du); Bko = ROL64in128_8(Amu); Eke = _mm_xor_si128(Bke, _mm_andnot_si128(Bki, Bko)); Ce = _mm_xor_si128(Ce, Eke); Asa = _mm_xor_si128(Asa, Da); Bku = ROL64in128(Asa, 18); Eki = _mm_xor_si128(Bki, _mm_andnot_si128(Bko, Bku)); Ci = _mm_xor_si128(Ci, Eki); Eko = _mm_xor_si128(Bko, _mm_andnot_si128(Bku, Bka)); Co = _mm_xor_si128(Co, Eko); Eku = _mm_xor_si128(Bku, _mm_andnot_si128(Bka, Bke)); Cu = _mm_xor_si128(Cu, Eku); Abu = _mm_xor_si128(Abu, Du); Bma = ROL64in128(Abu, 27); Aga = _mm_xor_si128(Aga, Da); Bme = ROL64in128(Aga, 36); Ake = _mm_xor_si128(Ake, De); Bmi = ROL64in128(Ake, 10); Ema = _mm_xor_si128(Bma, _mm_andnot_si128(Bme, Bmi)); Ca = _mm_xor_si128(Ca, Ema); Ami = _mm_xor_si128(Ami, Di); Bmo = ROL64in128(Ami, 15); Eme = _mm_xor_si128(Bme, _mm_andnot_si128(Bmi, Bmo)); Ce = _mm_xor_si128(Ce, Eme); Aso = _mm_xor_si128(Aso, Do); Bmu = ROL64in128_56(Aso); Emi = _mm_xor_si128(Bmi, _mm_andnot_si128(Bmo, Bmu)); Ci = _mm_xor_si128(Ci, Emi); Emo = _mm_xor_si128(Bmo, _mm_andnot_si128(Bmu, Bma)); Co = _mm_xor_si128(Co, Emo); Emu = _mm_xor_si128(Bmu, _mm_andnot_si128(Bma, Bme)); Cu = _mm_xor_si128(Cu, Emu); Abi = _mm_xor_si128(Abi, Di); Bsa = ROL64in128(Abi, 62); Ago = _mm_xor_si128(Ago, Do); Bse = ROL64in128(Ago, 55); Aku = _mm_xor_si128(Aku, Du); Bsi = ROL64in128(Aku, 39); Esa = _mm_xor_si128(Bsa, _mm_andnot_si128(Bse, Bsi)); Ca = _mm_xor_si128(Ca, Esa); Ama = _mm_xor_si128(Ama, Da); Bso = ROL64in128(Ama, 41); Ese = _mm_xor_si128(Bse, _mm_andnot_si128(Bsi, Bso)); Ce = _mm_xor_si128(Ce, Ese); Ase = _mm_xor_si128(Ase, De); Bsu = ROL64in128(Ase, 2); Esi = _mm_xor_si128(Bsi, _mm_andnot_si128(Bso, Bsu)); Ci = _mm_xor_si128(Ci, Esi); Eso = _mm_xor_si128(Bso, _mm_andnot_si128(Bsu, Bsa)); Co = _mm_xor_si128(Co, Eso); Esu = _mm_xor_si128(Bsu, _mm_andnot_si128(Bsa, Bse)); Cu = _mm_xor_si128(Cu, Esu); Da = _mm_xor_si128(Cu, ROL64in128(Ce, 1)); De = _mm_xor_si128(Ca, ROL64in128(Ci, 1)); Di = _mm_xor_si128(Ce, ROL64in128(Co, 1)); Do = _mm_xor_si128(Ci, ROL64in128(Cu, 1)); Du = _mm_xor_si128(Co, ROL64in128(Ca, 1)); Eba = _mm_xor_si128(Eba, Da); Bba = Eba; Ege = _mm_xor_si128(Ege, De); Bbe = ROL64in128(Ege, 44); Eki = _mm_xor_si128(Eki, Di); Bbi = ROL64in128(Eki, 43); Aba = _mm_xor_si128(Bba, _mm_andnot_si128(Bbe, Bbi)); Aba = _mm_xor_si128(Aba, SPLAT64(KeccakF1600Constants[11])); Ca = Aba; Emo = _mm_xor_si128(Emo, Do); Bbo = ROL64in128(Emo, 21); Abe = _mm_xor_si128(Bbe, _mm_andnot_si128(Bbi, Bbo)); Ce = Abe; Esu = _mm_xor_si128(Esu, Du); Bbu = ROL64in128(Esu, 14); Abi = _mm_xor_si128(Bbi, _mm_andnot_si128(Bbo, Bbu)); Ci = Abi; Abo = _mm_xor_si128(Bbo, _mm_andnot_si128(Bbu, Bba)); Co = Abo; Abu = _mm_xor_si128(Bbu, _mm_andnot_si128(Bba, Bbe)); Cu = Abu; Ebo = _mm_xor_si128(Ebo, Do); Bga = ROL64in128(Ebo, 28); Egu = _mm_xor_si128(Egu, Du); Bge = ROL64in128(Egu, 20); Eka = _mm_xor_si128(Eka, Da); Bgi = ROL64in128(Eka, 3); Aga = _mm_xor_si128(Bga, _mm_andnot_si128(Bge, Bgi)); Ca = _mm_xor_si128(Ca, Aga); Eme = _mm_xor_si128(Eme, De); Bgo = ROL64in128(Eme, 45); Age = _mm_xor_si128(Bge, _mm_andnot_si128(Bgi, Bgo)); Ce = _mm_xor_si128(Ce, Age); Esi = _mm_xor_si128(Esi, Di); Bgu = ROL64in128(Esi, 61); Agi = _mm_xor_si128(Bgi, _mm_andnot_si128(Bgo, Bgu)); Ci = _mm_xor_si128(Ci, Agi); Ago = _mm_xor_si128(Bgo, _mm_andnot_si128(Bgu, Bga)); Co = _mm_xor_si128(Co, Ago); Agu = _mm_xor_si128(Bgu, _mm_andnot_si128(Bga, Bge)); Cu = _mm_xor_si128(Cu, Agu); Ebe = _mm_xor_si128(Ebe, De); Bka = ROL64in128(Ebe, 1); Egi = _mm_xor_si128(Egi, Di); Bke = ROL64in128(Egi, 6); Eko = _mm_xor_si128(Eko, Do); Bki = ROL64in128(Eko, 25); Aka = _mm_xor_si128(Bka, _mm_andnot_si128(Bke, Bki)); Ca = _mm_xor_si128(Ca, Aka); Emu = _mm_xor_si128(Emu, Du); Bko = ROL64in128_8(Emu); Ake = _mm_xor_si128(Bke, _mm_andnot_si128(Bki, Bko)); Ce = _mm_xor_si128(Ce, Ake); Esa = _mm_xor_si128(Esa, Da); Bku = ROL64in128(Esa, 18); Aki = _mm_xor_si128(Bki, _mm_andnot_si128(Bko, Bku)); Ci = _mm_xor_si128(Ci, Aki); Ako = _mm_xor_si128(Bko, _mm_andnot_si128(Bku, Bka)); Co = _mm_xor_si128(Co, Ako); Aku = _mm_xor_si128(Bku, _mm_andnot_si128(Bka, Bke)); Cu = _mm_xor_si128(Cu, Aku); Ebu = _mm_xor_si128(Ebu, Du); Bma = ROL64in128(Ebu, 27); Ega = _mm_xor_si128(Ega, Da); Bme = ROL64in128(Ega, 36); Eke = _mm_xor_si128(Eke, De); Bmi = ROL64in128(Eke, 10); Ama = _mm_xor_si128(Bma, _mm_andnot_si128(Bme, Bmi)); Ca = _mm_xor_si128(Ca, Ama); Emi = _mm_xor_si128(Emi, Di); Bmo = ROL64in128(Emi, 15); Ame = _mm_xor_si128(Bme, _mm_andnot_si128(Bmi, Bmo)); Ce = _mm_xor_si128(Ce, Ame); Eso = _mm_xor_si128(Eso, Do); Bmu = ROL64in128_56(Eso); Ami = _mm_xor_si128(Bmi, _mm_andnot_si128(Bmo, Bmu)); Ci = _mm_xor_si128(Ci, Ami); Amo = _mm_xor_si128(Bmo, _mm_andnot_si128(Bmu, Bma)); Co = _mm_xor_si128(Co, Amo); Amu = _mm_xor_si128(Bmu, _mm_andnot_si128(Bma, Bme)); Cu = _mm_xor_si128(Cu, Amu); Ebi = _mm_xor_si128(Ebi, Di); Bsa = ROL64in128(Ebi, 62); Ego = _mm_xor_si128(Ego, Do); Bse = ROL64in128(Ego, 55); Eku = _mm_xor_si128(Eku, Du); Bsi = ROL64in128(Eku, 39); Asa = _mm_xor_si128(Bsa, _mm_andnot_si128(Bse, Bsi)); Ca = _mm_xor_si128(Ca, Asa); Ema = _mm_xor_si128(Ema, Da); Bso = ROL64in128(Ema, 41); Ase = _mm_xor_si128(Bse, _mm_andnot_si128(Bsi, Bso)); Ce = _mm_xor_si128(Ce, Ase); Ese = _mm_xor_si128(Ese, De); Bsu = ROL64in128(Ese, 2); Asi = _mm_xor_si128(Bsi, _mm_andnot_si128(Bso, Bsu)); Ci = _mm_xor_si128(Ci, Asi); Aso = _mm_xor_si128(Bso, _mm_andnot_si128(Bsu, Bsa)); Co = _mm_xor_si128(Co, Aso); Asu = _mm_xor_si128(Bsu, _mm_andnot_si128(Bsa, Bse)); Cu = _mm_xor_si128(Cu, Asu); Da = _mm_xor_si128(Cu, ROL64in128(Ce, 1)); De = _mm_xor_si128(Ca, ROL64in128(Ci, 1)); Di = _mm_xor_si128(Ce, ROL64in128(Co, 1)); Do = _mm_xor_si128(Ci, ROL64in128(Cu, 1)); Du = _mm_xor_si128(Co, ROL64in128(Ca, 1)); Aba = _mm_xor_si128(Aba, Da); Bba = Aba; Age = _mm_xor_si128(Age, De); Bbe = ROL64in128(Age, 44); Aki = _mm_xor_si128(Aki, Di); Bbi = ROL64in128(Aki, 43); Eba = _mm_xor_si128(Bba, _mm_andnot_si128(Bbe, Bbi)); Eba = _mm_xor_si128(Eba, SPLAT64(KeccakF1600Constants[12])); Ca = Eba; Amo = _mm_xor_si128(Amo, Do); Bbo = ROL64in128(Amo, 21); Ebe = _mm_xor_si128(Bbe, _mm_andnot_si128(Bbi, Bbo)); Ce = Ebe; Asu = _mm_xor_si128(Asu, Du); Bbu = ROL64in128(Asu, 14); Ebi = _mm_xor_si128(Bbi, _mm_andnot_si128(Bbo, Bbu)); Ci = Ebi; Ebo = _mm_xor_si128(Bbo, _mm_andnot_si128(Bbu, Bba)); Co = Ebo; Ebu = _mm_xor_si128(Bbu, _mm_andnot_si128(Bba, Bbe)); Cu = Ebu; Abo = _mm_xor_si128(Abo, Do); Bga = ROL64in128(Abo, 28); Agu = _mm_xor_si128(Agu, Du); Bge = ROL64in128(Agu, 20); Aka = _mm_xor_si128(Aka, Da); Bgi = ROL64in128(Aka, 3); Ega = _mm_xor_si128(Bga, _mm_andnot_si128(Bge, Bgi)); Ca = _mm_xor_si128(Ca, Ega); Ame = _mm_xor_si128(Ame, De); Bgo = ROL64in128(Ame, 45); Ege = _mm_xor_si128(Bge, _mm_andnot_si128(Bgi, Bgo)); Ce = _mm_xor_si128(Ce, Ege); Asi = _mm_xor_si128(Asi, Di); Bgu = ROL64in128(Asi, 61); Egi = _mm_xor_si128(Bgi, _mm_andnot_si128(Bgo, Bgu)); Ci = _mm_xor_si128(Ci, Egi); Ego = _mm_xor_si128(Bgo, _mm_andnot_si128(Bgu, Bga)); Co = _mm_xor_si128(Co, Ego); Egu = _mm_xor_si128(Bgu, _mm_andnot_si128(Bga, Bge)); Cu = _mm_xor_si128(Cu, Egu); Abe = _mm_xor_si128(Abe, De); Bka = ROL64in128(Abe, 1); Agi = _mm_xor_si128(Agi, Di); Bke = ROL64in128(Agi, 6); Ako = _mm_xor_si128(Ako, Do); Bki = ROL64in128(Ako, 25); Eka = _mm_xor_si128(Bka, _mm_andnot_si128(Bke, Bki)); Ca = _mm_xor_si128(Ca, Eka); Amu = _mm_xor_si128(Amu, Du); Bko = ROL64in128_8(Amu); Eke = _mm_xor_si128(Bke, _mm_andnot_si128(Bki, Bko)); Ce = _mm_xor_si128(Ce, Eke); Asa = _mm_xor_si128(Asa, Da); Bku = ROL64in128(Asa, 18); Eki = _mm_xor_si128(Bki, _mm_andnot_si128(Bko, Bku)); Ci = _mm_xor_si128(Ci, Eki); Eko = _mm_xor_si128(Bko, _mm_andnot_si128(Bku, Bka)); Co = _mm_xor_si128(Co, Eko); Eku = _mm_xor_si128(Bku, _mm_andnot_si128(Bka, Bke)); Cu = _mm_xor_si128(Cu, Eku); Abu = _mm_xor_si128(Abu, Du); Bma = ROL64in128(Abu, 27); Aga = _mm_xor_si128(Aga, Da); Bme = ROL64in128(Aga, 36); Ake = _mm_xor_si128(Ake, De); Bmi = ROL64in128(Ake, 10); Ema = _mm_xor_si128(Bma, _mm_andnot_si128(Bme, Bmi)); Ca = _mm_xor_si128(Ca, Ema); Ami = _mm_xor_si128(Ami, Di); Bmo = ROL64in128(Ami, 15); Eme = _mm_xor_si128(Bme, _mm_andnot_si128(Bmi, Bmo)); Ce = _mm_xor_si128(Ce, Eme); Aso = _mm_xor_si128(Aso, Do); Bmu = ROL64in128_56(Aso); Emi = _mm_xor_si128(Bmi, _mm_andnot_si128(Bmo, Bmu)); Ci = _mm_xor_si128(Ci, Emi); Emo = _mm_xor_si128(Bmo, _mm_andnot_si128(Bmu, Bma)); Co = _mm_xor_si128(Co, Emo); Emu = _mm_xor_si128(Bmu, _mm_andnot_si128(Bma, Bme)); Cu = _mm_xor_si128(Cu, Emu); Abi = _mm_xor_si128(Abi, Di); Bsa = ROL64in128(Abi, 62); Ago = _mm_xor_si128(Ago, Do); Bse = ROL64in128(Ago, 55); Aku = _mm_xor_si128(Aku, Du); Bsi = ROL64in128(Aku, 39); Esa = _mm_xor_si128(Bsa, _mm_andnot_si128(Bse, Bsi)); Ca = _mm_xor_si128(Ca, Esa); Ama = _mm_xor_si128(Ama, Da); Bso = ROL64in128(Ama, 41); Ese = _mm_xor_si128(Bse, _mm_andnot_si128(Bsi, Bso)); Ce = _mm_xor_si128(Ce, Ese); Ase = _mm_xor_si128(Ase, De); Bsu = ROL64in128(Ase, 2); Esi = _mm_xor_si128(Bsi, _mm_andnot_si128(Bso, Bsu)); Ci = _mm_xor_si128(Ci, Esi); Eso = _mm_xor_si128(Bso, _mm_andnot_si128(Bsu, Bsa)); Co = _mm_xor_si128(Co, Eso); Esu = _mm_xor_si128(Bsu, _mm_andnot_si128(Bsa, Bse)); Cu = _mm_xor_si128(Cu, Esu); Da = _mm_xor_si128(Cu, ROL64in128(Ce, 1)); De = _mm_xor_si128(Ca, ROL64in128(Ci, 1)); Di = _mm_xor_si128(Ce, ROL64in128(Co, 1)); Do = _mm_xor_si128(Ci, ROL64in128(Cu, 1)); Du = _mm_xor_si128(Co, ROL64in128(Ca, 1)); Eba = _mm_xor_si128(Eba, Da); Bba = Eba; Ege = _mm_xor_si128(Ege, De); Bbe = ROL64in128(Ege, 44); Eki = _mm_xor_si128(Eki, Di); Bbi = ROL64in128(Eki, 43); Aba = _mm_xor_si128(Bba, _mm_andnot_si128(Bbe, Bbi)); Aba = _mm_xor_si128(Aba, SPLAT64(KeccakF1600Constants[13])); Ca = Aba; Emo = _mm_xor_si128(Emo, Do); Bbo = ROL64in128(Emo, 21); Abe = _mm_xor_si128(Bbe, _mm_andnot_si128(Bbi, Bbo)); Ce = Abe; Esu = _mm_xor_si128(Esu, Du); Bbu = ROL64in128(Esu, 14); Abi = _mm_xor_si128(Bbi, _mm_andnot_si128(Bbo, Bbu)); Ci = Abi; Abo = _mm_xor_si128(Bbo, _mm_andnot_si128(Bbu, Bba)); Co = Abo; Abu = _mm_xor_si128(Bbu, _mm_andnot_si128(Bba, Bbe)); Cu = Abu; Ebo = _mm_xor_si128(Ebo, Do); Bga = ROL64in128(Ebo, 28); Egu = _mm_xor_si128(Egu, Du); Bge = ROL64in128(Egu, 20); Eka = _mm_xor_si128(Eka, Da); Bgi = ROL64in128(Eka, 3); Aga = _mm_xor_si128(Bga, _mm_andnot_si128(Bge, Bgi)); Ca = _mm_xor_si128(Ca, Aga); Eme = _mm_xor_si128(Eme, De); Bgo = ROL64in128(Eme, 45); Age = _mm_xor_si128(Bge, _mm_andnot_si128(Bgi, Bgo)); Ce = _mm_xor_si128(Ce, Age); Esi = _mm_xor_si128(Esi, Di); Bgu = ROL64in128(Esi, 61); Agi = _mm_xor_si128(Bgi, _mm_andnot_si128(Bgo, Bgu)); Ci = _mm_xor_si128(Ci, Agi); Ago = _mm_xor_si128(Bgo, _mm_andnot_si128(Bgu, Bga)); Co = _mm_xor_si128(Co, Ago); Agu = _mm_xor_si128(Bgu, _mm_andnot_si128(Bga, Bge)); Cu = _mm_xor_si128(Cu, Agu); Ebe = _mm_xor_si128(Ebe, De); Bka = ROL64in128(Ebe, 1); Egi = _mm_xor_si128(Egi, Di); Bke = ROL64in128(Egi, 6); Eko = _mm_xor_si128(Eko, Do); Bki = ROL64in128(Eko, 25); Aka = _mm_xor_si128(Bka, _mm_andnot_si128(Bke, Bki)); Ca = _mm_xor_si128(Ca, Aka); Emu = _mm_xor_si128(Emu, Du); Bko = ROL64in128_8(Emu); Ake = _mm_xor_si128(Bke, _mm_andnot_si128(Bki, Bko)); Ce = _mm_xor_si128(Ce, Ake); Esa = _mm_xor_si128(Esa, Da); Bku = ROL64in128(Esa, 18); Aki = _mm_xor_si128(Bki, _mm_andnot_si128(Bko, Bku)); Ci = _mm_xor_si128(Ci, Aki); Ako = _mm_xor_si128(Bko, _mm_andnot_si128(Bku, Bka)); Co = _mm_xor_si128(Co, Ako); Aku = _mm_xor_si128(Bku, _mm_andnot_si128(Bka, Bke)); Cu = _mm_xor_si128(Cu, Aku); Ebu = _mm_xor_si128(Ebu, Du); Bma = ROL64in128(Ebu, 27); Ega = _mm_xor_si128(Ega, Da); Bme = ROL64in128(Ega, 36); Eke = _mm_xor_si128(Eke, De); Bmi = ROL64in128(Eke, 10); Ama = _mm_xor_si128(Bma, _mm_andnot_si128(Bme, Bmi)); Ca = _mm_xor_si128(Ca, Ama); Emi = _mm_xor_si128(Emi, Di); Bmo = ROL64in128(Emi, 15); Ame = _mm_xor_si128(Bme, _mm_andnot_si128(Bmi, Bmo)); Ce = _mm_xor_si128(Ce, Ame); Eso = _mm_xor_si128(Eso, Do); Bmu = ROL64in128_56(Eso); Ami = _mm_xor_si128(Bmi, _mm_andnot_si128(Bmo, Bmu)); Ci = _mm_xor_si128(Ci, Ami); Amo = _mm_xor_si128(Bmo, _mm_andnot_si128(Bmu, Bma)); Co = _mm_xor_si128(Co, Amo); Amu = _mm_xor_si128(Bmu, _mm_andnot_si128(Bma, Bme)); Cu = _mm_xor_si128(Cu, Amu); Ebi = _mm_xor_si128(Ebi, Di); Bsa = ROL64in128(Ebi, 62); Ego = _mm_xor_si128(Ego, Do); Bse = ROL64in128(Ego, 55); Eku = _mm_xor_si128(Eku, Du); Bsi = ROL64in128(Eku, 39); Asa = _mm_xor_si128(Bsa, _mm_andnot_si128(Bse, Bsi)); Ca = _mm_xor_si128(Ca, Asa); Ema = _mm_xor_si128(Ema, Da); Bso = ROL64in128(Ema, 41); Ase = _mm_xor_si128(Bse, _mm_andnot_si128(Bsi, Bso)); Ce = _mm_xor_si128(Ce, Ase); Ese = _mm_xor_si128(Ese, De); Bsu = ROL64in128(Ese, 2); Asi = _mm_xor_si128(Bsi, _mm_andnot_si128(Bso, Bsu)); Ci = _mm_xor_si128(Ci, Asi); Aso = _mm_xor_si128(Bso, _mm_andnot_si128(Bsu, Bsa)); Co = _mm_xor_si128(Co, Aso); Asu = _mm_xor_si128(Bsu, _mm_andnot_si128(Bsa, Bse)); Cu = _mm_xor_si128(Cu, Asu); Da = _mm_xor_si128(Cu, ROL64in128(Ce, 1)); De = _mm_xor_si128(Ca, ROL64in128(Ci, 1)); Di = _mm_xor_si128(Ce, ROL64in128(Co, 1)); Do = _mm_xor_si128(Ci, ROL64in128(Cu, 1)); Du = _mm_xor_si128(Co, ROL64in128(Ca, 1)); Aba = _mm_xor_si128(Aba, Da); Bba = Aba; Age = _mm_xor_si128(Age, De); Bbe = ROL64in128(Age, 44); Aki = _mm_xor_si128(Aki, Di); Bbi = ROL64in128(Aki, 43); Eba = _mm_xor_si128(Bba, _mm_andnot_si128(Bbe, Bbi)); Eba = _mm_xor_si128(Eba, SPLAT64(KeccakF1600Constants[14])); Ca = Eba; Amo = _mm_xor_si128(Amo, Do); Bbo = ROL64in128(Amo, 21); Ebe = _mm_xor_si128(Bbe, _mm_andnot_si128(Bbi, Bbo)); Ce = Ebe; Asu = _mm_xor_si128(Asu, Du); Bbu = ROL64in128(Asu, 14); Ebi = _mm_xor_si128(Bbi, _mm_andnot_si128(Bbo, Bbu)); Ci = Ebi; Ebo = _mm_xor_si128(Bbo, _mm_andnot_si128(Bbu, Bba)); Co = Ebo; Ebu = _mm_xor_si128(Bbu, _mm_andnot_si128(Bba, Bbe)); Cu = Ebu; Abo = _mm_xor_si128(Abo, Do); Bga = ROL64in128(Abo, 28); Agu = _mm_xor_si128(Agu, Du); Bge = ROL64in128(Agu, 20); Aka = _mm_xor_si128(Aka, Da); Bgi = ROL64in128(Aka, 3); Ega = _mm_xor_si128(Bga, _mm_andnot_si128(Bge, Bgi)); Ca = _mm_xor_si128(Ca, Ega); Ame = _mm_xor_si128(Ame, De); Bgo = ROL64in128(Ame, 45); Ege = _mm_xor_si128(Bge, _mm_andnot_si128(Bgi, Bgo)); Ce = _mm_xor_si128(Ce, Ege); Asi = _mm_xor_si128(Asi, Di); Bgu = ROL64in128(Asi, 61); Egi = _mm_xor_si128(Bgi, _mm_andnot_si128(Bgo, Bgu)); Ci = _mm_xor_si128(Ci, Egi); Ego = _mm_xor_si128(Bgo, _mm_andnot_si128(Bgu, Bga)); Co = _mm_xor_si128(Co, Ego); Egu = _mm_xor_si128(Bgu, _mm_andnot_si128(Bga, Bge)); Cu = _mm_xor_si128(Cu, Egu); Abe = _mm_xor_si128(Abe, De); Bka = ROL64in128(Abe, 1); Agi = _mm_xor_si128(Agi, Di); Bke = ROL64in128(Agi, 6); Ako = _mm_xor_si128(Ako, Do); Bki = ROL64in128(Ako, 25); Eka = _mm_xor_si128(Bka, _mm_andnot_si128(Bke, Bki)); Ca = _mm_xor_si128(Ca, Eka); Amu = _mm_xor_si128(Amu, Du); Bko = ROL64in128_8(Amu); Eke = _mm_xor_si128(Bke, _mm_andnot_si128(Bki, Bko)); Ce = _mm_xor_si128(Ce, Eke); Asa = _mm_xor_si128(Asa, Da); Bku = ROL64in128(Asa, 18); Eki = _mm_xor_si128(Bki, _mm_andnot_si128(Bko, Bku)); Ci = _mm_xor_si128(Ci, Eki); Eko = _mm_xor_si128(Bko, _mm_andnot_si128(Bku, Bka)); Co = _mm_xor_si128(Co, Eko); Eku = _mm_xor_si128(Bku, _mm_andnot_si128(Bka, Bke)); Cu = _mm_xor_si128(Cu, Eku); Abu = _mm_xor_si128(Abu, Du); Bma = ROL64in128(Abu, 27); Aga = _mm_xor_si128(Aga, Da); Bme = ROL64in128(Aga, 36); Ake = _mm_xor_si128(Ake, De); Bmi = ROL64in128(Ake, 10); Ema = _mm_xor_si128(Bma, _mm_andnot_si128(Bme, Bmi)); Ca = _mm_xor_si128(Ca, Ema); Ami = _mm_xor_si128(Ami, Di); Bmo = ROL64in128(Ami, 15); Eme = _mm_xor_si128(Bme, _mm_andnot_si128(Bmi, Bmo)); Ce = _mm_xor_si128(Ce, Eme); Aso = _mm_xor_si128(Aso, Do); Bmu = ROL64in128_56(Aso); Emi = _mm_xor_si128(Bmi, _mm_andnot_si128(Bmo, Bmu)); Ci = _mm_xor_si128(Ci, Emi); Emo = _mm_xor_si128(Bmo, _mm_andnot_si128(Bmu, Bma)); Co = _mm_xor_si128(Co, Emo); Emu = _mm_xor_si128(Bmu, _mm_andnot_si128(Bma, Bme)); Cu = _mm_xor_si128(Cu, Emu); Abi = _mm_xor_si128(Abi, Di); Bsa = ROL64in128(Abi, 62); Ago = _mm_xor_si128(Ago, Do); Bse = ROL64in128(Ago, 55); Aku = _mm_xor_si128(Aku, Du); Bsi = ROL64in128(Aku, 39); Esa = _mm_xor_si128(Bsa, _mm_andnot_si128(Bse, Bsi)); Ca = _mm_xor_si128(Ca, Esa); Ama = _mm_xor_si128(Ama, Da); Bso = ROL64in128(Ama, 41); Ese = _mm_xor_si128(Bse, _mm_andnot_si128(Bsi, Bso)); Ce = _mm_xor_si128(Ce, Ese); Ase = _mm_xor_si128(Ase, De); Bsu = ROL64in128(Ase, 2); Esi = _mm_xor_si128(Bsi, _mm_andnot_si128(Bso, Bsu)); Ci = _mm_xor_si128(Ci, Esi); Eso = _mm_xor_si128(Bso, _mm_andnot_si128(Bsu, Bsa)); Co = _mm_xor_si128(Co, Eso); Esu = _mm_xor_si128(Bsu, _mm_andnot_si128(Bsa, Bse)); Cu = _mm_xor_si128(Cu, Esu); Da = _mm_xor_si128(Cu, ROL64in128(Ce, 1)); De = _mm_xor_si128(Ca, ROL64in128(Ci, 1)); Di = _mm_xor_si128(Ce, ROL64in128(Co, 1)); Do = _mm_xor_si128(Ci, ROL64in128(Cu, 1)); Du = _mm_xor_si128(Co, ROL64in128(Ca, 1)); Eba = _mm_xor_si128(Eba, Da); Bba = Eba; Ege = _mm_xor_si128(Ege, De); Bbe = ROL64in128(Ege, 44); Eki = _mm_xor_si128(Eki, Di); Bbi = ROL64in128(Eki, 43); Aba = _mm_xor_si128(Bba, _mm_andnot_si128(Bbe, Bbi)); Aba = _mm_xor_si128(Aba, SPLAT64(KeccakF1600Constants[15])); Ca = Aba; Emo = _mm_xor_si128(Emo, Do); Bbo = ROL64in128(Emo, 21); Abe = _mm_xor_si128(Bbe, _mm_andnot_si128(Bbi, Bbo)); Ce = Abe; Esu = _mm_xor_si128(Esu, Du); Bbu = ROL64in128(Esu, 14); Abi = _mm_xor_si128(Bbi, _mm_andnot_si128(Bbo, Bbu)); Ci = Abi; Abo = _mm_xor_si128(Bbo, _mm_andnot_si128(Bbu, Bba)); Co = Abo; Abu = _mm_xor_si128(Bbu, _mm_andnot_si128(Bba, Bbe)); Cu = Abu; Ebo = _mm_xor_si128(Ebo, Do); Bga = ROL64in128(Ebo, 28); Egu = _mm_xor_si128(Egu, Du); Bge = ROL64in128(Egu, 20); Eka = _mm_xor_si128(Eka, Da); Bgi = ROL64in128(Eka, 3); Aga = _mm_xor_si128(Bga, _mm_andnot_si128(Bge, Bgi)); Ca = _mm_xor_si128(Ca, Aga); Eme = _mm_xor_si128(Eme, De); Bgo = ROL64in128(Eme, 45); Age = _mm_xor_si128(Bge, _mm_andnot_si128(Bgi, Bgo)); Ce = _mm_xor_si128(Ce, Age); Esi = _mm_xor_si128(Esi, Di); Bgu = ROL64in128(Esi, 61); Agi = _mm_xor_si128(Bgi, _mm_andnot_si128(Bgo, Bgu)); Ci = _mm_xor_si128(Ci, Agi); Ago = _mm_xor_si128(Bgo, _mm_andnot_si128(Bgu, Bga)); Co = _mm_xor_si128(Co, Ago); Agu = _mm_xor_si128(Bgu, _mm_andnot_si128(Bga, Bge)); Cu = _mm_xor_si128(Cu, Agu); Ebe = _mm_xor_si128(Ebe, De); Bka = ROL64in128(Ebe, 1); Egi = _mm_xor_si128(Egi, Di); Bke = ROL64in128(Egi, 6); Eko = _mm_xor_si128(Eko, Do); Bki = ROL64in128(Eko, 25); Aka = _mm_xor_si128(Bka, _mm_andnot_si128(Bke, Bki)); Ca = _mm_xor_si128(Ca, Aka); Emu = _mm_xor_si128(Emu, Du); Bko = ROL64in128_8(Emu); Ake = _mm_xor_si128(Bke, _mm_andnot_si128(Bki, Bko)); Ce = _mm_xor_si128(Ce, Ake); Esa = _mm_xor_si128(Esa, Da); Bku = ROL64in128(Esa, 18); Aki = _mm_xor_si128(Bki, _mm_andnot_si128(Bko, Bku)); Ci = _mm_xor_si128(Ci, Aki); Ako = _mm_xor_si128(Bko, _mm_andnot_si128(Bku, Bka)); Co = _mm_xor_si128(Co, Ako); Aku = _mm_xor_si128(Bku, _mm_andnot_si128(Bka, Bke)); Cu = _mm_xor_si128(Cu, Aku); Ebu = _mm_xor_si128(Ebu, Du); Bma = ROL64in128(Ebu, 27); Ega = _mm_xor_si128(Ega, Da); Bme = ROL64in128(Ega, 36); Eke = _mm_xor_si128(Eke, De); Bmi = ROL64in128(Eke, 10); Ama = _mm_xor_si128(Bma, _mm_andnot_si128(Bme, Bmi)); Ca = _mm_xor_si128(Ca, Ama); Emi = _mm_xor_si128(Emi, Di); Bmo = ROL64in128(Emi, 15); Ame = _mm_xor_si128(Bme, _mm_andnot_si128(Bmi, Bmo)); Ce = _mm_xor_si128(Ce, Ame); Eso = _mm_xor_si128(Eso, Do); Bmu = ROL64in128_56(Eso); Ami = _mm_xor_si128(Bmi, _mm_andnot_si128(Bmo, Bmu)); Ci = _mm_xor_si128(Ci, Ami); Amo = _mm_xor_si128(Bmo, _mm_andnot_si128(Bmu, Bma)); Co = _mm_xor_si128(Co, Amo); Amu = _mm_xor_si128(Bmu, _mm_andnot_si128(Bma, Bme)); Cu = _mm_xor_si128(Cu, Amu); Ebi = _mm_xor_si128(Ebi, Di); Bsa = ROL64in128(Ebi, 62); Ego = _mm_xor_si128(Ego, Do); Bse = ROL64in128(Ego, 55); Eku = _mm_xor_si128(Eku, Du); Bsi = ROL64in128(Eku, 39); Asa = _mm_xor_si128(Bsa, _mm_andnot_si128(Bse, Bsi)); Ca = _mm_xor_si128(Ca, Asa); Ema = _mm_xor_si128(Ema, Da); Bso = ROL64in128(Ema, 41); Ase = _mm_xor_si128(Bse, _mm_andnot_si128(Bsi, Bso)); Ce = _mm_xor_si128(Ce, Ase); Ese = _mm_xor_si128(Ese, De); Bsu = ROL64in128(Ese, 2); Asi = _mm_xor_si128(Bsi, _mm_andnot_si128(Bso, Bsu)); Ci = _mm_xor_si128(Ci, Asi); Aso = _mm_xor_si128(Bso, _mm_andnot_si128(Bsu, Bsa)); Co = _mm_xor_si128(Co, Aso); Asu = _mm_xor_si128(Bsu, _mm_andnot_si128(Bsa, Bse)); Cu = _mm_xor_si128(Cu, Asu); Da = _mm_xor_si128(Cu, ROL64in128(Ce, 1)); De = _mm_xor_si128(Ca, ROL64in128(Ci, 1)); Di = _mm_xor_si128(Ce, ROL64in128(Co, 1)); Do = _mm_xor_si128(Ci, ROL64in128(Cu, 1)); Du = _mm_xor_si128(Co, ROL64in128(Ca, 1)); Aba = _mm_xor_si128(Aba, Da); Bba = Aba; Age = _mm_xor_si128(Age, De); Bbe = ROL64in128(Age, 44); Aki = _mm_xor_si128(Aki, Di); Bbi = ROL64in128(Aki, 43); Eba = _mm_xor_si128(Bba, _mm_andnot_si128(Bbe, Bbi)); Eba = _mm_xor_si128(Eba, SPLAT64(KeccakF1600Constants[16])); Ca = Eba; Amo = _mm_xor_si128(Amo, Do); Bbo = ROL64in128(Amo, 21); Ebe = _mm_xor_si128(Bbe, _mm_andnot_si128(Bbi, Bbo)); Ce = Ebe; Asu = _mm_xor_si128(Asu, Du); Bbu = ROL64in128(Asu, 14); Ebi = _mm_xor_si128(Bbi, _mm_andnot_si128(Bbo, Bbu)); Ci = Ebi; Ebo = _mm_xor_si128(Bbo, _mm_andnot_si128(Bbu, Bba)); Co = Ebo; Ebu = _mm_xor_si128(Bbu, _mm_andnot_si128(Bba, Bbe)); Cu = Ebu; Abo = _mm_xor_si128(Abo, Do); Bga = ROL64in128(Abo, 28); Agu = _mm_xor_si128(Agu, Du); Bge = ROL64in128(Agu, 20); Aka = _mm_xor_si128(Aka, Da); Bgi = ROL64in128(Aka, 3); Ega = _mm_xor_si128(Bga, _mm_andnot_si128(Bge, Bgi)); Ca = _mm_xor_si128(Ca, Ega); Ame = _mm_xor_si128(Ame, De); Bgo = ROL64in128(Ame, 45); Ege = _mm_xor_si128(Bge, _mm_andnot_si128(Bgi, Bgo)); Ce = _mm_xor_si128(Ce, Ege); Asi = _mm_xor_si128(Asi, Di); Bgu = ROL64in128(Asi, 61); Egi = _mm_xor_si128(Bgi, _mm_andnot_si128(Bgo, Bgu)); Ci = _mm_xor_si128(Ci, Egi); Ego = _mm_xor_si128(Bgo, _mm_andnot_si128(Bgu, Bga)); Co = _mm_xor_si128(Co, Ego); Egu = _mm_xor_si128(Bgu, _mm_andnot_si128(Bga, Bge)); Cu = _mm_xor_si128(Cu, Egu); Abe = _mm_xor_si128(Abe, De); Bka = ROL64in128(Abe, 1); Agi = _mm_xor_si128(Agi, Di); Bke = ROL64in128(Agi, 6); Ako = _mm_xor_si128(Ako, Do); Bki = ROL64in128(Ako, 25); Eka = _mm_xor_si128(Bka, _mm_andnot_si128(Bke, Bki)); Ca = _mm_xor_si128(Ca, Eka); Amu = _mm_xor_si128(Amu, Du); Bko = ROL64in128_8(Amu); Eke = _mm_xor_si128(Bke, _mm_andnot_si128(Bki, Bko)); Ce = _mm_xor_si128(Ce, Eke); Asa = _mm_xor_si128(Asa, Da); Bku = ROL64in128(Asa, 18); Eki = _mm_xor_si128(Bki, _mm_andnot_si128(Bko, Bku)); Ci = _mm_xor_si128(Ci, Eki); Eko = _mm_xor_si128(Bko, _mm_andnot_si128(Bku, Bka)); Co = _mm_xor_si128(Co, Eko); Eku = _mm_xor_si128(Bku, _mm_andnot_si128(Bka, Bke)); Cu = _mm_xor_si128(Cu, Eku); Abu = _mm_xor_si128(Abu, Du); Bma = ROL64in128(Abu, 27); Aga = _mm_xor_si128(Aga, Da); Bme = ROL64in128(Aga, 36); Ake = _mm_xor_si128(Ake, De); Bmi = ROL64in128(Ake, 10); Ema = _mm_xor_si128(Bma, _mm_andnot_si128(Bme, Bmi)); Ca = _mm_xor_si128(Ca, Ema); Ami = _mm_xor_si128(Ami, Di); Bmo = ROL64in128(Ami, 15); Eme = _mm_xor_si128(Bme, _mm_andnot_si128(Bmi, Bmo)); Ce = _mm_xor_si128(Ce, Eme); Aso = _mm_xor_si128(Aso, Do); Bmu = ROL64in128_56(Aso); Emi = _mm_xor_si128(Bmi, _mm_andnot_si128(Bmo, Bmu)); Ci = _mm_xor_si128(Ci, Emi); Emo = _mm_xor_si128(Bmo, _mm_andnot_si128(Bmu, Bma)); Co = _mm_xor_si128(Co, Emo); Emu = _mm_xor_si128(Bmu, _mm_andnot_si128(Bma, Bme)); Cu = _mm_xor_si128(Cu, Emu); Abi = _mm_xor_si128(Abi, Di); Bsa = ROL64in128(Abi, 62); Ago = _mm_xor_si128(Ago, Do); Bse = ROL64in128(Ago, 55); Aku = _mm_xor_si128(Aku, Du); Bsi = ROL64in128(Aku, 39); Esa = _mm_xor_si128(Bsa, _mm_andnot_si128(Bse, Bsi)); Ca = _mm_xor_si128(Ca, Esa); Ama = _mm_xor_si128(Ama, Da); Bso = ROL64in128(Ama, 41); Ese = _mm_xor_si128(Bse, _mm_andnot_si128(Bsi, Bso)); Ce = _mm_xor_si128(Ce, Ese); Ase = _mm_xor_si128(Ase, De); Bsu = ROL64in128(Ase, 2); Esi = _mm_xor_si128(Bsi, _mm_andnot_si128(Bso, Bsu)); Ci = _mm_xor_si128(Ci, Esi); Eso = _mm_xor_si128(Bso, _mm_andnot_si128(Bsu, Bsa)); Co = _mm_xor_si128(Co, Eso); Esu = _mm_xor_si128(Bsu, _mm_andnot_si128(Bsa, Bse)); Cu = _mm_xor_si128(Cu, Esu); Da = _mm_xor_si128(Cu, ROL64in128(Ce, 1)); De = _mm_xor_si128(Ca, ROL64in128(Ci, 1)); Di = _mm_xor_si128(Ce, ROL64in128(Co, 1)); Do = _mm_xor_si128(Ci, ROL64in128(Cu, 1)); Du = _mm_xor_si128(Co, ROL64in128(Ca, 1)); Eba = _mm_xor_si128(Eba, Da); Bba = Eba; Ege = _mm_xor_si128(Ege, De); Bbe = ROL64in128(Ege, 44); Eki = _mm_xor_si128(Eki, Di); Bbi = ROL64in128(Eki, 43); Aba = _mm_xor_si128(Bba, _mm_andnot_si128(Bbe, Bbi)); Aba = _mm_xor_si128(Aba, SPLAT64(KeccakF1600Constants[17])); Ca = Aba; Emo = _mm_xor_si128(Emo, Do); Bbo = ROL64in128(Emo, 21); Abe = _mm_xor_si128(Bbe, _mm_andnot_si128(Bbi, Bbo)); Ce = Abe; Esu = _mm_xor_si128(Esu, Du); Bbu = ROL64in128(Esu, 14); Abi = _mm_xor_si128(Bbi, _mm_andnot_si128(Bbo, Bbu)); Ci = Abi; Abo = _mm_xor_si128(Bbo, _mm_andnot_si128(Bbu, Bba)); Co = Abo; Abu = _mm_xor_si128(Bbu, _mm_andnot_si128(Bba, Bbe)); Cu = Abu; Ebo = _mm_xor_si128(Ebo, Do); Bga = ROL64in128(Ebo, 28); Egu = _mm_xor_si128(Egu, Du); Bge = ROL64in128(Egu, 20); Eka = _mm_xor_si128(Eka, Da); Bgi = ROL64in128(Eka, 3); Aga = _mm_xor_si128(Bga, _mm_andnot_si128(Bge, Bgi)); Ca = _mm_xor_si128(Ca, Aga); Eme = _mm_xor_si128(Eme, De); Bgo = ROL64in128(Eme, 45); Age = _mm_xor_si128(Bge, _mm_andnot_si128(Bgi, Bgo)); Ce = _mm_xor_si128(Ce, Age); Esi = _mm_xor_si128(Esi, Di); Bgu = ROL64in128(Esi, 61); Agi = _mm_xor_si128(Bgi, _mm_andnot_si128(Bgo, Bgu)); Ci = _mm_xor_si128(Ci, Agi); Ago = _mm_xor_si128(Bgo, _mm_andnot_si128(Bgu, Bga)); Co = _mm_xor_si128(Co, Ago); Agu = _mm_xor_si128(Bgu, _mm_andnot_si128(Bga, Bge)); Cu = _mm_xor_si128(Cu, Agu); Ebe = _mm_xor_si128(Ebe, De); Bka = ROL64in128(Ebe, 1); Egi = _mm_xor_si128(Egi, Di); Bke = ROL64in128(Egi, 6); Eko = _mm_xor_si128(Eko, Do); Bki = ROL64in128(Eko, 25); Aka = _mm_xor_si128(Bka, _mm_andnot_si128(Bke, Bki)); Ca = _mm_xor_si128(Ca, Aka); Emu = _mm_xor_si128(Emu, Du); Bko = ROL64in128_8(Emu); Ake = _mm_xor_si128(Bke, _mm_andnot_si128(Bki, Bko)); Ce = _mm_xor_si128(Ce, Ake); Esa = _mm_xor_si128(Esa, Da); Bku = ROL64in128(Esa, 18); Aki = _mm_xor_si128(Bki, _mm_andnot_si128(Bko, Bku)); Ci = _mm_xor_si128(Ci, Aki); Ako = _mm_xor_si128(Bko, _mm_andnot_si128(Bku, Bka)); Co = _mm_xor_si128(Co, Ako); Aku = _mm_xor_si128(Bku, _mm_andnot_si128(Bka, Bke)); Cu = _mm_xor_si128(Cu, Aku); Ebu = _mm_xor_si128(Ebu, Du); Bma = ROL64in128(Ebu, 27); Ega = _mm_xor_si128(Ega, Da); Bme = ROL64in128(Ega, 36); Eke = _mm_xor_si128(Eke, De); Bmi = ROL64in128(Eke, 10); Ama = _mm_xor_si128(Bma, _mm_andnot_si128(Bme, Bmi)); Ca = _mm_xor_si128(Ca, Ama); Emi = _mm_xor_si128(Emi, Di); Bmo = ROL64in128(Emi, 15); Ame = _mm_xor_si128(Bme, _mm_andnot_si128(Bmi, Bmo)); Ce = _mm_xor_si128(Ce, Ame); Eso = _mm_xor_si128(Eso, Do); Bmu = ROL64in128_56(Eso); Ami = _mm_xor_si128(Bmi, _mm_andnot_si128(Bmo, Bmu)); Ci = _mm_xor_si128(Ci, Ami); Amo = _mm_xor_si128(Bmo, _mm_andnot_si128(Bmu, Bma)); Co = _mm_xor_si128(Co, Amo); Amu = _mm_xor_si128(Bmu, _mm_andnot_si128(Bma, Bme)); Cu = _mm_xor_si128(Cu, Amu); Ebi = _mm_xor_si128(Ebi, Di); Bsa = ROL64in128(Ebi, 62); Ego = _mm_xor_si128(Ego, Do); Bse = ROL64in128(Ego, 55); Eku = _mm_xor_si128(Eku, Du); Bsi = ROL64in128(Eku, 39); Asa = _mm_xor_si128(Bsa, _mm_andnot_si128(Bse, Bsi)); Ca = _mm_xor_si128(Ca, Asa); Ema = _mm_xor_si128(Ema, Da); Bso = ROL64in128(Ema, 41); Ase = _mm_xor_si128(Bse, _mm_andnot_si128(Bsi, Bso)); Ce = _mm_xor_si128(Ce, Ase); Ese = _mm_xor_si128(Ese, De); Bsu = ROL64in128(Ese, 2); Asi = _mm_xor_si128(Bsi, _mm_andnot_si128(Bso, Bsu)); Ci = _mm_xor_si128(Ci, Asi); Aso = _mm_xor_si128(Bso, _mm_andnot_si128(Bsu, Bsa)); Co = _mm_xor_si128(Co, Aso); Asu = _mm_xor_si128(Bsu, _mm_andnot_si128(Bsa, Bse)); Cu = _mm_xor_si128(Cu, Asu); Da = _mm_xor_si128(Cu, ROL64in128(Ce, 1)); De = _mm_xor_si128(Ca, ROL64in128(Ci, 1)); Di = _mm_xor_si128(Ce, ROL64in128(Co, 1)); Do = _mm_xor_si128(Ci, ROL64in128(Cu, 1)); Du = _mm_xor_si128(Co, ROL64in128(Ca, 1)); Aba = _mm_xor_si128(Aba, Da); Bba = Aba; Age = _mm_xor_si128(Age, De); Bbe = ROL64in128(Age, 44); Aki = _mm_xor_si128(Aki, Di); Bbi = ROL64in128(Aki, 43); Eba = _mm_xor_si128(Bba, _mm_andnot_si128(Bbe, Bbi)); Eba = _mm_xor_si128(Eba, SPLAT64(KeccakF1600Constants[18])); Ca = Eba; Amo = _mm_xor_si128(Amo, Do); Bbo = ROL64in128(Amo, 21); Ebe = _mm_xor_si128(Bbe, _mm_andnot_si128(Bbi, Bbo)); Ce = Ebe; Asu = _mm_xor_si128(Asu, Du); Bbu = ROL64in128(Asu, 14); Ebi = _mm_xor_si128(Bbi, _mm_andnot_si128(Bbo, Bbu)); Ci = Ebi; Ebo = _mm_xor_si128(Bbo, _mm_andnot_si128(Bbu, Bba)); Co = Ebo; Ebu = _mm_xor_si128(Bbu, _mm_andnot_si128(Bba, Bbe)); Cu = Ebu; Abo = _mm_xor_si128(Abo, Do); Bga = ROL64in128(Abo, 28); Agu = _mm_xor_si128(Agu, Du); Bge = ROL64in128(Agu, 20); Aka = _mm_xor_si128(Aka, Da); Bgi = ROL64in128(Aka, 3); Ega = _mm_xor_si128(Bga, _mm_andnot_si128(Bge, Bgi)); Ca = _mm_xor_si128(Ca, Ega); Ame = _mm_xor_si128(Ame, De); Bgo = ROL64in128(Ame, 45); Ege = _mm_xor_si128(Bge, _mm_andnot_si128(Bgi, Bgo)); Ce = _mm_xor_si128(Ce, Ege); Asi = _mm_xor_si128(Asi, Di); Bgu = ROL64in128(Asi, 61); Egi = _mm_xor_si128(Bgi, _mm_andnot_si128(Bgo, Bgu)); Ci = _mm_xor_si128(Ci, Egi); Ego = _mm_xor_si128(Bgo, _mm_andnot_si128(Bgu, Bga)); Co = _mm_xor_si128(Co, Ego); Egu = _mm_xor_si128(Bgu, _mm_andnot_si128(Bga, Bge)); Cu = _mm_xor_si128(Cu, Egu); Abe = _mm_xor_si128(Abe, De); Bka = ROL64in128(Abe, 1); Agi = _mm_xor_si128(Agi, Di); Bke = ROL64in128(Agi, 6); Ako = _mm_xor_si128(Ako, Do); Bki = ROL64in128(Ako, 25); Eka = _mm_xor_si128(Bka, _mm_andnot_si128(Bke, Bki)); Ca = _mm_xor_si128(Ca, Eka); Amu = _mm_xor_si128(Amu, Du); Bko = ROL64in128_8(Amu); Eke = _mm_xor_si128(Bke, _mm_andnot_si128(Bki, Bko)); Ce = _mm_xor_si128(Ce, Eke); Asa = _mm_xor_si128(Asa, Da); Bku = ROL64in128(Asa, 18); Eki = _mm_xor_si128(Bki, _mm_andnot_si128(Bko, Bku)); Ci = _mm_xor_si128(Ci, Eki); Eko = _mm_xor_si128(Bko, _mm_andnot_si128(Bku, Bka)); Co = _mm_xor_si128(Co, Eko); Eku = _mm_xor_si128(Bku, _mm_andnot_si128(Bka, Bke)); Cu = _mm_xor_si128(Cu, Eku); Abu = _mm_xor_si128(Abu, Du); Bma = ROL64in128(Abu, 27); Aga = _mm_xor_si128(Aga, Da); Bme = ROL64in128(Aga, 36); Ake = _mm_xor_si128(Ake, De); Bmi = ROL64in128(Ake, 10); Ema = _mm_xor_si128(Bma, _mm_andnot_si128(Bme, Bmi)); Ca = _mm_xor_si128(Ca, Ema); Ami = _mm_xor_si128(Ami, Di); Bmo = ROL64in128(Ami, 15); Eme = _mm_xor_si128(Bme, _mm_andnot_si128(Bmi, Bmo)); Ce = _mm_xor_si128(Ce, Eme); Aso = _mm_xor_si128(Aso, Do); Bmu = ROL64in128_56(Aso); Emi = _mm_xor_si128(Bmi, _mm_andnot_si128(Bmo, Bmu)); Ci = _mm_xor_si128(Ci, Emi); Emo = _mm_xor_si128(Bmo, _mm_andnot_si128(Bmu, Bma)); Co = _mm_xor_si128(Co, Emo); Emu = _mm_xor_si128(Bmu, _mm_andnot_si128(Bma, Bme)); Cu = _mm_xor_si128(Cu, Emu); Abi = _mm_xor_si128(Abi, Di); Bsa = ROL64in128(Abi, 62); Ago = _mm_xor_si128(Ago, Do); Bse = ROL64in128(Ago, 55); Aku = _mm_xor_si128(Aku, Du); Bsi = ROL64in128(Aku, 39); Esa = _mm_xor_si128(Bsa, _mm_andnot_si128(Bse, Bsi)); Ca = _mm_xor_si128(Ca, Esa); Ama = _mm_xor_si128(Ama, Da); Bso = ROL64in128(Ama, 41); Ese = _mm_xor_si128(Bse, _mm_andnot_si128(Bsi, Bso)); Ce = _mm_xor_si128(Ce, Ese); Ase = _mm_xor_si128(Ase, De); Bsu = ROL64in128(Ase, 2); Esi = _mm_xor_si128(Bsi, _mm_andnot_si128(Bso, Bsu)); Ci = _mm_xor_si128(Ci, Esi); Eso = _mm_xor_si128(Bso, _mm_andnot_si128(Bsu, Bsa)); Co = _mm_xor_si128(Co, Eso); Esu = _mm_xor_si128(Bsu, _mm_andnot_si128(Bsa, Bse)); Cu = _mm_xor_si128(Cu, Esu); Da = _mm_xor_si128(Cu, ROL64in128(Ce, 1)); De = _mm_xor_si128(Ca, ROL64in128(Ci, 1)); Di = _mm_xor_si128(Ce, ROL64in128(Co, 1)); Do = _mm_xor_si128(Ci, ROL64in128(Cu, 1)); Du = _mm_xor_si128(Co, ROL64in128(Ca, 1)); Eba = _mm_xor_si128(Eba, Da); Bba = Eba; Ege = _mm_xor_si128(Ege, De); Bbe = ROL64in128(Ege, 44); Eki = _mm_xor_si128(Eki, Di); Bbi = ROL64in128(Eki, 43); Aba = _mm_xor_si128(Bba, _mm_andnot_si128(Bbe, Bbi)); Aba = _mm_xor_si128(Aba, SPLAT64(KeccakF1600Constants[19])); Ca = Aba; Emo = _mm_xor_si128(Emo, Do); Bbo = ROL64in128(Emo, 21); Abe = _mm_xor_si128(Bbe, _mm_andnot_si128(Bbi, Bbo)); Ce = Abe; Esu = _mm_xor_si128(Esu, Du); Bbu = ROL64in128(Esu, 14); Abi = _mm_xor_si128(Bbi, _mm_andnot_si128(Bbo, Bbu)); Ci = Abi; Abo = _mm_xor_si128(Bbo, _mm_andnot_si128(Bbu, Bba)); Co = Abo; Abu = _mm_xor_si128(Bbu, _mm_andnot_si128(Bba, Bbe)); Cu = Abu; Ebo = _mm_xor_si128(Ebo, Do); Bga = ROL64in128(Ebo, 28); Egu = _mm_xor_si128(Egu, Du); Bge = ROL64in128(Egu, 20); Eka = _mm_xor_si128(Eka, Da); Bgi = ROL64in128(Eka, 3); Aga = _mm_xor_si128(Bga, _mm_andnot_si128(Bge, Bgi)); Ca = _mm_xor_si128(Ca, Aga); Eme = _mm_xor_si128(Eme, De); Bgo = ROL64in128(Eme, 45); Age = _mm_xor_si128(Bge, _mm_andnot_si128(Bgi, Bgo)); Ce = _mm_xor_si128(Ce, Age); Esi = _mm_xor_si128(Esi, Di); Bgu = ROL64in128(Esi, 61); Agi = _mm_xor_si128(Bgi, _mm_andnot_si128(Bgo, Bgu)); Ci = _mm_xor_si128(Ci, Agi); Ago = _mm_xor_si128(Bgo, _mm_andnot_si128(Bgu, Bga)); Co = _mm_xor_si128(Co, Ago); Agu = _mm_xor_si128(Bgu, _mm_andnot_si128(Bga, Bge)); Cu = _mm_xor_si128(Cu, Agu); Ebe = _mm_xor_si128(Ebe, De); Bka = ROL64in128(Ebe, 1); Egi = _mm_xor_si128(Egi, Di); Bke = ROL64in128(Egi, 6); Eko = _mm_xor_si128(Eko, Do); Bki = ROL64in128(Eko, 25); Aka = _mm_xor_si128(Bka, _mm_andnot_si128(Bke, Bki)); Ca = _mm_xor_si128(Ca, Aka); Emu = _mm_xor_si128(Emu, Du); Bko = ROL64in128_8(Emu); Ake = _mm_xor_si128(Bke, _mm_andnot_si128(Bki, Bko)); Ce = _mm_xor_si128(Ce, Ake); Esa = _mm_xor_si128(Esa, Da); Bku = ROL64in128(Esa, 18); Aki = _mm_xor_si128(Bki, _mm_andnot_si128(Bko, Bku)); Ci = _mm_xor_si128(Ci, Aki); Ako = _mm_xor_si128(Bko, _mm_andnot_si128(Bku, Bka)); Co = _mm_xor_si128(Co, Ako); Aku = _mm_xor_si128(Bku, _mm_andnot_si128(Bka, Bke)); Cu = _mm_xor_si128(Cu, Aku); Ebu = _mm_xor_si128(Ebu, Du); Bma = ROL64in128(Ebu, 27); Ega = _mm_xor_si128(Ega, Da); Bme = ROL64in128(Ega, 36); Eke = _mm_xor_si128(Eke, De); Bmi = ROL64in128(Eke, 10); Ama = _mm_xor_si128(Bma, _mm_andnot_si128(Bme, Bmi)); Ca = _mm_xor_si128(Ca, Ama); Emi = _mm_xor_si128(Emi, Di); Bmo = ROL64in128(Emi, 15); Ame = _mm_xor_si128(Bme, _mm_andnot_si128(Bmi, Bmo)); Ce = _mm_xor_si128(Ce, Ame); Eso = _mm_xor_si128(Eso, Do); Bmu = ROL64in128_56(Eso); Ami = _mm_xor_si128(Bmi, _mm_andnot_si128(Bmo, Bmu)); Ci = _mm_xor_si128(Ci, Ami); Amo = _mm_xor_si128(Bmo, _mm_andnot_si128(Bmu, Bma)); Co = _mm_xor_si128(Co, Amo); Amu = _mm_xor_si128(Bmu, _mm_andnot_si128(Bma, Bme)); Cu = _mm_xor_si128(Cu, Amu); Ebi = _mm_xor_si128(Ebi, Di); Bsa = ROL64in128(Ebi, 62); Ego = _mm_xor_si128(Ego, Do); Bse = ROL64in128(Ego, 55); Eku = _mm_xor_si128(Eku, Du); Bsi = ROL64in128(Eku, 39); Asa = _mm_xor_si128(Bsa, _mm_andnot_si128(Bse, Bsi)); Ca = _mm_xor_si128(Ca, Asa); Ema = _mm_xor_si128(Ema, Da); Bso = ROL64in128(Ema, 41); Ase = _mm_xor_si128(Bse, _mm_andnot_si128(Bsi, Bso)); Ce = _mm_xor_si128(Ce, Ase); Ese = _mm_xor_si128(Ese, De); Bsu = ROL64in128(Ese, 2); Asi = _mm_xor_si128(Bsi, _mm_andnot_si128(Bso, Bsu)); Ci = _mm_xor_si128(Ci, Asi); Aso = _mm_xor_si128(Bso, _mm_andnot_si128(Bsu, Bsa)); Co = _mm_xor_si128(Co, Aso); Asu = _mm_xor_si128(Bsu, _mm_andnot_si128(Bsa, Bse)); Cu = _mm_xor_si128(Cu, Asu); Da = _mm_xor_si128(Cu, ROL64in128(Ce, 1)); De = _mm_xor_si128(Ca, ROL64in128(Ci, 1)); Di = _mm_xor_si128(Ce, ROL64in128(Co, 1)); Do = _mm_xor_si128(Ci, ROL64in128(Cu, 1)); Du = _mm_xor_si128(Co, ROL64in128(Ca, 1)); Aba = _mm_xor_si128(Aba, Da); Bba = Aba; Age = _mm_xor_si128(Age, De); Bbe = ROL64in128(Age, 44); Aki = _mm_xor_si128(Aki, Di); Bbi = ROL64in128(Aki, 43); Eba = _mm_xor_si128(Bba, _mm_andnot_si128(Bbe, Bbi)); Eba = _mm_xor_si128(Eba, SPLAT64(KeccakF1600Constants[20])); Ca = Eba; Amo = _mm_xor_si128(Amo, Do); Bbo = ROL64in128(Amo, 21); Ebe = _mm_xor_si128(Bbe, _mm_andnot_si128(Bbi, Bbo)); Ce = Ebe; Asu = _mm_xor_si128(Asu, Du); Bbu = ROL64in128(Asu, 14); Ebi = _mm_xor_si128(Bbi, _mm_andnot_si128(Bbo, Bbu)); Ci = Ebi; Ebo = _mm_xor_si128(Bbo, _mm_andnot_si128(Bbu, Bba)); Co = Ebo; Ebu = _mm_xor_si128(Bbu, _mm_andnot_si128(Bba, Bbe)); Cu = Ebu; Abo = _mm_xor_si128(Abo, Do); Bga = ROL64in128(Abo, 28); Agu = _mm_xor_si128(Agu, Du); Bge = ROL64in128(Agu, 20); Aka = _mm_xor_si128(Aka, Da); Bgi = ROL64in128(Aka, 3); Ega = _mm_xor_si128(Bga, _mm_andnot_si128(Bge, Bgi)); Ca = _mm_xor_si128(Ca, Ega); Ame = _mm_xor_si128(Ame, De); Bgo = ROL64in128(Ame, 45); Ege = _mm_xor_si128(Bge, _mm_andnot_si128(Bgi, Bgo)); Ce = _mm_xor_si128(Ce, Ege); Asi = _mm_xor_si128(Asi, Di); Bgu = ROL64in128(Asi, 61); Egi = _mm_xor_si128(Bgi, _mm_andnot_si128(Bgo, Bgu)); Ci = _mm_xor_si128(Ci, Egi); Ego = _mm_xor_si128(Bgo, _mm_andnot_si128(Bgu, Bga)); Co = _mm_xor_si128(Co, Ego); Egu = _mm_xor_si128(Bgu, _mm_andnot_si128(Bga, Bge)); Cu = _mm_xor_si128(Cu, Egu); Abe = _mm_xor_si128(Abe, De); Bka = ROL64in128(Abe, 1); Agi = _mm_xor_si128(Agi, Di); Bke = ROL64in128(Agi, 6); Ako = _mm_xor_si128(Ako, Do); Bki = ROL64in128(Ako, 25); Eka = _mm_xor_si128(Bka, _mm_andnot_si128(Bke, Bki)); Ca = _mm_xor_si128(Ca, Eka); Amu = _mm_xor_si128(Amu, Du); Bko = ROL64in128_8(Amu); Eke = _mm_xor_si128(Bke, _mm_andnot_si128(Bki, Bko)); Ce = _mm_xor_si128(Ce, Eke); Asa = _mm_xor_si128(Asa, Da); Bku = ROL64in128(Asa, 18); Eki = _mm_xor_si128(Bki, _mm_andnot_si128(Bko, Bku)); Ci = _mm_xor_si128(Ci, Eki); Eko = _mm_xor_si128(Bko, _mm_andnot_si128(Bku, Bka)); Co = _mm_xor_si128(Co, Eko); Eku = _mm_xor_si128(Bku, _mm_andnot_si128(Bka, Bke)); Cu = _mm_xor_si128(Cu, Eku); Abu = _mm_xor_si128(Abu, Du); Bma = ROL64in128(Abu, 27); Aga = _mm_xor_si128(Aga, Da); Bme = ROL64in128(Aga, 36); Ake = _mm_xor_si128(Ake, De); Bmi = ROL64in128(Ake, 10); Ema = _mm_xor_si128(Bma, _mm_andnot_si128(Bme, Bmi)); Ca = _mm_xor_si128(Ca, Ema); Ami = _mm_xor_si128(Ami, Di); Bmo = ROL64in128(Ami, 15); Eme = _mm_xor_si128(Bme, _mm_andnot_si128(Bmi, Bmo)); Ce = _mm_xor_si128(Ce, Eme); Aso = _mm_xor_si128(Aso, Do); Bmu = ROL64in128_56(Aso); Emi = _mm_xor_si128(Bmi, _mm_andnot_si128(Bmo, Bmu)); Ci = _mm_xor_si128(Ci, Emi); Emo = _mm_xor_si128(Bmo, _mm_andnot_si128(Bmu, Bma)); Co = _mm_xor_si128(Co, Emo); Emu = _mm_xor_si128(Bmu, _mm_andnot_si128(Bma, Bme)); Cu = _mm_xor_si128(Cu, Emu); Abi = _mm_xor_si128(Abi, Di); Bsa = ROL64in128(Abi, 62); Ago = _mm_xor_si128(Ago, Do); Bse = ROL64in128(Ago, 55); Aku = _mm_xor_si128(Aku, Du); Bsi = ROL64in128(Aku, 39); Esa = _mm_xor_si128(Bsa, _mm_andnot_si128(Bse, Bsi)); Ca = _mm_xor_si128(Ca, Esa); Ama = _mm_xor_si128(Ama, Da); Bso = ROL64in128(Ama, 41); Ese = _mm_xor_si128(Bse, _mm_andnot_si128(Bsi, Bso)); Ce = _mm_xor_si128(Ce, Ese); Ase = _mm_xor_si128(Ase, De); Bsu = ROL64in128(Ase, 2); Esi = _mm_xor_si128(Bsi, _mm_andnot_si128(Bso, Bsu)); Ci = _mm_xor_si128(Ci, Esi); Eso = _mm_xor_si128(Bso, _mm_andnot_si128(Bsu, Bsa)); Co = _mm_xor_si128(Co, Eso); Esu = _mm_xor_si128(Bsu, _mm_andnot_si128(Bsa, Bse)); Cu = _mm_xor_si128(Cu, Esu); Da = _mm_xor_si128(Cu, ROL64in128(Ce, 1)); De = _mm_xor_si128(Ca, ROL64in128(Ci, 1)); Di = _mm_xor_si128(Ce, ROL64in128(Co, 1)); Do = _mm_xor_si128(Ci, ROL64in128(Cu, 1)); Du = _mm_xor_si128(Co, ROL64in128(Ca, 1)); Eba = _mm_xor_si128(Eba, Da); Bba = Eba; Ege = _mm_xor_si128(Ege, De); Bbe = ROL64in128(Ege, 44); Eki = _mm_xor_si128(Eki, Di); Bbi = ROL64in128(Eki, 43); Aba = _mm_xor_si128(Bba, _mm_andnot_si128(Bbe, Bbi)); Aba = _mm_xor_si128(Aba, SPLAT64(KeccakF1600Constants[21])); Ca = Aba; Emo = _mm_xor_si128(Emo, Do); Bbo = ROL64in128(Emo, 21); Abe = _mm_xor_si128(Bbe, _mm_andnot_si128(Bbi, Bbo)); Ce = Abe; Esu = _mm_xor_si128(Esu, Du); Bbu = ROL64in128(Esu, 14); Abi = _mm_xor_si128(Bbi, _mm_andnot_si128(Bbo, Bbu)); Ci = Abi; Abo = _mm_xor_si128(Bbo, _mm_andnot_si128(Bbu, Bba)); Co = Abo; Abu = _mm_xor_si128(Bbu, _mm_andnot_si128(Bba, Bbe)); Cu = Abu; Ebo = _mm_xor_si128(Ebo, Do); Bga = ROL64in128(Ebo, 28); Egu = _mm_xor_si128(Egu, Du); Bge = ROL64in128(Egu, 20); Eka = _mm_xor_si128(Eka, Da); Bgi = ROL64in128(Eka, 3); Aga = _mm_xor_si128(Bga, _mm_andnot_si128(Bge, Bgi)); Ca = _mm_xor_si128(Ca, Aga); Eme = _mm_xor_si128(Eme, De); Bgo = ROL64in128(Eme, 45); Age = _mm_xor_si128(Bge, _mm_andnot_si128(Bgi, Bgo)); Ce = _mm_xor_si128(Ce, Age); Esi = _mm_xor_si128(Esi, Di); Bgu = ROL64in128(Esi, 61); Agi = _mm_xor_si128(Bgi, _mm_andnot_si128(Bgo, Bgu)); Ci = _mm_xor_si128(Ci, Agi); Ago = _mm_xor_si128(Bgo, _mm_andnot_si128(Bgu, Bga)); Co = _mm_xor_si128(Co, Ago); Agu = _mm_xor_si128(Bgu, _mm_andnot_si128(Bga, Bge)); Cu = _mm_xor_si128(Cu, Agu); Ebe = _mm_xor_si128(Ebe, De); Bka = ROL64in128(Ebe, 1); Egi = _mm_xor_si128(Egi, Di); Bke = ROL64in128(Egi, 6); Eko = _mm_xor_si128(Eko, Do); Bki = ROL64in128(Eko, 25); Aka = _mm_xor_si128(Bka, _mm_andnot_si128(Bke, Bki)); Ca = _mm_xor_si128(Ca, Aka); Emu = _mm_xor_si128(Emu, Du); Bko = ROL64in128_8(Emu); Ake = _mm_xor_si128(Bke, _mm_andnot_si128(Bki, Bko)); Ce = _mm_xor_si128(Ce, Ake); Esa = _mm_xor_si128(Esa, Da); Bku = ROL64in128(Esa, 18); Aki = _mm_xor_si128(Bki, _mm_andnot_si128(Bko, Bku)); Ci = _mm_xor_si128(Ci, Aki); Ako = _mm_xor_si128(Bko, _mm_andnot_si128(Bku, Bka)); Co = _mm_xor_si128(Co, Ako); Aku = _mm_xor_si128(Bku, _mm_andnot_si128(Bka, Bke)); Cu = _mm_xor_si128(Cu, Aku); Ebu = _mm_xor_si128(Ebu, Du); Bma = ROL64in128(Ebu, 27); Ega = _mm_xor_si128(Ega, Da); Bme = ROL64in128(Ega, 36); Eke = _mm_xor_si128(Eke, De); Bmi = ROL64in128(Eke, 10); Ama = _mm_xor_si128(Bma, _mm_andnot_si128(Bme, Bmi)); Ca = _mm_xor_si128(Ca, Ama); Emi = _mm_xor_si128(Emi, Di); Bmo = ROL64in128(Emi, 15); Ame = _mm_xor_si128(Bme, _mm_andnot_si128(Bmi, Bmo)); Ce = _mm_xor_si128(Ce, Ame); Eso = _mm_xor_si128(Eso, Do); Bmu = ROL64in128_56(Eso); Ami = _mm_xor_si128(Bmi, _mm_andnot_si128(Bmo, Bmu)); Ci = _mm_xor_si128(Ci, Ami); Amo = _mm_xor_si128(Bmo, _mm_andnot_si128(Bmu, Bma)); Co = _mm_xor_si128(Co, Amo); Amu = _mm_xor_si128(Bmu, _mm_andnot_si128(Bma, Bme)); Cu = _mm_xor_si128(Cu, Amu); Ebi = _mm_xor_si128(Ebi, Di); Bsa = ROL64in128(Ebi, 62); Ego = _mm_xor_si128(Ego, Do); Bse = ROL64in128(Ego, 55); Eku = _mm_xor_si128(Eku, Du); Bsi = ROL64in128(Eku, 39); Asa = _mm_xor_si128(Bsa, _mm_andnot_si128(Bse, Bsi)); Ca = _mm_xor_si128(Ca, Asa); Ema = _mm_xor_si128(Ema, Da); Bso = ROL64in128(Ema, 41); Ase = _mm_xor_si128(Bse, _mm_andnot_si128(Bsi, Bso)); Ce = _mm_xor_si128(Ce, Ase); Ese = _mm_xor_si128(Ese, De); Bsu = ROL64in128(Ese, 2); Asi = _mm_xor_si128(Bsi, _mm_andnot_si128(Bso, Bsu)); Ci = _mm_xor_si128(Ci, Asi); Aso = _mm_xor_si128(Bso, _mm_andnot_si128(Bsu, Bsa)); Co = _mm_xor_si128(Co, Aso); Asu = _mm_xor_si128(Bsu, _mm_andnot_si128(Bsa, Bse)); Cu = _mm_xor_si128(Cu, Asu); Da = _mm_xor_si128(Cu, ROL64in128(Ce, 1)); De = _mm_xor_si128(Ca, ROL64in128(Ci, 1)); Di = _mm_xor_si128(Ce, ROL64in128(Co, 1)); Do = _mm_xor_si128(Ci, ROL64in128(Cu, 1)); Du = _mm_xor_si128(Co, ROL64in128(Ca, 1)); Aba = _mm_xor_si128(Aba, Da); Bba = Aba; Age = _mm_xor_si128(Age, De); Bbe = ROL64in128(Age, 44); Aki = _mm_xor_si128(Aki, Di); Bbi = ROL64in128(Aki, 43); Eba = _mm_xor_si128(Bba, _mm_andnot_si128(Bbe, Bbi)); Eba = _mm_xor_si128(Eba, SPLAT64(KeccakF1600Constants[22])); Ca = Eba; Amo = _mm_xor_si128(Amo, Do); Bbo = ROL64in128(Amo, 21); Ebe = _mm_xor_si128(Bbe, _mm_andnot_si128(Bbi, Bbo)); Ce = Ebe; Asu = _mm_xor_si128(Asu, Du); Bbu = ROL64in128(Asu, 14); Ebi = _mm_xor_si128(Bbi, _mm_andnot_si128(Bbo, Bbu)); Ci = Ebi; Ebo = _mm_xor_si128(Bbo, _mm_andnot_si128(Bbu, Bba)); Co = Ebo; Ebu = _mm_xor_si128(Bbu, _mm_andnot_si128(Bba, Bbe)); Cu = Ebu; Abo = _mm_xor_si128(Abo, Do); Bga = ROL64in128(Abo, 28); Agu = _mm_xor_si128(Agu, Du); Bge = ROL64in128(Agu, 20); Aka = _mm_xor_si128(Aka, Da); Bgi = ROL64in128(Aka, 3); Ega = _mm_xor_si128(Bga, _mm_andnot_si128(Bge, Bgi)); Ca = _mm_xor_si128(Ca, Ega); Ame = _mm_xor_si128(Ame, De); Bgo = ROL64in128(Ame, 45); Ege = _mm_xor_si128(Bge, _mm_andnot_si128(Bgi, Bgo)); Ce = _mm_xor_si128(Ce, Ege); Asi = _mm_xor_si128(Asi, Di); Bgu = ROL64in128(Asi, 61); Egi = _mm_xor_si128(Bgi, _mm_andnot_si128(Bgo, Bgu)); Ci = _mm_xor_si128(Ci, Egi); Ego = _mm_xor_si128(Bgo, _mm_andnot_si128(Bgu, Bga)); Co = _mm_xor_si128(Co, Ego); Egu = _mm_xor_si128(Bgu, _mm_andnot_si128(Bga, Bge)); Cu = _mm_xor_si128(Cu, Egu); Abe = _mm_xor_si128(Abe, De); Bka = ROL64in128(Abe, 1); Agi = _mm_xor_si128(Agi, Di); Bke = ROL64in128(Agi, 6); Ako = _mm_xor_si128(Ako, Do); Bki = ROL64in128(Ako, 25); Eka = _mm_xor_si128(Bka, _mm_andnot_si128(Bke, Bki)); Ca = _mm_xor_si128(Ca, Eka); Amu = _mm_xor_si128(Amu, Du); Bko = ROL64in128_8(Amu); Eke = _mm_xor_si128(Bke, _mm_andnot_si128(Bki, Bko)); Ce = _mm_xor_si128(Ce, Eke); Asa = _mm_xor_si128(Asa, Da); Bku = ROL64in128(Asa, 18); Eki = _mm_xor_si128(Bki, _mm_andnot_si128(Bko, Bku)); Ci = _mm_xor_si128(Ci, Eki); Eko = _mm_xor_si128(Bko, _mm_andnot_si128(Bku, Bka)); Co = _mm_xor_si128(Co, Eko); Eku = _mm_xor_si128(Bku, _mm_andnot_si128(Bka, Bke)); Cu = _mm_xor_si128(Cu, Eku); Abu = _mm_xor_si128(Abu, Du); Bma = ROL64in128(Abu, 27); Aga = _mm_xor_si128(Aga, Da); Bme = ROL64in128(Aga, 36); Ake = _mm_xor_si128(Ake, De); Bmi = ROL64in128(Ake, 10); Ema = _mm_xor_si128(Bma, _mm_andnot_si128(Bme, Bmi)); Ca = _mm_xor_si128(Ca, Ema); Ami = _mm_xor_si128(Ami, Di); Bmo = ROL64in128(Ami, 15); Eme = _mm_xor_si128(Bme, _mm_andnot_si128(Bmi, Bmo)); Ce = _mm_xor_si128(Ce, Eme); Aso = _mm_xor_si128(Aso, Do); Bmu = ROL64in128_56(Aso); Emi = _mm_xor_si128(Bmi, _mm_andnot_si128(Bmo, Bmu)); Ci = _mm_xor_si128(Ci, Emi); Emo = _mm_xor_si128(Bmo, _mm_andnot_si128(Bmu, Bma)); Co = _mm_xor_si128(Co, Emo); Emu = _mm_xor_si128(Bmu, _mm_andnot_si128(Bma, Bme)); Cu = _mm_xor_si128(Cu, Emu); Abi = _mm_xor_si128(Abi, Di); Bsa = ROL64in128(Abi, 62); Ago = _mm_xor_si128(Ago, Do); Bse = ROL64in128(Ago, 55); Aku = _mm_xor_si128(Aku, Du); Bsi = ROL64in128(Aku, 39); Esa = _mm_xor_si128(Bsa, _mm_andnot_si128(Bse, Bsi)); Ca = _mm_xor_si128(Ca, Esa); Ama = _mm_xor_si128(Ama, Da); Bso = ROL64in128(Ama, 41); Ese = _mm_xor_si128(Bse, _mm_andnot_si128(Bsi, Bso)); Ce = _mm_xor_si128(Ce, Ese); Ase = _mm_xor_si128(Ase, De); Bsu = ROL64in128(Ase, 2); Esi = _mm_xor_si128(Bsi, _mm_andnot_si128(Bso, Bsu)); Ci = _mm_xor_si128(Ci, Esi); Eso = _mm_xor_si128(Bso, _mm_andnot_si128(Bsu, Bsa)); Co = _mm_xor_si128(Co, Eso); Esu = _mm_xor_si128(Bsu, _mm_andnot_si128(Bsa, Bse)); Cu = _mm_xor_si128(Cu, Esu); Da = _mm_xor_si128(Cu, ROL64in128(Ce, 1)); De = _mm_xor_si128(Ca, ROL64in128(Ci, 1)); Di = _mm_xor_si128(Ce, ROL64in128(Co, 1)); Do = _mm_xor_si128(Ci, ROL64in128(Cu, 1)); Du = _mm_xor_si128(Co, ROL64in128(Ca, 1)); Eba = _mm_xor_si128(Eba, Da); Bba = Eba; Ege = _mm_xor_si128(Ege, De); Bbe = ROL64in128(Ege, 44); Eki = _mm_xor_si128(Eki, Di); Bbi = ROL64in128(Eki, 43); Aba = _mm_xor_si128(Bba, _mm_andnot_si128(Bbe, Bbi)); Aba = _mm_xor_si128(Aba, SPLAT64(KeccakF1600Constants[23])); Emo = _mm_xor_si128(Emo, Do); Bbo = ROL64in128(Emo, 21); Abe = _mm_xor_si128(Bbe, _mm_andnot_si128(Bbi, Bbo)); Esu = _mm_xor_si128(Esu, Du); Bbu = ROL64in128(Esu, 14); Abi = _mm_xor_si128(Bbi, _mm_andnot_si128(Bbo, Bbu)); Abo = _mm_xor_si128(Bbo, _mm_andnot_si128(Bbu, Bba)); Abu = _mm_xor_si128(Bbu, _mm_andnot_si128(Bba, Bbe)); Ebo = _mm_xor_si128(Ebo, Do); Bga = ROL64in128(Ebo, 28); Egu = _mm_xor_si128(Egu, Du); Bge = ROL64in128(Egu, 20); Eka = _mm_xor_si128(Eka, Da); Bgi = ROL64in128(Eka, 3); Aga = _mm_xor_si128(Bga, _mm_andnot_si128(Bge, Bgi)); Eme = _mm_xor_si128(Eme, De); Bgo = ROL64in128(Eme, 45); Age = _mm_xor_si128(Bge, _mm_andnot_si128(Bgi, Bgo)); Esi = _mm_xor_si128(Esi, Di); Bgu = ROL64in128(Esi, 61); Agi = _mm_xor_si128(Bgi, _mm_andnot_si128(Bgo, Bgu)); Ago = _mm_xor_si128(Bgo, _mm_andnot_si128(Bgu, Bga)); Agu = _mm_xor_si128(Bgu, _mm_andnot_si128(Bga, Bge)); Ebe = _mm_xor_si128(Ebe, De); Bka = ROL64in128(Ebe, 1); Egi = _mm_xor_si128(Egi, Di); Bke = ROL64in128(Egi, 6); Eko = _mm_xor_si128(Eko, Do); Bki = ROL64in128(Eko, 25); Aka = _mm_xor_si128(Bka, _mm_andnot_si128(Bke, Bki)); Emu = _mm_xor_si128(Emu, Du); Bko = ROL64in128_8(Emu); Ake = _mm_xor_si128(Bke, _mm_andnot_si128(Bki, Bko)); Esa = _mm_xor_si128(Esa, Da); Bku = ROL64in128(Esa, 18); Aki = _mm_xor_si128(Bki, _mm_andnot_si128(Bko, Bku)); Ako = _mm_xor_si128(Bko, _mm_andnot_si128(Bku, Bka)); Aku = _mm_xor_si128(Bku, _mm_andnot_si128(Bka, Bke)); Ebu = _mm_xor_si128(Ebu, Du); Bma = ROL64in128(Ebu, 27); Ega = _mm_xor_si128(Ega, Da); Bme = ROL64in128(Ega, 36); Eke = _mm_xor_si128(Eke, De); Bmi = ROL64in128(Eke, 10); Ama = _mm_xor_si128(Bma, _mm_andnot_si128(Bme, Bmi)); Emi = _mm_xor_si128(Emi, Di); Bmo = ROL64in128(Emi, 15); Ame = _mm_xor_si128(Bme, _mm_andnot_si128(Bmi, Bmo)); Eso = _mm_xor_si128(Eso, Do); Bmu = ROL64in128_56(Eso); Ami = _mm_xor_si128(Bmi, _mm_andnot_si128(Bmo, Bmu)); Amo = _mm_xor_si128(Bmo, _mm_andnot_si128(Bmu, Bma)); Amu = _mm_xor_si128(Bmu, _mm_andnot_si128(Bma, Bme)); Ebi = _mm_xor_si128(Ebi, Di); Bsa = ROL64in128(Ebi, 62); Ego = _mm_xor_si128(Ego, Do); Bse = ROL64in128(Ego, 55); Eku = _mm_xor_si128(Eku, Du); Bsi = ROL64in128(Eku, 39); Asa = _mm_xor_si128(Bsa, _mm_andnot_si128(Bse, Bsi)); Ema = _mm_xor_si128(Ema, Da); Bso = ROL64in128(Ema, 41); Ase = _mm_xor_si128(Bse, _mm_andnot_si128(Bsi, Bso)); Ese = _mm_xor_si128(Ese, De); Bsu = ROL64in128(Ese, 2); Asi = _mm_xor_si128(Bsi, _mm_andnot_si128(Bso, Bsu)); Aso = _mm_xor_si128(Bso, _mm_andnot_si128(Bsu, Bsa)); Asu = _mm_xor_si128(Bsu, _mm_andnot_si128(Bsa, Bse)); _mm_storeu_si128(M128_CAST(lanes+ 0), Aba); _mm_storeu_si128(M128_CAST(lanes+ 1), Abe); _mm_storeu_si128(M128_CAST(lanes+ 2), Abi); _mm_storeu_si128(M128_CAST(lanes+ 3), Abo); _mm_storeu_si128(M128_CAST(lanes+ 4), Abu); _mm_storeu_si128(M128_CAST(lanes+ 5), Aga); _mm_storeu_si128(M128_CAST(lanes+ 6), Age); _mm_storeu_si128(M128_CAST(lanes+ 7), Agi); _mm_storeu_si128(M128_CAST(lanes+ 8), Ago); _mm_storeu_si128(M128_CAST(lanes+ 9), Agu); _mm_storeu_si128(M128_CAST(lanes+10), Aka); _mm_storeu_si128(M128_CAST(lanes+11), Ake); _mm_storeu_si128(M128_CAST(lanes+12), Aki); _mm_storeu_si128(M128_CAST(lanes+13), Ako); _mm_storeu_si128(M128_CAST(lanes+14), Aku); _mm_storeu_si128(M128_CAST(lanes+15), Ama); _mm_storeu_si128(M128_CAST(lanes+16), Ame); _mm_storeu_si128(M128_CAST(lanes+17), Ami); _mm_storeu_si128(M128_CAST(lanes+18), Amo); _mm_storeu_si128(M128_CAST(lanes+19), Amu); _mm_storeu_si128(M128_CAST(lanes+20), Asa); _mm_storeu_si128(M128_CAST(lanes+21), Ase); _mm_storeu_si128(M128_CAST(lanes+22), Asi); _mm_storeu_si128(M128_CAST(lanes+23), Aso); _mm_storeu_si128(M128_CAST(lanes+24), Asu); } #endif NAMESPACE_END