summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorEmilia Kasper <emilia@openssl.org>2015-03-13 21:10:13 -0700
committerEmilia Kasper <emilia@openssl.org>2015-03-13 21:14:56 -0700
commita2fcab9978a0905c4286051993da63329fda8a19 (patch)
tree1ba8152acb8171fde5e8c0b14be933b1efd8329f
parent1a098164354e8e14e0237993cca7a0bffe820ed6 (diff)
downloadopenssl-new-a2fcab9978a0905c4286051993da63329fda8a19.tar.gz
Fix undefined behaviour in shifts.
Td4 and Te4 are arrays of u8. A u8 << int promotes the u8 to an int first then shifts. If the mathematical result of a shift (as modelled by lhs * 2^{rhs}) is not representable in an integer, behaviour is undefined. In other words, you can't shift into the sign bit of a signed integer. Fix this by casting to u32 whenever we're shifting left by 24. (For consistency, cast other shifts, too.) Caught by -fsanitize=shift Submitted by Nick Lewycky (Google) Reviewed-by: Andy Polyakov <appro@openssl.org> (cherry picked from commit 8b37e5c14f0eddb10c7f91ef91004622d90ef361)
-rw-r--r--crypto/aes/aes_core.c64
-rw-r--r--crypto/aes/aes_x86core.c224
2 files changed, 144 insertions, 144 deletions
diff --git a/crypto/aes/aes_core.c b/crypto/aes/aes_core.c
index ff0d1643fd..2ddb0860d7 100644
--- a/crypto/aes/aes_core.c
+++ b/crypto/aes/aes_core.c
@@ -1130,31 +1130,31 @@ void AES_decrypt(const unsigned char *in, unsigned char *out,
* map cipher state to byte array block:
*/
s0 =
- (Td4[(t0 >> 24) ] << 24) ^
- (Td4[(t3 >> 16) & 0xff] << 16) ^
- (Td4[(t2 >> 8) & 0xff] << 8) ^
- (Td4[(t1 ) & 0xff]) ^
+ ((u32)Td4[(t0 >> 24) ] << 24) ^
+ ((u32)Td4[(t3 >> 16) & 0xff] << 16) ^
+ ((u32)Td4[(t2 >> 8) & 0xff] << 8) ^
+ ((u32)Td4[(t1 ) & 0xff]) ^
rk[0];
PUTU32(out , s0);
s1 =
- (Td4[(t1 >> 24) ] << 24) ^
- (Td4[(t0 >> 16) & 0xff] << 16) ^
- (Td4[(t3 >> 8) & 0xff] << 8) ^
- (Td4[(t2 ) & 0xff]) ^
+ ((u32)Td4[(t1 >> 24) ] << 24) ^
+ ((u32)Td4[(t0 >> 16) & 0xff] << 16) ^
+ ((u32)Td4[(t3 >> 8) & 0xff] << 8) ^
+ ((u32)Td4[(t2 ) & 0xff]) ^
rk[1];
PUTU32(out + 4, s1);
s2 =
- (Td4[(t2 >> 24) ] << 24) ^
- (Td4[(t1 >> 16) & 0xff] << 16) ^
- (Td4[(t0 >> 8) & 0xff] << 8) ^
- (Td4[(t3 ) & 0xff]) ^
+ ((u32)Td4[(t2 >> 24) ] << 24) ^
+ ((u32)Td4[(t1 >> 16) & 0xff] << 16) ^
+ ((u32)Td4[(t0 >> 8) & 0xff] << 8) ^
+ ((u32)Td4[(t3 ) & 0xff]) ^
rk[2];
PUTU32(out + 8, s2);
s3 =
- (Td4[(t3 >> 24) ] << 24) ^
- (Td4[(t2 >> 16) & 0xff] << 16) ^
- (Td4[(t1 >> 8) & 0xff] << 8) ^
- (Td4[(t0 ) & 0xff]) ^
+ ((u32)Td4[(t3 >> 24) ] << 24) ^
+ ((u32)Td4[(t2 >> 16) & 0xff] << 16) ^
+ ((u32)Td4[(t1 >> 8) & 0xff] << 8) ^
+ ((u32)Td4[(t0 ) & 0xff]) ^
rk[3];
PUTU32(out + 12, s3);
}
@@ -1233,10 +1233,10 @@ int private_AES_set_encrypt_key(const unsigned char *userKey, const int bits,
while (1) {
temp = rk[3];
rk[4] = rk[0] ^
- (Te4[(temp >> 16) & 0xff] << 24) ^
- (Te4[(temp >> 8) & 0xff] << 16) ^
- (Te4[(temp ) & 0xff] << 8) ^
- (Te4[(temp >> 24) ]) ^
+ ((u32)Te4[(temp >> 16) & 0xff] << 24) ^
+ ((u32)Te4[(temp >> 8) & 0xff] << 16) ^
+ ((u32)Te4[(temp ) & 0xff] << 8) ^
+ ((u32)Te4[(temp >> 24) ]) ^
rcon[i];
rk[5] = rk[1] ^ rk[4];
rk[6] = rk[2] ^ rk[5];
@@ -1253,10 +1253,10 @@ int private_AES_set_encrypt_key(const unsigned char *userKey, const int bits,
while (1) {
temp = rk[ 5];
rk[ 6] = rk[ 0] ^
- (Te4[(temp >> 16) & 0xff] << 24) ^
- (Te4[(temp >> 8) & 0xff] << 16) ^
- (Te4[(temp ) & 0xff] << 8) ^
- (Te4[(temp >> 24) ]) ^
+ ((u32)Te4[(temp >> 16) & 0xff] << 24) ^
+ ((u32)Te4[(temp >> 8) & 0xff] << 16) ^
+ ((u32)Te4[(temp ) & 0xff] << 8) ^
+ ((u32)Te4[(temp >> 24) ]) ^
rcon[i];
rk[ 7] = rk[ 1] ^ rk[ 6];
rk[ 8] = rk[ 2] ^ rk[ 7];
@@ -1275,10 +1275,10 @@ int private_AES_set_encrypt_key(const unsigned char *userKey, const int bits,
while (1) {
temp = rk[ 7];
rk[ 8] = rk[ 0] ^
- (Te4[(temp >> 16) & 0xff] << 24) ^
- (Te4[(temp >> 8) & 0xff] << 16) ^
- (Te4[(temp ) & 0xff] << 8) ^
- (Te4[(temp >> 24) ]) ^
+ ((u32)Te4[(temp >> 16) & 0xff] << 24) ^
+ ((u32)Te4[(temp >> 8) & 0xff] << 16) ^
+ ((u32)Te4[(temp ) & 0xff] << 8) ^
+ ((u32)Te4[(temp >> 24) ]) ^
rcon[i];
rk[ 9] = rk[ 1] ^ rk[ 8];
rk[10] = rk[ 2] ^ rk[ 9];
@@ -1288,10 +1288,10 @@ int private_AES_set_encrypt_key(const unsigned char *userKey, const int bits,
}
temp = rk[11];
rk[12] = rk[ 4] ^
- (Te4[(temp >> 24) ] << 24) ^
- (Te4[(temp >> 16) & 0xff] << 16) ^
- (Te4[(temp >> 8) & 0xff] << 8) ^
- (Te4[(temp ) & 0xff]);
+ ((u32)Te4[(temp >> 24) ] << 24) ^
+ ((u32)Te4[(temp >> 16) & 0xff] << 16) ^
+ ((u32)Te4[(temp >> 8) & 0xff] << 8) ^
+ ((u32)Te4[(temp ) & 0xff]);
rk[13] = rk[ 5] ^ rk[12];
rk[14] = rk[ 6] ^ rk[13];
rk[15] = rk[ 7] ^ rk[14];
diff --git a/crypto/aes/aes_x86core.c b/crypto/aes/aes_x86core.c
index f651059054..1defbb1abf 100644
--- a/crypto/aes/aes_x86core.c
+++ b/crypto/aes/aes_x86core.c
@@ -497,10 +497,10 @@ int AES_set_encrypt_key(const unsigned char *userKey, const int bits,
while (1) {
temp = rk[3];
rk[4] = rk[0] ^
- (Te4[(temp >> 8) & 0xff] ) ^
- (Te4[(temp >> 16) & 0xff] << 8) ^
- (Te4[(temp >> 24) ] << 16) ^
- (Te4[(temp ) & 0xff] << 24) ^
+ ((u32)Te4[(temp >> 8) & 0xff] ) ^
+ ((u32)Te4[(temp >> 16) & 0xff] << 8) ^
+ ((u32)Te4[(temp >> 24) ] << 16) ^
+ ((u32)Te4[(temp ) & 0xff] << 24) ^
rcon[i];
rk[5] = rk[1] ^ rk[4];
rk[6] = rk[2] ^ rk[5];
@@ -517,10 +517,10 @@ int AES_set_encrypt_key(const unsigned char *userKey, const int bits,
while (1) {
temp = rk[ 5];
rk[ 6] = rk[ 0] ^
- (Te4[(temp >> 8) & 0xff] ) ^
- (Te4[(temp >> 16) & 0xff] << 8) ^
- (Te4[(temp >> 24) ] << 16) ^
- (Te4[(temp ) & 0xff] << 24) ^
+ ((u32)Te4[(temp >> 8) & 0xff] ) ^
+ ((u32)Te4[(temp >> 16) & 0xff] << 8) ^
+ ((u32)Te4[(temp >> 24) ] << 16) ^
+ ((u32)Te4[(temp ) & 0xff] << 24) ^
rcon[i];
rk[ 7] = rk[ 1] ^ rk[ 6];
rk[ 8] = rk[ 2] ^ rk[ 7];
@@ -539,10 +539,10 @@ int AES_set_encrypt_key(const unsigned char *userKey, const int bits,
while (1) {
temp = rk[ 7];
rk[ 8] = rk[ 0] ^
- (Te4[(temp >> 8) & 0xff] ) ^
- (Te4[(temp >> 16) & 0xff] << 8) ^
- (Te4[(temp >> 24) ] << 16) ^
- (Te4[(temp ) & 0xff] << 24) ^
+ ((u32)Te4[(temp >> 8) & 0xff] ) ^
+ ((u32)Te4[(temp >> 16) & 0xff] << 8) ^
+ ((u32)Te4[(temp >> 24) ] << 16) ^
+ ((u32)Te4[(temp ) & 0xff] << 24) ^
rcon[i];
rk[ 9] = rk[ 1] ^ rk[ 8];
rk[10] = rk[ 2] ^ rk[ 9];
@@ -552,10 +552,10 @@ int AES_set_encrypt_key(const unsigned char *userKey, const int bits,
}
temp = rk[11];
rk[12] = rk[ 4] ^
- (Te4[(temp ) & 0xff] ) ^
- (Te4[(temp >> 8) & 0xff] << 8) ^
- (Te4[(temp >> 16) & 0xff] << 16) ^
- (Te4[(temp >> 24) ] << 24);
+ ((u32)Te4[(temp ) & 0xff] ) ^
+ ((u32)Te4[(temp >> 8) & 0xff] << 8) ^
+ ((u32)Te4[(temp >> 16) & 0xff] << 16) ^
+ ((u32)Te4[(temp >> 24) ] << 24);
rk[13] = rk[ 5] ^ rk[12];
rk[14] = rk[ 6] ^ rk[13];
rk[15] = rk[ 7] ^ rk[14];
@@ -674,22 +674,22 @@ void AES_encrypt(const unsigned char *in, unsigned char *out,
#if defined(AES_COMPACT_IN_OUTER_ROUNDS)
prefetch256(Te4);
- t[0] = Te4[(s0 ) & 0xff] ^
- Te4[(s1 >> 8) & 0xff] << 8 ^
- Te4[(s2 >> 16) & 0xff] << 16 ^
- Te4[(s3 >> 24) ] << 24;
- t[1] = Te4[(s1 ) & 0xff] ^
- Te4[(s2 >> 8) & 0xff] << 8 ^
- Te4[(s3 >> 16) & 0xff] << 16 ^
- Te4[(s0 >> 24) ] << 24;
- t[2] = Te4[(s2 ) & 0xff] ^
- Te4[(s3 >> 8) & 0xff] << 8 ^
- Te4[(s0 >> 16) & 0xff] << 16 ^
- Te4[(s1 >> 24) ] << 24;
- t[3] = Te4[(s3 ) & 0xff] ^
- Te4[(s0 >> 8) & 0xff] << 8 ^
- Te4[(s1 >> 16) & 0xff] << 16 ^
- Te4[(s2 >> 24) ] << 24;
+ t[0] = (u32)Te4[(s0 ) & 0xff] ^
+ (u32)Te4[(s1 >> 8) & 0xff] << 8 ^
+ (u32)Te4[(s2 >> 16) & 0xff] << 16 ^
+ (u32)Te4[(s3 >> 24) ] << 24;
+ t[1] = (u32)Te4[(s1 ) & 0xff] ^
+ (u32)Te4[(s2 >> 8) & 0xff] << 8 ^
+ (u32)Te4[(s3 >> 16) & 0xff] << 16 ^
+ (u32)Te4[(s0 >> 24) ] << 24;
+ t[2] = (u32)Te4[(s2 ) & 0xff] ^
+ (u32)Te4[(s3 >> 8) & 0xff] << 8 ^
+ (u32)Te4[(s0 >> 16) & 0xff] << 16 ^
+ (u32)Te4[(s1 >> 24) ] << 24;
+ t[3] = (u32)Te4[(s3 ) & 0xff] ^
+ (u32)Te4[(s0 >> 8) & 0xff] << 8 ^
+ (u32)Te4[(s1 >> 16) & 0xff] << 16 ^
+ (u32)Te4[(s2 >> 24) ] << 24;
/* now do the linear transform using words */
{ int i;
@@ -740,22 +740,22 @@ void AES_encrypt(const unsigned char *in, unsigned char *out,
*/
for (rk+=8,r=key->rounds-2; r>0; rk+=4,r--) {
#if defined(AES_COMPACT_IN_INNER_ROUNDS)
- t[0] = Te4[(s0 ) & 0xff] ^
- Te4[(s1 >> 8) & 0xff] << 8 ^
- Te4[(s2 >> 16) & 0xff] << 16 ^
- Te4[(s3 >> 24) ] << 24;
- t[1] = Te4[(s1 ) & 0xff] ^
- Te4[(s2 >> 8) & 0xff] << 8 ^
- Te4[(s3 >> 16) & 0xff] << 16 ^
- Te4[(s0 >> 24) ] << 24;
- t[2] = Te4[(s2 ) & 0xff] ^
- Te4[(s3 >> 8) & 0xff] << 8 ^
- Te4[(s0 >> 16) & 0xff] << 16 ^
- Te4[(s1 >> 24) ] << 24;
- t[3] = Te4[(s3 ) & 0xff] ^
- Te4[(s0 >> 8) & 0xff] << 8 ^
- Te4[(s1 >> 16) & 0xff] << 16 ^
- Te4[(s2 >> 24) ] << 24;
+ t[0] = (u32)Te4[(s0 ) & 0xff] ^
+ (u32)Te4[(s1 >> 8) & 0xff] << 8 ^
+ (u32)Te4[(s2 >> 16) & 0xff] << 16 ^
+ (u32)Te4[(s3 >> 24) ] << 24;
+ t[1] = (u32)Te4[(s1 ) & 0xff] ^
+ (u32)Te4[(s2 >> 8) & 0xff] << 8 ^
+ (u32)Te4[(s3 >> 16) & 0xff] << 16 ^
+ (u32)Te4[(s0 >> 24) ] << 24;
+ t[2] = (u32)Te4[(s2 ) & 0xff] ^
+ (u32)Te4[(s3 >> 8) & 0xff] << 8 ^
+ (u32)Te4[(s0 >> 16) & 0xff] << 16 ^
+ (u32)Te4[(s1 >> 24) ] << 24;
+ t[3] = (u32)Te4[(s3 ) & 0xff] ^
+ (u32)Te4[(s0 >> 8) & 0xff] << 8 ^
+ (u32)Te4[(s1 >> 16) & 0xff] << 16 ^
+ (u32)Te4[(s2 >> 24) ] << 24;
/* now do the linear transform using words */
{
@@ -810,28 +810,28 @@ void AES_encrypt(const unsigned char *in, unsigned char *out,
prefetch256(Te4);
*(u32*)(out+0) =
- Te4[(s0 ) & 0xff] ^
- Te4[(s1 >> 8) & 0xff] << 8 ^
- Te4[(s2 >> 16) & 0xff] << 16 ^
- Te4[(s3 >> 24) ] << 24 ^
+ (u32)Te4[(s0 ) & 0xff] ^
+ (u32)Te4[(s1 >> 8) & 0xff] << 8 ^
+ (u32)Te4[(s2 >> 16) & 0xff] << 16 ^
+ (u32)Te4[(s3 >> 24) ] << 24 ^
rk[0];
*(u32*)(out+4) =
- Te4[(s1 ) & 0xff] ^
- Te4[(s2 >> 8) & 0xff] << 8 ^
- Te4[(s3 >> 16) & 0xff] << 16 ^
- Te4[(s0 >> 24) ] << 24 ^
+ (u32)Te4[(s1 ) & 0xff] ^
+ (u32)Te4[(s2 >> 8) & 0xff] << 8 ^
+ (u32)Te4[(s3 >> 16) & 0xff] << 16 ^
+ (u32)Te4[(s0 >> 24) ] << 24 ^
rk[1];
*(u32*)(out+8) =
- Te4[(s2 ) & 0xff] ^
- Te4[(s3 >> 8) & 0xff] << 8 ^
- Te4[(s0 >> 16) & 0xff] << 16 ^
- Te4[(s1 >> 24) ] << 24 ^
+ (u32)Te4[(s2 ) & 0xff] ^
+ (u32)Te4[(s3 >> 8) & 0xff] << 8 ^
+ (u32)Te4[(s0 >> 16) & 0xff] << 16 ^
+ (u32)Te4[(s1 >> 24) ] << 24 ^
rk[2];
*(u32*)(out+12) =
- Te4[(s3 ) & 0xff] ^
- Te4[(s0 >> 8) & 0xff] << 8 ^
- Te4[(s1 >> 16) & 0xff] << 16 ^
- Te4[(s2 >> 24) ] << 24 ^
+ (u32)Te4[(s3 ) & 0xff] ^
+ (u32)Te4[(s0 >> 8) & 0xff] << 8 ^
+ (u32)Te4[(s1 >> 16) & 0xff] << 16 ^
+ (u32)Te4[(s2 >> 24) ] << 24 ^
rk[3];
#else
*(u32*)(out+0) =
@@ -888,22 +888,22 @@ void AES_decrypt(const unsigned char *in, unsigned char *out,
#if defined(AES_COMPACT_IN_OUTER_ROUNDS)
prefetch256(Td4);
- t[0] = Td4[(s0 ) & 0xff] ^
- Td4[(s3 >> 8) & 0xff] << 8 ^
- Td4[(s2 >> 16) & 0xff] << 16 ^
- Td4[(s1 >> 24) ] << 24;
- t[1] = Td4[(s1 ) & 0xff] ^
- Td4[(s0 >> 8) & 0xff] << 8 ^
- Td4[(s3 >> 16) & 0xff] << 16 ^
- Td4[(s2 >> 24) ] << 24;
- t[2] = Td4[(s2 ) & 0xff] ^
- Td4[(s1 >> 8) & 0xff] << 8 ^
- Td4[(s0 >> 16) & 0xff] << 16 ^
- Td4[(s3 >> 24) ] << 24;
- t[3] = Td4[(s3 ) & 0xff] ^
- Td4[(s2 >> 8) & 0xff] << 8 ^
- Td4[(s1 >> 16) & 0xff] << 16 ^
- Td4[(s0 >> 24) ] << 24;
+ t[0] = (u32)Td4[(s0 ) & 0xff] ^
+ (u32)Td4[(s3 >> 8) & 0xff] << 8 ^
+ (u32)Td4[(s2 >> 16) & 0xff] << 16 ^
+ (u32)Td4[(s1 >> 24) ] << 24;
+ t[1] = (u32)Td4[(s1 ) & 0xff] ^
+ (u32)Td4[(s0 >> 8) & 0xff] << 8 ^
+ (u32)Td4[(s3 >> 16) & 0xff] << 16 ^
+ (u32)Td4[(s2 >> 24) ] << 24;
+ t[2] = (u32)Td4[(s2 ) & 0xff] ^
+ (u32)Td4[(s1 >> 8) & 0xff] << 8 ^
+ (u32)Td4[(s0 >> 16) & 0xff] << 16 ^
+ (u32)Td4[(s3 >> 24) ] << 24;
+ t[3] = (u32)Td4[(s3 ) & 0xff] ^
+ (u32)Td4[(s2 >> 8) & 0xff] << 8 ^
+ (u32)Td4[(s1 >> 16) & 0xff] << 16 ^
+ (u32)Td4[(s0 >> 24) ] << 24;
/* now do the linear transform using words */
{
@@ -965,22 +965,22 @@ void AES_decrypt(const unsigned char *in, unsigned char *out,
*/
for (rk+=8,r=key->rounds-2; r>0; rk+=4,r--) {
#if defined(AES_COMPACT_IN_INNER_ROUNDS)
- t[0] = Td4[(s0 ) & 0xff] ^
- Td4[(s3 >> 8) & 0xff] << 8 ^
- Td4[(s2 >> 16) & 0xff] << 16 ^
- Td4[(s1 >> 24) ] << 24;
- t[1] = Td4[(s1 ) & 0xff] ^
- Td4[(s0 >> 8) & 0xff] << 8 ^
- Td4[(s3 >> 16) & 0xff] << 16 ^
- Td4[(s2 >> 24) ] << 24;
- t[2] = Td4[(s2 ) & 0xff] ^
- Td4[(s1 >> 8) & 0xff] << 8 ^
- Td4[(s0 >> 16) & 0xff] << 16 ^
- Td4[(s3 >> 24) ] << 24;
- t[3] = Td4[(s3 ) & 0xff] ^
- Td4[(s2 >> 8) & 0xff] << 8 ^
- Td4[(s1 >> 16) & 0xff] << 16 ^
- Td4[(s0 >> 24) ] << 24;
+ t[0] = (u32)Td4[(s0 ) & 0xff] ^
+ (u32)Td4[(s3 >> 8) & 0xff] << 8 ^
+ (u32)Td4[(s2 >> 16) & 0xff] << 16 ^
+ (u32)Td4[(s1 >> 24) ] << 24;
+ t[1] = (u32)Td4[(s1 ) & 0xff] ^
+ (u32)Td4[(s0 >> 8) & 0xff] << 8 ^
+ (u32)Td4[(s3 >> 16) & 0xff] << 16 ^
+ (u32)Td4[(s2 >> 24) ] << 24;
+ t[2] = (u32)Td4[(s2 ) & 0xff] ^
+ (u32)Td4[(s1 >> 8) & 0xff] << 8 ^
+ (u32)Td4[(s0 >> 16) & 0xff] << 16 ^
+ (u32)Td4[(s3 >> 24) ] << 24;
+ t[3] = (u32)Td4[(s3 ) & 0xff] ^
+ (u32)Td4[(s2 >> 8) & 0xff] << 8 ^
+ (u32)Td4[(s1 >> 16) & 0xff] << 16 ^
+ (u32)Td4[(s0 >> 24) ] << 24;
/* now do the linear transform using words */
{
@@ -1044,27 +1044,27 @@ void AES_decrypt(const unsigned char *in, unsigned char *out,
prefetch256(Td4);
*(u32*)(out+0) =
- (Td4[(s0 ) & 0xff]) ^
- (Td4[(s3 >> 8) & 0xff] << 8) ^
- (Td4[(s2 >> 16) & 0xff] << 16) ^
- (Td4[(s1 >> 24) ] << 24) ^
+ ((u32)Td4[(s0 ) & 0xff]) ^
+ ((u32)Td4[(s3 >> 8) & 0xff] << 8) ^
+ ((u32)Td4[(s2 >> 16) & 0xff] << 16) ^
+ ((u32)Td4[(s1 >> 24) ] << 24) ^
rk[0];
*(u32*)(out+4) =
- (Td4[(s1 ) & 0xff]) ^
- (Td4[(s0 >> 8) & 0xff] << 8) ^
- (Td4[(s3 >> 16) & 0xff] << 16) ^
- (Td4[(s2 >> 24) ] << 24) ^
+ ((u32)Td4[(s1 ) & 0xff]) ^
+ ((u32)Td4[(s0 >> 8) & 0xff] << 8) ^
+ ((u32)Td4[(s3 >> 16) & 0xff] << 16) ^
+ ((u32)Td4[(s2 >> 24) ] << 24) ^
rk[1];
*(u32*)(out+8) =
- (Td4[(s2 ) & 0xff]) ^
- (Td4[(s1 >> 8) & 0xff] << 8) ^
- (Td4[(s0 >> 16) & 0xff] << 16) ^
- (Td4[(s3 >> 24) ] << 24) ^
+ ((u32)Td4[(s2 ) & 0xff]) ^
+ ((u32)Td4[(s1 >> 8) & 0xff] << 8) ^
+ ((u32)Td4[(s0 >> 16) & 0xff] << 16) ^
+ ((u32)Td4[(s3 >> 24) ] << 24) ^
rk[2];
*(u32*)(out+12) =
- (Td4[(s3 ) & 0xff]) ^
- (Td4[(s2 >> 8) & 0xff] << 8) ^
- (Td4[(s1 >> 16) & 0xff] << 16) ^
- (Td4[(s0 >> 24) ] << 24) ^
+ ((u32)Td4[(s3 ) & 0xff]) ^
+ ((u32)Td4[(s2 >> 8) & 0xff] << 8) ^
+ ((u32)Td4[(s1 >> 16) & 0xff] << 16) ^
+ ((u32)Td4[(s0 >> 24) ] << 24) ^
rk[3];
}