summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLegrandin <helderijs@gmail.com>2013-06-29 18:35:49 +0200
committerDwayne Litzenberger <dlitz@dlitz.net>2013-10-20 13:30:22 -0700
commit965871a72773457d73fda6a1a2970a4279dcbe6f (patch)
treebe1042cfe897159ae7eefba94e51cb9feb23c596
parentc5787d70f52dc9e78b8e859bd4cae8e75ce2cf41 (diff)
downloadpycrypto-965871a72773457d73fda6a1a2970a4279dcbe6f.tar.gz
GCM mode: Optimize key setup for GCM mode.
GCM mode requires GHASH for 2 different operations: one for the data (AD + ciphertext) and one for the IV. Construction of tables to speed-up GHASH is very expensive and it is worth doing only for the data, not for the IV. This patch ensures that the GHASH for the IV does not use tables, with a ~40% faster key setup. [dlitz@dlitz.net: Whitespace fixed with "git rebase --whitespace=fix"]
-rw-r--r--lib/Crypto/Cipher/blockalgo.py13
-rw-r--r--src/galois.c95
2 files changed, 87 insertions, 21 deletions
diff --git a/lib/Crypto/Cipher/blockalgo.py b/lib/Crypto/Cipher/blockalgo.py
index 9ac8710..0d21f44 100644
--- a/lib/Crypto/Cipher/blockalgo.py
+++ b/lib/Crypto/Cipher/blockalgo.py
@@ -329,14 +329,17 @@ class _GHASH(_SmoothMAC):
(x^128 + x^7 + x^2 + x + 1).
"""
- def __init__(self, hash_subkey, block_size):
+ def __init__(self, hash_subkey, block_size, table_size='64K'):
_SmoothMAC.__init__(self, block_size, None, 0)
- self._hash_subkey = galois._ghash_expand(hash_subkey)
+ if table_size == '64K':
+ self._hash_subkey = galois._ghash_expand(hash_subkey)
+ else:
+ self._hash_subkey = hash_subkey
self._last_y = bchr(0) * 16
self._mac = galois._ghash
def copy(self):
- clone = _GHASH(self._hash_subkey, self._bs)
+ clone = _GHASH(self._hash_subkey, self._bs, table_size='0K')
_SmoothMAC._deep_copy(self, clone)
clone._last_y = self._last_y
return clone
@@ -433,7 +436,7 @@ class BlockAlgo:
bchr(0) * fill +
long_to_bytes(8 * len(self.nonce), 8))
- mac = _GHASH(hash_subkey, factory.block_size)
+ mac = _GHASH(hash_subkey, factory.block_size, '0K')
mac.update(ghash_in)
self._j0 = bytes_to_long(mac.digest())
@@ -443,7 +446,7 @@ class BlockAlgo:
self._cipher = self._factory.new(key, MODE_CTR, counter=ctr)
# Step 5 - Bootstrat GHASH
- self._cipherMAC = _GHASH(hash_subkey, factory.block_size)
+ self._cipherMAC = _GHASH(hash_subkey, factory.block_size, '64K')
# Step 6 - Prepare GCTR cipher for GMAC
ctr = Counter.new(128, initial_value=self._j0, allow_wraparound=True)
diff --git a/src/galois.c b/src/galois.c
index 3c76c99..2660044 100644
--- a/src/galois.c
+++ b/src/galois.c
@@ -93,6 +93,42 @@ static const t_v_tables* make_v_tables(const uint8_t y[16])
}
/**
+ * Multiply to elements of GF(2**128) using the reducing polynomial
+ * (x^128 + x^7 + x^2 + x + 1).
+ */
+static void gcm_mult(uint8_t out[16], const uint8_t x[16], const uint8_t y[16])
+{
+ uint64_t z[2], v[2];
+ int i;
+
+ /** z, v = 0, y **/
+ z[0] = z[1] = 0;
+ v[0] = be_to_word(&y[0]);
+ v[1] = be_to_word(&y[8]);
+
+ for (i=0; i<16; i++) {
+ uint8_t j;
+
+ for (j=0x80; j>0; j>>=1) {
+ uint64_t c;
+
+ /** z ^= (x>>i&1)*v **/
+ if (x[i] & j) {
+
+ z[0] ^= v[0];
+ z[1] ^= v[1];
+ }
+ /** v = (v&1)*0xE1000000000000000000000000000000L ^ (v>>1) **/
+ c = v[1]&1 ? 0xE100000000000000 : 0;
+ v[1] = v[1]>>1 | (v[0] << 63);
+ v[0] = v[0]>>1 ^ c;
+ }
+ }
+ word_to_be(out, z[0]);
+ word_to_be(out+8, z[1]);
+}
+
+/**
* Multiply two elements of GF(2**128) using the reducing polynomial
* (x^128 + x^7 + x^2 + x + 1).
*
@@ -177,31 +213,58 @@ static int ghash_expand(t_key_tables *key_tables, const uint8_t h[16])
* Compute the GHASH of a piece of an arbitrary data given an
* arbitrary Y_0, as specified in NIST SP 800 38D.
*
- * \param y_out The resulting GHASH (16 bytes).
- * \param block_data Pointer to the data to hash.
- * \param len Length of the data to hash (multiple of 16).
- * \param y_in The initial Y (Y_0, 16 bytes).
- * \param key_tables The expanded hash key (16*256*16 bytes).
+ * \param y_out The resulting GHASH (16 bytes).
+ * \param block_data Pointer to the data to hash.
+ * \param len Length of the data to hash (multiple of 16).
+ * \param y_in The initial Y (Y_0, 16 bytes).
+ * \param key_tables The hash key, possibly expanded to 16*256*16 bytes.
+ * \param key_tables_len The length of the data pointed by key_table.
*/
static void ghash(
uint8_t y_out[16],
const uint8_t block_data[],
int len,
const uint8_t y_in[16],
- const t_key_tables *key_tables
+ const void *key_tables,
+ int key_tables_len
)
{
- int i;
+ int i, j;
+ uint8_t x[16];
+ const t_key_tables *key_tables_64 = NULL;
+ const uint8_t (*key)[16] = NULL;
+
+ switch (key_tables_len) {
+ case sizeof(t_key_tables):
+ {
+ key_tables_64 = (const t_key_tables*) key_tables;
+ break;
+ }
+ case 16:
+ {
+ key = (const uint8_t (*)[16]) key_tables;
+ break;
+ }
+ default:
+ return;
+ }
memcpy(y_out, y_in, 16);
- for (i=0; i<len; i+=16) {
- int j;
- uint8_t x[16];
- for (j=0; j<16; j++) {
- x[j] = y_out[j] ^ block_data[i+j];
+ if (key_tables_64) {
+ for (i=0; i<len; i+=16) {
+ for (j=0; j<16; j++) {
+ x[j] = y_out[j] ^ block_data[i+j];
+ }
+ gcm_mult2(y_out, key_tables_64, x);
+ }
+ } else {
+ for (i=0; i<len; i+=16) {
+ for (j=0; j<16; j++) {
+ x[j] = y_out[j] ^ block_data[i+j];
+ }
+ gcm_mult(y_out, *key, x);
}
- gcm_mult2(y_out, key_tables, x);
}
}
@@ -284,8 +347,8 @@ ghash_function(PyObject *self, PyObject *args)
goto out;
}
- if (len_exp_h!=sizeof(t_key_tables)) {
- PyErr_SetString(PyExc_ValueError, "Length of expanded h is incorrect.");
+ if (len_exp_h!=sizeof(t_key_tables) && len_exp_h!=16) {
+ PyErr_SetString(PyExc_ValueError, "Length of expanded key is incorrect.");
goto out;
}
@@ -301,7 +364,7 @@ ghash_function(PyObject *self, PyObject *args)
ghash( PyBytes_Buffer(retval), PyBytes_Buffer(data), len_data,
PyBytes_Buffer(y),
- (const t_key_tables*)PyBytes_AS_STRING(exp_h));
+ PyBytes_Buffer(exp_h), len_exp_h );
#undef PyBytes_Buffer