diff options
author | Sebastian Ramacher <sebastian@ramacher.at> | 2013-02-04 14:44:29 +0100 |
---|---|---|
committer | Dwayne Litzenberger <dlitz@dlitz.net> | 2013-04-21 20:41:18 -0700 |
commit | e1ce77b1673db76fb46d87effa7b1a1dc083d9b7 (patch) | |
tree | 3c999461384918aa9b1c2f10813db7211e2534b1 /src | |
parent | 1dd8353cc490f954677285415ec01e253f84b93d (diff) | |
download | pycrypto-e1ce77b1673db76fb46d87effa7b1a1dc083d9b7.tar.gz |
Initial AES-NI support
Diffstat (limited to 'src')
-rw-r--r-- | src/AESNI.c | 227 | ||||
-rw-r--r-- | src/config.h.in | 9 | ||||
-rw-r--r-- | src/cpuid.c | 100 |
3 files changed, 336 insertions, 0 deletions
diff --git a/src/AESNI.c b/src/AESNI.c new file mode 100644 index 0000000..bd80581 --- /dev/null +++ b/src/AESNI.c @@ -0,0 +1,227 @@ +/* + * AESNI.c: AES using AES-NI instructions + * + * Written in 2013 by Sebastian Ramacher <sebastian@ramacher.at> + * + * =================================================================== + * The contents of this file are dedicated to the public domain. To + * the extent that dedication to the public domain is not available, + * everyone is granted a worldwide, perpetual, royalty-free, + * non-exclusive license to exercise all rights associated with the + * contents of this file for any purpose whatsoever. + * No rights are reserved. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * =================================================================== + */ + +#include <assert.h> +#include <stdlib.h> +#include "Python.h" +#include <wmmintrin.h> + +#define MODULE_NAME _AESNI +#define BLOCK_SIZE 16 +#define KEY_SIZE 0 + +#define MAXKC (256/32) +#define MAXKB (256/8) +#define MAXNR 14 + +typedef unsigned char u8; + +typedef struct { + __m128i ek[MAXNR + 1]; + __m128i dk[MAXNR + 1]; + int rounds; +} block_state; + +/* Helper functions to expand keys */ + +static __m128i aes128_keyexpand(__m128i key, __m128i keygened, int shuf) +{ + key = _mm_xor_si128(key, _mm_slli_si128(key, 4)); + key = _mm_xor_si128(key, _mm_slli_si128(key, 4)); + key = _mm_xor_si128(key, _mm_slli_si128(key, 4)); + keygened = _mm_shuffle_epi32(keygened, shuf); + return _mm_xor_si128(key, keygened); +} + +static __m128i aes192_keyexpand_2(__m128i key, __m128i key2) +{ + key = _mm_shuffle_epi32(key, 0xff); + key2 = _mm_xor_si128(key2, _mm_slli_si128(key2, 4)); + return _mm_xor_si128(key, key2); +} + +#define KEYEXP128(K, I) aes128_keyexpand(K, _mm_aeskeygenassist_si128(K, I), 0xff) +#define KEYEXP192(K1, K2, I) aes128_keyexpand(K1, _mm_aeskeygenassist_si128(K2, I), 0x55) +#define KEYEXP192_2(K1, K2) aes192_keyexpand_2(K1, K2) +#define KEYEXP256(K1, K2, I) aes128_keyexpand(K1, _mm_aeskeygenassist_si128(K2, I), 0xff) +#define KEYEXP256_2(K1, K2) aes128_keyexpand(K1, _mm_aeskeygenassist_si128(K2, 0x00), 0xaa) + +/* Encryption key setup */ +static void aes_key_setup_enc(__m128i rk[], const u8* cipherKey, int keylen) +{ + switch (keylen) { + case 16: + { + /* 128 bit key setup */ + rk[0] = _mm_loadu_si128((const __m128i*) cipherKey); + rk[1] = KEYEXP128(rk[0], 0x01); + rk[2] = KEYEXP128(rk[1], 0x02); + rk[3] = KEYEXP128(rk[2], 0x04); + rk[4] = KEYEXP128(rk[3], 0x08); + rk[5] = KEYEXP128(rk[4], 0x10); + rk[6] = KEYEXP128(rk[5], 0x20); + rk[7] = KEYEXP128(rk[6], 0x40); + rk[8] = KEYEXP128(rk[7], 0x80); + rk[9] = KEYEXP128(rk[8], 0x1B); + rk[10] = KEYEXP128(rk[9], 0x36); + break; + } + case 24: + { + /* 192 bit key setup */ + __m128i temp[2]; + rk[0] = _mm_loadu_si128((const __m128i*) cipherKey); + rk[1] = _mm_loadu_si128((const __m128i*) (cipherKey+16)); + temp[0] = KEYEXP192(rk[0], rk[1], 0x01); + temp[1] = KEYEXP192_2(temp[0], rk[1]); + rk[1] = (__m128i)_mm_shuffle_pd((__m128d)rk[1], (__m128d)temp[0], 0); + rk[2] = (__m128i)_mm_shuffle_pd((__m128d)temp[0], (__m128d)temp[1], 1); + rk[3] = KEYEXP192(temp[0], temp[1], 0x02); + rk[4] = KEYEXP192_2(rk[3], temp[1]); + temp[0] = KEYEXP192(rk[3], rk[4], 0x04); + temp[1] = KEYEXP192_2(temp[0], rk[4]); + rk[4] = (__m128i)_mm_shuffle_pd((__m128d)rk[4], (__m128d)temp[0], 0); + rk[5] = (__m128i)_mm_shuffle_pd((__m128d)temp[0], (__m128d)temp[1], 1); + rk[6] = KEYEXP192(temp[0], temp[1], 0x08); + rk[7] = KEYEXP192_2(rk[6], temp[1]); + temp[0] = KEYEXP192(rk[6], rk[7], 0x10); + temp[1] = KEYEXP192_2(temp[0], rk[7]); + rk[7] = (__m128i)_mm_shuffle_pd((__m128d)rk[7], (__m128d)temp[0], 0); + rk[8] = (__m128i)_mm_shuffle_pd((__m128d)temp[0], (__m128d)temp[1], 1); + rk[9] = KEYEXP192(temp[0], temp[1], 0x20); + rk[10] = KEYEXP192_2(rk[9], temp[1]); + temp[0] = KEYEXP192(rk[9], rk[10], 0x40); + temp[1] = KEYEXP192_2(temp[0], rk[10]); + rk[10] = (__m128i)_mm_shuffle_pd((__m128d)rk[10], (__m128d) temp[0], 0); + rk[11] = (__m128i)_mm_shuffle_pd((__m128d)temp[0],(__m128d) temp[1], 1); + rk[12] = KEYEXP192(temp[0], temp[1], 0x80); + break; + } + case 32: + { + /* 256 bit key setup */ + rk[0] = _mm_loadu_si128((const __m128i*) cipherKey); + rk[1] = _mm_loadu_si128((const __m128i*) (cipherKey+16)); + rk[2] = KEYEXP256(rk[0], rk[1], 0x01); + rk[3] = KEYEXP256_2(rk[1], rk[2]); + rk[4] = KEYEXP256(rk[2], rk[3], 0x02); + rk[5] = KEYEXP256_2(rk[3], rk[4]); + rk[6] = KEYEXP256(rk[4], rk[5], 0x04); + rk[7] = KEYEXP256_2(rk[5], rk[6]); + rk[8] = KEYEXP256(rk[6], rk[7], 0x08); + rk[9] = KEYEXP256_2(rk[7], rk[8]); + rk[10] = KEYEXP256(rk[8], rk[9], 0x10); + rk[11] = KEYEXP256_2(rk[9], rk[10]); + rk[12] = KEYEXP256(rk[10], rk[11], 0x20); + rk[13] = KEYEXP256_2(rk[11], rk[12]); + rk[14] = KEYEXP256(rk[12], rk[13], 0x40); + break; + } + } +} + +/* Decryption key setup */ +static void aes_key_setup_dec(__m128i dk[], const __m128i ek[], int rounds) +{ + dk[rounds] = ek[0]; + for (int i = 1; i < rounds; ++i) { + dk[rounds - i] = _mm_aesimc_si128(ek[i]); + } + dk[0] = ek[rounds]; +} + +static void block_init(block_state* self, unsigned char* key, int keylen) +{ + int nr = 0; + switch (keylen) { + case 16: nr = 10; break; + case 24: nr = 12; break; + case 32: nr = 14; break; + default: + PyErr_SetString(PyExc_ValueError, + "AES key must be either 16, 24, or 32 bytes long"); + return; + } + self->rounds = nr; + aes_key_setup_enc(self->ek, key, keylen); + aes_key_setup_dec(self->dk, self->ek, nr); +} + +static void block_encrypt(block_state* self, const u8* in, u8* out) +{ + __m128i m = _mm_loadu_si128((const __m128i*) in); + /* first 9 rounds */ + m = _mm_xor_si128(m, self->ek[0]); + m = _mm_aesenc_si128(m, self->ek[1]); + m = _mm_aesenc_si128(m, self->ek[2]); + m = _mm_aesenc_si128(m, self->ek[3]); + m = _mm_aesenc_si128(m, self->ek[4]); + m = _mm_aesenc_si128(m, self->ek[5]); + m = _mm_aesenc_si128(m, self->ek[6]); + m = _mm_aesenc_si128(m, self->ek[7]); + m = _mm_aesenc_si128(m, self->ek[8]); + m = _mm_aesenc_si128(m, self->ek[9]); + if (self->rounds != 10) { + /* two additional rounds for AES-192/256 */ + m = _mm_aesenc_si128(m, self->ek[10]); + m = _mm_aesenc_si128(m, self->ek[11]); + if (self->rounds == 14) { + /* another two additional rounds for AES-256 */ + m = _mm_aesenc_si128(m, self->ek[12]); + m = _mm_aesenc_si128(m, self->ek[13]); + } + } + m = _mm_aesenclast_si128(m, self->ek[self->rounds]); + _mm_storeu_si128((__m128i*) out, m); +} + +static void block_decrypt(block_state* self, const u8* in, u8* out) +{ + __m128i m = _mm_loadu_si128((const __m128i*) in); + /* first 9 rounds */ + m = _mm_xor_si128(m, self->dk[0]); + m = _mm_aesdec_si128(m, self->dk[1]); + m = _mm_aesdec_si128(m, self->dk[2]); + m = _mm_aesdec_si128(m, self->dk[3]); + m = _mm_aesdec_si128(m, self->dk[4]); + m = _mm_aesdec_si128(m, self->dk[5]); + m = _mm_aesdec_si128(m, self->dk[6]); + m = _mm_aesdec_si128(m, self->dk[7]); + m = _mm_aesdec_si128(m, self->dk[8]); + m = _mm_aesdec_si128(m, self->dk[9]); + if (self->rounds != 10) { + /* two additional rounds for AES-192/256 */ + m = _mm_aesdec_si128(m, self->dk[10]); + m = _mm_aesdec_si128(m, self->dk[11]); + if (self->rounds == 14) { + /* another two additional rounds for AES-256 */ + m = _mm_aesdec_si128(m, self->dk[12]); + m = _mm_aesdec_si128(m, self->dk[13]); + } + } + m = _mm_aesdeclast_si128(m, self->dk[self->rounds]); + _mm_storeu_si128((__m128i*) out, m); +} + +#include "block_template.c" diff --git a/src/config.h.in b/src/config.h.in index d89930c..b520f4e 100644 --- a/src/config.h.in +++ b/src/config.h.in @@ -1,5 +1,8 @@ /* src/config.h.in. Generated from configure.ac by autoheader. */ +/* Define to 1 if you have the <cpuid.h> header file. */ +#undef HAVE_CPUID_H + /* Define to 1 if you have the declaration of `mpz_powm', and to 0 if you don't. */ #undef HAVE_DECL_MPZ_POWM @@ -20,6 +23,9 @@ /* Define to 1 if you have the <limits.h> header file. */ #undef HAVE_LIMITS_H +/* Define if CC supports -maes */ +#undef HAVE_MAES + /* Define to 1 if your system has a GNU libc compatible `malloc' function, and to 0 otherwise. */ #undef HAVE_MALLOC @@ -63,6 +69,9 @@ /* Define to 1 if you have the <wchar.h> header file. */ #undef HAVE_WCHAR_H +/* Define to 1 if you have the <wmmintrin.h> header file. */ +#undef HAVE_WMMINTRIN_H + /* Define to the address where bug reports for this package should be sent. */ #undef PACKAGE_BUGREPORT diff --git a/src/cpuid.c b/src/cpuid.c new file mode 100644 index 0000000..e4d8b27 --- /dev/null +++ b/src/cpuid.c @@ -0,0 +1,100 @@ +/* + * cpuid.c: check CPU capabilities + * + * Written in 2013 by Sebastian Ramacher <sebastian@ramacher.at> + * + * =================================================================== + * The contents of this file are dedicated to the public domain. To + * the extent that dedication to the public domain is not available, + * everyone is granted a worldwide, perpetual, royalty-free, + * non-exclusive license to exercise all rights associated with the + * contents of this file for any purpose whatsoever. + * No rights are reserved. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * =================================================================== + */ +#include "Python.h" +#include <stdint.h> +#include "config.h" +#ifdef HAVE_CPUID_H +#include <cpuid.h> +#endif + +#include "pycrypto_compat.h" + +/* + * The have_aes_ni Python function + */ + +static char have_aes_ni__doc__[] = +"have_aes_ni() -> bool\n" +"\n" +"Return whether AES-NI instructions are available.\n"; + +static PyObject * +have_aes_ni(PyObject *self, PyObject *args) +{ +#ifndef HAVE_CPUID_H + Py_INCREF(Py_False); + return Py_False; +#else + uint32_t eax, ebx, ecx, edx; + /* call cpuid to check if AES-NI instructions are available */ + if (__get_cpuid(1, &eax, &ebx, &ecx, &edx)) { + if (ecx & bit_AES) { + Py_INCREF(Py_True); + return Py_True; + } + } + Py_INCREF(Py_False); + return Py_False; +#endif +} + +/* + * Module-level method table and module initialization function + */ + +static PyMethodDef cpuid_methods[] = { + {"have_aes_ni", have_aes_ni, METH_NOARGS, have_aes_ni__doc__}, + {NULL, NULL, 0, NULL} /* end-of-list sentinel value */ +}; + +#ifdef IS_PY3K +static struct PyModuleDef moduledef = { + PyModuleDef_HEAD_INIT, + "cpuid", + NULL, + -1, + cpuid_methods, + NULL, + NULL, + NULL, + NULL +}; +#endif + +PyMODINIT_FUNC +#ifdef IS_PY3K +PyInit_cpuid(void) +#else +initcpuid(void) +#endif +{ + /* Initialize the module */ +#ifdef IS_PY3K + return PyModule_Create(&moduledef); +#else + Py_InitModule("cpuid", cpuid_methods); +#endif +} + +/* vim:set ts=4 sw=4 sts=4 expandtab: */ |