summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorSebastian Ramacher <sebastian@ramacher.at>2013-02-04 14:44:29 +0100
committerDwayne Litzenberger <dlitz@dlitz.net>2013-04-21 20:41:18 -0700
commite1ce77b1673db76fb46d87effa7b1a1dc083d9b7 (patch)
tree3c999461384918aa9b1c2f10813db7211e2534b1 /src
parent1dd8353cc490f954677285415ec01e253f84b93d (diff)
downloadpycrypto-e1ce77b1673db76fb46d87effa7b1a1dc083d9b7.tar.gz
Initial AES-NI support
Diffstat (limited to 'src')
-rw-r--r--src/AESNI.c227
-rw-r--r--src/config.h.in9
-rw-r--r--src/cpuid.c100
3 files changed, 336 insertions, 0 deletions
diff --git a/src/AESNI.c b/src/AESNI.c
new file mode 100644
index 0000000..bd80581
--- /dev/null
+++ b/src/AESNI.c
@@ -0,0 +1,227 @@
+/*
+ * AESNI.c: AES using AES-NI instructions
+ *
+ * Written in 2013 by Sebastian Ramacher <sebastian@ramacher.at>
+ *
+ * ===================================================================
+ * The contents of this file are dedicated to the public domain. To
+ * the extent that dedication to the public domain is not available,
+ * everyone is granted a worldwide, perpetual, royalty-free,
+ * non-exclusive license to exercise all rights associated with the
+ * contents of this file for any purpose whatsoever.
+ * No rights are reserved.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ * ===================================================================
+ */
+
+#include <assert.h>
+#include <stdlib.h>
+#include "Python.h"
+#include <wmmintrin.h>
+
+#define MODULE_NAME _AESNI
+#define BLOCK_SIZE 16
+#define KEY_SIZE 0
+
+#define MAXKC (256/32)
+#define MAXKB (256/8)
+#define MAXNR 14
+
+typedef unsigned char u8;
+
+typedef struct {
+ __m128i ek[MAXNR + 1];
+ __m128i dk[MAXNR + 1];
+ int rounds;
+} block_state;
+
+/* Helper functions to expand keys */
+
+static __m128i aes128_keyexpand(__m128i key, __m128i keygened, int shuf)
+{
+ key = _mm_xor_si128(key, _mm_slli_si128(key, 4));
+ key = _mm_xor_si128(key, _mm_slli_si128(key, 4));
+ key = _mm_xor_si128(key, _mm_slli_si128(key, 4));
+ keygened = _mm_shuffle_epi32(keygened, shuf);
+ return _mm_xor_si128(key, keygened);
+}
+
+static __m128i aes192_keyexpand_2(__m128i key, __m128i key2)
+{
+ key = _mm_shuffle_epi32(key, 0xff);
+ key2 = _mm_xor_si128(key2, _mm_slli_si128(key2, 4));
+ return _mm_xor_si128(key, key2);
+}
+
+#define KEYEXP128(K, I) aes128_keyexpand(K, _mm_aeskeygenassist_si128(K, I), 0xff)
+#define KEYEXP192(K1, K2, I) aes128_keyexpand(K1, _mm_aeskeygenassist_si128(K2, I), 0x55)
+#define KEYEXP192_2(K1, K2) aes192_keyexpand_2(K1, K2)
+#define KEYEXP256(K1, K2, I) aes128_keyexpand(K1, _mm_aeskeygenassist_si128(K2, I), 0xff)
+#define KEYEXP256_2(K1, K2) aes128_keyexpand(K1, _mm_aeskeygenassist_si128(K2, 0x00), 0xaa)
+
+/* Encryption key setup */
+static void aes_key_setup_enc(__m128i rk[], const u8* cipherKey, int keylen)
+{
+ switch (keylen) {
+ case 16:
+ {
+ /* 128 bit key setup */
+ rk[0] = _mm_loadu_si128((const __m128i*) cipherKey);
+ rk[1] = KEYEXP128(rk[0], 0x01);
+ rk[2] = KEYEXP128(rk[1], 0x02);
+ rk[3] = KEYEXP128(rk[2], 0x04);
+ rk[4] = KEYEXP128(rk[3], 0x08);
+ rk[5] = KEYEXP128(rk[4], 0x10);
+ rk[6] = KEYEXP128(rk[5], 0x20);
+ rk[7] = KEYEXP128(rk[6], 0x40);
+ rk[8] = KEYEXP128(rk[7], 0x80);
+ rk[9] = KEYEXP128(rk[8], 0x1B);
+ rk[10] = KEYEXP128(rk[9], 0x36);
+ break;
+ }
+ case 24:
+ {
+ /* 192 bit key setup */
+ __m128i temp[2];
+ rk[0] = _mm_loadu_si128((const __m128i*) cipherKey);
+ rk[1] = _mm_loadu_si128((const __m128i*) (cipherKey+16));
+ temp[0] = KEYEXP192(rk[0], rk[1], 0x01);
+ temp[1] = KEYEXP192_2(temp[0], rk[1]);
+ rk[1] = (__m128i)_mm_shuffle_pd((__m128d)rk[1], (__m128d)temp[0], 0);
+ rk[2] = (__m128i)_mm_shuffle_pd((__m128d)temp[0], (__m128d)temp[1], 1);
+ rk[3] = KEYEXP192(temp[0], temp[1], 0x02);
+ rk[4] = KEYEXP192_2(rk[3], temp[1]);
+ temp[0] = KEYEXP192(rk[3], rk[4], 0x04);
+ temp[1] = KEYEXP192_2(temp[0], rk[4]);
+ rk[4] = (__m128i)_mm_shuffle_pd((__m128d)rk[4], (__m128d)temp[0], 0);
+ rk[5] = (__m128i)_mm_shuffle_pd((__m128d)temp[0], (__m128d)temp[1], 1);
+ rk[6] = KEYEXP192(temp[0], temp[1], 0x08);
+ rk[7] = KEYEXP192_2(rk[6], temp[1]);
+ temp[0] = KEYEXP192(rk[6], rk[7], 0x10);
+ temp[1] = KEYEXP192_2(temp[0], rk[7]);
+ rk[7] = (__m128i)_mm_shuffle_pd((__m128d)rk[7], (__m128d)temp[0], 0);
+ rk[8] = (__m128i)_mm_shuffle_pd((__m128d)temp[0], (__m128d)temp[1], 1);
+ rk[9] = KEYEXP192(temp[0], temp[1], 0x20);
+ rk[10] = KEYEXP192_2(rk[9], temp[1]);
+ temp[0] = KEYEXP192(rk[9], rk[10], 0x40);
+ temp[1] = KEYEXP192_2(temp[0], rk[10]);
+ rk[10] = (__m128i)_mm_shuffle_pd((__m128d)rk[10], (__m128d) temp[0], 0);
+ rk[11] = (__m128i)_mm_shuffle_pd((__m128d)temp[0],(__m128d) temp[1], 1);
+ rk[12] = KEYEXP192(temp[0], temp[1], 0x80);
+ break;
+ }
+ case 32:
+ {
+ /* 256 bit key setup */
+ rk[0] = _mm_loadu_si128((const __m128i*) cipherKey);
+ rk[1] = _mm_loadu_si128((const __m128i*) (cipherKey+16));
+ rk[2] = KEYEXP256(rk[0], rk[1], 0x01);
+ rk[3] = KEYEXP256_2(rk[1], rk[2]);
+ rk[4] = KEYEXP256(rk[2], rk[3], 0x02);
+ rk[5] = KEYEXP256_2(rk[3], rk[4]);
+ rk[6] = KEYEXP256(rk[4], rk[5], 0x04);
+ rk[7] = KEYEXP256_2(rk[5], rk[6]);
+ rk[8] = KEYEXP256(rk[6], rk[7], 0x08);
+ rk[9] = KEYEXP256_2(rk[7], rk[8]);
+ rk[10] = KEYEXP256(rk[8], rk[9], 0x10);
+ rk[11] = KEYEXP256_2(rk[9], rk[10]);
+ rk[12] = KEYEXP256(rk[10], rk[11], 0x20);
+ rk[13] = KEYEXP256_2(rk[11], rk[12]);
+ rk[14] = KEYEXP256(rk[12], rk[13], 0x40);
+ break;
+ }
+ }
+}
+
+/* Decryption key setup */
+static void aes_key_setup_dec(__m128i dk[], const __m128i ek[], int rounds)
+{
+ dk[rounds] = ek[0];
+ for (int i = 1; i < rounds; ++i) {
+ dk[rounds - i] = _mm_aesimc_si128(ek[i]);
+ }
+ dk[0] = ek[rounds];
+}
+
+static void block_init(block_state* self, unsigned char* key, int keylen)
+{
+ int nr = 0;
+ switch (keylen) {
+ case 16: nr = 10; break;
+ case 24: nr = 12; break;
+ case 32: nr = 14; break;
+ default:
+ PyErr_SetString(PyExc_ValueError,
+ "AES key must be either 16, 24, or 32 bytes long");
+ return;
+ }
+ self->rounds = nr;
+ aes_key_setup_enc(self->ek, key, keylen);
+ aes_key_setup_dec(self->dk, self->ek, nr);
+}
+
+static void block_encrypt(block_state* self, const u8* in, u8* out)
+{
+ __m128i m = _mm_loadu_si128((const __m128i*) in);
+ /* first 9 rounds */
+ m = _mm_xor_si128(m, self->ek[0]);
+ m = _mm_aesenc_si128(m, self->ek[1]);
+ m = _mm_aesenc_si128(m, self->ek[2]);
+ m = _mm_aesenc_si128(m, self->ek[3]);
+ m = _mm_aesenc_si128(m, self->ek[4]);
+ m = _mm_aesenc_si128(m, self->ek[5]);
+ m = _mm_aesenc_si128(m, self->ek[6]);
+ m = _mm_aesenc_si128(m, self->ek[7]);
+ m = _mm_aesenc_si128(m, self->ek[8]);
+ m = _mm_aesenc_si128(m, self->ek[9]);
+ if (self->rounds != 10) {
+ /* two additional rounds for AES-192/256 */
+ m = _mm_aesenc_si128(m, self->ek[10]);
+ m = _mm_aesenc_si128(m, self->ek[11]);
+ if (self->rounds == 14) {
+ /* another two additional rounds for AES-256 */
+ m = _mm_aesenc_si128(m, self->ek[12]);
+ m = _mm_aesenc_si128(m, self->ek[13]);
+ }
+ }
+ m = _mm_aesenclast_si128(m, self->ek[self->rounds]);
+ _mm_storeu_si128((__m128i*) out, m);
+}
+
+static void block_decrypt(block_state* self, const u8* in, u8* out)
+{
+ __m128i m = _mm_loadu_si128((const __m128i*) in);
+ /* first 9 rounds */
+ m = _mm_xor_si128(m, self->dk[0]);
+ m = _mm_aesdec_si128(m, self->dk[1]);
+ m = _mm_aesdec_si128(m, self->dk[2]);
+ m = _mm_aesdec_si128(m, self->dk[3]);
+ m = _mm_aesdec_si128(m, self->dk[4]);
+ m = _mm_aesdec_si128(m, self->dk[5]);
+ m = _mm_aesdec_si128(m, self->dk[6]);
+ m = _mm_aesdec_si128(m, self->dk[7]);
+ m = _mm_aesdec_si128(m, self->dk[8]);
+ m = _mm_aesdec_si128(m, self->dk[9]);
+ if (self->rounds != 10) {
+ /* two additional rounds for AES-192/256 */
+ m = _mm_aesdec_si128(m, self->dk[10]);
+ m = _mm_aesdec_si128(m, self->dk[11]);
+ if (self->rounds == 14) {
+ /* another two additional rounds for AES-256 */
+ m = _mm_aesdec_si128(m, self->dk[12]);
+ m = _mm_aesdec_si128(m, self->dk[13]);
+ }
+ }
+ m = _mm_aesdeclast_si128(m, self->dk[self->rounds]);
+ _mm_storeu_si128((__m128i*) out, m);
+}
+
+#include "block_template.c"
diff --git a/src/config.h.in b/src/config.h.in
index d89930c..b520f4e 100644
--- a/src/config.h.in
+++ b/src/config.h.in
@@ -1,5 +1,8 @@
/* src/config.h.in. Generated from configure.ac by autoheader. */
+/* Define to 1 if you have the <cpuid.h> header file. */
+#undef HAVE_CPUID_H
+
/* Define to 1 if you have the declaration of `mpz_powm', and to 0 if you
don't. */
#undef HAVE_DECL_MPZ_POWM
@@ -20,6 +23,9 @@
/* Define to 1 if you have the <limits.h> header file. */
#undef HAVE_LIMITS_H
+/* Define if CC supports -maes */
+#undef HAVE_MAES
+
/* Define to 1 if your system has a GNU libc compatible `malloc' function, and
to 0 otherwise. */
#undef HAVE_MALLOC
@@ -63,6 +69,9 @@
/* Define to 1 if you have the <wchar.h> header file. */
#undef HAVE_WCHAR_H
+/* Define to 1 if you have the <wmmintrin.h> header file. */
+#undef HAVE_WMMINTRIN_H
+
/* Define to the address where bug reports for this package should be sent. */
#undef PACKAGE_BUGREPORT
diff --git a/src/cpuid.c b/src/cpuid.c
new file mode 100644
index 0000000..e4d8b27
--- /dev/null
+++ b/src/cpuid.c
@@ -0,0 +1,100 @@
+/*
+ * cpuid.c: check CPU capabilities
+ *
+ * Written in 2013 by Sebastian Ramacher <sebastian@ramacher.at>
+ *
+ * ===================================================================
+ * The contents of this file are dedicated to the public domain. To
+ * the extent that dedication to the public domain is not available,
+ * everyone is granted a worldwide, perpetual, royalty-free,
+ * non-exclusive license to exercise all rights associated with the
+ * contents of this file for any purpose whatsoever.
+ * No rights are reserved.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ * ===================================================================
+ */
+#include "Python.h"
+#include <stdint.h>
+#include "config.h"
+#ifdef HAVE_CPUID_H
+#include <cpuid.h>
+#endif
+
+#include "pycrypto_compat.h"
+
+/*
+ * The have_aes_ni Python function
+ */
+
+static char have_aes_ni__doc__[] =
+"have_aes_ni() -> bool\n"
+"\n"
+"Return whether AES-NI instructions are available.\n";
+
+static PyObject *
+have_aes_ni(PyObject *self, PyObject *args)
+{
+#ifndef HAVE_CPUID_H
+ Py_INCREF(Py_False);
+ return Py_False;
+#else
+ uint32_t eax, ebx, ecx, edx;
+ /* call cpuid to check if AES-NI instructions are available */
+ if (__get_cpuid(1, &eax, &ebx, &ecx, &edx)) {
+ if (ecx & bit_AES) {
+ Py_INCREF(Py_True);
+ return Py_True;
+ }
+ }
+ Py_INCREF(Py_False);
+ return Py_False;
+#endif
+}
+
+/*
+ * Module-level method table and module initialization function
+ */
+
+static PyMethodDef cpuid_methods[] = {
+ {"have_aes_ni", have_aes_ni, METH_NOARGS, have_aes_ni__doc__},
+ {NULL, NULL, 0, NULL} /* end-of-list sentinel value */
+};
+
+#ifdef IS_PY3K
+static struct PyModuleDef moduledef = {
+ PyModuleDef_HEAD_INIT,
+ "cpuid",
+ NULL,
+ -1,
+ cpuid_methods,
+ NULL,
+ NULL,
+ NULL,
+ NULL
+};
+#endif
+
+PyMODINIT_FUNC
+#ifdef IS_PY3K
+PyInit_cpuid(void)
+#else
+initcpuid(void)
+#endif
+{
+ /* Initialize the module */
+#ifdef IS_PY3K
+ return PyModule_Create(&moduledef);
+#else
+ Py_InitModule("cpuid", cpuid_methods);
+#endif
+}
+
+/* vim:set ts=4 sw=4 sts=4 expandtab: */