Imported Andy Polyakov's implementation of AES-GCM in aarch64

author: Nikos Mavrogiannopoulos <nmav@gnutls.org> 2016-09-27 21:43:05 +0200
committer: Nikos Mavrogiannopoulos <nmav@redhat.com> 2016-10-03 13:53:05 +0200
commit: c5032beda17213eea05e842739dafae50e87f39b (patch)
tree: eb3db582e6c269192141a2329986c5b67564b013 /lib
parent: edce87be74a578eb3b2bc85483130ddd62a4f38f (diff)
download: gnutls-c5032beda17213eea05e842739dafae50e87f39b.tar.gz
4 files changed, 459 insertions, 67 deletions
diff --git a/lib/accelerated/aarch64/Makefile.am b/lib/accelerated/aarch64/Makefile.am
index f34507853b..edc1edd495 100644
--- a/lib/accelerated/aarch64/Makefile.am
+++ b/lib/accelerated/aarch64/Makefile.am
@@ -33,6 +33,9 @@ if ENABLE_MINITASN1
 AM_CPPFLAGS += -I$(srcdir)/../../minitasn1
 endif
 
+#ensure that we have all aarch64 instruction sets enabled for the assembler
+AM_CCASFLAGS = -Wa,-march=all
+
 EXTRA_DIST = README
 
 noinst_LTLIBRARIES = libaarch64.la
@@ -42,6 +45,6 @@ libaarch64_la_SOURCES = aarch64-common.c aarch64-common.h sha-aarch64.h sha-aarc
 
 if ASM_AARCH64
 libaarch64_la_SOURCES += elf/sha1-armv8.s elf/sha512-armv8.s elf/sha256-armv8.s \
-	elf/aes-aarch64.s
+	elf/aes-aarch64.s elf/ghash-aarch64.s
 
 endif #ASM_AARCH64
diff --git a/lib/accelerated/aarch64/aarch64-common.c b/lib/accelerated/aarch64/aarch64-common.c
index 9866132848..310ea5508c 100644
--- a/lib/accelerated/aarch64/aarch64-common.c
+++ b/lib/accelerated/aarch64/aarch64-common.c
@@ -193,20 +193,24 @@ void _register_aarch64_crypto(unsigned capabilities)
 	if (_gnutls_arm_cpuid_s & ARMV8_AES) {
 		_gnutls_debug_log("Aarch64 AES was detected\n");
 
-		ret =
-		    gnutls_crypto_single_cipher_register
-		    (GNUTLS_CIPHER_AES_128_GCM, 90,
-		     &_gnutls_aes_gcm_aarch64, 0);
+		if (_gnutls_arm_cpuid_s & ARMV8_PMULL) {
+			_gnutls_debug_log("Aarch64 PMULL was detected\n");
+
+			ret =
+			    gnutls_crypto_single_cipher_register
+			    (GNUTLS_CIPHER_AES_128_GCM, 90,
+			     &_gnutls_aes_gcm_aarch64, 0);
+			if (ret < 0) {
+					gnutls_assert();
+				}
+
+			ret =
+			    gnutls_crypto_single_cipher_register
+			    (GNUTLS_CIPHER_AES_256_GCM, 90,
+			     &_gnutls_aes_gcm_aarch64, 0);
 			if (ret < 0) {
 				gnutls_assert();
 			}
-
-		ret =
-		    gnutls_crypto_single_cipher_register
-		    (GNUTLS_CIPHER_AES_256_GCM, 90,
-		     &_gnutls_aes_gcm_aarch64, 0);
-		if (ret < 0) {
-			gnutls_assert();
 		}
 
 		ret =
diff --git a/lib/accelerated/aarch64/aes-gcm-aarch64.c b/lib/accelerated/aarch64/aes-gcm-aarch64.c
index 37412363f3..c571d0294f 100644
--- a/lib/accelerated/aarch64/aes-gcm-aarch64.c
+++ b/lib/accelerated/aarch64/aes-gcm-aarch64.c
@@ -22,50 +22,56 @@
  */
 
 /*
- * The following code is an implementation of the AES-128-GCM cipher
- * using the vpaes aarch64 code.
+ * The following code is an implementation of the AES-GCM cipher
+ * using the AES and neon instruction sets.
  */
 
 #include "errors.h"
 #include "gnutls_int.h"
-
-#ifdef HAVE_LIBNETTLE
-
 #include <gnutls/crypto.h>
 #include "errors.h"
 #include <aes-aarch64.h>
 #include <aarch64-common.h>
+#include <nettle/memxor.h>
+#include <nettle/macros.h>
 #include <byteswap.h>
-#include <nettle/gcm.h>
 
-/* GCM mode 
- * It is used when the CPU doesn't include the PCLMUL instructions.
- */
-struct gcm_aarch64_aes_ctx GCM_CTX(AES_KEY);
+#define GCM_BLOCK_SIZE 16
 
-static void aarch64_aes_encrypt(const void *_ctx,
-				size_t length, uint8_t * dst,
-				const uint8_t * src)
-{
-	AES_KEY *ctx = (void*)_ctx;
+/* GCM mode */
 
-	aes_v8_encrypt(src, dst, ctx);
-}
+typedef struct {
+	uint64_t hi, lo;
+} u128;
 
-static void aarch64_aes_128_set_encrypt_key(void *_ctx,
-				    const uint8_t * key)
-{
-	AES_KEY *ctx = _ctx;
+/* This is the gcm128 structure used in openssl. It
+ * is compatible with the included assembly code.
+ */
+struct gcm128_context {
+	union {
+		uint64_t u[2];
+		uint32_t d[4];
+		uint8_t c[16];
+	} Yi, EKi, EK0, len, Xi, H;
+	u128 Htable[16];
+};
 
-	aes_v8_set_encrypt_key(key, 16*8, ctx);
-}
+struct aes_gcm_ctx {
+	AES_KEY expanded_key;
+	struct gcm128_context gcm;
+};
 
-static void aarch64_aes_256_set_encrypt_key(void *_ctx,
-				    const uint8_t * key)
+void gcm_init_v8(u128 Htable[16], const uint64_t Xi[2]);
+void gcm_ghash_v8(uint64_t Xi[2], const u128 Htable[16],
+		     const uint8_t * inp, size_t len);
+void gcm_gmult_v8(uint64_t Xi[2], const u128 Htable[16]);
+
+static void aes_gcm_deinit(void *_ctx)
 {
-	AES_KEY *ctx = _ctx;
+	struct aes_gcm_ctx *ctx = _ctx;
 
-	aes_v8_set_encrypt_key(key, 32*8, ctx);
+	zeroize_temp_key(ctx, sizeof(*ctx));
+	gnutls_free(ctx);
 }
 
 static int
@@ -77,7 +83,7 @@ aes_gcm_cipher_init(gnutls_cipher_algorithm_t algorithm, void **_ctx,
 	    algorithm != GNUTLS_CIPHER_AES_256_GCM)
 		return GNUTLS_E_INVALID_REQUEST;
 
-	*_ctx = gnutls_calloc(1, sizeof(struct gcm_aarch64_aes_ctx));
+	*_ctx = gnutls_calloc(1, sizeof(struct aes_gcm_ctx));
 	if (*_ctx == NULL) {
 		gnutls_assert();
 		return GNUTLS_E_MEMORY_ERROR;
@@ -87,40 +93,127 @@ aes_gcm_cipher_init(gnutls_cipher_algorithm_t algorithm, void **_ctx,
 }
 
 static int
-aes_gcm_cipher_setkey(void *_ctx, const void *key, size_t keysize)
+aes_gcm_cipher_setkey(void *_ctx, const void *userkey, size_t keysize)
 {
-	struct gcm_aarch64_aes_ctx *ctx = _ctx;
+	struct aes_gcm_ctx *ctx = _ctx;
+	int ret;
+
+	ret =
+	    aes_v8_set_encrypt_key(userkey, keysize * 8,
+				  ALIGN16(&ctx->expanded_key));
+	if (ret != 0)
+		return gnutls_assert_val(GNUTLS_E_ENCRYPTION_FAILED);
+
+	aes_v8_encrypt(ctx->gcm.H.c, ctx->gcm.H.c, ALIGN16(&ctx->expanded_key));
 
-	if (keysize == 16) {
-		GCM_SET_KEY(ctx, aarch64_aes_128_set_encrypt_key, aarch64_aes_encrypt,
-			    key);
-	} else if (keysize == 32) {
-		GCM_SET_KEY(ctx, aarch64_aes_256_set_encrypt_key, aarch64_aes_encrypt,
-			    key);
-	} else abort();
+	ctx->gcm.H.u[0] = bswap_64(ctx->gcm.H.u[0]);
+	ctx->gcm.H.u[1] = bswap_64(ctx->gcm.H.u[1]);
+
+	gcm_init_v8(ctx->gcm.Htable, ctx->gcm.H.u);
 
 	return 0;
 }
 
 static int aes_gcm_setiv(void *_ctx, const void *iv, size_t iv_size)
 {
-	struct gcm_aarch64_aes_ctx *ctx = _ctx;
+	struct aes_gcm_ctx *ctx = _ctx;
 
 	if (iv_size != GCM_BLOCK_SIZE - 4)
 		return gnutls_assert_val(GNUTLS_E_INVALID_REQUEST);
 
-	GCM_SET_IV(ctx, iv_size, iv);
+	memset(ctx->gcm.Xi.c, 0, sizeof(ctx->gcm.Xi.c));
+	memset(ctx->gcm.len.c, 0, sizeof(ctx->gcm.len.c));
+
+	memcpy(ctx->gcm.Yi.c, iv, GCM_BLOCK_SIZE - 4);
+	ctx->gcm.Yi.c[GCM_BLOCK_SIZE - 4] = 0;
+	ctx->gcm.Yi.c[GCM_BLOCK_SIZE - 3] = 0;
+	ctx->gcm.Yi.c[GCM_BLOCK_SIZE - 2] = 0;
+	ctx->gcm.Yi.c[GCM_BLOCK_SIZE - 1] = 1;
 
+	aes_v8_encrypt(ctx->gcm.Yi.c, ctx->gcm.EK0.c,
+			ALIGN16(&ctx->expanded_key));
+	ctx->gcm.Yi.c[GCM_BLOCK_SIZE - 1] = 2;
 	return 0;
 }
 
+static void
+gcm_ghash(struct aes_gcm_ctx *ctx, const uint8_t * src, size_t src_size)
+{
+	size_t rest = src_size % GCM_BLOCK_SIZE;
+	size_t aligned_size = src_size - rest;
+
+	if (aligned_size > 0)
+		gcm_ghash_v8(ctx->gcm.Xi.u, ctx->gcm.Htable, src,
+				aligned_size);
+
+	if (rest > 0) {
+		memxor(ctx->gcm.Xi.c, src + aligned_size, rest);
+		gcm_gmult_v8(ctx->gcm.Xi.u, ctx->gcm.Htable);
+	}
+}
+
+static void
+ctr32_encrypt_blocks(const unsigned char *in, unsigned char *out,
+		     size_t blocks, const AES_KEY *key,
+		     const unsigned char ivec[16])
+{
+	unsigned i;
+	uint8_t ctr[16];
+
+	memcpy(ctr, ivec, 16);
+
+	for (i=0;i<blocks;i++) {
+		aes_v8_encrypt(ctr, out, key);
+		memxor(out, in, 16);
+
+		out += 16;
+		in += 16;
+		INCREMENT(16, ctr);
+	}
+}
+
+static inline void
+ctr_encrypt_last(struct aes_gcm_ctx *ctx, const uint8_t * src,
+		 uint8_t * dst, size_t pos, size_t length)
+{
+	uint8_t tmp[GCM_BLOCK_SIZE];
+	uint8_t out[GCM_BLOCK_SIZE];
+
+	memcpy(tmp, &src[pos], length);
+	ctr32_encrypt_blocks(tmp, out, 1,
+			     ALIGN16(&ctx->expanded_key),
+			     ctx->gcm.Yi.c);
+
+	memcpy(&dst[pos], out, length);
+
+}
+
 static int
 aes_gcm_encrypt(void *_ctx, const void *src, size_t src_size,
 		void *dst, size_t length)
 {
-	struct gcm_aarch64_aes_ctx *ctx = _ctx;
+	struct aes_gcm_ctx *ctx = _ctx;
+	int blocks = src_size / GCM_BLOCK_SIZE;
+	int exp_blocks = blocks * GCM_BLOCK_SIZE;
+	int rest = src_size - (exp_blocks);
+	uint32_t counter;
+
+	if (blocks > 0) {
+		ctr32_encrypt_blocks(src, dst,
+				     blocks,
+				     ALIGN16(&ctx->expanded_key),
+				     ctx->gcm.Yi.c);
+
+		counter = _gnutls_read_uint32(ctx->gcm.Yi.c + 12);
+		counter += blocks;
+		_gnutls_write_uint32(counter, ctx->gcm.Yi.c + 12);
+	}
 
-	GCM_ENCRYPT(ctx, aarch64_aes_encrypt, src_size, dst, src);
+	if (rest > 0)		/* last incomplete block */
+		ctr_encrypt_last(ctx, src, dst, exp_blocks, rest);
+
+	gcm_ghash(ctx, dst, src_size);
+	ctx->gcm.len.u[1] += src_size;
 
 	return 0;
 }
@@ -129,34 +222,62 @@ static int
 aes_gcm_decrypt(void *_ctx, const void *src, size_t src_size,
 		void *dst, size_t dst_size)
 {
-	struct gcm_aarch64_aes_ctx *ctx = _ctx;
+	struct aes_gcm_ctx *ctx = _ctx;
+	int blocks = src_size / GCM_BLOCK_SIZE;
+	int exp_blocks = blocks * GCM_BLOCK_SIZE;
+	int rest = src_size - (exp_blocks);
+	uint32_t counter;
+
+	gcm_ghash(ctx, src, src_size);
+	ctx->gcm.len.u[1] += src_size;
+
+	if (blocks > 0) {
+		ctr32_encrypt_blocks(src, dst,
+				     blocks,
+				     ALIGN16(&ctx->expanded_key),
+				     ctx->gcm.Yi.c);
+
+		counter = _gnutls_read_uint32(ctx->gcm.Yi.c + 12);
+		counter += blocks;
+		_gnutls_write_uint32(counter, ctx->gcm.Yi.c + 12);
+	}
+
+	if (rest > 0)		/* last incomplete block */
+		ctr_encrypt_last(ctx, src, dst, exp_blocks, rest);
 
-	GCM_DECRYPT(ctx, aarch64_aes_encrypt, src_size, dst, src);
 	return 0;
 }
 
 static int aes_gcm_auth(void *_ctx, const void *src, size_t src_size)
 {
-	struct gcm_aarch64_aes_ctx *ctx = _ctx;
+	struct aes_gcm_ctx *ctx = _ctx;
 
-	GCM_UPDATE(ctx, src_size, src);
+	gcm_ghash(ctx, src, src_size);
+	ctx->gcm.len.u[0] += src_size;
 
 	return 0;
 }
 
+
 static void aes_gcm_tag(void *_ctx, void *tag, size_t tagsize)
 {
-	struct gcm_aarch64_aes_ctx *ctx = _ctx;
+	struct aes_gcm_ctx *ctx = _ctx;
+	uint8_t buffer[GCM_BLOCK_SIZE];
+	uint64_t alen, clen;
 
-	GCM_DIGEST(ctx, aarch64_aes_encrypt, tagsize, tag);
-}
+	alen = ctx->gcm.len.u[0] * 8;
+	clen = ctx->gcm.len.u[1] * 8;
 
-static void aes_gcm_deinit(void *_ctx)
-{
-	struct gcm_aarch64_aes_ctx *ctx = _ctx;
+	_gnutls_write_uint64(alen, buffer);
+	_gnutls_write_uint64(clen, &buffer[8]);
 
-	zeroize_temp_key(ctx, sizeof(*ctx));
-	gnutls_free(ctx);
+	gcm_ghash_v8(ctx->gcm.Xi.u, ctx->gcm.Htable, buffer,
+			GCM_BLOCK_SIZE);
+
+	ctx->gcm.Xi.u[0] ^= ctx->gcm.EK0.u[0];
+	ctx->gcm.Xi.u[1] ^= ctx->gcm.EK0.u[1];
+
+	memcpy(tag, ctx->gcm.Xi.c, MIN(GCM_BLOCK_SIZE, tagsize));
 }
 
 #include "../x86/aes-gcm-aead.h"
@@ -173,5 +294,3 @@ const gnutls_crypto_cipher_st _gnutls_aes_gcm_aarch64 = {
 	.tag = aes_gcm_tag,
 	.auth = aes_gcm_auth,
 };
-
-#endif
diff --git a/lib/accelerated/aarch64/elf/ghash-aarch64.s b/lib/accelerated/aarch64/elf/ghash-aarch64.s
new file mode 100644
index 0000000000..13faf91f40
--- /dev/null
+++ b/lib/accelerated/aarch64/elf/ghash-aarch64.s
@@ -0,0 +1,266 @@
+# Copyright (c) 2011-2016, Andy Polyakov <appro@openssl.org>
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+# 
+#     * Redistributions of source code must retain copyright notices,
+#      this list of conditions and the following disclaimer.
+#
+#     * Redistributions in binary form must reproduce the above
+#      copyright notice, this list of conditions and the following
+#      disclaimer in the documentation and/or other materials
+#      provided with the distribution.
+#
+#     * Neither the name of the Andy Polyakov nor the names of its
+#      copyright holder and contributors may be used to endorse or
+#      promote products derived from this software without specific
+#      prior written permission.
+#
+# ALTERNATIVELY, provided that this notice is retained in full, this
+# product may be distributed under the terms of the GNU General Public
+# License (GPL), in which case the provisions of the GPL apply INSTEAD OF
+# those given above.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+#
+# *** This file is auto-generated ***
+#
+# 1 "lib/accelerated/aarch64/elf/ghash-aarch64.s.tmp.S"
+# 1 "<built-in>"
+# 1 "<command-line>"
+# 1 "lib/accelerated/aarch64/elf/ghash-aarch64.s.tmp.S"
+# 1 "lib/accelerated/aarch64/aarch64-common.h" 1
+# 2 "lib/accelerated/aarch64/elf/ghash-aarch64.s.tmp.S" 2
+
+.text
+.arch armv8-a+crypto
+.globl gcm_init_v8
+.type gcm_init_v8,%function
+.align 4
+gcm_init_v8:
+ ld1 {v17.2d},[x1]
+ movi v19.16b,#0xe1
+ shl v19.2d,v19.2d,#57
+ ext v3.16b,v17.16b,v17.16b,#8
+ ushr v18.2d,v19.2d,#63
+ dup v17.4s,v17.s[1]
+ ext v16.16b,v18.16b,v19.16b,#8
+ ushr v18.2d,v3.2d,#63
+ sshr v17.4s,v17.4s,#31
+ and v18.16b,v18.16b,v16.16b
+ shl v3.2d,v3.2d,#1
+ ext v18.16b,v18.16b,v18.16b,#8
+ and v16.16b,v16.16b,v17.16b
+ orr v3.16b,v3.16b,v18.16b
+ eor v20.16b,v3.16b,v16.16b
+ st1 {v20.2d},[x0],#16
+
+
+ ext v16.16b,v20.16b,v20.16b,#8
+ pmull v0.1q,v20.1d,v20.1d
+ eor v16.16b,v16.16b,v20.16b
+ pmull2 v2.1q,v20.2d,v20.2d
+ pmull v1.1q,v16.1d,v16.1d
+
+ ext v17.16b,v0.16b,v2.16b,#8
+ eor v18.16b,v0.16b,v2.16b
+ eor v1.16b,v1.16b,v17.16b
+ eor v1.16b,v1.16b,v18.16b
+ pmull v18.1q,v0.1d,v19.1d
+
+ ins v2.d[0],v1.d[1]
+ ins v1.d[1],v0.d[0]
+ eor v0.16b,v1.16b,v18.16b
+
+ ext v18.16b,v0.16b,v0.16b,#8
+ pmull v0.1q,v0.1d,v19.1d
+ eor v18.16b,v18.16b,v2.16b
+ eor v22.16b,v0.16b,v18.16b
+
+ ext v17.16b,v22.16b,v22.16b,#8
+ eor v17.16b,v17.16b,v22.16b
+ ext v21.16b,v16.16b,v17.16b,#8
+ st1 {v21.2d,v22.2d},[x0]
+
+ ret
+.size gcm_init_v8,.-gcm_init_v8
+.globl gcm_gmult_v8
+.type gcm_gmult_v8,%function
+.align 4
+gcm_gmult_v8:
+ ld1 {v17.2d},[x0]
+ movi v19.16b,#0xe1
+ ld1 {v20.2d,v21.2d},[x1]
+ shl v19.2d,v19.2d,#57
+
+ rev64 v17.16b,v17.16b
+
+ ext v3.16b,v17.16b,v17.16b,#8
+
+ pmull v0.1q,v20.1d,v3.1d
+ eor v17.16b,v17.16b,v3.16b
+ pmull2 v2.1q,v20.2d,v3.2d
+ pmull v1.1q,v21.1d,v17.1d
+
+ ext v17.16b,v0.16b,v2.16b,#8
+ eor v18.16b,v0.16b,v2.16b
+ eor v1.16b,v1.16b,v17.16b
+ eor v1.16b,v1.16b,v18.16b
+ pmull v18.1q,v0.1d,v19.1d
+
+ ins v2.d[0],v1.d[1]
+ ins v1.d[1],v0.d[0]
+ eor v0.16b,v1.16b,v18.16b
+
+ ext v18.16b,v0.16b,v0.16b,#8
+ pmull v0.1q,v0.1d,v19.1d
+ eor v18.16b,v18.16b,v2.16b
+ eor v0.16b,v0.16b,v18.16b
+
+
+ rev64 v0.16b,v0.16b
+
+ ext v0.16b,v0.16b,v0.16b,#8
+ st1 {v0.2d},[x0]
+
+ ret
+.size gcm_gmult_v8,.-gcm_gmult_v8
+.globl gcm_ghash_v8
+.type gcm_ghash_v8,%function
+.align 4
+gcm_ghash_v8:
+ ld1 {v0.2d},[x0]
+
+
+
+
+
+ subs x3,x3,#32
+ mov x12,#16
+# 116 "lib/accelerated/aarch64/elf/ghash-aarch64.s.tmp.S"
+ ld1 {v20.2d,v21.2d},[x1],#32
+ movi v19.16b,#0xe1
+ ld1 {v22.2d},[x1]
+ csel x12,xzr,x12,eq
+ ext v0.16b,v0.16b,v0.16b,#8
+ ld1 {v16.2d},[x2],#16
+ shl v19.2d,v19.2d,#57
+
+ rev64 v16.16b,v16.16b
+ rev64 v0.16b,v0.16b
+
+ ext v3.16b,v16.16b,v16.16b,#8
+ b.lo .Lodd_tail_v8
+ ld1 {v17.2d},[x2],x12
+
+ rev64 v17.16b,v17.16b
+
+ ext v7.16b,v17.16b,v17.16b,#8
+ eor v3.16b,v3.16b,v0.16b
+ pmull v4.1q,v20.1d,v7.1d
+ eor v17.16b,v17.16b,v7.16b
+ pmull2 v6.1q,v20.2d,v7.2d
+ b .Loop_mod2x_v8
+
+.align 4
+.Loop_mod2x_v8:
+ ext v18.16b,v3.16b,v3.16b,#8
+ subs x3,x3,#32
+ pmull v0.1q,v22.1d,v3.1d
+ csel x12,xzr,x12,lo
+
+ pmull v5.1q,v21.1d,v17.1d
+ eor v18.16b,v18.16b,v3.16b
+ pmull2 v2.1q,v22.2d,v3.2d
+ eor v0.16b,v0.16b,v4.16b
+ pmull2 v1.1q,v21.2d,v18.2d
+ ld1 {v16.2d},[x2],x12
+
+ eor v2.16b,v2.16b,v6.16b
+ csel x12,xzr,x12,eq
+ eor v1.16b,v1.16b,v5.16b
+
+ ext v17.16b,v0.16b,v2.16b,#8
+ eor v18.16b,v0.16b,v2.16b
+ eor v1.16b,v1.16b,v17.16b
+ ld1 {v17.2d},[x2],x12
+
+ rev64 v16.16b,v16.16b
+
+ eor v1.16b,v1.16b,v18.16b
+ pmull v18.1q,v0.1d,v19.1d
+
+
+ rev64 v17.16b,v17.16b
+
+ ins v2.d[0],v1.d[1]
+ ins v1.d[1],v0.d[0]
+ ext v7.16b,v17.16b,v17.16b,#8
+ ext v3.16b,v16.16b,v16.16b,#8
+ eor v0.16b,v1.16b,v18.16b
+ pmull v4.1q,v20.1d,v7.1d
+ eor v3.16b,v3.16b,v2.16b
+
+ ext v18.16b,v0.16b,v0.16b,#8
+ pmull v0.1q,v0.1d,v19.1d
+ eor v3.16b,v3.16b,v18.16b
+ eor v17.16b,v17.16b,v7.16b
+ eor v3.16b,v3.16b,v0.16b
+ pmull2 v6.1q,v20.2d,v7.2d
+ b.hs .Loop_mod2x_v8
+
+ eor v2.16b,v2.16b,v18.16b
+ ext v3.16b,v16.16b,v16.16b,#8
+ adds x3,x3,#32
+ eor v0.16b,v0.16b,v2.16b
+ b.eq .Ldone_v8
+.Lodd_tail_v8:
+ ext v18.16b,v0.16b,v0.16b,#8
+ eor v3.16b,v3.16b,v0.16b
+ eor v17.16b,v16.16b,v18.16b
+
+ pmull v0.1q,v20.1d,v3.1d
+ eor v17.16b,v17.16b,v3.16b
+ pmull2 v2.1q,v20.2d,v3.2d
+ pmull v1.1q,v21.1d,v17.1d
+
+ ext v17.16b,v0.16b,v2.16b,#8
+ eor v18.16b,v0.16b,v2.16b
+ eor v1.16b,v1.16b,v17.16b
+ eor v1.16b,v1.16b,v18.16b
+ pmull v18.1q,v0.1d,v19.1d
+
+ ins v2.d[0],v1.d[1]
+ ins v1.d[1],v0.d[0]
+ eor v0.16b,v1.16b,v18.16b
+
+ ext v18.16b,v0.16b,v0.16b,#8
+ pmull v0.1q,v0.1d,v19.1d
+ eor v18.16b,v18.16b,v2.16b
+ eor v0.16b,v0.16b,v18.16b
+
+.Ldone_v8:
+
+ rev64 v0.16b,v0.16b
+
+ ext v0.16b,v0.16b,v0.16b,#8
+ st1 {v0.2d},[x0]
+
+ ret
+.size gcm_ghash_v8,.-gcm_ghash_v8
+.byte 71,72,65,83,72,32,102,111,114,32,65,82,77,118,56,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
+.align 2
+.align 2
+.section .note.GNU-stack,"",%progbits
author	Nikos Mavrogiannopoulos <nmav@gnutls.org>	2016-09-27 21:43:05 +0200
committer	Nikos Mavrogiannopoulos <nmav@redhat.com>	2016-10-03 13:53:05 +0200
commit	c5032beda17213eea05e842739dafae50e87f39b (patch)
tree	eb3db582e6c269192141a2329986c5b67564b013 /lib
parent	edce87be74a578eb3b2bc85483130ddd62a4f38f (diff)
download	gnutls-c5032beda17213eea05e842739dafae50e87f39b.tar.gz