summaryrefslogtreecommitdiff
path: root/lib
diff options
context:
space:
mode:
authorNikos Mavrogiannopoulos <nmav@gnutls.org>2016-09-27 21:43:05 +0200
committerNikos Mavrogiannopoulos <nmav@redhat.com>2016-10-03 13:53:05 +0200
commitc5032beda17213eea05e842739dafae50e87f39b (patch)
treeeb3db582e6c269192141a2329986c5b67564b013 /lib
parentedce87be74a578eb3b2bc85483130ddd62a4f38f (diff)
downloadgnutls-c5032beda17213eea05e842739dafae50e87f39b.tar.gz
Imported Andy Polyakov's implementation of AES-GCM in aarch64
Diffstat (limited to 'lib')
-rw-r--r--lib/accelerated/aarch64/Makefile.am5
-rw-r--r--lib/accelerated/aarch64/aarch64-common.c26
-rw-r--r--lib/accelerated/aarch64/aes-gcm-aarch64.c229
-rw-r--r--lib/accelerated/aarch64/elf/ghash-aarch64.s266
4 files changed, 459 insertions, 67 deletions
diff --git a/lib/accelerated/aarch64/Makefile.am b/lib/accelerated/aarch64/Makefile.am
index f34507853b..edc1edd495 100644
--- a/lib/accelerated/aarch64/Makefile.am
+++ b/lib/accelerated/aarch64/Makefile.am
@@ -33,6 +33,9 @@ if ENABLE_MINITASN1
AM_CPPFLAGS += -I$(srcdir)/../../minitasn1
endif
+#ensure that we have all aarch64 instruction sets enabled for the assembler
+AM_CCASFLAGS = -Wa,-march=all
+
EXTRA_DIST = README
noinst_LTLIBRARIES = libaarch64.la
@@ -42,6 +45,6 @@ libaarch64_la_SOURCES = aarch64-common.c aarch64-common.h sha-aarch64.h sha-aarc
if ASM_AARCH64
libaarch64_la_SOURCES += elf/sha1-armv8.s elf/sha512-armv8.s elf/sha256-armv8.s \
- elf/aes-aarch64.s
+ elf/aes-aarch64.s elf/ghash-aarch64.s
endif #ASM_AARCH64
diff --git a/lib/accelerated/aarch64/aarch64-common.c b/lib/accelerated/aarch64/aarch64-common.c
index 9866132848..310ea5508c 100644
--- a/lib/accelerated/aarch64/aarch64-common.c
+++ b/lib/accelerated/aarch64/aarch64-common.c
@@ -193,20 +193,24 @@ void _register_aarch64_crypto(unsigned capabilities)
if (_gnutls_arm_cpuid_s & ARMV8_AES) {
_gnutls_debug_log("Aarch64 AES was detected\n");
- ret =
- gnutls_crypto_single_cipher_register
- (GNUTLS_CIPHER_AES_128_GCM, 90,
- &_gnutls_aes_gcm_aarch64, 0);
+ if (_gnutls_arm_cpuid_s & ARMV8_PMULL) {
+ _gnutls_debug_log("Aarch64 PMULL was detected\n");
+
+ ret =
+ gnutls_crypto_single_cipher_register
+ (GNUTLS_CIPHER_AES_128_GCM, 90,
+ &_gnutls_aes_gcm_aarch64, 0);
+ if (ret < 0) {
+ gnutls_assert();
+ }
+
+ ret =
+ gnutls_crypto_single_cipher_register
+ (GNUTLS_CIPHER_AES_256_GCM, 90,
+ &_gnutls_aes_gcm_aarch64, 0);
if (ret < 0) {
gnutls_assert();
}
-
- ret =
- gnutls_crypto_single_cipher_register
- (GNUTLS_CIPHER_AES_256_GCM, 90,
- &_gnutls_aes_gcm_aarch64, 0);
- if (ret < 0) {
- gnutls_assert();
}
ret =
diff --git a/lib/accelerated/aarch64/aes-gcm-aarch64.c b/lib/accelerated/aarch64/aes-gcm-aarch64.c
index 37412363f3..c571d0294f 100644
--- a/lib/accelerated/aarch64/aes-gcm-aarch64.c
+++ b/lib/accelerated/aarch64/aes-gcm-aarch64.c
@@ -22,50 +22,56 @@
*/
/*
- * The following code is an implementation of the AES-128-GCM cipher
- * using the vpaes aarch64 code.
+ * The following code is an implementation of the AES-GCM cipher
+ * using the AES and neon instruction sets.
*/
#include "errors.h"
#include "gnutls_int.h"
-
-#ifdef HAVE_LIBNETTLE
-
#include <gnutls/crypto.h>
#include "errors.h"
#include <aes-aarch64.h>
#include <aarch64-common.h>
+#include <nettle/memxor.h>
+#include <nettle/macros.h>
#include <byteswap.h>
-#include <nettle/gcm.h>
-/* GCM mode
- * It is used when the CPU doesn't include the PCLMUL instructions.
- */
-struct gcm_aarch64_aes_ctx GCM_CTX(AES_KEY);
+#define GCM_BLOCK_SIZE 16
-static void aarch64_aes_encrypt(const void *_ctx,
- size_t length, uint8_t * dst,
- const uint8_t * src)
-{
- AES_KEY *ctx = (void*)_ctx;
+/* GCM mode */
- aes_v8_encrypt(src, dst, ctx);
-}
+typedef struct {
+ uint64_t hi, lo;
+} u128;
-static void aarch64_aes_128_set_encrypt_key(void *_ctx,
- const uint8_t * key)
-{
- AES_KEY *ctx = _ctx;
+/* This is the gcm128 structure used in openssl. It
+ * is compatible with the included assembly code.
+ */
+struct gcm128_context {
+ union {
+ uint64_t u[2];
+ uint32_t d[4];
+ uint8_t c[16];
+ } Yi, EKi, EK0, len, Xi, H;
+ u128 Htable[16];
+};
- aes_v8_set_encrypt_key(key, 16*8, ctx);
-}
+struct aes_gcm_ctx {
+ AES_KEY expanded_key;
+ struct gcm128_context gcm;
+};
-static void aarch64_aes_256_set_encrypt_key(void *_ctx,
- const uint8_t * key)
+void gcm_init_v8(u128 Htable[16], const uint64_t Xi[2]);
+void gcm_ghash_v8(uint64_t Xi[2], const u128 Htable[16],
+ const uint8_t * inp, size_t len);
+void gcm_gmult_v8(uint64_t Xi[2], const u128 Htable[16]);
+
+static void aes_gcm_deinit(void *_ctx)
{
- AES_KEY *ctx = _ctx;
+ struct aes_gcm_ctx *ctx = _ctx;
- aes_v8_set_encrypt_key(key, 32*8, ctx);
+ zeroize_temp_key(ctx, sizeof(*ctx));
+ gnutls_free(ctx);
}
static int
@@ -77,7 +83,7 @@ aes_gcm_cipher_init(gnutls_cipher_algorithm_t algorithm, void **_ctx,
algorithm != GNUTLS_CIPHER_AES_256_GCM)
return GNUTLS_E_INVALID_REQUEST;
- *_ctx = gnutls_calloc(1, sizeof(struct gcm_aarch64_aes_ctx));
+ *_ctx = gnutls_calloc(1, sizeof(struct aes_gcm_ctx));
if (*_ctx == NULL) {
gnutls_assert();
return GNUTLS_E_MEMORY_ERROR;
@@ -87,40 +93,127 @@ aes_gcm_cipher_init(gnutls_cipher_algorithm_t algorithm, void **_ctx,
}
static int
-aes_gcm_cipher_setkey(void *_ctx, const void *key, size_t keysize)
+aes_gcm_cipher_setkey(void *_ctx, const void *userkey, size_t keysize)
{
- struct gcm_aarch64_aes_ctx *ctx = _ctx;
+ struct aes_gcm_ctx *ctx = _ctx;
+ int ret;
+
+ ret =
+ aes_v8_set_encrypt_key(userkey, keysize * 8,
+ ALIGN16(&ctx->expanded_key));
+ if (ret != 0)
+ return gnutls_assert_val(GNUTLS_E_ENCRYPTION_FAILED);
+
+ aes_v8_encrypt(ctx->gcm.H.c, ctx->gcm.H.c, ALIGN16(&ctx->expanded_key));
- if (keysize == 16) {
- GCM_SET_KEY(ctx, aarch64_aes_128_set_encrypt_key, aarch64_aes_encrypt,
- key);
- } else if (keysize == 32) {
- GCM_SET_KEY(ctx, aarch64_aes_256_set_encrypt_key, aarch64_aes_encrypt,
- key);
- } else abort();
+ ctx->gcm.H.u[0] = bswap_64(ctx->gcm.H.u[0]);
+ ctx->gcm.H.u[1] = bswap_64(ctx->gcm.H.u[1]);
+
+ gcm_init_v8(ctx->gcm.Htable, ctx->gcm.H.u);
return 0;
}
static int aes_gcm_setiv(void *_ctx, const void *iv, size_t iv_size)
{
- struct gcm_aarch64_aes_ctx *ctx = _ctx;
+ struct aes_gcm_ctx *ctx = _ctx;
if (iv_size != GCM_BLOCK_SIZE - 4)
return gnutls_assert_val(GNUTLS_E_INVALID_REQUEST);
- GCM_SET_IV(ctx, iv_size, iv);
+ memset(ctx->gcm.Xi.c, 0, sizeof(ctx->gcm.Xi.c));
+ memset(ctx->gcm.len.c, 0, sizeof(ctx->gcm.len.c));
+
+ memcpy(ctx->gcm.Yi.c, iv, GCM_BLOCK_SIZE - 4);
+ ctx->gcm.Yi.c[GCM_BLOCK_SIZE - 4] = 0;
+ ctx->gcm.Yi.c[GCM_BLOCK_SIZE - 3] = 0;
+ ctx->gcm.Yi.c[GCM_BLOCK_SIZE - 2] = 0;
+ ctx->gcm.Yi.c[GCM_BLOCK_SIZE - 1] = 1;
+ aes_v8_encrypt(ctx->gcm.Yi.c, ctx->gcm.EK0.c,
+ ALIGN16(&ctx->expanded_key));
+ ctx->gcm.Yi.c[GCM_BLOCK_SIZE - 1] = 2;
return 0;
}
+static void
+gcm_ghash(struct aes_gcm_ctx *ctx, const uint8_t * src, size_t src_size)
+{
+ size_t rest = src_size % GCM_BLOCK_SIZE;
+ size_t aligned_size = src_size - rest;
+
+ if (aligned_size > 0)
+ gcm_ghash_v8(ctx->gcm.Xi.u, ctx->gcm.Htable, src,
+ aligned_size);
+
+ if (rest > 0) {
+ memxor(ctx->gcm.Xi.c, src + aligned_size, rest);
+ gcm_gmult_v8(ctx->gcm.Xi.u, ctx->gcm.Htable);
+ }
+}
+
+static void
+ctr32_encrypt_blocks(const unsigned char *in, unsigned char *out,
+ size_t blocks, const AES_KEY *key,
+ const unsigned char ivec[16])
+{
+ unsigned i;
+ uint8_t ctr[16];
+
+ memcpy(ctr, ivec, 16);
+
+ for (i=0;i<blocks;i++) {
+ aes_v8_encrypt(ctr, out, key);
+ memxor(out, in, 16);
+
+ out += 16;
+ in += 16;
+ INCREMENT(16, ctr);
+ }
+}
+
+static inline void
+ctr_encrypt_last(struct aes_gcm_ctx *ctx, const uint8_t * src,
+ uint8_t * dst, size_t pos, size_t length)
+{
+ uint8_t tmp[GCM_BLOCK_SIZE];
+ uint8_t out[GCM_BLOCK_SIZE];
+
+ memcpy(tmp, &src[pos], length);
+ ctr32_encrypt_blocks(tmp, out, 1,
+ ALIGN16(&ctx->expanded_key),
+ ctx->gcm.Yi.c);
+
+ memcpy(&dst[pos], out, length);
+
+}
+
static int
aes_gcm_encrypt(void *_ctx, const void *src, size_t src_size,
void *dst, size_t length)
{
- struct gcm_aarch64_aes_ctx *ctx = _ctx;
+ struct aes_gcm_ctx *ctx = _ctx;
+ int blocks = src_size / GCM_BLOCK_SIZE;
+ int exp_blocks = blocks * GCM_BLOCK_SIZE;
+ int rest = src_size - (exp_blocks);
+ uint32_t counter;
+
+ if (blocks > 0) {
+ ctr32_encrypt_blocks(src, dst,
+ blocks,
+ ALIGN16(&ctx->expanded_key),
+ ctx->gcm.Yi.c);
+
+ counter = _gnutls_read_uint32(ctx->gcm.Yi.c + 12);
+ counter += blocks;
+ _gnutls_write_uint32(counter, ctx->gcm.Yi.c + 12);
+ }
- GCM_ENCRYPT(ctx, aarch64_aes_encrypt, src_size, dst, src);
+ if (rest > 0) /* last incomplete block */
+ ctr_encrypt_last(ctx, src, dst, exp_blocks, rest);
+
+ gcm_ghash(ctx, dst, src_size);
+ ctx->gcm.len.u[1] += src_size;
return 0;
}
@@ -129,34 +222,62 @@ static int
aes_gcm_decrypt(void *_ctx, const void *src, size_t src_size,
void *dst, size_t dst_size)
{
- struct gcm_aarch64_aes_ctx *ctx = _ctx;
+ struct aes_gcm_ctx *ctx = _ctx;
+ int blocks = src_size / GCM_BLOCK_SIZE;
+ int exp_blocks = blocks * GCM_BLOCK_SIZE;
+ int rest = src_size - (exp_blocks);
+ uint32_t counter;
+
+ gcm_ghash(ctx, src, src_size);
+ ctx->gcm.len.u[1] += src_size;
+
+ if (blocks > 0) {
+ ctr32_encrypt_blocks(src, dst,
+ blocks,
+ ALIGN16(&ctx->expanded_key),
+ ctx->gcm.Yi.c);
+
+ counter = _gnutls_read_uint32(ctx->gcm.Yi.c + 12);
+ counter += blocks;
+ _gnutls_write_uint32(counter, ctx->gcm.Yi.c + 12);
+ }
+
+ if (rest > 0) /* last incomplete block */
+ ctr_encrypt_last(ctx, src, dst, exp_blocks, rest);
- GCM_DECRYPT(ctx, aarch64_aes_encrypt, src_size, dst, src);
return 0;
}
static int aes_gcm_auth(void *_ctx, const void *src, size_t src_size)
{
- struct gcm_aarch64_aes_ctx *ctx = _ctx;
+ struct aes_gcm_ctx *ctx = _ctx;
- GCM_UPDATE(ctx, src_size, src);
+ gcm_ghash(ctx, src, src_size);
+ ctx->gcm.len.u[0] += src_size;
return 0;
}
+
static void aes_gcm_tag(void *_ctx, void *tag, size_t tagsize)
{
- struct gcm_aarch64_aes_ctx *ctx = _ctx;
+ struct aes_gcm_ctx *ctx = _ctx;
+ uint8_t buffer[GCM_BLOCK_SIZE];
+ uint64_t alen, clen;
- GCM_DIGEST(ctx, aarch64_aes_encrypt, tagsize, tag);
-}
+ alen = ctx->gcm.len.u[0] * 8;
+ clen = ctx->gcm.len.u[1] * 8;
-static void aes_gcm_deinit(void *_ctx)
-{
- struct gcm_aarch64_aes_ctx *ctx = _ctx;
+ _gnutls_write_uint64(alen, buffer);
+ _gnutls_write_uint64(clen, &buffer[8]);
- zeroize_temp_key(ctx, sizeof(*ctx));
- gnutls_free(ctx);
+ gcm_ghash_v8(ctx->gcm.Xi.u, ctx->gcm.Htable, buffer,
+ GCM_BLOCK_SIZE);
+
+ ctx->gcm.Xi.u[0] ^= ctx->gcm.EK0.u[0];
+ ctx->gcm.Xi.u[1] ^= ctx->gcm.EK0.u[1];
+
+ memcpy(tag, ctx->gcm.Xi.c, MIN(GCM_BLOCK_SIZE, tagsize));
}
#include "../x86/aes-gcm-aead.h"
@@ -173,5 +294,3 @@ const gnutls_crypto_cipher_st _gnutls_aes_gcm_aarch64 = {
.tag = aes_gcm_tag,
.auth = aes_gcm_auth,
};
-
-#endif
diff --git a/lib/accelerated/aarch64/elf/ghash-aarch64.s b/lib/accelerated/aarch64/elf/ghash-aarch64.s
new file mode 100644
index 0000000000..13faf91f40
--- /dev/null
+++ b/lib/accelerated/aarch64/elf/ghash-aarch64.s
@@ -0,0 +1,266 @@
+# Copyright (c) 2011-2016, Andy Polyakov <appro@openssl.org>
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#
+# * Redistributions of source code must retain copyright notices,
+# this list of conditions and the following disclaimer.
+#
+# * Redistributions in binary form must reproduce the above
+# copyright notice, this list of conditions and the following
+# disclaimer in the documentation and/or other materials
+# provided with the distribution.
+#
+# * Neither the name of the Andy Polyakov nor the names of its
+# copyright holder and contributors may be used to endorse or
+# promote products derived from this software without specific
+# prior written permission.
+#
+# ALTERNATIVELY, provided that this notice is retained in full, this
+# product may be distributed under the terms of the GNU General Public
+# License (GPL), in which case the provisions of the GPL apply INSTEAD OF
+# those given above.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+#
+# *** This file is auto-generated ***
+#
+# 1 "lib/accelerated/aarch64/elf/ghash-aarch64.s.tmp.S"
+# 1 "<built-in>"
+# 1 "<command-line>"
+# 1 "lib/accelerated/aarch64/elf/ghash-aarch64.s.tmp.S"
+# 1 "lib/accelerated/aarch64/aarch64-common.h" 1
+# 2 "lib/accelerated/aarch64/elf/ghash-aarch64.s.tmp.S" 2
+
+.text
+.arch armv8-a+crypto
+.globl gcm_init_v8
+.type gcm_init_v8,%function
+.align 4
+gcm_init_v8:
+ ld1 {v17.2d},[x1]
+ movi v19.16b,#0xe1
+ shl v19.2d,v19.2d,#57
+ ext v3.16b,v17.16b,v17.16b,#8
+ ushr v18.2d,v19.2d,#63
+ dup v17.4s,v17.s[1]
+ ext v16.16b,v18.16b,v19.16b,#8
+ ushr v18.2d,v3.2d,#63
+ sshr v17.4s,v17.4s,#31
+ and v18.16b,v18.16b,v16.16b
+ shl v3.2d,v3.2d,#1
+ ext v18.16b,v18.16b,v18.16b,#8
+ and v16.16b,v16.16b,v17.16b
+ orr v3.16b,v3.16b,v18.16b
+ eor v20.16b,v3.16b,v16.16b
+ st1 {v20.2d},[x0],#16
+
+
+ ext v16.16b,v20.16b,v20.16b,#8
+ pmull v0.1q,v20.1d,v20.1d
+ eor v16.16b,v16.16b,v20.16b
+ pmull2 v2.1q,v20.2d,v20.2d
+ pmull v1.1q,v16.1d,v16.1d
+
+ ext v17.16b,v0.16b,v2.16b,#8
+ eor v18.16b,v0.16b,v2.16b
+ eor v1.16b,v1.16b,v17.16b
+ eor v1.16b,v1.16b,v18.16b
+ pmull v18.1q,v0.1d,v19.1d
+
+ ins v2.d[0],v1.d[1]
+ ins v1.d[1],v0.d[0]
+ eor v0.16b,v1.16b,v18.16b
+
+ ext v18.16b,v0.16b,v0.16b,#8
+ pmull v0.1q,v0.1d,v19.1d
+ eor v18.16b,v18.16b,v2.16b
+ eor v22.16b,v0.16b,v18.16b
+
+ ext v17.16b,v22.16b,v22.16b,#8
+ eor v17.16b,v17.16b,v22.16b
+ ext v21.16b,v16.16b,v17.16b,#8
+ st1 {v21.2d,v22.2d},[x0]
+
+ ret
+.size gcm_init_v8,.-gcm_init_v8
+.globl gcm_gmult_v8
+.type gcm_gmult_v8,%function
+.align 4
+gcm_gmult_v8:
+ ld1 {v17.2d},[x0]
+ movi v19.16b,#0xe1
+ ld1 {v20.2d,v21.2d},[x1]
+ shl v19.2d,v19.2d,#57
+
+ rev64 v17.16b,v17.16b
+
+ ext v3.16b,v17.16b,v17.16b,#8
+
+ pmull v0.1q,v20.1d,v3.1d
+ eor v17.16b,v17.16b,v3.16b
+ pmull2 v2.1q,v20.2d,v3.2d
+ pmull v1.1q,v21.1d,v17.1d
+
+ ext v17.16b,v0.16b,v2.16b,#8
+ eor v18.16b,v0.16b,v2.16b
+ eor v1.16b,v1.16b,v17.16b
+ eor v1.16b,v1.16b,v18.16b
+ pmull v18.1q,v0.1d,v19.1d
+
+ ins v2.d[0],v1.d[1]
+ ins v1.d[1],v0.d[0]
+ eor v0.16b,v1.16b,v18.16b
+
+ ext v18.16b,v0.16b,v0.16b,#8
+ pmull v0.1q,v0.1d,v19.1d
+ eor v18.16b,v18.16b,v2.16b
+ eor v0.16b,v0.16b,v18.16b
+
+
+ rev64 v0.16b,v0.16b
+
+ ext v0.16b,v0.16b,v0.16b,#8
+ st1 {v0.2d},[x0]
+
+ ret
+.size gcm_gmult_v8,.-gcm_gmult_v8
+.globl gcm_ghash_v8
+.type gcm_ghash_v8,%function
+.align 4
+gcm_ghash_v8:
+ ld1 {v0.2d},[x0]
+
+
+
+
+
+ subs x3,x3,#32
+ mov x12,#16
+# 116 "lib/accelerated/aarch64/elf/ghash-aarch64.s.tmp.S"
+ ld1 {v20.2d,v21.2d},[x1],#32
+ movi v19.16b,#0xe1
+ ld1 {v22.2d},[x1]
+ csel x12,xzr,x12,eq
+ ext v0.16b,v0.16b,v0.16b,#8
+ ld1 {v16.2d},[x2],#16
+ shl v19.2d,v19.2d,#57
+
+ rev64 v16.16b,v16.16b
+ rev64 v0.16b,v0.16b
+
+ ext v3.16b,v16.16b,v16.16b,#8
+ b.lo .Lodd_tail_v8
+ ld1 {v17.2d},[x2],x12
+
+ rev64 v17.16b,v17.16b
+
+ ext v7.16b,v17.16b,v17.16b,#8
+ eor v3.16b,v3.16b,v0.16b
+ pmull v4.1q,v20.1d,v7.1d
+ eor v17.16b,v17.16b,v7.16b
+ pmull2 v6.1q,v20.2d,v7.2d
+ b .Loop_mod2x_v8
+
+.align 4
+.Loop_mod2x_v8:
+ ext v18.16b,v3.16b,v3.16b,#8
+ subs x3,x3,#32
+ pmull v0.1q,v22.1d,v3.1d
+ csel x12,xzr,x12,lo
+
+ pmull v5.1q,v21.1d,v17.1d
+ eor v18.16b,v18.16b,v3.16b
+ pmull2 v2.1q,v22.2d,v3.2d
+ eor v0.16b,v0.16b,v4.16b
+ pmull2 v1.1q,v21.2d,v18.2d
+ ld1 {v16.2d},[x2],x12
+
+ eor v2.16b,v2.16b,v6.16b
+ csel x12,xzr,x12,eq
+ eor v1.16b,v1.16b,v5.16b
+
+ ext v17.16b,v0.16b,v2.16b,#8
+ eor v18.16b,v0.16b,v2.16b
+ eor v1.16b,v1.16b,v17.16b
+ ld1 {v17.2d},[x2],x12
+
+ rev64 v16.16b,v16.16b
+
+ eor v1.16b,v1.16b,v18.16b
+ pmull v18.1q,v0.1d,v19.1d
+
+
+ rev64 v17.16b,v17.16b
+
+ ins v2.d[0],v1.d[1]
+ ins v1.d[1],v0.d[0]
+ ext v7.16b,v17.16b,v17.16b,#8
+ ext v3.16b,v16.16b,v16.16b,#8
+ eor v0.16b,v1.16b,v18.16b
+ pmull v4.1q,v20.1d,v7.1d
+ eor v3.16b,v3.16b,v2.16b
+
+ ext v18.16b,v0.16b,v0.16b,#8
+ pmull v0.1q,v0.1d,v19.1d
+ eor v3.16b,v3.16b,v18.16b
+ eor v17.16b,v17.16b,v7.16b
+ eor v3.16b,v3.16b,v0.16b
+ pmull2 v6.1q,v20.2d,v7.2d
+ b.hs .Loop_mod2x_v8
+
+ eor v2.16b,v2.16b,v18.16b
+ ext v3.16b,v16.16b,v16.16b,#8
+ adds x3,x3,#32
+ eor v0.16b,v0.16b,v2.16b
+ b.eq .Ldone_v8
+.Lodd_tail_v8:
+ ext v18.16b,v0.16b,v0.16b,#8
+ eor v3.16b,v3.16b,v0.16b
+ eor v17.16b,v16.16b,v18.16b
+
+ pmull v0.1q,v20.1d,v3.1d
+ eor v17.16b,v17.16b,v3.16b
+ pmull2 v2.1q,v20.2d,v3.2d
+ pmull v1.1q,v21.1d,v17.1d
+
+ ext v17.16b,v0.16b,v2.16b,#8
+ eor v18.16b,v0.16b,v2.16b
+ eor v1.16b,v1.16b,v17.16b
+ eor v1.16b,v1.16b,v18.16b
+ pmull v18.1q,v0.1d,v19.1d
+
+ ins v2.d[0],v1.d[1]
+ ins v1.d[1],v0.d[0]
+ eor v0.16b,v1.16b,v18.16b
+
+ ext v18.16b,v0.16b,v0.16b,#8
+ pmull v0.1q,v0.1d,v19.1d
+ eor v18.16b,v18.16b,v2.16b
+ eor v0.16b,v0.16b,v18.16b
+
+.Ldone_v8:
+
+ rev64 v0.16b,v0.16b
+
+ ext v0.16b,v0.16b,v0.16b,#8
+ st1 {v0.2d},[x0]
+
+ ret
+.size gcm_ghash_v8,.-gcm_ghash_v8
+.byte 71,72,65,83,72,32,102,111,114,32,65,82,77,118,56,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
+.align 2
+.align 2
+.section .note.GNU-stack,"",%progbits