diff options
author | Nikos Mavrogiannopoulos <nmav@gnutls.org> | 2016-09-27 21:43:05 +0200 |
---|---|---|
committer | Nikos Mavrogiannopoulos <nmav@redhat.com> | 2016-10-03 13:53:05 +0200 |
commit | c5032beda17213eea05e842739dafae50e87f39b (patch) | |
tree | eb3db582e6c269192141a2329986c5b67564b013 | |
parent | edce87be74a578eb3b2bc85483130ddd62a4f38f (diff) | |
download | gnutls-c5032beda17213eea05e842739dafae50e87f39b.tar.gz |
Imported Andy Polyakov's implementation of AES-GCM in aarch64
-rw-r--r-- | cfg.mk | 1 | ||||
l--------- | devel/perlasm/ghash-aarch64.pl | 1 | ||||
l--------- | devel/perlasm/ghash-aarch64.pl.license | 1 | ||||
-rw-r--r-- | lib/accelerated/aarch64/Makefile.am | 5 | ||||
-rw-r--r-- | lib/accelerated/aarch64/aarch64-common.c | 26 | ||||
-rw-r--r-- | lib/accelerated/aarch64/aes-gcm-aarch64.c | 229 | ||||
-rw-r--r-- | lib/accelerated/aarch64/elf/ghash-aarch64.s | 266 |
7 files changed, 462 insertions, 67 deletions
@@ -135,6 +135,7 @@ web: -cp -v doc/reference/html/*.html doc/reference/html/*.png doc/reference/html/*.devhelp* doc/reference/html/*.css $(htmldir)/reference/ ASM_SOURCES_XXX := \ + lib/accelerated/aarch64/XXX/ghash-aarch64.s \ lib/accelerated/aarch64/XXX/aes-aarch64.s \ lib/accelerated/aarch64/XXX/sha1-armv8.s \ lib/accelerated/aarch64/XXX/sha256-armv8.s \ diff --git a/devel/perlasm/ghash-aarch64.pl b/devel/perlasm/ghash-aarch64.pl new file mode 120000 index 0000000000..761f73cd4a --- /dev/null +++ b/devel/perlasm/ghash-aarch64.pl @@ -0,0 +1 @@ +../openssl/crypto/modes/asm/ghashv8-armx.pl
\ No newline at end of file diff --git a/devel/perlasm/ghash-aarch64.pl.license b/devel/perlasm/ghash-aarch64.pl.license new file mode 120000 index 0000000000..cd301a44ab --- /dev/null +++ b/devel/perlasm/ghash-aarch64.pl.license @@ -0,0 +1 @@ +license.txt
\ No newline at end of file diff --git a/lib/accelerated/aarch64/Makefile.am b/lib/accelerated/aarch64/Makefile.am index f34507853b..edc1edd495 100644 --- a/lib/accelerated/aarch64/Makefile.am +++ b/lib/accelerated/aarch64/Makefile.am @@ -33,6 +33,9 @@ if ENABLE_MINITASN1 AM_CPPFLAGS += -I$(srcdir)/../../minitasn1 endif +#ensure that we have all aarch64 instruction sets enabled for the assembler +AM_CCASFLAGS = -Wa,-march=all + EXTRA_DIST = README noinst_LTLIBRARIES = libaarch64.la @@ -42,6 +45,6 @@ libaarch64_la_SOURCES = aarch64-common.c aarch64-common.h sha-aarch64.h sha-aarc if ASM_AARCH64 libaarch64_la_SOURCES += elf/sha1-armv8.s elf/sha512-armv8.s elf/sha256-armv8.s \ - elf/aes-aarch64.s + elf/aes-aarch64.s elf/ghash-aarch64.s endif #ASM_AARCH64 diff --git a/lib/accelerated/aarch64/aarch64-common.c b/lib/accelerated/aarch64/aarch64-common.c index 9866132848..310ea5508c 100644 --- a/lib/accelerated/aarch64/aarch64-common.c +++ b/lib/accelerated/aarch64/aarch64-common.c @@ -193,20 +193,24 @@ void _register_aarch64_crypto(unsigned capabilities) if (_gnutls_arm_cpuid_s & ARMV8_AES) { _gnutls_debug_log("Aarch64 AES was detected\n"); - ret = - gnutls_crypto_single_cipher_register - (GNUTLS_CIPHER_AES_128_GCM, 90, - &_gnutls_aes_gcm_aarch64, 0); + if (_gnutls_arm_cpuid_s & ARMV8_PMULL) { + _gnutls_debug_log("Aarch64 PMULL was detected\n"); + + ret = + gnutls_crypto_single_cipher_register + (GNUTLS_CIPHER_AES_128_GCM, 90, + &_gnutls_aes_gcm_aarch64, 0); + if (ret < 0) { + gnutls_assert(); + } + + ret = + gnutls_crypto_single_cipher_register + (GNUTLS_CIPHER_AES_256_GCM, 90, + &_gnutls_aes_gcm_aarch64, 0); if (ret < 0) { gnutls_assert(); } - - ret = - gnutls_crypto_single_cipher_register - (GNUTLS_CIPHER_AES_256_GCM, 90, - &_gnutls_aes_gcm_aarch64, 0); - if (ret < 0) { - gnutls_assert(); } ret = diff --git a/lib/accelerated/aarch64/aes-gcm-aarch64.c b/lib/accelerated/aarch64/aes-gcm-aarch64.c index 37412363f3..c571d0294f 100644 --- a/lib/accelerated/aarch64/aes-gcm-aarch64.c +++ b/lib/accelerated/aarch64/aes-gcm-aarch64.c @@ -22,50 +22,56 @@ */ /* - * The following code is an implementation of the AES-128-GCM cipher - * using the vpaes aarch64 code. + * The following code is an implementation of the AES-GCM cipher + * using the AES and neon instruction sets. */ #include "errors.h" #include "gnutls_int.h" - -#ifdef HAVE_LIBNETTLE - #include <gnutls/crypto.h> #include "errors.h" #include <aes-aarch64.h> #include <aarch64-common.h> +#include <nettle/memxor.h> +#include <nettle/macros.h> #include <byteswap.h> -#include <nettle/gcm.h> -/* GCM mode - * It is used when the CPU doesn't include the PCLMUL instructions. - */ -struct gcm_aarch64_aes_ctx GCM_CTX(AES_KEY); +#define GCM_BLOCK_SIZE 16 -static void aarch64_aes_encrypt(const void *_ctx, - size_t length, uint8_t * dst, - const uint8_t * src) -{ - AES_KEY *ctx = (void*)_ctx; +/* GCM mode */ - aes_v8_encrypt(src, dst, ctx); -} +typedef struct { + uint64_t hi, lo; +} u128; -static void aarch64_aes_128_set_encrypt_key(void *_ctx, - const uint8_t * key) -{ - AES_KEY *ctx = _ctx; +/* This is the gcm128 structure used in openssl. It + * is compatible with the included assembly code. + */ +struct gcm128_context { + union { + uint64_t u[2]; + uint32_t d[4]; + uint8_t c[16]; + } Yi, EKi, EK0, len, Xi, H; + u128 Htable[16]; +}; - aes_v8_set_encrypt_key(key, 16*8, ctx); -} +struct aes_gcm_ctx { + AES_KEY expanded_key; + struct gcm128_context gcm; +}; -static void aarch64_aes_256_set_encrypt_key(void *_ctx, - const uint8_t * key) +void gcm_init_v8(u128 Htable[16], const uint64_t Xi[2]); +void gcm_ghash_v8(uint64_t Xi[2], const u128 Htable[16], + const uint8_t * inp, size_t len); +void gcm_gmult_v8(uint64_t Xi[2], const u128 Htable[16]); + +static void aes_gcm_deinit(void *_ctx) { - AES_KEY *ctx = _ctx; + struct aes_gcm_ctx *ctx = _ctx; - aes_v8_set_encrypt_key(key, 32*8, ctx); + zeroize_temp_key(ctx, sizeof(*ctx)); + gnutls_free(ctx); } static int @@ -77,7 +83,7 @@ aes_gcm_cipher_init(gnutls_cipher_algorithm_t algorithm, void **_ctx, algorithm != GNUTLS_CIPHER_AES_256_GCM) return GNUTLS_E_INVALID_REQUEST; - *_ctx = gnutls_calloc(1, sizeof(struct gcm_aarch64_aes_ctx)); + *_ctx = gnutls_calloc(1, sizeof(struct aes_gcm_ctx)); if (*_ctx == NULL) { gnutls_assert(); return GNUTLS_E_MEMORY_ERROR; @@ -87,40 +93,127 @@ aes_gcm_cipher_init(gnutls_cipher_algorithm_t algorithm, void **_ctx, } static int -aes_gcm_cipher_setkey(void *_ctx, const void *key, size_t keysize) +aes_gcm_cipher_setkey(void *_ctx, const void *userkey, size_t keysize) { - struct gcm_aarch64_aes_ctx *ctx = _ctx; + struct aes_gcm_ctx *ctx = _ctx; + int ret; + + ret = + aes_v8_set_encrypt_key(userkey, keysize * 8, + ALIGN16(&ctx->expanded_key)); + if (ret != 0) + return gnutls_assert_val(GNUTLS_E_ENCRYPTION_FAILED); + + aes_v8_encrypt(ctx->gcm.H.c, ctx->gcm.H.c, ALIGN16(&ctx->expanded_key)); - if (keysize == 16) { - GCM_SET_KEY(ctx, aarch64_aes_128_set_encrypt_key, aarch64_aes_encrypt, - key); - } else if (keysize == 32) { - GCM_SET_KEY(ctx, aarch64_aes_256_set_encrypt_key, aarch64_aes_encrypt, - key); - } else abort(); + ctx->gcm.H.u[0] = bswap_64(ctx->gcm.H.u[0]); + ctx->gcm.H.u[1] = bswap_64(ctx->gcm.H.u[1]); + + gcm_init_v8(ctx->gcm.Htable, ctx->gcm.H.u); return 0; } static int aes_gcm_setiv(void *_ctx, const void *iv, size_t iv_size) { - struct gcm_aarch64_aes_ctx *ctx = _ctx; + struct aes_gcm_ctx *ctx = _ctx; if (iv_size != GCM_BLOCK_SIZE - 4) return gnutls_assert_val(GNUTLS_E_INVALID_REQUEST); - GCM_SET_IV(ctx, iv_size, iv); + memset(ctx->gcm.Xi.c, 0, sizeof(ctx->gcm.Xi.c)); + memset(ctx->gcm.len.c, 0, sizeof(ctx->gcm.len.c)); + + memcpy(ctx->gcm.Yi.c, iv, GCM_BLOCK_SIZE - 4); + ctx->gcm.Yi.c[GCM_BLOCK_SIZE - 4] = 0; + ctx->gcm.Yi.c[GCM_BLOCK_SIZE - 3] = 0; + ctx->gcm.Yi.c[GCM_BLOCK_SIZE - 2] = 0; + ctx->gcm.Yi.c[GCM_BLOCK_SIZE - 1] = 1; + aes_v8_encrypt(ctx->gcm.Yi.c, ctx->gcm.EK0.c, + ALIGN16(&ctx->expanded_key)); + ctx->gcm.Yi.c[GCM_BLOCK_SIZE - 1] = 2; return 0; } +static void +gcm_ghash(struct aes_gcm_ctx *ctx, const uint8_t * src, size_t src_size) +{ + size_t rest = src_size % GCM_BLOCK_SIZE; + size_t aligned_size = src_size - rest; + + if (aligned_size > 0) + gcm_ghash_v8(ctx->gcm.Xi.u, ctx->gcm.Htable, src, + aligned_size); + + if (rest > 0) { + memxor(ctx->gcm.Xi.c, src + aligned_size, rest); + gcm_gmult_v8(ctx->gcm.Xi.u, ctx->gcm.Htable); + } +} + +static void +ctr32_encrypt_blocks(const unsigned char *in, unsigned char *out, + size_t blocks, const AES_KEY *key, + const unsigned char ivec[16]) +{ + unsigned i; + uint8_t ctr[16]; + + memcpy(ctr, ivec, 16); + + for (i=0;i<blocks;i++) { + aes_v8_encrypt(ctr, out, key); + memxor(out, in, 16); + + out += 16; + in += 16; + INCREMENT(16, ctr); + } +} + +static inline void +ctr_encrypt_last(struct aes_gcm_ctx *ctx, const uint8_t * src, + uint8_t * dst, size_t pos, size_t length) +{ + uint8_t tmp[GCM_BLOCK_SIZE]; + uint8_t out[GCM_BLOCK_SIZE]; + + memcpy(tmp, &src[pos], length); + ctr32_encrypt_blocks(tmp, out, 1, + ALIGN16(&ctx->expanded_key), + ctx->gcm.Yi.c); + + memcpy(&dst[pos], out, length); + +} + static int aes_gcm_encrypt(void *_ctx, const void *src, size_t src_size, void *dst, size_t length) { - struct gcm_aarch64_aes_ctx *ctx = _ctx; + struct aes_gcm_ctx *ctx = _ctx; + int blocks = src_size / GCM_BLOCK_SIZE; + int exp_blocks = blocks * GCM_BLOCK_SIZE; + int rest = src_size - (exp_blocks); + uint32_t counter; + + if (blocks > 0) { + ctr32_encrypt_blocks(src, dst, + blocks, + ALIGN16(&ctx->expanded_key), + ctx->gcm.Yi.c); + + counter = _gnutls_read_uint32(ctx->gcm.Yi.c + 12); + counter += blocks; + _gnutls_write_uint32(counter, ctx->gcm.Yi.c + 12); + } - GCM_ENCRYPT(ctx, aarch64_aes_encrypt, src_size, dst, src); + if (rest > 0) /* last incomplete block */ + ctr_encrypt_last(ctx, src, dst, exp_blocks, rest); + + gcm_ghash(ctx, dst, src_size); + ctx->gcm.len.u[1] += src_size; return 0; } @@ -129,34 +222,62 @@ static int aes_gcm_decrypt(void *_ctx, const void *src, size_t src_size, void *dst, size_t dst_size) { - struct gcm_aarch64_aes_ctx *ctx = _ctx; + struct aes_gcm_ctx *ctx = _ctx; + int blocks = src_size / GCM_BLOCK_SIZE; + int exp_blocks = blocks * GCM_BLOCK_SIZE; + int rest = src_size - (exp_blocks); + uint32_t counter; + + gcm_ghash(ctx, src, src_size); + ctx->gcm.len.u[1] += src_size; + + if (blocks > 0) { + ctr32_encrypt_blocks(src, dst, + blocks, + ALIGN16(&ctx->expanded_key), + ctx->gcm.Yi.c); + + counter = _gnutls_read_uint32(ctx->gcm.Yi.c + 12); + counter += blocks; + _gnutls_write_uint32(counter, ctx->gcm.Yi.c + 12); + } + + if (rest > 0) /* last incomplete block */ + ctr_encrypt_last(ctx, src, dst, exp_blocks, rest); - GCM_DECRYPT(ctx, aarch64_aes_encrypt, src_size, dst, src); return 0; } static int aes_gcm_auth(void *_ctx, const void *src, size_t src_size) { - struct gcm_aarch64_aes_ctx *ctx = _ctx; + struct aes_gcm_ctx *ctx = _ctx; - GCM_UPDATE(ctx, src_size, src); + gcm_ghash(ctx, src, src_size); + ctx->gcm.len.u[0] += src_size; return 0; } + static void aes_gcm_tag(void *_ctx, void *tag, size_t tagsize) { - struct gcm_aarch64_aes_ctx *ctx = _ctx; + struct aes_gcm_ctx *ctx = _ctx; + uint8_t buffer[GCM_BLOCK_SIZE]; + uint64_t alen, clen; - GCM_DIGEST(ctx, aarch64_aes_encrypt, tagsize, tag); -} + alen = ctx->gcm.len.u[0] * 8; + clen = ctx->gcm.len.u[1] * 8; -static void aes_gcm_deinit(void *_ctx) -{ - struct gcm_aarch64_aes_ctx *ctx = _ctx; + _gnutls_write_uint64(alen, buffer); + _gnutls_write_uint64(clen, &buffer[8]); - zeroize_temp_key(ctx, sizeof(*ctx)); - gnutls_free(ctx); + gcm_ghash_v8(ctx->gcm.Xi.u, ctx->gcm.Htable, buffer, + GCM_BLOCK_SIZE); + + ctx->gcm.Xi.u[0] ^= ctx->gcm.EK0.u[0]; + ctx->gcm.Xi.u[1] ^= ctx->gcm.EK0.u[1]; + + memcpy(tag, ctx->gcm.Xi.c, MIN(GCM_BLOCK_SIZE, tagsize)); } #include "../x86/aes-gcm-aead.h" @@ -173,5 +294,3 @@ const gnutls_crypto_cipher_st _gnutls_aes_gcm_aarch64 = { .tag = aes_gcm_tag, .auth = aes_gcm_auth, }; - -#endif diff --git a/lib/accelerated/aarch64/elf/ghash-aarch64.s b/lib/accelerated/aarch64/elf/ghash-aarch64.s new file mode 100644 index 0000000000..13faf91f40 --- /dev/null +++ b/lib/accelerated/aarch64/elf/ghash-aarch64.s @@ -0,0 +1,266 @@ +# Copyright (c) 2011-2016, Andy Polyakov <appro@openssl.org> +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# * Redistributions of source code must retain copyright notices, +# this list of conditions and the following disclaimer. +# +# * Redistributions in binary form must reproduce the above +# copyright notice, this list of conditions and the following +# disclaimer in the documentation and/or other materials +# provided with the distribution. +# +# * Neither the name of the Andy Polyakov nor the names of its +# copyright holder and contributors may be used to endorse or +# promote products derived from this software without specific +# prior written permission. +# +# ALTERNATIVELY, provided that this notice is retained in full, this +# product may be distributed under the terms of the GNU General Public +# License (GPL), in which case the provisions of the GPL apply INSTEAD OF +# those given above. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# +# *** This file is auto-generated *** +# +# 1 "lib/accelerated/aarch64/elf/ghash-aarch64.s.tmp.S" +# 1 "<built-in>" +# 1 "<command-line>" +# 1 "lib/accelerated/aarch64/elf/ghash-aarch64.s.tmp.S" +# 1 "lib/accelerated/aarch64/aarch64-common.h" 1 +# 2 "lib/accelerated/aarch64/elf/ghash-aarch64.s.tmp.S" 2 + +.text +.arch armv8-a+crypto +.globl gcm_init_v8 +.type gcm_init_v8,%function +.align 4 +gcm_init_v8: + ld1 {v17.2d},[x1] + movi v19.16b,#0xe1 + shl v19.2d,v19.2d,#57 + ext v3.16b,v17.16b,v17.16b,#8 + ushr v18.2d,v19.2d,#63 + dup v17.4s,v17.s[1] + ext v16.16b,v18.16b,v19.16b,#8 + ushr v18.2d,v3.2d,#63 + sshr v17.4s,v17.4s,#31 + and v18.16b,v18.16b,v16.16b + shl v3.2d,v3.2d,#1 + ext v18.16b,v18.16b,v18.16b,#8 + and v16.16b,v16.16b,v17.16b + orr v3.16b,v3.16b,v18.16b + eor v20.16b,v3.16b,v16.16b + st1 {v20.2d},[x0],#16 + + + ext v16.16b,v20.16b,v20.16b,#8 + pmull v0.1q,v20.1d,v20.1d + eor v16.16b,v16.16b,v20.16b + pmull2 v2.1q,v20.2d,v20.2d + pmull v1.1q,v16.1d,v16.1d + + ext v17.16b,v0.16b,v2.16b,#8 + eor v18.16b,v0.16b,v2.16b + eor v1.16b,v1.16b,v17.16b + eor v1.16b,v1.16b,v18.16b + pmull v18.1q,v0.1d,v19.1d + + ins v2.d[0],v1.d[1] + ins v1.d[1],v0.d[0] + eor v0.16b,v1.16b,v18.16b + + ext v18.16b,v0.16b,v0.16b,#8 + pmull v0.1q,v0.1d,v19.1d + eor v18.16b,v18.16b,v2.16b + eor v22.16b,v0.16b,v18.16b + + ext v17.16b,v22.16b,v22.16b,#8 + eor v17.16b,v17.16b,v22.16b + ext v21.16b,v16.16b,v17.16b,#8 + st1 {v21.2d,v22.2d},[x0] + + ret +.size gcm_init_v8,.-gcm_init_v8 +.globl gcm_gmult_v8 +.type gcm_gmult_v8,%function +.align 4 +gcm_gmult_v8: + ld1 {v17.2d},[x0] + movi v19.16b,#0xe1 + ld1 {v20.2d,v21.2d},[x1] + shl v19.2d,v19.2d,#57 + + rev64 v17.16b,v17.16b + + ext v3.16b,v17.16b,v17.16b,#8 + + pmull v0.1q,v20.1d,v3.1d + eor v17.16b,v17.16b,v3.16b + pmull2 v2.1q,v20.2d,v3.2d + pmull v1.1q,v21.1d,v17.1d + + ext v17.16b,v0.16b,v2.16b,#8 + eor v18.16b,v0.16b,v2.16b + eor v1.16b,v1.16b,v17.16b + eor v1.16b,v1.16b,v18.16b + pmull v18.1q,v0.1d,v19.1d + + ins v2.d[0],v1.d[1] + ins v1.d[1],v0.d[0] + eor v0.16b,v1.16b,v18.16b + + ext v18.16b,v0.16b,v0.16b,#8 + pmull v0.1q,v0.1d,v19.1d + eor v18.16b,v18.16b,v2.16b + eor v0.16b,v0.16b,v18.16b + + + rev64 v0.16b,v0.16b + + ext v0.16b,v0.16b,v0.16b,#8 + st1 {v0.2d},[x0] + + ret +.size gcm_gmult_v8,.-gcm_gmult_v8 +.globl gcm_ghash_v8 +.type gcm_ghash_v8,%function +.align 4 +gcm_ghash_v8: + ld1 {v0.2d},[x0] + + + + + + subs x3,x3,#32 + mov x12,#16 +# 116 "lib/accelerated/aarch64/elf/ghash-aarch64.s.tmp.S" + ld1 {v20.2d,v21.2d},[x1],#32 + movi v19.16b,#0xe1 + ld1 {v22.2d},[x1] + csel x12,xzr,x12,eq + ext v0.16b,v0.16b,v0.16b,#8 + ld1 {v16.2d},[x2],#16 + shl v19.2d,v19.2d,#57 + + rev64 v16.16b,v16.16b + rev64 v0.16b,v0.16b + + ext v3.16b,v16.16b,v16.16b,#8 + b.lo .Lodd_tail_v8 + ld1 {v17.2d},[x2],x12 + + rev64 v17.16b,v17.16b + + ext v7.16b,v17.16b,v17.16b,#8 + eor v3.16b,v3.16b,v0.16b + pmull v4.1q,v20.1d,v7.1d + eor v17.16b,v17.16b,v7.16b + pmull2 v6.1q,v20.2d,v7.2d + b .Loop_mod2x_v8 + +.align 4 +.Loop_mod2x_v8: + ext v18.16b,v3.16b,v3.16b,#8 + subs x3,x3,#32 + pmull v0.1q,v22.1d,v3.1d + csel x12,xzr,x12,lo + + pmull v5.1q,v21.1d,v17.1d + eor v18.16b,v18.16b,v3.16b + pmull2 v2.1q,v22.2d,v3.2d + eor v0.16b,v0.16b,v4.16b + pmull2 v1.1q,v21.2d,v18.2d + ld1 {v16.2d},[x2],x12 + + eor v2.16b,v2.16b,v6.16b + csel x12,xzr,x12,eq + eor v1.16b,v1.16b,v5.16b + + ext v17.16b,v0.16b,v2.16b,#8 + eor v18.16b,v0.16b,v2.16b + eor v1.16b,v1.16b,v17.16b + ld1 {v17.2d},[x2],x12 + + rev64 v16.16b,v16.16b + + eor v1.16b,v1.16b,v18.16b + pmull v18.1q,v0.1d,v19.1d + + + rev64 v17.16b,v17.16b + + ins v2.d[0],v1.d[1] + ins v1.d[1],v0.d[0] + ext v7.16b,v17.16b,v17.16b,#8 + ext v3.16b,v16.16b,v16.16b,#8 + eor v0.16b,v1.16b,v18.16b + pmull v4.1q,v20.1d,v7.1d + eor v3.16b,v3.16b,v2.16b + + ext v18.16b,v0.16b,v0.16b,#8 + pmull v0.1q,v0.1d,v19.1d + eor v3.16b,v3.16b,v18.16b + eor v17.16b,v17.16b,v7.16b + eor v3.16b,v3.16b,v0.16b + pmull2 v6.1q,v20.2d,v7.2d + b.hs .Loop_mod2x_v8 + + eor v2.16b,v2.16b,v18.16b + ext v3.16b,v16.16b,v16.16b,#8 + adds x3,x3,#32 + eor v0.16b,v0.16b,v2.16b + b.eq .Ldone_v8 +.Lodd_tail_v8: + ext v18.16b,v0.16b,v0.16b,#8 + eor v3.16b,v3.16b,v0.16b + eor v17.16b,v16.16b,v18.16b + + pmull v0.1q,v20.1d,v3.1d + eor v17.16b,v17.16b,v3.16b + pmull2 v2.1q,v20.2d,v3.2d + pmull v1.1q,v21.1d,v17.1d + + ext v17.16b,v0.16b,v2.16b,#8 + eor v18.16b,v0.16b,v2.16b + eor v1.16b,v1.16b,v17.16b + eor v1.16b,v1.16b,v18.16b + pmull v18.1q,v0.1d,v19.1d + + ins v2.d[0],v1.d[1] + ins v1.d[1],v0.d[0] + eor v0.16b,v1.16b,v18.16b + + ext v18.16b,v0.16b,v0.16b,#8 + pmull v0.1q,v0.1d,v19.1d + eor v18.16b,v18.16b,v2.16b + eor v0.16b,v0.16b,v18.16b + +.Ldone_v8: + + rev64 v0.16b,v0.16b + + ext v0.16b,v0.16b,v0.16b,#8 + st1 {v0.2d},[x0] + + ret +.size gcm_ghash_v8,.-gcm_ghash_v8 +.byte 71,72,65,83,72,32,102,111,114,32,65,82,77,118,56,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0 +.align 2 +.align 2 +.section .note.GNU-stack,"",%progbits |