diff options
author | Niels Möller <nisse@lysator.liu.se> | 2021-07-08 20:37:26 +0000 |
---|---|---|
committer | Niels Möller <nisse@lysator.liu.se> | 2021-07-08 20:37:26 +0000 |
commit | 6dfdd0bc63cbcf4d7eca01bfda81e02823a4796e (patch) | |
tree | a3f6cfe2a02b394dfa3c0d612ee0c73456bda3d0 | |
parent | 61bcbbf830c28e8453e22db0bbdac174df442709 (diff) | |
parent | 6c84092d4d67b1a552ef287525be34e0474a9de5 (diff) | |
download | nettle-6dfdd0bc63cbcf4d7eca01bfda81e02823a4796e.tar.gz |
Merge branch 's390x-gcm' into 's390x'
[S390x] Optimize GHASH
See merge request nettle/nettle!26
-rw-r--r-- | Makefile.in | 2 | ||||
-rw-r--r-- | configure.ac | 2 | ||||
-rw-r--r-- | s390x/machine.m4 | 21 | ||||
-rw-r--r-- | s390x/msa_x4/gcm-hash.asm | 99 |
4 files changed, 121 insertions, 3 deletions
diff --git a/Makefile.in b/Makefile.in index 87d193b6..5e0ffdd7 100644 --- a/Makefile.in +++ b/Makefile.in @@ -620,7 +620,7 @@ distdir: $(DISTFILES) arm arm/neon arm/v6 arm/fat \ arm64 arm64/crypto arm64/fat \ powerpc64 powerpc64/p7 powerpc64/p8 powerpc64/fat \ - s390x s390x/msa_x1 s390x/msa_x2 ; do \ + s390x s390x/msa_x1 s390x/msa_x2 s390x/msa_x4 ; do \ mkdir "$(distdir)/$$d" ; \ find "$(srcdir)/$$d" -maxdepth 1 '(' -name '*.asm' -o -name '*.m4' -o -name README ')' \ -exec cp '{}' "$(distdir)/$$d" ';' ; \ diff --git a/configure.ac b/configure.ac index c0650ec2..64242bfd 100644 --- a/configure.ac +++ b/configure.ac @@ -531,7 +531,7 @@ if test "x$enable_assembler" = xyes ; then if test "$ABI" = 64 ; then asm_path="s390x" if test "$enable_s390x_msa" = yes ; then - asm_path="s390x/msa_x1 s390x/msa_x2 $asm_path" + asm_path="s390x/msa_x1 s390x/msa_x2 s390x/msa_x4 $asm_path" fi fi ;; diff --git a/s390x/machine.m4 b/s390x/machine.m4 index acd5e26c..8626cec1 100644 --- a/s390x/machine.m4 +++ b/s390x/machine.m4 @@ -1,2 +1,21 @@ C Register usage: -define(`RA', `%r14') +define(`RA', `%r14') C Return address +define(`SP', `%r15') C Stack pointer + +define(`STANDARD_STACK_FRAME',`160') + +C Dynamic stack space allocation +C AP is a general register to which the allocated space is assigned +C SPACE_LEN is the length of space, must be a multiple of 8 +C FREE_STACK macro can be used to free the allocated space +C ALLOC_STACK(AP, SPACE_LEN) +define(`ALLOC_STACK', +`lgr $1,SP + aghi SP,-(STANDARD_STACK_FRAME+$2) + stg $1,0(SP) + la $1,STANDARD_STACK_FRAME (SP)') + +C Free allocated stack space +C FREE_STACK(SPACE_LEN) +define(`FREE_STACK', +`aghi SP,STANDARD_STACK_FRAME+$1') diff --git a/s390x/msa_x4/gcm-hash.asm b/s390x/msa_x4/gcm-hash.asm new file mode 100644 index 00000000..50d8b7c0 --- /dev/null +++ b/s390x/msa_x4/gcm-hash.asm @@ -0,0 +1,99 @@ +C s390x/msa_x4/gcm-hash.asm + +ifelse(` + Copyright (C) 2020 Mamone Tarsha + This file is part of GNU Nettle. + + GNU Nettle is free software: you can redistribute it and/or + modify it under the terms of either: + + * the GNU Lesser General Public License as published by the Free + Software Foundation; either version 3 of the License, or (at your + option) any later version. + + or + + * the GNU General Public License as published by the Free + Software Foundation; either version 2 of the License, or (at your + option) any later version. + + or both in parallel, as here. + + GNU Nettle is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received copies of the GNU General Public License and + the GNU Lesser General Public License along with this program. If + not, see http://www.gnu.org/licenses/. +') + +C KIMD (COMPUTE INTERMEDIATE MESSAGE DIGEST) is specefied in +C "z/Architecture Principles of Operation SA22-7832-12" as follows: +C A function specified by the function code in general register 0 is performed. +C General register 1 contains the logical address of the leftmost byte of the parameter block in storage. +C the second operand is processed as specified by the function code using an initial chaining value in +C the parameter block, and the result replaces the chaining value. + +C This implementation uses KIMD-GHASH function. +C The parameter block used for the KIMD-GHASH function has the following format: +C *----------------------------------------------* +C | Initial Chaining Value (16 bytes) | +C |----------------------------------------------| +C | Hash Subkey (16 bytes) | +C *----------------------------------------------* + +C Size of parameter block +define(`PB_SIZE', `32') + +C gcm_set_key() assigns H value in the middle element of the table +define(`H_idx', `128*16') + +.file "gcm-hash.asm" + +.text + +C void gcm_init_key (union gcm_block *table) + +PROLOGUE(_nettle_gcm_init_key) + C Except for Hash Subkey (H), KIMD-GHASH does not need any pre-computed values so just return to the caller. + br RA +EPILOGUE(_nettle_gcm_init_key) + +C void gcm_hash (const struct gcm_key *key, union gcm_block *x, +C size_t length, const uint8_t *data) + +PROLOGUE(_nettle_gcm_hash) + ldgr %f0,%r6 C load non-volatile general register 6 into volatile float-point register 0 + C --- allocate a stack space for parameter block in addition to 16-byte buffer to handle leftover bytes --- + ALLOC_STACK(%r1,PB_SIZE+16) C parameter block (must be general register 1) + lgr %r6,%r3 + mvc 0(16,%r1),0(%r3) C copy x Initial Chaining Value field + mvc 16(16,%r1),H_idx (%r2) C copy H to Hash Subkey field + lghi %r0,65 C GHASH function code (must be general register 0) + lgr %r2,%r5 C location of leftmost byte of data (must not be odd-numbered general register nor be general register 0) + C number of bytes (must be general register of data + 1). length must be a multiple of the data block size (16). + risbg %r3,%r4,0,187,0 C Insert bit offsets 0-59, bit offset 0 of the fourth operand is set to clear the remaining bits. +1: .long 0xb93e0002 C kimd %r0,%r2 + brc 1,1b C safely branch back in case of partial completion + C --- handle leftovers --- + risbg %r5,%r4,60,191,0 C Insert bit offsets 60-63 and clear the remaining bits. + jz 4f + lgr %r4,%r2 + C --- copy the leftovers to allocated stack buffer and pad the remaining bytes with zero --- + la %r2,PB_SIZE (%r1) + lghi %r3,16 +2: mvcle %r2,%r4,0 + brc 1,2b + aghi %r2,-16 + aghi %r3,16 +3: .long 0xb93e0002 C kimd %r0,%r2 + brc 1,3b C safely branch back in case of partial completion +4: + mvc 0(16,%r6),0(%r1) C store x + xc 0(PB_SIZE+16,%r1),0(%r1) C wipe parameter block content and leftover bytes of data from stack + FREE_STACK(PB_SIZE+16) + lgdr %r6,%f0 C restore general register 6 + br RA +EPILOGUE(_nettle_gcm_hash) |