diff options
author | Jussi Kivilinna <jussi.kivilinna@iki.fi> | 2023-04-10 22:24:43 +0300 |
---|---|---|
committer | Jussi Kivilinna <jussi.kivilinna@iki.fi> | 2023-04-23 21:19:09 +0300 |
commit | fdf2e8ba654a4dcfee25586dd7e0749f2b7a92c0 (patch) | |
tree | 2f7e68db261f9fc9bde89efc3e5fe190a6316028 | |
parent | ad4ee8d52f7199ba8bdee767044337060529069f (diff) | |
download | libgcrypt-fdf2e8ba654a4dcfee25586dd7e0749f2b7a92c0.tar.gz |
mpi: optimize mpi_rshift and mpi_lshift to avoid extra MPI copying
* mpi/mpi-bit.c (_gcry_mpi_rshift): Refactor so that _gcry_mpih_rshift
is used to do the copying along with shifting when copying is needed
and refactor so that same code-path is used for both in-place and
copying operation.
(_gcry_mpi_lshift): Refactor so that _gcry_mpih_lshift is used to do
the copying along with shifting when copying is needed and refactor
so that same code-path is used for both in-place and copying operation.
--
Benchmark on AMD Ryzen 9 7900X:
Before:
| nanosecs/byte mebibytes/sec cycles/byte auto Mhz
rshift3 | 0.039 ns/B 24662 MiB/s 0.182 c/B 4700
lshift3 | 0.108 ns/B 8832 MiB/s 0.508 c/B 4700
rshift65 | 0.137 ns/B 6968 MiB/s 0.643 c/B 4700
lshift65 | 0.109 ns/B 8776 MiB/s 0.511 c/B 4700
After:
| nanosecs/byte mebibytes/sec cycles/byte auto Mhz
rshift3 | 0.038 ns/B 25049 MiB/s 0.179 c/B 4700
lshift3 | 0.039 ns/B 24709 MiB/s 0.181 c/B 4700
rshift65 | 0.038 ns/B 24942 MiB/s 0.180 c/B 4700
lshift65 | 0.040 ns/B 23671 MiB/s 0.189 c/B 4700
Signed-off-by: Jussi Kivilinna <jussi.kivilinna@iki.fi>
-rw-r--r-- | mpi/mpi-bit.c | 138 |
1 files changed, 51 insertions, 87 deletions
diff --git a/mpi/mpi-bit.c b/mpi/mpi-bit.c index e2170401..7313a9d4 100644 --- a/mpi/mpi-bit.c +++ b/mpi/mpi-bit.c @@ -251,10 +251,11 @@ _gcry_mpi_rshift_limbs( gcry_mpi_t a, unsigned int count ) void _gcry_mpi_rshift ( gcry_mpi_t x, gcry_mpi_t a, unsigned int n ) { - mpi_size_t xsize; - unsigned int i; unsigned int nlimbs = (n/BITS_PER_MPI_LIMB); unsigned int nbits = (n%BITS_PER_MPI_LIMB); + unsigned int i; + mpi_size_t alimbs; + mpi_ptr_t xp, ap; if (mpi_is_immutable (x)) { @@ -262,75 +263,42 @@ _gcry_mpi_rshift ( gcry_mpi_t x, gcry_mpi_t a, unsigned int n ) return; } - if ( x == a ) - { - /* In-place operation. */ - if ( nlimbs >= x->nlimbs ) - { - x->nlimbs = 0; - return; - } + alimbs = a->nlimbs; - if (nlimbs) - { - for (i=0; i < x->nlimbs - nlimbs; i++ ) - x->d[i] = x->d[i+nlimbs]; - x->d[i] = 0; - x->nlimbs -= nlimbs; - - } - if ( x->nlimbs && nbits ) - _gcry_mpih_rshift ( x->d, x->d, x->nlimbs, nbits ); - } - else if ( nlimbs ) + if (x != a) { - /* Copy and shift by more or equal bits than in a limb. */ - xsize = a->nlimbs; + RESIZE_IF_NEEDED (x, alimbs); + x->nlimbs = alimbs; + x->flags = a->flags; x->sign = a->sign; - RESIZE_IF_NEEDED (x, xsize); - x->nlimbs = xsize; - for (i=0; i < a->nlimbs; i++ ) - x->d[i] = a->d[i]; - x->nlimbs = i; - - if ( nlimbs >= x->nlimbs ) - { - x->nlimbs = 0; - return; - } + } + + /* In-place operation. */ + if (nlimbs >= alimbs) + { + x->nlimbs = 0; + return; + } + + xp = x->d; + ap = a->d; + if (alimbs && nbits) + { + _gcry_mpih_rshift (xp, ap + nlimbs, alimbs - nlimbs, nbits); if (nlimbs) - { - for (i=0; i < x->nlimbs - nlimbs; i++ ) - x->d[i] = x->d[i+nlimbs]; - x->d[i] = 0; - x->nlimbs -= nlimbs; - } - - if ( x->nlimbs && nbits ) - _gcry_mpih_rshift ( x->d, x->d, x->nlimbs, nbits ); + xp[alimbs - nlimbs] = 0; + x->nlimbs -= nlimbs; } - else + else if (nlimbs || (x != a)) { - /* Copy and shift by less than bits in a limb. */ - xsize = a->nlimbs; - x->sign = a->sign; - RESIZE_IF_NEEDED (x, xsize); - x->nlimbs = xsize; - - if ( xsize ) - { - if (nbits ) - _gcry_mpih_rshift (x->d, a->d, x->nlimbs, nbits ); - else - { - /* The rshift helper function is not specified for - NBITS==0, thus we do a plain copy here. */ - for (i=0; i < x->nlimbs; i++ ) - x->d[i] = a->d[i]; - } - } + for (i = 0; i < alimbs - nlimbs; i++ ) + xp[i] = ap[i + nlimbs]; + if (nlimbs) + xp[i] = 0; + x->nlimbs -= nlimbs; } + MPN_NORMALIZE (x->d, x->nlimbs); } @@ -368,6 +336,9 @@ _gcry_mpi_lshift ( gcry_mpi_t x, gcry_mpi_t a, unsigned int n ) { unsigned int nlimbs = (n/BITS_PER_MPI_LIMB); unsigned int nbits = (n%BITS_PER_MPI_LIMB); + mpi_size_t alimbs; + mpi_ptr_t xp, ap; + int i; if (mpi_is_immutable (x)) { @@ -378,34 +349,27 @@ _gcry_mpi_lshift ( gcry_mpi_t x, gcry_mpi_t a, unsigned int n ) if (x == a && !n) return; /* In-place shift with an amount of zero. */ - if ( x != a ) - { - /* Copy A to X. */ - unsigned int alimbs = a->nlimbs; - int asign = a->sign; - mpi_ptr_t xp, ap; - - RESIZE_IF_NEEDED (x, alimbs+nlimbs+1); - xp = x->d; - ap = a->d; - MPN_COPY (xp, ap, alimbs); - x->nlimbs = alimbs; - x->flags = a->flags; - x->sign = asign; - } + /* Note: might be in-place operation, so a==x or a!=x. */ + + alimbs = a->nlimbs; - if (nlimbs && !nbits) + RESIZE_IF_NEEDED (x, alimbs + nlimbs + 1); + xp = x->d; + ap = a->d; + if (nbits && alimbs) { - /* Shift a full number of limbs. */ - _gcry_mpi_lshift_limbs (x, nlimbs); + x->nlimbs = alimbs + nlimbs + 1; + xp[alimbs + nlimbs] = _gcry_mpih_lshift (xp + nlimbs, ap, alimbs, nbits); } - else if (n) + else { - /* We use a very dump approach: Shift left by the number of - limbs plus one and than fix it up by an rshift. */ - _gcry_mpi_lshift_limbs (x, nlimbs+1); - mpi_rshift (x, x, BITS_PER_MPI_LIMB - nbits); + x->nlimbs = alimbs + nlimbs; + for (i = alimbs - 1; i >= 0; i--) + xp[i + nlimbs] = ap[i]; } - + for (i = 0; i < nlimbs; i++) + xp[i] = 0; + x->flags = a->flags; + x->sign = a->sign; MPN_NORMALIZE (x->d, x->nlimbs); } |