diff options
author | Jussi Kivilinna <jussi.kivilinna@iki.fi> | 2023-04-10 21:05:34 +0300 |
---|---|---|
committer | Jussi Kivilinna <jussi.kivilinna@iki.fi> | 2023-04-23 21:19:09 +0300 |
commit | 8b09db4cce1040f061034ab899cd1369a51dceeb (patch) | |
tree | e0dbae258496ccc5af608ecb2cebc58f1aeec038 | |
parent | e90fbf6f8dacf280d03e557a65528fc2df24f1d7 (diff) | |
download | libgcrypt-8b09db4cce1040f061034ab899cd1369a51dceeb.tar.gz |
mpi: avoid MPI copy at gcry_mpi_sub
* mpi/mpi-add.c (_gcry_mpi_add): Rename function...
(_gcry_mpi_add_inv_sign): ... to this and add parameter for inverting
sign of second operand.
(_gcry_mpi_add): New.
(_gcry_mpi_sub): Remove mpi_copy and instead use new
'_gcry_mpi_add_inv_sign' function with inverted sign for second
operand.
--
Benchmark on AMD Ryzen 9 7900X:
Before:
| nanosecs/byte mebibytes/sec cycles/byte auto Mhz
add | 0.052 ns/B 18301 MiB/s 0.287 c/B 5500
sub | 0.098 ns/B 9768 MiB/s 0.537 c/B 5500
After:
| nanosecs/byte mebibytes/sec cycles/byte auto Mhz
add | 0.030 ns/B 31771 MiB/s 0.165 c/B 5500
sub | 0.031 ns/B 31187 MiB/s 0.168 c/B 5500
Signed-off-by: Jussi Kivilinna <jussi.kivilinna@iki.fi>
-rw-r--r-- | mpi/mpi-add.c | 19 |
1 files changed, 11 insertions, 8 deletions
diff --git a/mpi/mpi-add.c b/mpi/mpi-add.c index 38dd352f..2fd19e55 100644 --- a/mpi/mpi-add.c +++ b/mpi/mpi-add.c @@ -84,8 +84,8 @@ _gcry_mpi_add_ui (gcry_mpi_t w, gcry_mpi_t u, unsigned long v ) } -void -_gcry_mpi_add(gcry_mpi_t w, gcry_mpi_t u, gcry_mpi_t v) +static void +_gcry_mpi_add_inv_sign(gcry_mpi_t w, gcry_mpi_t u, gcry_mpi_t v, int inv_v_sign) { mpi_ptr_t wp, up, vp; mpi_size_t usize, vsize, wsize; @@ -93,7 +93,7 @@ _gcry_mpi_add(gcry_mpi_t w, gcry_mpi_t u, gcry_mpi_t v) if( u->nlimbs < v->nlimbs ) { /* Swap U and V. */ usize = v->nlimbs; - usign = v->sign; + usign = v->sign ^ inv_v_sign; vsize = u->nlimbs; vsign = u->sign; wsize = usize + 1; @@ -106,7 +106,7 @@ _gcry_mpi_add(gcry_mpi_t w, gcry_mpi_t u, gcry_mpi_t v) usize = u->nlimbs; usign = u->sign; vsize = v->nlimbs; - vsign = v->sign; + vsign = v->sign ^ inv_v_sign; wsize = usize + 1; RESIZE_IF_NEEDED(w, wsize); /* These must be after realloc (u or v may be the same as w). */ @@ -212,12 +212,15 @@ _gcry_mpi_sub_ui(gcry_mpi_t w, gcry_mpi_t u, unsigned long v ) } void +_gcry_mpi_add(gcry_mpi_t w, gcry_mpi_t u, gcry_mpi_t v) +{ + _gcry_mpi_add_inv_sign (w, u, v, 0); +} + +void _gcry_mpi_sub(gcry_mpi_t w, gcry_mpi_t u, gcry_mpi_t v) { - gcry_mpi_t vv = mpi_copy (v); - vv->sign = ! vv->sign; - mpi_add (w, u, vv); - mpi_free (vv); + _gcry_mpi_add_inv_sign (w, u, v, 1); } |