From e9b82efc35aa13d4aa3cfa7b0d2e3f919d17b290 Mon Sep 17 00:00:00 2001 From: Marco Bodrato Date: Tue, 15 Feb 2022 09:04:59 +0100 Subject: tune/: tune/speed support for mpn_{mul,sqr}mod_bknp1 --- tune/common.c | 52 +++++++++++++++++++++++++++++++++++++++++++++ tune/speed.c | 5 +++++ tune/speed.h | 68 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 125 insertions(+) (limited to 'tune') diff --git a/tune/common.c b/tune/common.c index b757bf271..52c6d5eea 100644 --- a/tune/common.c +++ b/tune/common.c @@ -1597,6 +1597,58 @@ speed_mpn_sqrmod_bnm1 (struct speed_params *s) SPEED_ROUTINE_MPN_MULMOD_BNM1_CALL (mpn_sqrmod_bnm1 (wp, s->size, s->xp, s->size, tp)); } +double +speed_mpn_mulmod_bknp1 (struct speed_params *s) +{ + SPEED_ROUTINE_MPN_MULMOD_BNP1_CALL (mpn_mulmod_bknp1 (wp, s->xp, s->yp, nk, k, tp),1); +} + +double +speed_mpn_sqrmod_bknp1 (struct speed_params *s) +{ + SPEED_ROUTINE_MPN_MULMOD_BNP1_CALL (mpn_sqrmod_bknp1 (wp, s->xp, nk, k, tp),1); +} + +static void +mpn_bc_mulmod_bnp1 (mp_ptr rp, mp_srcptr ap, mp_srcptr bp, mp_size_t n, + unsigned k, mp_ptr tp) +{ + if (k > 2) + mpn_mulmod_bknp1 (rp, ap, bp, n, k, tp); + else + { + n *= k; + mpn_mul_n (tp, ap, bp, n); + mpn_sub_n (rp, tp, tp + n, n); + } +} + +static void +mpn_bc_sqrmod_bnp1 (mp_ptr rp, mp_srcptr ap, mp_size_t n, + unsigned k, mp_ptr tp) +{ + if (k > 2) + mpn_sqrmod_bknp1 (rp, ap, n, k, tp); + else + { + n *= k; + mpn_sqr (tp, ap, n); + mpn_sub_n (rp, tp, tp + n, n); + } +} + +double +speed_mpn_mulmod_bnp1 (struct speed_params *s) +{ + SPEED_ROUTINE_MPN_MULMOD_BNP1_CALL (mpn_bc_mulmod_bnp1 (wp, s->xp, s->yp, nk, k, tp),0); +} + +double +speed_mpn_sqrmod_bnp1 (struct speed_params *s) +{ + SPEED_ROUTINE_MPN_MULMOD_BNP1_CALL (mpn_bc_sqrmod_bnp1 (wp, s->xp, nk, k, tp),0); +} + double speed_mpn_matrix22_mul (struct speed_params *s) { diff --git a/tune/speed.c b/tune/speed.c index e764204a2..e0de68a56 100644 --- a/tune/speed.c +++ b/tune/speed.c @@ -385,6 +385,11 @@ const struct routine_t { { "mpn_mulmod_bnm1_rounded", speed_mpn_mulmod_bnm1_rounded }, { "mpn_sqrmod_bnm1", speed_mpn_sqrmod_bnm1 }, + { "mpn_mulmod_bknp1", speed_mpn_mulmod_bknp1, FLAG_R_OPTIONAL }, + { "mpn_sqrmod_bknp1", speed_mpn_sqrmod_bknp1, FLAG_R_OPTIONAL }, + { "mpn_mulmod_bnp1", speed_mpn_mulmod_bnp1 }, + { "mpn_sqrmod_bnp1", speed_mpn_sqrmod_bnp1 }, + { "mpn_invert", speed_mpn_invert }, { "mpn_invertappr", speed_mpn_invertappr }, { "mpn_ni_invertappr", speed_mpn_ni_invertappr }, diff --git a/tune/speed.h b/tune/speed.h index d82a6051a..3155232c8 100644 --- a/tune/speed.h +++ b/tune/speed.h @@ -388,6 +388,10 @@ double speed_mpn_mulmod_bnm1 (struct speed_params *); double speed_mpn_bc_mulmod_bnm1 (struct speed_params *); double speed_mpn_mulmod_bnm1_rounded (struct speed_params *); double speed_mpn_sqrmod_bnm1 (struct speed_params *); +double speed_mpn_mulmod_bknp1 (struct speed_params *); +double speed_mpn_sqrmod_bknp1 (struct speed_params *); +double speed_mpn_mulmod_bnp1 (struct speed_params *); +double speed_mpn_sqrmod_bnp1 (struct speed_params *); double speed_mpn_udiv_qrnnd (struct speed_params *); double speed_mpn_udiv_qrnnd_r (struct speed_params *); double speed_mpn_umul_ppmm (struct speed_params *); @@ -1402,6 +1406,70 @@ int speed_routine_count_zeros_setup (struct speed_params *, mp_ptr, int, int); return t; \ } +#ifndef MOD_BKNP1_USE11 +#define MOD_BKNP1_USE11 0 +#endif +#ifndef MOD_BKNP1_ONLY3 +#define MOD_BKNP1_ONLY3 0 +#endif + +#define SPEED_ROUTINE_MPN_MULMOD_BNP1_CALL(call,use_r) \ + { \ + mp_ptr wp, tp; \ + unsigned i, k; \ + double t; \ + mp_size_t itch, nk; \ + TMP_DECL; \ + \ + SPEED_RESTRICT_COND (s->size >= 1); \ + SPEED_RESTRICT_COND (!use_r || (s->r == 0) || \ + (s->r == 3) || (s->r == 5) || (s->r == 7) || \ + (s->r == 13) || (s->r == 17) || \ + ((MOD_BKNP1_USE11) && (s->r == 11))); \ + \ + if (!use_r || (s->r < 2)) \ + { \ + if (s->size % 3 == 0) {nk = s->size / (k = 3);} \ + else if (s->size % 5 == 0) {nk = s->size / (k = 5);} \ + else if (s->size % 7 == 0) {nk = s->size / (k = 7);} \ + else if (s->size % 11 == 0) {nk = s->size / (k = 11);} \ + else if (s->size % 13 == 0) {nk = s->size / (k = 13);} \ + else if (s->size % 17 == 0) {nk = s->size / (k = 17);} \ + else nk = s->size / (k = 1); \ + } \ + else nk = s->size / (k = s->r); \ + \ + if (MOD_BKNP1_ONLY3) \ + k = 3; \ + SPEED_RESTRICT_COND ((!use_r || (k > 2)) && (s->size == k * nk)); \ + SPEED_RESTRICT_COND ((GMP_NUMB_MAX % k == 0) || (nk % 3 != 0) || \ + ((MOD_BKNP1_USE11) && (k == 11))); \ + \ + itch = mpn_mulmod_bknp1_itch (s->size); \ + \ + TMP_MARK; \ + SPEED_TMP_ALLOC_LIMBS (wp, 2 * s->size + 2, s->align_wp); \ + SPEED_TMP_ALLOC_LIMBS (tp, itch, s->align_wp2); \ + \ + s->xp [s->size] &= 1; \ + s->yp [s->size] &= 1; \ + speed_operand_src (s, s->xp, s->size + 1); \ + speed_operand_src (s, s->yp, s->size + 1); \ + speed_operand_dst (s, wp, 2 * s->size + 2); \ + speed_operand_dst (s, tp, itch); \ + speed_cache_fill (s); \ + \ + speed_starttime (); \ + i = s->reps; \ + do \ + call; \ + while (--i != 0); \ + t = speed_endtime (); \ + \ + TMP_FREE; \ + return t; \ + } + #define SPEED_ROUTINE_MPN_MUL_N_TSPACE(call, tsize, minsize) \ { \ mp_ptr wp, tspace; \ -- cgit v1.2.1