summaryrefslogtreecommitdiff
path: root/tune
diff options
context:
space:
mode:
authorKevin Ryde <user42@zip.com.au>2001-04-20 01:14:02 +0200
committerKevin Ryde <user42@zip.com.au>2001-04-20 01:14:02 +0200
commit28edfa9d2474798e080ce523c5eb41f3e850a62e (patch)
tree7a20d1f4c1ec47f784c50f97eae3257c19478cbe /tune
parentc15174563f9cfa79a48c04165ade0efdb1c0415d (diff)
downloadgmp-28edfa9d2474798e080ce523c5eb41f3e850a62e.tar.gz
* gmp-impl.h (DIVEXACT_1_THRESHOLD): New threshold.
* tune/tuneup.c: Tune it.
Diffstat (limited to 'tune')
-rw-r--r--tune/tuneup.c74
1 files changed, 74 insertions, 0 deletions
diff --git a/tune/tuneup.c b/tune/tuneup.c
index 9ac5d2a67..06d7c06db 100644
--- a/tune/tuneup.c
+++ b/tune/tuneup.c
@@ -135,6 +135,7 @@ mp_size_t fib_threshold[2] = { MP_SIZE_T_MAX };
mp_size_t powm_threshold[2] = { MP_SIZE_T_MAX };
mp_size_t gcd_accel_threshold[2] = { MP_SIZE_T_MAX };
mp_size_t gcdext_threshold[2] = { MP_SIZE_T_MAX };
+mp_size_t divexact_1_threshold[2] = { MP_SIZE_T_MAX };
mp_size_t divrem_1_norm_threshold[2] = { MP_SIZE_T_MAX };
mp_size_t divrem_1_unnorm_threshold[2] = { MP_SIZE_T_MAX };
mp_size_t divrem_2_threshold[2] = { MP_SIZE_T_MAX };
@@ -1005,6 +1006,7 @@ all (void)
the results in the .asm file, and there's no need for such thresholds
to appear in gmp-mparam.h. */
#if ! HAVE_NATIVE_mpn_divrem_1
+#define SPEED_MPN_DIVREM_1 speed_mpn_divrem_1_tune
/* Tune for the integer part of mpn_divrem_1. This will very possibly be
a bit out for the fractional part, but that's too bad, the integer part
@@ -1026,6 +1028,12 @@ all (void)
one (divrem_1_unnorm_threshold, 1, &param);
}
#endif /* ! HAVE_NATIVE_mpn_divrem_1 */
+
+ /* use the regular mpn_divrem_1 if there's no tuned version */
+#ifndef SPEED_MPN_DIVREM_1
+#define SPEED_MPN_DIVREM_1 speed_mpn_divrem_1
+#endif
+
#if ! HAVE_NATIVE_mpn_mod_1
#define SPEED_MPN_MOD_1 speed_mpn_mod_1_tune
{
@@ -1120,6 +1128,72 @@ all (void)
#endif
+ /* mpn_divexact_1 is vaguely expected to be used on smallish divisors, so
+ tune for that. Its speed can differ on odd or even divisor, so take an
+ average threshold for the two.
+
+ mpn_divrem_1 can vary with high<divisor or not, whereas mpn_divexact_1
+ might not vary that way, but don't test this since high<divisor isn't
+ expected to occur often with small divisors. */
+ {
+ static struct param_t param;
+ mp_size_t thresh[2], average;
+ int low, i;
+
+ param.name[0] = "DIVEXACT_1_THRESHOLD";
+ param.check_size = 256;
+ param.min_size[0] = 2;
+ param.stop_factor = 1.5;
+ param.function = SPEED_MPN_DIVREM_1;
+ param.function2 = speed_mpn_divexact_1;
+ param.noprint = 1;
+
+ print_define_start (param.name[0]);
+
+ for (low = 0; low <= 1; low++)
+ {
+ s.r = randlimb_half();
+ if (low == 0)
+ s.r |= 1;
+ else
+ s.r &= ~CNST_LIMB(7);
+
+ one (divexact_1_threshold, 1, &param);
+ if (option_trace)
+ printf ("low=%d thresh %ld\n", low, divexact_1_threshold[0]);
+
+ if (divexact_1_threshold[0] == MP_SIZE_T_MAX)
+ {
+ average = MP_SIZE_T_MAX;
+ goto divexact_1_done;
+ }
+
+ thresh[low] = divexact_1_threshold[0];
+ }
+
+ if (option_trace)
+ {
+ printf ("average of:");
+ for (i = 0; i < numberof(thresh); i++)
+ printf (" %ld", thresh[i]);
+ printf ("\n");
+ }
+
+ average = 0;
+ for (i = 0; i < numberof(thresh); i++)
+ average += thresh[i];
+ average /= numberof(thresh);
+
+ /* If divexact turns out to be better as early as 3 limbs, then use it
+ always, so as to reduce code size and conditional jumps. */
+ if (average <= 3)
+ average = 0;
+
+ divexact_1_done:
+ print_define_end (param.name[0], average);
+ }
+
+
/* The generic mpn_modexact_1_odd skips a divide step if high<divisor, the
same as mpn_mod_1, but this might not be true of an assembler
implementation. The threshold used is an average based on data where a