* gmp-impl.h (DIVEXACT_1_THRESHOLD): New threshold.

* tune/tuneup.c: Tune it.
author: Kevin Ryde <user42@zip.com.au> 2001-04-20 01:14:02 +0200
committer: Kevin Ryde <user42@zip.com.au> 2001-04-20 01:14:02 +0200
commit: 28edfa9d2474798e080ce523c5eb41f3e850a62e (patch)
tree: 7a20d1f4c1ec47f784c50f97eae3257c19478cbe /tune
parent: c15174563f9cfa79a48c04165ade0efdb1c0415d (diff)
download: gmp-28edfa9d2474798e080ce523c5eb41f3e850a62e.tar.gz
1 files changed, 74 insertions, 0 deletions
diff --git a/tune/tuneup.c b/tune/tuneup.c
index 9ac5d2a67..06d7c06db 100644
--- a/tune/tuneup.c
+++ b/tune/tuneup.c
@@ -135,6 +135,7 @@ mp_size_t  fib_threshold[2] = { MP_SIZE_T_MAX };
 mp_size_t  powm_threshold[2] = { MP_SIZE_T_MAX };
 mp_size_t  gcd_accel_threshold[2] = { MP_SIZE_T_MAX };
 mp_size_t  gcdext_threshold[2] = { MP_SIZE_T_MAX };
+mp_size_t  divexact_1_threshold[2] = { MP_SIZE_T_MAX };
 mp_size_t  divrem_1_norm_threshold[2] = { MP_SIZE_T_MAX };
 mp_size_t  divrem_1_unnorm_threshold[2] = { MP_SIZE_T_MAX };
 mp_size_t  divrem_2_threshold[2] = { MP_SIZE_T_MAX };
@@ -1005,6 +1006,7 @@ all (void)
      the results in the .asm file, and there's no need for such thresholds
      to appear in gmp-mparam.h.  */
 #if ! HAVE_NATIVE_mpn_divrem_1
+#define SPEED_MPN_DIVREM_1  speed_mpn_divrem_1_tune
 
   /* Tune for the integer part of mpn_divrem_1.  This will very possibly be
      a bit out for the fractional part, but that's too bad, the integer part
@@ -1026,6 +1028,12 @@ all (void)
     one (divrem_1_unnorm_threshold, 1, &param);
   }
 #endif /* ! HAVE_NATIVE_mpn_divrem_1 */
+
+  /* use the regular mpn_divrem_1 if there's no tuned version */
+#ifndef SPEED_MPN_DIVREM_1
+#define SPEED_MPN_DIVREM_1  speed_mpn_divrem_1
+#endif
+
 #if ! HAVE_NATIVE_mpn_mod_1
 #define SPEED_MPN_MOD_1  speed_mpn_mod_1_tune
   {
@@ -1120,6 +1128,72 @@ all (void)
 #endif
 
 
+  /* mpn_divexact_1 is vaguely expected to be used on smallish divisors, so
+     tune for that.  Its speed can differ on odd or even divisor, so take an
+     average threshold for the two.
+
+     mpn_divrem_1 can vary with high<divisor or not, whereas mpn_divexact_1
+     might not vary that way, but don't test this since high<divisor isn't
+     expected to occur often with small divisors.  */
+  {
+    static struct param_t  param;
+    mp_size_t  thresh[2], average;
+    int        low, i;
+
+    param.name[0] = "DIVEXACT_1_THRESHOLD";
+    param.check_size = 256;
+    param.min_size[0] = 2;
+    param.stop_factor = 1.5;
+    param.function  = SPEED_MPN_DIVREM_1;
+    param.function2 = speed_mpn_divexact_1;
+    param.noprint = 1;
+
+    print_define_start (param.name[0]);
+
+    for (low = 0; low <= 1; low++)
+      {
+        s.r = randlimb_half();
+        if (low == 0)
+          s.r |= 1;
+        else
+          s.r &= ~CNST_LIMB(7);
+
+        one (divexact_1_threshold, 1, &param);
+        if (option_trace)
+          printf ("low=%d thresh %ld\n", low, divexact_1_threshold[0]);
+
+        if (divexact_1_threshold[0] == MP_SIZE_T_MAX)
+          {
+            average = MP_SIZE_T_MAX;
+            goto divexact_1_done;
+          }
+
+        thresh[low] = divexact_1_threshold[0];
+      }
+
+    if (option_trace)
+      {
+        printf ("average of:");
+        for (i = 0; i < numberof(thresh); i++)
+          printf (" %ld", thresh[i]);
+        printf ("\n");
+      }
+
+    average = 0;
+    for (i = 0; i < numberof(thresh); i++)
+      average += thresh[i];
+    average /= numberof(thresh);
+
+    /* If divexact turns out to be better as early as 3 limbs, then use it
+       always, so as to reduce code size and conditional jumps.  */
+    if (average <= 3)
+      average = 0;
+
+  divexact_1_done:
+    print_define_end (param.name[0], average);
+  }
+
+
   /* The generic mpn_modexact_1_odd skips a divide step if high<divisor, the
      same as mpn_mod_1, but this might not be true of an assembler
      implementation.  The threshold used is an average based on data where a
author	Kevin Ryde <user42@zip.com.au>	2001-04-20 01:14:02 +0200
committer	Kevin Ryde <user42@zip.com.au>	2001-04-20 01:14:02 +0200
commit	28edfa9d2474798e080ce523c5eb41f3e850a62e (patch)
tree	7a20d1f4c1ec47f784c50f97eae3257c19478cbe /tune
parent	c15174563f9cfa79a48c04165ade0efdb1c0415d (diff)
download	gmp-28edfa9d2474798e080ce523c5eb41f3e850a62e.tar.gz