summaryrefslogtreecommitdiff
path: root/gmp/tune/speed-ext.c
diff options
context:
space:
mode:
Diffstat (limited to 'gmp/tune/speed-ext.c')
-rw-r--r--gmp/tune/speed-ext.c233
1 files changed, 233 insertions, 0 deletions
diff --git a/gmp/tune/speed-ext.c b/gmp/tune/speed-ext.c
new file mode 100644
index 0000000000..e7fb8b9f60
--- /dev/null
+++ b/gmp/tune/speed-ext.c
@@ -0,0 +1,233 @@
+/* An example of extending the speed program to measure routines not in GMP.
+
+Copyright 1999, 2000, 2002, 2003, 2005 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+ * the GNU Lesser General Public License as published by the Free
+ Software Foundation; either version 3 of the License, or (at your
+ option) any later version.
+
+or
+
+ * the GNU General Public License as published by the Free Software
+ Foundation; either version 2 of the License, or (at your option) any
+ later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library. If not,
+see https://www.gnu.org/licenses/. */
+
+
+/* The extension here is three versions of an mpn arithmetic mean. These
+ aren't meant to be particularly useful, just examples.
+
+ You can run something like the following to compare their speeds.
+
+ ./speed-ext -s 1-20 -c mean_calls mean_open mean_open2
+
+ On RISC chips, mean_open() might be fastest if the compiler is doing a
+ good job. On the register starved x86s, mean_calls will be fastest.
+
+
+ Notes:
+
+ SPEED_EXTRA_PROTOS and SPEED_EXTRA_ROUTINES are macros that get expanded
+ by speed.c in useful places. SPEED_EXTRA_PROTOS goes after the header
+ files, and SPEED_EXTRA_ROUTINES goes in the array of available routines.
+
+ The advantage of this #include "speed.c" scheme is that there's no
+ editing of a copy of that file, and new features in new versions of it
+ will be immediately available.
+
+ In a real program the routines mean_calls() etc would probably be in
+ separate C or assembler source files, and just the measuring
+ speed_mean_calls() etc would be here. Linking against other libraries
+ for things to measure is perfectly possible too.
+
+ When attempting to compare two versions of the same named routine, say
+ like the generic and assembler versions of mpn_add_n(), creative use of
+ cc -D or #define is suggested, so one or both can be renamed and linked
+ into the same program. It'll be much easier to compare them side by side
+ than with separate programs for each.
+
+ common.c has notes on writing speed measuring routines.
+
+ Remember to link against tune/libspeed.la (or tune/.libs/libspeed.a if
+ not using libtool) to get common.o and other objects needed by speed.c. */
+
+
+#define SPEED_EXTRA_PROTOS \
+ double speed_mean_calls (struct speed_params *s); \
+ double speed_mean_open (struct speed_params *s); \
+ double speed_mean_open2 (struct speed_params *s);
+
+#define SPEED_EXTRA_ROUTINES \
+ { "mean_calls", speed_mean_calls }, \
+ { "mean_open", speed_mean_open }, \
+ { "mean_open2", speed_mean_open2 },
+
+#include "speed.c"
+
+
+/* A straightforward implementation calling mpn subroutines.
+
+ wp,size is set to (xp,size + yp,size) / 2. The return value is the
+ remainder from the division. The other versions are the same. */
+
+mp_limb_t
+mean_calls (mp_ptr wp, mp_srcptr xp, mp_srcptr yp, mp_size_t size)
+{
+ mp_limb_t c, ret;
+
+ ASSERT (size >= 1);
+
+ c = mpn_add_n (wp, xp, yp, size);
+ ret = mpn_rshift (wp, wp, size, 1) >> (GMP_LIMB_BITS-1);
+ wp[size-1] |= (c << (GMP_LIMB_BITS-1));
+ return ret;
+}
+
+
+/* An open-coded version, making one pass over the data. The right shift is
+ done as the added limbs are produced. The addition code follows
+ mpn/generic/add_n.c. */
+
+mp_limb_t
+mean_open (mp_ptr wp, mp_srcptr xp, mp_srcptr yp, mp_size_t size)
+{
+ mp_limb_t w, wprev, x, y, c, ret;
+ mp_size_t i;
+
+ ASSERT (size >= 1);
+
+ x = xp[0];
+ y = yp[0];
+
+ wprev = x + y;
+ c = (wprev < x);
+ ret = (wprev & 1);
+
+#define RSHIFT(hi,lo) (((lo) >> 1) | ((hi) << (GMP_LIMB_BITS-1)))
+
+ for (i = 1; i < size; i++)
+ {
+ x = xp[i];
+ y = yp[i];
+
+ w = x + c;
+ c = (w < x);
+ w += y;
+ c += (w < y);
+
+ wp[i-1] = RSHIFT (w, wprev);
+ wprev = w;
+ }
+
+ wp[i-1] = RSHIFT (c, wprev);
+
+ return ret;
+}
+
+
+/* Another one-pass version, but right shifting the source limbs rather than
+ the result limbs. There's not much chance of this being better than the
+ above, but it's an alternative at least. */
+
+mp_limb_t
+mean_open2 (mp_ptr wp, mp_srcptr xp, mp_srcptr yp, mp_size_t size)
+{
+ mp_limb_t w, x, y, xnext, ynext, c, ret;
+ mp_size_t i;
+
+ ASSERT (size >= 1);
+
+ x = xp[0];
+ y = yp[0];
+
+ /* ret is the low bit of x+y, c is the carry out of that low bit add */
+ ret = (x ^ y) & 1;
+ c = (x & y) & 1;
+
+ for (i = 0; i < size-1; i++)
+ {
+ xnext = xp[i+1];
+ ynext = yp[i+1];
+ x = RSHIFT (xnext, x);
+ y = RSHIFT (ynext, y);
+
+ w = x + c;
+ c = (w < x);
+ w += y;
+ c += (w < y);
+ wp[i] = w;
+
+ x = xnext;
+ y = ynext;
+ }
+
+ wp[i] = (x >> 1) + (y >> 1) + c;
+
+ return ret;
+}
+
+
+/* The speed measuring routines are the same apart from which function they
+ run, so a macro is used. Actually this macro is the same as
+ SPEED_ROUTINE_MPN_BINARY_N. */
+
+#define SPEED_ROUTINE_MEAN(mean_fun) \
+ { \
+ unsigned i; \
+ mp_ptr wp; \
+ double t; \
+ TMP_DECL; \
+ \
+ SPEED_RESTRICT_COND (s->size >= 1); \
+ \
+ TMP_MARK; \
+ SPEED_TMP_ALLOC_LIMBS (wp, s->size, s->align_wp); \
+ \
+ speed_operand_src (s, s->xp, s->size); \
+ speed_operand_src (s, s->yp, s->size); \
+ speed_operand_dst (s, wp, s->size); \
+ speed_cache_fill (s); \
+ \
+ speed_starttime (); \
+ i = s->reps; \
+ do \
+ mean_fun (wp, s->xp, s->yp, s->size); \
+ while (--i != 0); \
+ t = speed_endtime (); \
+ \
+ TMP_FREE; \
+ return t; \
+ }
+
+double
+speed_mean_calls (struct speed_params *s)
+{
+ SPEED_ROUTINE_MEAN (mean_calls);
+}
+
+double
+speed_mean_open (struct speed_params *s)
+{
+ SPEED_ROUTINE_MEAN (mean_open);
+}
+
+double
+speed_mean_open2 (struct speed_params *s)
+{
+ SPEED_ROUTINE_MEAN (mean_open2);
+}