summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSiddhesh Poyarekar <siddhesh@redhat.com>2013-03-15 12:30:03 +0530
committerSiddhesh Poyarekar <siddhesh@redhat.com>2013-03-15 12:30:03 +0530
commit8cfdb7e0560ab27e70a1d2e898fb4a0a67a13c70 (patch)
tree7eb91b35e7d04f1c4889563b3c922e512cfe2045
parentd22ca8cdfb98001d03772ef264b244930d439b3f (diff)
downloadglibc-8cfdb7e0560ab27e70a1d2e898fb4a0a67a13c70.tar.gz
Framework for performance benchmarking of functions
See benchtests/Makefile to know how to use it.
-rw-r--r--ChangeLog17
-rw-r--r--Makefile.in5
-rw-r--r--NEWS2
-rw-r--r--Rules27
-rw-r--r--benchtests/Makefile59
-rw-r--r--benchtests/bench-skeleton.c75
-rw-r--r--benchtests/exp-inputs1
-rw-r--r--benchtests/pow-inputs1
-rwxr-xr-xscripts/bench.pl93
9 files changed, 278 insertions, 2 deletions
diff --git a/ChangeLog b/ChangeLog
index 6fc99c41ea..2e8affb197 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,4 +1,21 @@
2013-03-15 Siddhesh Poyarekar <siddhesh@redhat.com>
+ Richard Henderson <rth@redhat.com>
+ Tulio Magno Quites Machado Filho <tuliom@linux.vnet.ibm.com>
+
+ * Makefile.in (bench): New target.
+ * NEWS: Mention the benchmark framework.
+ * Rules (bench): Likewise.
+ (binaries-bench): Generate binaries for functions to
+ benchmark.
+ * benchtests/Makefile: New makefile for benchmark tests.
+ * benchtests/bench-skeleton.c: New skeleton file for benchmark
+ programs.
+ * benchtests/exp-inputs: New input file for EXP function.
+ * benchtests/pow-inputs: New input file for POW function.
+ * scripts/bench.pl: New script to generate source files for
+ benchmark programs.
+
+2013-03-15 Siddhesh Poyarekar <siddhesh@redhat.com>
* sysdeps/ieee754/dbl-64/mpa-arch.h: New file.
* sysdeps/ieee754/dbl-64/mpa.c (norm): Use MANTISSA_T to store
diff --git a/Makefile.in b/Makefile.in
index d73a78f8e9..df75b8f0c4 100644
--- a/Makefile.in
+++ b/Makefile.in
@@ -3,7 +3,7 @@ srcdir = @srcdir@
# Uncomment the line below if you want to do parallel build.
# PARALLELMFLAGS = -j 4
-.PHONY: all install
+.PHONY: all install bench
all .DEFAULT:
$(MAKE) -r PARALLELMFLAGS="$(PARALLELMFLAGS)" -C $(srcdir) objdir=`pwd` $@
@@ -11,3 +11,6 @@ all .DEFAULT:
install:
LANGUAGE=C LC_ALL=C; export LANGUAGE LC_ALL; \
$(MAKE) -r PARALLELMFLAGS="$(PARALLELMFLAGS)" -C $(srcdir) objdir=`pwd` $@
+
+bench:
+ $(MAKE) -C $(srcdir)/benchtests $(PARALLELMFLAGS) objdir=`pwd` $@
diff --git a/NEWS b/NEWS
index 1f63e3ff96..cb96a74e50 100644
--- a/NEWS
+++ b/NEWS
@@ -23,6 +23,8 @@ Version 2.18
* Support for priority inherited mutexes in pthread condition variables on
non-x86 architectures.
+* Added a benchmark framework to track performance of functions in glibc.
+
Version 2.17
diff --git a/Rules b/Rules
index 301a74818c..bc5dacd2f6 100644
--- a/Rules
+++ b/Rules
@@ -83,7 +83,7 @@ common-generated += dummy.o dummy.c
# This makes all the auxiliary and test programs.
-.PHONY: others tests
+.PHONY: others tests bench
ifeq ($(multi-arch),no)
tests := $(filter-out $(tests-ifunc), $(tests))
xtests := $(filter-out $(xtests-ifunc), $(xtests))
@@ -188,6 +188,31 @@ $(objpfx)%.out: /dev/null $(objpfx)% # Make it 2nd arg for canned sequence.
$(make-test-out) > $@
endif # tests
+
+# Build and run benchmark programs.
+binaries-bench := $(addprefix $(objpfx)bench-,$(bench))
+
+run-bench = $(test-wrapper-env) \
+ GCONV_PATH=$(common-objpfx)iconvdata LC_ALL=C \
+ $($*-ENV) $(run-via-rtld-prefix) $${run}
+
+bench: $(binaries-bench)
+ if [ -f $(objpfx)bench.out ]; then \
+ mv -f $(objpfx)bench.out $(objpfx)bench.out.old; \
+ fi
+ for run in $^; do \
+ eval $(run-bench) >> $(objpfx)bench.out; \
+ done
+
+$(binaries-bench): %: %.o \
+ $(sort $(filter $(common-objpfx)lib%,$(link-libc))) \
+ $(addprefix $(csu-objpfx),start.o) $(+preinit) $(+postinit)
+ $(+link)
+
+$(objpfx)bench-%.c: %-inputs bench-skeleton.c
+ $(..)scripts/bench.pl $(patsubst %-inputs,%,$<) \
+ $($*-ITER) $($*-ARGLIST) $($*-RET) > $@
+
.PHONY: distclean realclean subdir_distclean subdir_realclean \
subdir_clean subdir_mostlyclean subdir_testclean
diff --git a/benchtests/Makefile b/benchtests/Makefile
new file mode 100644
index 0000000000..74938b925c
--- /dev/null
+++ b/benchtests/Makefile
@@ -0,0 +1,59 @@
+# Copyright (C) 2013 Free Software Foundation, Inc.
+# This file is part of the GNU C Library.
+
+# The GNU C Library is free software; you can redistribute it and/or
+# modify it under the terms of the GNU Lesser General Public
+# License as published by the Free Software Foundation; either
+# version 2.1 of the License, or (at your option) any later version.
+
+# The GNU C Library is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+# Lesser General Public License for more details.
+
+# You should have received a copy of the GNU Lesser General Public
+# License along with the GNU C Library; if not, see
+# <http://www.gnu.org/licenses/>.
+
+
+# Makefile for benchmark tests. The only useful target here is `bench`.
+
+# Adding a new function `foo`:
+# ---------------------------
+
+# - Append the function name to the bench variable
+
+# - Define foo-ITER with the number of iterations you want to run. Keep it
+# high enough that the overhead of clock_gettime is only a small fraction of
+# the total run time of the test.
+
+# - Define foo-ARGLIST as a colon separated list of types of the input
+# arguments. Use `void` if function does not take any inputs. Put in quotes
+# if the input argument is a pointer, e.g.:
+
+# malloc-ARGLIST: "void *"
+
+# - Define foo-RET as the type the function returns. Skip if the function
+# returns void. One could even skip foo-ARGLIST if the function does not
+# take any inputs AND the function returns void.
+
+
+# - Make a file called `foo-inputs` with one input value per line, an input
+# being a comma separated list of arguments to be passed into the function.
+# See pow-inputs for an example.
+
+subdir := benchtests
+bench := exp pow
+
+exp-ITER = 100000
+exp-ARGLIST = double
+exp-RET = double
+LDFLAGS-bench-exp = -lm
+
+pow-ITER = 100000
+pow-ARGLIST = double:double
+pow-RET = double
+LDFLAGS-bench-pow = -lm
+
+include ../Makeconfig
+include ../Rules
diff --git a/benchtests/bench-skeleton.c b/benchtests/bench-skeleton.c
new file mode 100644
index 0000000000..13f986d817
--- /dev/null
+++ b/benchtests/bench-skeleton.c
@@ -0,0 +1,75 @@
+/* Skeleton for benchmark programs.
+ Copyright (C) 2013 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#include <string.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <time.h>
+#include <inttypes.h>
+
+int
+main (int argc, char **argv)
+{
+ unsigned long i, j, k;
+ uint64_t total = 0, max = 0, min = 0x7fffffffffffffff;
+ struct timespec start, end;
+
+ memset (&start, 0, sizeof (start));
+ memset (&end, 0, sizeof (end));
+
+ clock_getres (CLOCK_PROCESS_CPUTIME_ID, &start);
+
+ /* Measure 1000 times the resolution of the clock. So for a 1ns resolution
+ clock, we measure 1000 iterations of the function call at a time.
+ Measurements close to the minimum clock resolution won't make much sense,
+ but it's better than having nothing at all. */
+ unsigned long iters = 1000 * start.tv_nsec;
+ unsigned long total_iters = ITER / iters;
+
+ for (i = 0; i < NUM_SAMPLES; i++)
+ {
+ for (j = 0; j < total_iters; j ++)
+ {
+ clock_gettime (CLOCK_PROCESS_CPUTIME_ID, &start);
+ for (k = 0; k < iters; k++)
+ BENCH_FUNC(i);
+ clock_gettime (CLOCK_PROCESS_CPUTIME_ID, &end);
+
+ uint64_t cur = (end.tv_nsec - start.tv_nsec
+ + ((end.tv_sec - start.tv_sec)
+ * (uint64_t) 1000000000));
+
+ if (cur > max)
+ max = cur;
+
+ if (cur < min)
+ min = cur;
+
+ total += cur;
+ }
+ }
+
+ double d_total_s = total * 1e-9;
+ double d_iters = iters;
+ double d_total_i = (double)ITER * NUM_SAMPLES;
+ printf (FUNCNAME ": ITERS:%g: TOTAL:%gs, MAX:%gns, MIN:%gns, %g iter/s\n",
+ d_total_i, d_total_s, max / d_iters, min / d_iters,
+ d_total_i / d_total_s);
+
+ return 0;
+}
diff --git a/benchtests/exp-inputs b/benchtests/exp-inputs
new file mode 100644
index 0000000000..a2086baa86
--- /dev/null
+++ b/benchtests/exp-inputs
@@ -0,0 +1 @@
+708.00096423260981737257679924368858
diff --git a/benchtests/pow-inputs b/benchtests/pow-inputs
new file mode 100644
index 0000000000..dbb1270b75
--- /dev/null
+++ b/benchtests/pow-inputs
@@ -0,0 +1 @@
+1.0000000000000020, 1.5
diff --git a/scripts/bench.pl b/scripts/bench.pl
new file mode 100755
index 0000000000..bb7f64897e
--- /dev/null
+++ b/scripts/bench.pl
@@ -0,0 +1,93 @@
+#! /usr/bin/perl -w
+# Copyright (C) 2013 Free Software Foundation, Inc.
+# This file is part of the GNU C Library.
+
+# The GNU C Library is free software; you can redistribute it and/or
+# modify it under the terms of the GNU Lesser General Public
+# License as published by the Free Software Foundation; either
+# version 2.1 of the License, or (at your option) any later version.
+
+# The GNU C Library is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+# Lesser General Public License for more details.
+
+# You should have received a copy of the GNU Lesser General Public
+# License along with the GNU C Library; if not, see
+# <http://www.gnu.org/licenses/>.
+
+
+use strict;
+use warnings;
+# Generate a benchmark source file for a given input.
+
+if (@ARGV < 2) {
+ die "Usage: bench.pl <function> <iterations> [parameter types] [return type]"
+}
+
+my $arg;
+my $func = $ARGV[0];
+my $iters = $ARGV[1];
+my @args;
+my $ret = "void";
+my $getret = "";
+my $retval = "";
+
+if (@ARGV >= 3) {
+ @args = split(':', $ARGV[2]);
+}
+
+if (@ARGV == 4) {
+ $ret = $ARGV[3];
+}
+
+my $decl = "extern $ret $func (";
+
+if (@args == 0 || $args[0] eq "void") {
+ print "$decl void);\n";
+ print "#define CALL_BENCH_FUNC(j) $func();\n";
+ print "#define NUM_SAMPLES (1)\n";
+}
+else {
+ my $num = 0;
+ my $bench_func = "#define CALL_BENCH_FUNC(j) $func (";
+ my $struct = "struct args {";
+
+ foreach $arg (@args) {
+ if ($num > 0) {
+ $bench_func = "$bench_func,";
+ $decl = "$decl,";
+ }
+
+ $struct = "$struct $arg arg$num;";
+ $bench_func = "$bench_func in[j].arg$num";
+ $decl = "$decl $arg";
+ $num = $num + 1;
+ }
+
+ print "$decl);\n";
+ print "$bench_func);\n";
+ print "$struct } in[] = {";
+
+ open INPUTS, "<$func-inputs" or die $!;
+
+ while (<INPUTS>) {
+ chomp;
+ print "{$_},\n";
+ }
+ print "};\n";
+ print "#define NUM_SAMPLES (sizeof (in) / sizeof (struct args))\n"
+}
+
+# In some cases not storing a return value seems to result in the function call
+# being optimized out.
+if ($ret ne "void") {
+ print "static volatile $ret ret = 0.0;\n";
+ $getret = "ret = ";
+}
+
+print "#define BENCH_FUNC(j) ({$getret CALL_BENCH_FUNC (j);})\n";
+
+print "#define ITER $iters\n";
+print "#define FUNCNAME \"$func\"\n";
+print "#include \"bench-skeleton.c\"\n";