hash benchmark utility

* added benchmark subcommand to '-m passlib' tool, can generate benchmarks for one or more hash algorithms & backends * rewrote HashTimer to use as base for benchmark cmd, moved to utils.
author: Eli Collins <elic@assurancetechnologies.com> 2012-01-05 21:07:17 -0500
committer: Eli Collins <elic@assurancetechnologies.com> 2012-01-05 21:07:17 -0500
commit: 2cf6008218aa5195ea073193dcb10b9515573d0a (patch)
tree: db6d1d18fcece091690c21463eff2845cb27502d
parent: 0fbd288e0b8dd76ec87eb302bd9558de0ce663a9 (diff)
download: passlib-2cf6008218aa5195ea073193dcb10b9515573d0a.tar.gz
3 files changed, 340 insertions, 233 deletions
diff --git a/passlib/__main__.py b/passlib/__main__.py
index 2b06d02..40ae846 100644
--- a/passlib/__main__.py
+++ b/passlib/__main__.py
@@ -8,9 +8,13 @@ from optparse import OptionParser
 import re
 import sys
 # package
+from passlib import __version__
 from passlib.registry import list_crypt_handlers, get_crypt_handler
 from passlib.utils.compat import print_, iteritems
 import passlib.utils.handlers as uh
+
+vstr = "Passlib " + __version__
+
 #=========================================================
 # utils
 #=========================================================
@@ -26,7 +30,7 @@ def encrypt_cmd(args):
     #
     # parse args
     #
-    p = OptionParser(prog="passlib encrypt",
+    p = OptionParser(prog="passlib encrypt", version=vstr,
                      usage="%prog [options] <format> <password>")
     ##p.add_option("-d", "--details",
     ##                  action="store_true", dest="details", default=False,
@@ -181,7 +185,8 @@ def identify_cmd(args):
     #
     # parse args
     #
-    p = OptionParser(prog="passlib identify", usage="%prog [options] <hash>")
+    p = OptionParser(prog="passlib identify", version=vstr,
+                     usage="%prog [options] <hash>")
     p.add_option("-d", "--details",
                       action="store_true", dest="details", default=False,
                       help="show details about referenced hashes")
@@ -233,11 +238,106 @@ def identify_cmd(args):
     return 0 if results else 1
 
 #=========================================================
+# timer command
+#=========================================================
+def benchmark_cmd(args):
+    """benchmark speed of hash algorithms"""
+    #
+    # parse args
+    #
+    p = OptionParser(prog="passlib benchmark", version=vstr,
+                     usage="%prog [options] <alg> [ <alg> ... ]",
+                     description="""You should provide the names of one
+or more algorithms to benchmark, as positional arguments. If you
+provide the special name "all", all algorithms in Passlib will be tested.""",
+                     )
+    p.add_option("--max-time", action="store", type="float",
+                 dest="max_time", default=1.0, metavar="TIME",
+                 help="spend at most TIME seconds benchmarking each hash (default=%default)",
+                )
+
+    p.add_option("--csv", action="store_true",
+                 dest="csv", default=False,
+                 help="Output results in CSV format")
+
+    opts, args = p.parse_args(args)
+    if not args:
+        p.error("no algorithm names provided")
+    elif len(args) == 1 and args[0] == "all":
+        autoall = True
+        args = [ name for name in list_crypt_handlers()
+                if name not in _skip_handlers ]
+    else:
+        autoall = False
+
+    from passlib.utils._cost import HashTimer
+
+    kwds = dict(max_time=opts.max_time)
+
+    if opts.csv:
+        fmt = "%s,%s,%s,%s"
+        print_(fmt % ("handler", "backend", "cost", "speed"))
+    else:
+        fmt = "%-30s %-10s %10s"
+        print_(fmt % ("handler", "cost", "speed"))
+        print_(fmt % ("-" * 30, "-" * 10, "-" * 10))
+
+    def measure(handler, backend=None):
+        if backend:
+            tag = "%s (%s)" % (handler.name, backend)
+            if not hasattr(handler, "backends"):
+                print_("\nerror: %r handler does not support multiple backends"
+                                % (handler.name,))
+                return 1
+            if backend not in handler.backends:
+                print_("\nerror: %r handler has no backend named %r" %
+                                 (handler.name, backend))
+                return 1
+            if not handler.has_backend(backend):
+                cost = getattr(handler, "rounds_cost", None) or "fixed"
+                if opts.csv:
+                    print_(fmt % (handler.name, backend, cost, ""))
+                else:
+                    print_(fmt % (tag, cost, ""))
+                return
+        else:
+            tag = handler.name
+        timer = HashTimer(handler, backend=backend, **kwds)
+        cost = timer.scale if timer.hasrounds else "fixed"
+        if timer.speed < 10:
+            spd = "%g" % (timer.speed,)
+        else:
+            spd = "%d" % (timer.speed,)
+        if opts.csv:
+            print_(fmt % (handler.name, backend or '', cost, spd))
+        else:
+            print_(fmt % (tag, cost, spd))
+
+    for name in args:
+        if ":" in name:
+            name, backend = name.split(":")
+        else:
+            backend = None
+        handler = get_crypt_handler(name)
+        if (backend == "all" or autoall) and hasattr(handler, "backends"):
+            for backend in handler.backends:
+                rc = measure(handler, backend)
+                if rc:
+                    return rc
+        else:
+            rc = measure(handler, backend)
+            if rc:
+                return rc
+
+    return 0
+
+#=========================================================
 # main
 #=========================================================
 commands = {
     "identify": identify_cmd,
     "encrypt": encrypt_cmd,
+    "benchmark": benchmark_cmd,
     # TODO: verify_cmd
     # TODO: gencfg_cmd - generate config w/ timings, possibly taking in another
     # TODO: chkcfg_cmd - check config file for errors.
@@ -266,8 +366,7 @@ def main(args):
         _print_usage()
         return 0
     elif cmd in ["version", "--version", "-v"]:
-        from passlib import __version__ as vstr
-        print_("Passlib %s" % (vstr,))
+        print_(vstr)
         return 0
     func = commands.get(cmd)
     if not func:
diff --git a/passlib/tests/genconfig.py b/passlib/tests/genconfig.py
deleted file mode 100644
index dc87d02..0000000
--- a/passlib/tests/genconfig.py
+++ /dev/null
@@ -1,229 +0,0 @@
-"""passlib config generation script
-
-this script is a work in progress to develop a script which generates
-rounds configuration parameters suitable for a particular host & deployment requirements.
-right now it just consists of a function which experimentally determines
-the optimal range of rounds values for a given hash, based on the desired time it should take.
-"""
-#=========================================================
-#imports
-#=========================================================
-#core
-from math import log as logb
-import logging
-import time
-import sys
-#site
-#pkg
-from passlib.utils.compat import iteritems, print_
-from passlib.registry import get_crypt_handler
-#local
-log = logging.getLogger(__name__)
-#=========================================================
-#
-#=========================================================
-class HashTimer(object):
-    """helper which determines number of rounds required for hash to take desired amount of time.
-
-    usage::
-
-        >>> timer = HashTimer("sha512_crypt")
-        >>> timer.find_rounds(.5)
-
-    .. note::
-        This function is not very exact, and generates results
-        that are only approximately the same each time (w/in about 5% usually).
-
-        Furthermore, to generate useful values, it should
-        be run when the system has an average load
-        to get an accurate measurement.
-    """
-    log = logging.getLogger(__name__ + ".HashTimer")
-
-    def __init__(self, name, samples=1):
-        #
-        #get handler, extract boundary information
-        #
-        self.handler = handler = get_crypt_handler(name)
-        if 'rounds' not in handler.setting_kwds:
-            raise ValueError("scheme does not support rounds: %r" % (handler.name,))
-        self.min_rounds = getattr(handler, "min_rounds", 2)
-        self.max_rounds = getattr(handler, "max_rounds", (1<<32)-1)
-        rc = self.rounds_cost = getattr(handler, "rounds_cost", "linear")
-
-        #
-        #set up functions that vary based on rounds cost function
-        #
-        if rc == "linear":
-            def get_rps(rounds, delta):
-                return rounds/delta
-            def guess_rounds(rps, target):
-                return int(rps*target+.5)
-            erradj = 2
-        elif rc == "log2":
-            def get_rps(rounds, delta):
-                return (2**rounds)/delta
-            def guess_rounds(rps, target):
-                return int(logb(rps*target,2)+.5)
-            erradj = 1.1
-        else:
-            raise NotImplementedError("unknown rounds cost function: %r" % (rc,))
-        self.get_rps = get_rps
-        self.guess_rounds = guess_rounds
-        self.erradj = erradj
-
-        #
-        #init cache
-        #
-        self.samples = samples
-        self.cache = {}
-        self.srange = trange(samples)
-
-    def time_encrypt(self, rounds):
-        "check how long encryption for a given number of rounds will take"
-        cache = self.cache
-        if rounds in cache:
-            return cache[rounds]
-        encrypt = self.handler.encrypt
-        srange = self.srange
-        cur = time.time
-        start = cur()
-        for x in srange:
-            encrypt("too many secrets", rounds=rounds)
-        stop = cur()
-        delta = (stop-start)/self.samples
-        cache[rounds] = delta
-        return delta
-
-    def find_rounds(self, target, over=False, under=False):
-        """find optimal rounds range for hash
-
-        :arg target: time hashing a password should take
-        :param over: if True, returns minimum rounds taking *at least* target seconds.
-        :param under: if True, returns maximum rounds taking *at most* target seconds.
-
-        if neither over / under is set, returns rounds taking
-        closest to target seconds.
-
-        :returns:
-            returns number of rounds closest
-            to taking about ``target`` seconds to hash a password.
-        """
-        if target <= 0:
-            raise ValueError("target must be > 0")
-
-        log = self.log
-        name = self.handler.name
-        get_rps = self.get_rps
-        time_encrypt = self.time_encrypt
-        log.info("%s: finding rounds for target time: %f", name, target)
-
-        #
-        #check if useful lower & upper bounds already exist in cache
-        #
-        lower = upper = None
-        for rounds, delta in iteritems(self.cache.iteritems):
-            if delta < target:
-                if lower is None or rounds > lower:
-                    lower = rounds
-            else:
-                if upper is None or rounds < upper:
-                    upper = rounds
-
-        #
-        #if bounds not found in cache, run open-ended search for starting bounds
-        #
-        if lower is None:
-            lower = max(1,self.min_rounds)
-        if upper is None:
-            guess_rounds = self.guess_rounds
-            max_rounds = self.max_rounds
-            target_above = target*self.erradj #NOTE: we aim a little high as hack to deal w/ measuring error
-            rounds = lower
-            while True:
-                delta = time_encrypt(rounds)
-                rps = get_rps(rounds, delta)
-                log.debug("%s: ranging target: checked %r -> %fs (%f r/s)", name, rounds, delta, rps)
-                if delta < target:
-                    lower = rounds
-                    if rounds == max_rounds:
-                        log.warning("%s: target time out of range: hash would require > max_rounds (%d) in order to take %fs", name, max_rounds, target)
-                        return rounds
-                    rounds = min(max(guess_rounds(rps, target_above), rounds+1), max_rounds)
-                else:
-                    upper = rounds
-                    break
-
-        #
-        #perform binary search till we find match
-        #
-        while lower+1<upper:
-            #NOTE: weighting things in favor of upper, since per-call overhead causes curve to not be quite linear.
-            next = (upper*5+lower*3)//8
-            delta = time_encrypt(next)
-            rps = get_rps(next, delta)
-            log.debug("%s: finding target: range %r .. %r: checked %r -> %fs (%f r/s)", name, lower, upper, next, delta, rps)
-            if delta < target:
-                lower = next
-            else:
-                upper = next
-
-        #
-        #now 'lower' is largest value which takes less than target seconds,
-        #and 'upper' is smallest value which takes greater than target seconds.
-        #so we pick based on over/under flags, or fallback to whichever one is closest
-        #
-        if over:
-            return upper
-        elif under:
-            return lower
-        else:
-            if target-cache[lower] < cache[upper]-target:
-                return lower
-            else:
-                return upper
-
-    def find_rounds_range(self, target_high, target_low=None, over=False, under=False):
-        "find min/max rounds which will cause scheme to take specified range of times"
-        if target_low is None:
-            target_low = target_high * .75
-        elif target_low > target_high:
-            target_high = target_low
-        rounds_high = self.find_rounds(target_high, under=not over, over=over)
-        rounds_low = self.find_rounds(target_low, over=not under, under=under)
-        if rounds_low > rounds_high:
-            #NOTE: this happens sometimes w/ rounds_cost=log2...
-            #if nothing hits w/in range, rounds_low will be 1+ rounds_high
-            #we just return correctly ordered range
-            rounds_low, rounds_high = rounds_high, rounds_low
-        return rounds_low, rounds_high
-
-    def estimate_rps(self):
-        "return estimated rounds per second based on cached results"
-        cache = self.cache
-        if not cache:
-            raise RuntimeError("should not be called until cache populated by find_rounds()")
-        get_rps = self.get_rps
-        rps = sum(r*get_rps(r,d) for r,d in iteritems(cache))/sum(cache)
-        if rps > 1000: #for almost all cases, we'd return integer
-            rps = int(rps)
-        return rps
-
-#=========================================================
-#main
-#=========================================================
-def main(*args):
-    from bps.logs import setup_std_logging
-    setup_std_logging(level="debug", dev=True)
-
-    timer = HashTimer("sha256_crypt")
-    print_(timer.find_rounds_range(.5))
-    print_(timer.estimate_rps())
-
-    #TODO: give script ability to generate timings for a range of schemes, and minimum / maximum times.
-
-if __name__ == "__main__":
-    sys.exit(main(sys.argv[1:]))
-#=========================================================
-#eof
-#=========================================================
diff --git a/passlib/utils/_cost.py b/passlib/utils/_cost.py
new file mode 100644
index 0000000..8f12d70
--- /dev/null
+++ b/passlib/utils/_cost.py
@@ -0,0 +1,237 @@
+"""passlib.utils._cost - hash cost analysis
+
+this modules contains routines for measuring the cost/speed of a hash.
+it may eventually made public.
+"""
+#=========================================================
+#imports
+#=========================================================
+from __future__ import division
+#core
+from math import log as logb
+import logging; log = logging.getLogger(__name__)
+import sys
+#site
+#pkg
+from passlib.registry import get_crypt_handler
+from passlib.utils import timer
+from passlib.utils.compat import u
+#local
+__all__ = [
+    "HashTimer",
+]
+#=========================================================
+# timer class
+#=========================================================
+SECRET = u("dummy password")
+
+class HashTimer(object):
+    """helper class which determines speed of hash implementation
+
+    :arg handler: passlib handler object to test
+    :param max_time: maximum amount of time to speed timing hash
+    :param backend: optionally use specific backend for handler
+
+    once created...
+
+    * :meth:`estimate` can be called to determine correct rounds value
+      which will take a specific amount of time.
+    * :attr:`speed` will report the handler's speed in iterations per second.
+    * :attr:`scale` will report the scale that the hash's rounds parameter
+      uses relative to the *speed*, which is always linear.
+    """
+    #====================================================================
+    # instance attrs
+    #====================================================================
+
+    # inputs
+    handler = None
+    max_time = None
+    backend = None
+
+    # results
+    hasrounds = None #: True if has variable rounds, False if fixed
+    scale = None #: scaling of cost parameter (linear/log2)
+    speed = None #: raw rounds/second
+
+    # public helpers
+    c2v = None # convert cost parameter -> rounds value
+    v2c = None # convert rounds value -> cost parameter
+
+    #====================================================================
+    # init / main loop
+    #====================================================================
+    def __init__(self, handler, max_time=1, backend=None):
+        # validate params
+        self.max_time = max(0, max_time)
+
+        # characterize handler
+        self.handler = handler
+        self.hasrounds = 'rounds' in handler.setting_kwds
+        self.scale = handler.rounds_cost if self.hasrounds else "linear"
+
+        # init cost manipulation helpers
+        if self.scale == "log2":
+            self.c2v = lambda c: 1<<c
+            self.v2c = lambda v: int(logb(v,2))
+            self.cshift = lambda c,p: max(0,c+p)
+        else:
+            self.c2v = self.v2c = lambda x: int(x)
+            self.cshift = lambda c,p: c<<p if p > 0 else c>>(-p)
+
+        # init stub context kwds
+        ctx = self.ctx = {}
+        if handler.context_kwds:
+            if 'user' in handler.context_kwds:
+                ctx['user'] = u("dummyuser")
+
+        # run timing loop
+        self.backend = backend
+        if backend:
+            orig_backend = handler.get_backend()
+            handler.set_backend(backend)
+        try:
+            self._run()
+        finally:
+            if backend:
+                handler.set_backend(orig_backend)
+
+    def _run(self):
+        # init locals
+        terminate = timer() + self.max_time
+        handler = self.handler
+        ctx = self.ctx
+        samples = self._samples = []
+
+        # pick small rounds value to start with
+        if self.hasrounds:
+            rounds = max(handler.min_rounds,
+                         self.cshift(handler.default_rounds, -1))
+            setup_factor = 1.9
+        else:
+            rounds = 16
+            setup_factor = 1
+            hash = handler.encrypt(SECRET, **ctx)
+
+        # do main testing loop
+        while True:
+            # test with specified number of rounds
+            if self.hasrounds:
+                # NOTE: the extra time taken by this encrypt() call is
+                #       why setup_factor is set to 1.9, above
+                hash = handler.encrypt(SECRET, rounds=rounds, **ctx)
+                start = timer()
+                handler.verify(SECRET, hash, **ctx)
+                end = timer()
+            else:
+                i = 0
+                start = timer()
+                while i < rounds:
+                    handler.verify(SECRET, hash, **ctx)
+                    i += 1
+                end = timer()
+
+            # record speed, and decide if we have time to go again w/ 2x rounds
+            elapsed = end - start
+            samples.append((rounds, elapsed))
+            remaining = (terminate - end) / (setup_factor * elapsed)
+            if remaining < 1:
+                break
+            elif remaining >= 2:
+                rounds = self.cshift(rounds, 1)
+            # else get another sample at same # rounds, since there's time
+
+        # calculate speed - this discards the first 1/3 of the samples,
+        # since the smaller rounds are frequently inaccurate. it then takes
+        # the median value, to cheaply discard any outliers.
+        count = len(samples)
+        if count < 2:
+            c,e = samples[0]
+            self.speed = self.c2v(c)/e
+        else:
+            speeds = sorted(self.c2v(c)/e for c,e in samples[count//3:])
+            self.speed = speeds[(len(speeds)+1)//2]
+
+    #====================================================================
+    # public helpers
+    #====================================================================
+    def estimate(self, target):
+        "estimate rounds value to reach target time"
+        value = self.speed * target
+        rounds = self.v2c(value)
+        if self.scale == "log2" and value > 1.5 * self.c2v(rounds):
+            return rounds+1
+        return rounds
+
+    #====================================================================
+    # eoc
+    #====================================================================
+
+def estimate_rounds_value(handler, target_time=.25, max_time=None, **kwds):
+    "estimate number of rounds handler should use to take target_time"
+    if max_time is None:
+        max_time = target_time*2
+    timer = HashTimer(handler, max_time, **kwds)
+    return timer.estimate(target_time)
+
+#=========================================================
+# development helpers
+#=========================================================
+
+def _test_timer(timer, cost):
+    "helper to test HashTimer's accuracy"
+    handler = timer.handler
+    ctx = timer.ctx
+    if timer.hasrounds:
+        h = handler.encrypt(SECRET, rounds=cost, **ctx)
+        s = default_timer()
+        handler.verify(SECRET, h, **ctx)
+        return default_timer()-s
+    else:
+        h = handler.encrypt(SECRET, **ctx)
+        s = default_timer()
+        i = 0
+        while i < cost:
+            handler.verify(SECRET, h, **ctx)
+            i += 1
+        return default_timer()-s
+
+def main(*args):
+    "test script used to develop & test HashTimer"
+    from bps.logs import setup_std_logging
+    setup_std_logging(level="warn", dev=True)
+    from passlib import hash
+
+    target_delta = .25
+
+    for name in dir(hash):
+        if name.startswith("_"):
+            continue
+        handler = getattr(hash, name)
+
+        s = default_timer()
+        timer = HashTimer(handler, max_time=1)
+        run_time = default_timer()-s
+
+        target_cost = timer.estimate(target_delta)
+        real_delta = _test_timer(timer, target_cost)
+        real_speed = timer.c2v(target_cost)/real_delta
+
+        speeds = [ timer.c2v(c)/e for c,e in timer._samples ]
+        def fpe(value, correct):
+            return round((value-correct)/correct*100,2)
+        print "%30s, %10s, % 10.2f, % 5.4f%%, % 5.4fs [%s]" % \
+            (timer.handler.name,
+             timer.scale if timer.hasrounds else "fixed",
+             timer.speed,
+             fpe(timer.speed, real_speed),
+             run_time,
+             ", ".join(str(fpe(s,real_speed)) for s in sorted(speeds))
+             )
+
+if __name__ == "__main__":
+    sys.exit(main(sys.argv[1:]))
+
+#=========================================================
+#eof
+#=========================================================
author	Eli Collins <elic@assurancetechnologies.com>	2012-01-05 21:07:17 -0500
committer	Eli Collins <elic@assurancetechnologies.com>	2012-01-05 21:07:17 -0500
commit	2cf6008218aa5195ea073193dcb10b9515573d0a (patch)
tree	db6d1d18fcece091690c21463eff2845cb27502d
parent	0fbd288e0b8dd76ec87eb302bd9558de0ce663a9 (diff)
download	passlib-2cf6008218aa5195ea073193dcb10b9515573d0a.tar.gz