diff options
| author | Eli Collins <elic@assurancetechnologies.com> | 2012-01-05 21:07:17 -0500 |
|---|---|---|
| committer | Eli Collins <elic@assurancetechnologies.com> | 2012-01-05 21:07:17 -0500 |
| commit | 2cf6008218aa5195ea073193dcb10b9515573d0a (patch) | |
| tree | db6d1d18fcece091690c21463eff2845cb27502d | |
| parent | 0fbd288e0b8dd76ec87eb302bd9558de0ce663a9 (diff) | |
| download | passlib-2cf6008218aa5195ea073193dcb10b9515573d0a.tar.gz | |
hash benchmark utility
* added benchmark subcommand to '-m passlib' tool,
can generate benchmarks for one or more hash algorithms & backends
* rewrote HashTimer to use as base for benchmark cmd, moved to utils.
| -rw-r--r-- | passlib/__main__.py | 107 | ||||
| -rw-r--r-- | passlib/tests/genconfig.py | 229 | ||||
| -rw-r--r-- | passlib/utils/_cost.py | 237 |
3 files changed, 340 insertions, 233 deletions
diff --git a/passlib/__main__.py b/passlib/__main__.py index 2b06d02..40ae846 100644 --- a/passlib/__main__.py +++ b/passlib/__main__.py @@ -8,9 +8,13 @@ from optparse import OptionParser import re import sys # package +from passlib import __version__ from passlib.registry import list_crypt_handlers, get_crypt_handler from passlib.utils.compat import print_, iteritems import passlib.utils.handlers as uh + +vstr = "Passlib " + __version__ + #========================================================= # utils #========================================================= @@ -26,7 +30,7 @@ def encrypt_cmd(args): # # parse args # - p = OptionParser(prog="passlib encrypt", + p = OptionParser(prog="passlib encrypt", version=vstr, usage="%prog [options] <format> <password>") ##p.add_option("-d", "--details", ## action="store_true", dest="details", default=False, @@ -181,7 +185,8 @@ def identify_cmd(args): # # parse args # - p = OptionParser(prog="passlib identify", usage="%prog [options] <hash>") + p = OptionParser(prog="passlib identify", version=vstr, + usage="%prog [options] <hash>") p.add_option("-d", "--details", action="store_true", dest="details", default=False, help="show details about referenced hashes") @@ -233,11 +238,106 @@ def identify_cmd(args): return 0 if results else 1 #========================================================= +# timer command +#========================================================= +def benchmark_cmd(args): + """benchmark speed of hash algorithms""" + # + # parse args + # + p = OptionParser(prog="passlib benchmark", version=vstr, + usage="%prog [options] <alg> [ <alg> ... ]", + description="""You should provide the names of one +or more algorithms to benchmark, as positional arguments. If you +provide the special name "all", all algorithms in Passlib will be tested.""", + ) + p.add_option("--max-time", action="store", type="float", + dest="max_time", default=1.0, metavar="TIME", + help="spend at most TIME seconds benchmarking each hash (default=%default)", + ) + + p.add_option("--csv", action="store_true", + dest="csv", default=False, + help="Output results in CSV format") + + opts, args = p.parse_args(args) + if not args: + p.error("no algorithm names provided") + elif len(args) == 1 and args[0] == "all": + autoall = True + args = [ name for name in list_crypt_handlers() + if name not in _skip_handlers ] + else: + autoall = False + + from passlib.utils._cost import HashTimer + + kwds = dict(max_time=opts.max_time) + + if opts.csv: + fmt = "%s,%s,%s,%s" + print_(fmt % ("handler", "backend", "cost", "speed")) + else: + fmt = "%-30s %-10s %10s" + print_(fmt % ("handler", "cost", "speed")) + print_(fmt % ("-" * 30, "-" * 10, "-" * 10)) + + def measure(handler, backend=None): + if backend: + tag = "%s (%s)" % (handler.name, backend) + if not hasattr(handler, "backends"): + print_("\nerror: %r handler does not support multiple backends" + % (handler.name,)) + return 1 + if backend not in handler.backends: + print_("\nerror: %r handler has no backend named %r" % + (handler.name, backend)) + return 1 + if not handler.has_backend(backend): + cost = getattr(handler, "rounds_cost", None) or "fixed" + if opts.csv: + print_(fmt % (handler.name, backend, cost, "")) + else: + print_(fmt % (tag, cost, "")) + return + else: + tag = handler.name + timer = HashTimer(handler, backend=backend, **kwds) + cost = timer.scale if timer.hasrounds else "fixed" + if timer.speed < 10: + spd = "%g" % (timer.speed,) + else: + spd = "%d" % (timer.speed,) + if opts.csv: + print_(fmt % (handler.name, backend or '', cost, spd)) + else: + print_(fmt % (tag, cost, spd)) + + for name in args: + if ":" in name: + name, backend = name.split(":") + else: + backend = None + handler = get_crypt_handler(name) + if (backend == "all" or autoall) and hasattr(handler, "backends"): + for backend in handler.backends: + rc = measure(handler, backend) + if rc: + return rc + else: + rc = measure(handler, backend) + if rc: + return rc + + return 0 + +#========================================================= # main #========================================================= commands = { "identify": identify_cmd, "encrypt": encrypt_cmd, + "benchmark": benchmark_cmd, # TODO: verify_cmd # TODO: gencfg_cmd - generate config w/ timings, possibly taking in another # TODO: chkcfg_cmd - check config file for errors. @@ -266,8 +366,7 @@ def main(args): _print_usage() return 0 elif cmd in ["version", "--version", "-v"]: - from passlib import __version__ as vstr - print_("Passlib %s" % (vstr,)) + print_(vstr) return 0 func = commands.get(cmd) if not func: diff --git a/passlib/tests/genconfig.py b/passlib/tests/genconfig.py deleted file mode 100644 index dc87d02..0000000 --- a/passlib/tests/genconfig.py +++ /dev/null @@ -1,229 +0,0 @@ -"""passlib config generation script - -this script is a work in progress to develop a script which generates -rounds configuration parameters suitable for a particular host & deployment requirements. -right now it just consists of a function which experimentally determines -the optimal range of rounds values for a given hash, based on the desired time it should take. -""" -#========================================================= -#imports -#========================================================= -#core -from math import log as logb -import logging -import time -import sys -#site -#pkg -from passlib.utils.compat import iteritems, print_ -from passlib.registry import get_crypt_handler -#local -log = logging.getLogger(__name__) -#========================================================= -# -#========================================================= -class HashTimer(object): - """helper which determines number of rounds required for hash to take desired amount of time. - - usage:: - - >>> timer = HashTimer("sha512_crypt") - >>> timer.find_rounds(.5) - - .. note:: - This function is not very exact, and generates results - that are only approximately the same each time (w/in about 5% usually). - - Furthermore, to generate useful values, it should - be run when the system has an average load - to get an accurate measurement. - """ - log = logging.getLogger(__name__ + ".HashTimer") - - def __init__(self, name, samples=1): - # - #get handler, extract boundary information - # - self.handler = handler = get_crypt_handler(name) - if 'rounds' not in handler.setting_kwds: - raise ValueError("scheme does not support rounds: %r" % (handler.name,)) - self.min_rounds = getattr(handler, "min_rounds", 2) - self.max_rounds = getattr(handler, "max_rounds", (1<<32)-1) - rc = self.rounds_cost = getattr(handler, "rounds_cost", "linear") - - # - #set up functions that vary based on rounds cost function - # - if rc == "linear": - def get_rps(rounds, delta): - return rounds/delta - def guess_rounds(rps, target): - return int(rps*target+.5) - erradj = 2 - elif rc == "log2": - def get_rps(rounds, delta): - return (2**rounds)/delta - def guess_rounds(rps, target): - return int(logb(rps*target,2)+.5) - erradj = 1.1 - else: - raise NotImplementedError("unknown rounds cost function: %r" % (rc,)) - self.get_rps = get_rps - self.guess_rounds = guess_rounds - self.erradj = erradj - - # - #init cache - # - self.samples = samples - self.cache = {} - self.srange = trange(samples) - - def time_encrypt(self, rounds): - "check how long encryption for a given number of rounds will take" - cache = self.cache - if rounds in cache: - return cache[rounds] - encrypt = self.handler.encrypt - srange = self.srange - cur = time.time - start = cur() - for x in srange: - encrypt("too many secrets", rounds=rounds) - stop = cur() - delta = (stop-start)/self.samples - cache[rounds] = delta - return delta - - def find_rounds(self, target, over=False, under=False): - """find optimal rounds range for hash - - :arg target: time hashing a password should take - :param over: if True, returns minimum rounds taking *at least* target seconds. - :param under: if True, returns maximum rounds taking *at most* target seconds. - - if neither over / under is set, returns rounds taking - closest to target seconds. - - :returns: - returns number of rounds closest - to taking about ``target`` seconds to hash a password. - """ - if target <= 0: - raise ValueError("target must be > 0") - - log = self.log - name = self.handler.name - get_rps = self.get_rps - time_encrypt = self.time_encrypt - log.info("%s: finding rounds for target time: %f", name, target) - - # - #check if useful lower & upper bounds already exist in cache - # - lower = upper = None - for rounds, delta in iteritems(self.cache.iteritems): - if delta < target: - if lower is None or rounds > lower: - lower = rounds - else: - if upper is None or rounds < upper: - upper = rounds - - # - #if bounds not found in cache, run open-ended search for starting bounds - # - if lower is None: - lower = max(1,self.min_rounds) - if upper is None: - guess_rounds = self.guess_rounds - max_rounds = self.max_rounds - target_above = target*self.erradj #NOTE: we aim a little high as hack to deal w/ measuring error - rounds = lower - while True: - delta = time_encrypt(rounds) - rps = get_rps(rounds, delta) - log.debug("%s: ranging target: checked %r -> %fs (%f r/s)", name, rounds, delta, rps) - if delta < target: - lower = rounds - if rounds == max_rounds: - log.warning("%s: target time out of range: hash would require > max_rounds (%d) in order to take %fs", name, max_rounds, target) - return rounds - rounds = min(max(guess_rounds(rps, target_above), rounds+1), max_rounds) - else: - upper = rounds - break - - # - #perform binary search till we find match - # - while lower+1<upper: - #NOTE: weighting things in favor of upper, since per-call overhead causes curve to not be quite linear. - next = (upper*5+lower*3)//8 - delta = time_encrypt(next) - rps = get_rps(next, delta) - log.debug("%s: finding target: range %r .. %r: checked %r -> %fs (%f r/s)", name, lower, upper, next, delta, rps) - if delta < target: - lower = next - else: - upper = next - - # - #now 'lower' is largest value which takes less than target seconds, - #and 'upper' is smallest value which takes greater than target seconds. - #so we pick based on over/under flags, or fallback to whichever one is closest - # - if over: - return upper - elif under: - return lower - else: - if target-cache[lower] < cache[upper]-target: - return lower - else: - return upper - - def find_rounds_range(self, target_high, target_low=None, over=False, under=False): - "find min/max rounds which will cause scheme to take specified range of times" - if target_low is None: - target_low = target_high * .75 - elif target_low > target_high: - target_high = target_low - rounds_high = self.find_rounds(target_high, under=not over, over=over) - rounds_low = self.find_rounds(target_low, over=not under, under=under) - if rounds_low > rounds_high: - #NOTE: this happens sometimes w/ rounds_cost=log2... - #if nothing hits w/in range, rounds_low will be 1+ rounds_high - #we just return correctly ordered range - rounds_low, rounds_high = rounds_high, rounds_low - return rounds_low, rounds_high - - def estimate_rps(self): - "return estimated rounds per second based on cached results" - cache = self.cache - if not cache: - raise RuntimeError("should not be called until cache populated by find_rounds()") - get_rps = self.get_rps - rps = sum(r*get_rps(r,d) for r,d in iteritems(cache))/sum(cache) - if rps > 1000: #for almost all cases, we'd return integer - rps = int(rps) - return rps - -#========================================================= -#main -#========================================================= -def main(*args): - from bps.logs import setup_std_logging - setup_std_logging(level="debug", dev=True) - - timer = HashTimer("sha256_crypt") - print_(timer.find_rounds_range(.5)) - print_(timer.estimate_rps()) - - #TODO: give script ability to generate timings for a range of schemes, and minimum / maximum times. - -if __name__ == "__main__": - sys.exit(main(sys.argv[1:])) -#========================================================= -#eof -#========================================================= diff --git a/passlib/utils/_cost.py b/passlib/utils/_cost.py new file mode 100644 index 0000000..8f12d70 --- /dev/null +++ b/passlib/utils/_cost.py @@ -0,0 +1,237 @@ +"""passlib.utils._cost - hash cost analysis + +this modules contains routines for measuring the cost/speed of a hash. +it may eventually made public. +""" +#========================================================= +#imports +#========================================================= +from __future__ import division +#core +from math import log as logb +import logging; log = logging.getLogger(__name__) +import sys +#site +#pkg +from passlib.registry import get_crypt_handler +from passlib.utils import timer +from passlib.utils.compat import u +#local +__all__ = [ + "HashTimer", +] +#========================================================= +# timer class +#========================================================= +SECRET = u("dummy password") + +class HashTimer(object): + """helper class which determines speed of hash implementation + + :arg handler: passlib handler object to test + :param max_time: maximum amount of time to speed timing hash + :param backend: optionally use specific backend for handler + + once created... + + * :meth:`estimate` can be called to determine correct rounds value + which will take a specific amount of time. + * :attr:`speed` will report the handler's speed in iterations per second. + * :attr:`scale` will report the scale that the hash's rounds parameter + uses relative to the *speed*, which is always linear. + """ + #==================================================================== + # instance attrs + #==================================================================== + + # inputs + handler = None + max_time = None + backend = None + + # results + hasrounds = None #: True if has variable rounds, False if fixed + scale = None #: scaling of cost parameter (linear/log2) + speed = None #: raw rounds/second + + # public helpers + c2v = None # convert cost parameter -> rounds value + v2c = None # convert rounds value -> cost parameter + + #==================================================================== + # init / main loop + #==================================================================== + def __init__(self, handler, max_time=1, backend=None): + # validate params + self.max_time = max(0, max_time) + + # characterize handler + self.handler = handler + self.hasrounds = 'rounds' in handler.setting_kwds + self.scale = handler.rounds_cost if self.hasrounds else "linear" + + # init cost manipulation helpers + if self.scale == "log2": + self.c2v = lambda c: 1<<c + self.v2c = lambda v: int(logb(v,2)) + self.cshift = lambda c,p: max(0,c+p) + else: + self.c2v = self.v2c = lambda x: int(x) + self.cshift = lambda c,p: c<<p if p > 0 else c>>(-p) + + # init stub context kwds + ctx = self.ctx = {} + if handler.context_kwds: + if 'user' in handler.context_kwds: + ctx['user'] = u("dummyuser") + + # run timing loop + self.backend = backend + if backend: + orig_backend = handler.get_backend() + handler.set_backend(backend) + try: + self._run() + finally: + if backend: + handler.set_backend(orig_backend) + + def _run(self): + # init locals + terminate = timer() + self.max_time + handler = self.handler + ctx = self.ctx + samples = self._samples = [] + + # pick small rounds value to start with + if self.hasrounds: + rounds = max(handler.min_rounds, + self.cshift(handler.default_rounds, -1)) + setup_factor = 1.9 + else: + rounds = 16 + setup_factor = 1 + hash = handler.encrypt(SECRET, **ctx) + + # do main testing loop + while True: + # test with specified number of rounds + if self.hasrounds: + # NOTE: the extra time taken by this encrypt() call is + # why setup_factor is set to 1.9, above + hash = handler.encrypt(SECRET, rounds=rounds, **ctx) + start = timer() + handler.verify(SECRET, hash, **ctx) + end = timer() + else: + i = 0 + start = timer() + while i < rounds: + handler.verify(SECRET, hash, **ctx) + i += 1 + end = timer() + + # record speed, and decide if we have time to go again w/ 2x rounds + elapsed = end - start + samples.append((rounds, elapsed)) + remaining = (terminate - end) / (setup_factor * elapsed) + if remaining < 1: + break + elif remaining >= 2: + rounds = self.cshift(rounds, 1) + # else get another sample at same # rounds, since there's time + + # calculate speed - this discards the first 1/3 of the samples, + # since the smaller rounds are frequently inaccurate. it then takes + # the median value, to cheaply discard any outliers. + count = len(samples) + if count < 2: + c,e = samples[0] + self.speed = self.c2v(c)/e + else: + speeds = sorted(self.c2v(c)/e for c,e in samples[count//3:]) + self.speed = speeds[(len(speeds)+1)//2] + + #==================================================================== + # public helpers + #==================================================================== + def estimate(self, target): + "estimate rounds value to reach target time" + value = self.speed * target + rounds = self.v2c(value) + if self.scale == "log2" and value > 1.5 * self.c2v(rounds): + return rounds+1 + return rounds + + #==================================================================== + # eoc + #==================================================================== + +def estimate_rounds_value(handler, target_time=.25, max_time=None, **kwds): + "estimate number of rounds handler should use to take target_time" + if max_time is None: + max_time = target_time*2 + timer = HashTimer(handler, max_time, **kwds) + return timer.estimate(target_time) + +#========================================================= +# development helpers +#========================================================= + +def _test_timer(timer, cost): + "helper to test HashTimer's accuracy" + handler = timer.handler + ctx = timer.ctx + if timer.hasrounds: + h = handler.encrypt(SECRET, rounds=cost, **ctx) + s = default_timer() + handler.verify(SECRET, h, **ctx) + return default_timer()-s + else: + h = handler.encrypt(SECRET, **ctx) + s = default_timer() + i = 0 + while i < cost: + handler.verify(SECRET, h, **ctx) + i += 1 + return default_timer()-s + +def main(*args): + "test script used to develop & test HashTimer" + from bps.logs import setup_std_logging + setup_std_logging(level="warn", dev=True) + from passlib import hash + + target_delta = .25 + + for name in dir(hash): + if name.startswith("_"): + continue + handler = getattr(hash, name) + + s = default_timer() + timer = HashTimer(handler, max_time=1) + run_time = default_timer()-s + + target_cost = timer.estimate(target_delta) + real_delta = _test_timer(timer, target_cost) + real_speed = timer.c2v(target_cost)/real_delta + + speeds = [ timer.c2v(c)/e for c,e in timer._samples ] + def fpe(value, correct): + return round((value-correct)/correct*100,2) + print "%30s, %10s, % 10.2f, % 5.4f%%, % 5.4fs [%s]" % \ + (timer.handler.name, + timer.scale if timer.hasrounds else "fixed", + timer.speed, + fpe(timer.speed, real_speed), + run_time, + ", ".join(str(fpe(s,real_speed)) for s in sorted(speeds)) + ) + +if __name__ == "__main__": + sys.exit(main(sys.argv[1:])) + +#========================================================= +#eof +#========================================================= |
