summaryrefslogtreecommitdiff
path: root/paste/exceptions/serial_number_generator.py
blob: 3f80107efc4ff4bfd6ec9fa0edea8791d486eed4 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
# (c) 2005 Ian Bicking and contributors; written for Paste (http://pythonpaste.org)
# Licensed under the MIT license: http://www.opensource.org/licenses/mit-license.php

"""
Creates a human-readable identifier, using numbers and digits,
avoiding ambiguous numbers and letters.  hash_identifier can be used
to create compact representations that are unique for a certain string
(or concatenation of strings)
"""

try:
    from hashlib import md5
except ImportError:
    from md5 import md5

import six

good_characters = "23456789abcdefghjkmnpqrtuvwxyz"

base = len(good_characters)

def make_identifier(number):
    """
    Encodes a number as an identifier.
    """
    if not isinstance(number, six.integer_types):
        raise ValueError(
            "You can only make identifiers out of integers (not %r)"
            % number)
    if number < 0:
        raise ValueError(
            "You cannot make identifiers out of negative numbers: %r"
            % number)
    result = []
    while number:
        next = number % base
        result.append(good_characters[next])
        # Note, this depends on integer rounding of results:
        number = number // base
    return ''.join(result)

def hash_identifier(s, length, pad=True, hasher=md5, prefix='',
                    group=None, upper=False):
    """
    Hashes the string (with the given hashing module), then turns that
    hash into an identifier of the given length (using modulo to
    reduce the length of the identifier).  If ``pad`` is False, then
    the minimum-length identifier will be used; otherwise the
    identifier will be padded with 0's as necessary.

    ``prefix`` will be added last, and does not count towards the
    target length.  ``group`` will group the characters with ``-`` in
    the given lengths, and also does not count towards the target
    length.  E.g., ``group=4`` will cause a identifier like
    ``a5f3-hgk3-asdf``.  Grouping occurs before the prefix.
    """
    if not callable(hasher):
        # Accept sha/md5 modules as well as callables
        hasher = hasher.new
    if length > 26 and hasher is md5:
        raise ValueError(
            "md5 cannot create hashes longer than 26 characters in "
            "length (you gave %s)" % length)
    if isinstance(s, six.text_type):
        s = s.encode('utf-8')
    elif not isinstance(s, six.binary_type):
        s = str(s)
        if six.PY3:
            s = s.encode('utf-8')
    h = hasher(s)
    bin_hash = h.digest()
    modulo = base ** length
    number = 0
    for c in list(bin_hash):
        number = (number * 256 + six.byte2int([c])) % modulo
    ident = make_identifier(number)
    if pad:
        ident = good_characters[0]*(length-len(ident)) + ident
    if group:
        parts = []
        while ident:
            parts.insert(0, ident[-group:])
            ident = ident[:-group]
        ident = '-'.join(parts)
    if upper:
        ident = ident.upper()
    return prefix + ident

# doctest tests:
__test__ = {
    'make_identifier': """
    >>> make_identifier(0)
    ''
    >>> make_identifier(1000)
    'c53'
    >>> make_identifier(-100)
    Traceback (most recent call last):
        ...
    ValueError: You cannot make identifiers out of negative numbers: -100
    >>> make_identifier('test')
    Traceback (most recent call last):
        ...
    ValueError: You can only make identifiers out of integers (not 'test')
    >>> make_identifier(1000000000000)
    'c53x9rqh3'
    """,
    'hash_identifier': """
    >>> hash_identifier(0, 5)
    'cy2dr'
    >>> hash_identifier(0, 10)
    'cy2dr6rg46'
    >>> hash_identifier('this is a test of a long string', 5)
    'awatu'
    >>> hash_identifier(0, 26)
    'cy2dr6rg46cx8t4w2f3nfexzk4'
    >>> hash_identifier(0, 30)
    Traceback (most recent call last):
        ...
    ValueError: md5 cannot create hashes longer than 26 characters in length (you gave 30)
    >>> hash_identifier(0, 10, group=4)
    'cy-2dr6-rg46'
    >>> hash_identifier(0, 10, group=4, upper=True, prefix='M-')
    'M-CY-2DR6-RG46'
    """}

if __name__ == '__main__':
    import doctest
    doctest.testmod()