diff options
| -rw-r--r-- | Lib/hashlib.py | 29 | ||||
| -rw-r--r-- | Lib/test/test_hashlib.py | 12 | ||||
| -rw-r--r-- | Misc/NEWS.d/next/Library/2019-09-13-12-18-51.bpo-38153.nHAbuJ.rst | 3 | ||||
| -rw-r--r-- | Modules/_hashopenssl.c | 167 | 
4 files changed, 179 insertions, 32 deletions
| diff --git a/Lib/hashlib.py b/Lib/hashlib.py index 4e783a86a3..56873b7278 100644 --- a/Lib/hashlib.py +++ b/Lib/hashlib.py @@ -70,37 +70,44 @@ __all__ = __always_supported + ('new', 'algorithms_guaranteed',  __builtin_constructor_cache = {} +__block_openssl_constructor = { +    'sha3_224', 'sha3_256', 'sha3_384', 'sha3_512', +    'shake_128', 'shake_256', +    'blake2b', 'blake2s', +} +  def __get_builtin_constructor(name):      cache = __builtin_constructor_cache      constructor = cache.get(name)      if constructor is not None:          return constructor      try: -        if name in ('SHA1', 'sha1'): +        if name in {'SHA1', 'sha1'}:              import _sha1              cache['SHA1'] = cache['sha1'] = _sha1.sha1 -        elif name in ('MD5', 'md5'): +        elif name in {'MD5', 'md5'}:              import _md5              cache['MD5'] = cache['md5'] = _md5.md5 -        elif name in ('SHA256', 'sha256', 'SHA224', 'sha224'): +        elif name in {'SHA256', 'sha256', 'SHA224', 'sha224'}:              import _sha256              cache['SHA224'] = cache['sha224'] = _sha256.sha224              cache['SHA256'] = cache['sha256'] = _sha256.sha256 -        elif name in ('SHA512', 'sha512', 'SHA384', 'sha384'): +        elif name in {'SHA512', 'sha512', 'SHA384', 'sha384'}:              import _sha512              cache['SHA384'] = cache['sha384'] = _sha512.sha384              cache['SHA512'] = cache['sha512'] = _sha512.sha512 -        elif name in ('blake2b', 'blake2s'): +        elif name in {'blake2b', 'blake2s'}:              import _blake2              cache['blake2b'] = _blake2.blake2b              cache['blake2s'] = _blake2.blake2s -        elif name in {'sha3_224', 'sha3_256', 'sha3_384', 'sha3_512', -                      'shake_128', 'shake_256'}: +        elif name in {'sha3_224', 'sha3_256', 'sha3_384', 'sha3_512'}:              import _sha3              cache['sha3_224'] = _sha3.sha3_224              cache['sha3_256'] = _sha3.sha3_256              cache['sha3_384'] = _sha3.sha3_384              cache['sha3_512'] = _sha3.sha3_512 +        elif name in {'shake_128', 'shake_256'}: +            import _sha3              cache['shake_128'] = _sha3.shake_128              cache['shake_256'] = _sha3.shake_256      except ImportError: @@ -114,8 +121,8 @@ def __get_builtin_constructor(name):  def __get_openssl_constructor(name): -    if name in {'blake2b', 'blake2s'}: -        # Prefer our blake2 implementation. +    if name in __block_openssl_constructor: +        # Prefer our blake2 and sha3 implementation.          return __get_builtin_constructor(name)      try:          f = getattr(_hashlib, 'openssl_' + name) @@ -140,8 +147,8 @@ def __hash_new(name, data=b'', **kwargs):      """new(name, data=b'') - Return a new hashing object using the named algorithm;      optionally initialized with data (which must be a bytes-like object).      """ -    if name in {'blake2b', 'blake2s'}: -        # Prefer our blake2 implementation. +    if name in __block_openssl_constructor: +        # Prefer our blake2 and sha3 implementation          # OpenSSL 1.1.0 comes with a limited implementation of blake2b/s.          # It does neither support keyed blake2 nor advanced features like          # salt, personal, tree hashing or SSE. diff --git a/Lib/test/test_hashlib.py b/Lib/test/test_hashlib.py index 46088e52dc..9204b44bf4 100644 --- a/Lib/test/test_hashlib.py +++ b/Lib/test/test_hashlib.py @@ -27,6 +27,11 @@ c_hashlib = import_fresh_module('hashlib', fresh=['_hashlib'])  py_hashlib = import_fresh_module('hashlib', blocked=['_hashlib'])  try: +    from _hashlib import HASH +except ImportError: +    HASH = None + +try:      import _blake2  except ImportError:      _blake2 = None @@ -386,6 +391,9 @@ class HashLibTestCase(unittest.TestCase):          constructors = self.constructors_to_test[name]          for hash_object_constructor in constructors:              m = hash_object_constructor() +            if HASH is not None and isinstance(m, HASH): +                # _hashopenssl's variant does not have extra SHA3 attributes +                continue              self.assertEqual(capacity + rate, 1600)              self.assertEqual(m._capacity_bits, capacity)              self.assertEqual(m._rate_bits, rate) @@ -985,6 +993,10 @@ class KDFTests(unittest.TestCase):                  hashlib.scrypt(b'password', salt=b'salt', n=2, r=8, p=1,                                 dklen=dklen) +    def test_normalized_name(self): +        self.assertNotIn("blake2b512", hashlib.algorithms_available) +        self.assertNotIn("sha3-512", hashlib.algorithms_available) +  if __name__ == "__main__":      unittest.main() diff --git a/Misc/NEWS.d/next/Library/2019-09-13-12-18-51.bpo-38153.nHAbuJ.rst b/Misc/NEWS.d/next/Library/2019-09-13-12-18-51.bpo-38153.nHAbuJ.rst new file mode 100644 index 0000000000..8a483c760a --- /dev/null +++ b/Misc/NEWS.d/next/Library/2019-09-13-12-18-51.bpo-38153.nHAbuJ.rst @@ -0,0 +1,3 @@ +Names of hashing algorithms frome OpenSSL are now normalized to follow +Python's naming conventions. For example OpenSSL uses sha3-512 instead of +sha3_512 or blake2b512 instead of blake2b.  diff --git a/Modules/_hashopenssl.c b/Modules/_hashopenssl.c index c65c698cbb..29ebec77a4 100644 --- a/Modules/_hashopenssl.c +++ b/Modules/_hashopenssl.c @@ -34,6 +34,14 @@  #define MUNCH_SIZE INT_MAX +#if defined(NID_sha3_224) && defined(EVP_MD_FLAG_XOF) +#define PY_OPENSSL_HAS_SHA3 1 +#endif + +#ifdef NID_blake2b512 +#define PY_OPENSSL_HAS_BLAKE2 1 +#endif +  typedef struct {      PyObject_HEAD      EVP_MD_CTX          *ctx;   /* OpenSSL message digest context */ @@ -82,6 +90,135 @@ _setException(PyObject *exc)  }  /* LCOV_EXCL_STOP */ +static PyObject* +py_digest_name(const EVP_MD *md) +{ +    int nid = EVP_MD_nid(md); +    const char *name = NULL; + +    /* Hard-coded names for well-known hashing algorithms. +     * OpenSSL uses slightly different names algorithms like SHA3. +     */ +    switch (nid) { +    case NID_md5: +        name = "md5"; +        break; +    case NID_sha1: +        name = "sha1"; +        break; +    case NID_sha224: +        name ="sha224"; +        break; +    case NID_sha256: +        name ="sha256"; +        break; +    case NID_sha384: +        name ="sha384"; +        break; +    case NID_sha512: +        name ="sha512"; +        break; +#ifdef NID_sha512_224 +    case NID_sha512_224: +        name ="sha512_224"; +        break; +    case NID_sha512_256: +        name ="sha512_256"; +        break; +#endif +#ifdef PY_OPENSSL_HAS_SHA3 +    case NID_sha3_224: +        name ="sha3_224"; +        break; +    case NID_sha3_256: +        name ="sha3_256"; +        break; +    case NID_sha3_384: +        name ="sha3_384"; +        break; +    case NID_sha3_512: +        name ="sha3_512"; +        break; +    case NID_shake128: +        name ="shake_128"; +        break; +    case NID_shake256: +        name ="shake_256"; +        break; +#endif +#ifdef PY_OPENSSL_HAS_BLAKE2 +    case NID_blake2s256: +        name ="blake2s"; +        break; +    case NID_blake2b512: +        name ="blake2b"; +        break; +#endif +    default: +        /* Ignore aliased names and only use long, lowercase name. The aliases +         * pollute the list and OpenSSL appears to have its own definition of +         * alias as the resulting list still contains duplicate and alternate +         * names for several algorithms. +         */ +        name = OBJ_nid2ln(nid); +        if (name == NULL) +            name = OBJ_nid2sn(nid); +        break; +    } + +    return PyUnicode_FromString(name); +} + +static const EVP_MD* +py_digest_by_name(const char *name) +{ +    const EVP_MD *digest = EVP_get_digestbyname(name); + +    /* OpenSSL uses dash instead of underscore in names of some algorithms +     * like SHA3 and SHAKE. Detect different spellings. */ +    if (digest == NULL) { +#ifdef NID_sha512_224 +        if (!strcmp(name, "sha512_224") || !strcmp(name, "SHA512_224")) { +            digest = EVP_sha512_224(); +        } +        else if (!strcmp(name, "sha512_256") || !strcmp(name, "SHA512_256")) { +            digest = EVP_sha512_256(); +        } +#endif +#ifdef PY_OPENSSL_HAS_SHA3 +        /* could be sha3_ or shake_, Python never defined upper case */ +        else if (!strcmp(name, "sha3_224")) { +            digest = EVP_sha3_224(); +        } +        else if (!strcmp(name, "sha3_256")) { +            digest = EVP_sha3_256(); +        } +        else if (!strcmp(name, "sha3_384")) { +            digest = EVP_sha3_384(); +        } +        else if (!strcmp(name, "sha3_512")) { +            digest = EVP_sha3_512(); +        } +        else if (!strcmp(name, "shake_128")) { +            digest = EVP_shake128(); +        } +        else if (!strcmp(name, "shake_256")) { +            digest = EVP_shake256(); +        } +#endif +#ifdef PY_OPENSSL_HAS_BLAKE2 +        else if (!strcmp(name, "blake2s256")) { +            digest = EVP_blake2s256(); +        } +        else if (!strcmp(name, "blake2b512")) { +            digest = EVP_blake2b512(); +        } +#endif +    } + +    return digest; +} +  static EVPobject *  newEVPobject(void)  { @@ -304,16 +441,7 @@ EVP_get_digest_size(EVPobject *self, void *closure)  static PyObject *  EVP_get_name(EVPobject *self, void *closure)  { -    const char *name = EVP_MD_name(EVP_MD_CTX_md(self->ctx)); -    PyObject *name_obj, *name_lower; - -    name_obj = PyUnicode_FromString(name); -    if (!name_obj) { -        return NULL; -    } -    name_lower = PyObject_CallMethod(name_obj, "lower", NULL); -    Py_DECREF(name_obj); -    return name_lower; +    return py_digest_name(EVP_MD_CTX_md(self->ctx));  }  static PyGetSetDef EVP_getseters[] = { @@ -337,7 +465,7 @@ static PyObject *  EVP_repr(EVPobject *self)  {      PyObject *name_obj, *repr; -    name_obj = EVP_get_name(self, NULL); +    name_obj = py_digest_name(EVP_MD_CTX_md(self->ctx));      if (!name_obj) {          return NULL;      } @@ -403,6 +531,7 @@ static PyTypeObject EVPtype = {      0,                  /* tp_dictoffset */  }; +\  static PyObject *  EVPnew(const EVP_MD *digest,         const unsigned char *cp, Py_ssize_t len, int usedforsecurity) @@ -485,7 +614,7 @@ EVP_new_impl(PyObject *module, PyObject *name_obj, PyObject *data_obj,      if (data_obj)          GET_BUFFER_VIEW_OR_ERROUT(data_obj, &view); -    digest = EVP_get_digestbyname(name); +    digest = py_digest_by_name(name);      ret_obj = EVPnew(digest,                       (unsigned char*)view.buf, view.len, @@ -922,21 +1051,17 @@ typedef struct _internal_name_mapper_state {  /* A callback function to pass to OpenSSL's OBJ_NAME_do_all(...) */  static void -_openssl_hash_name_mapper(const OBJ_NAME *openssl_obj_name, void *arg) +_openssl_hash_name_mapper(const EVP_MD *md, const char *from, +                          const char *to, void *arg)  {      _InternalNameMapperState *state = (_InternalNameMapperState *)arg;      PyObject *py_name;      assert(state != NULL); -    if (openssl_obj_name == NULL) -        return; -    /* Ignore aliased names, they pollute the list and OpenSSL appears to -     * have its own definition of alias as the resulting list still -     * contains duplicate and alternate names for several algorithms.     */ -    if (openssl_obj_name->alias) +    if (md == NULL)          return; -    py_name = PyUnicode_FromString(openssl_obj_name->name); +    py_name = py_digest_name(md);      if (py_name == NULL) {          state->error = 1;      } else { @@ -958,7 +1083,7 @@ generate_hash_name_list(void)          return NULL;      state.error = 0; -    OBJ_NAME_do_all(OBJ_NAME_TYPE_MD_METH, &_openssl_hash_name_mapper, &state); +    EVP_MD_do_all(&_openssl_hash_name_mapper, &state);      if (state.error) {          Py_DECREF(state.set); | 
