diff options
author | Raymond Hettinger <rhettinger@users.noreply.github.com> | 2021-04-22 08:34:57 -0700 |
---|---|---|
committer | GitHub <noreply@github.com> | 2021-04-22 08:34:57 -0700 |
commit | a07da09ad5bd7d234ccd084a3a0933c290d1b592 (patch) | |
tree | 8c1ab67575527bd5c0c9452a74458ad5a29a1d08 /Python/pyhash.c | |
parent | accea7dc2bd30a6e8e1b0334acfca9585cbd7f8a (diff) | |
download | cpython-git-a07da09ad5bd7d234ccd084a3a0933c290d1b592.tar.gz |
bpo-43475: Fix worst case collision behavior for NaN instances (GH-25493)
Diffstat (limited to 'Python/pyhash.c')
-rw-r--r-- | Python/pyhash.c | 14 |
1 files changed, 10 insertions, 4 deletions
diff --git a/Python/pyhash.c b/Python/pyhash.c index 3b6c34eefd..f0c82356f1 100644 --- a/Python/pyhash.c +++ b/Python/pyhash.c @@ -56,8 +56,12 @@ static Py_ssize_t hashstats[Py_HASH_STATS_MAX + 1] = {0}; If the result of the reduction is infinity (this is impossible for integers, floats and Decimals) then use the predefined hash value _PyHASH_INF for x >= 0, or -_PyHASH_INF for x < 0, instead. - _PyHASH_INF, -_PyHASH_INF and _PyHASH_NAN are also used for the - hashes of float and Decimal infinities and nans. + _PyHASH_INF and -_PyHASH_INF are also used for the + hashes of float and Decimal infinities. + + NaNs hash with a pointer hash. Having distinct hash values prevents + catastrophic pileups from distinct NaN instances which used to always + have the same hash value but would compare unequal. A selling point for the above strategy is that it makes it possible to compute hashes of decimal and binary floating-point numbers @@ -82,8 +86,10 @@ static Py_ssize_t hashstats[Py_HASH_STATS_MAX + 1] = {0}; */ +Py_hash_t _Py_HashPointer(const void *); + Py_hash_t -_Py_HashDouble(double v) +_Py_HashDouble(PyObject *inst, double v) { int e, sign; double m; @@ -93,7 +99,7 @@ _Py_HashDouble(double v) if (Py_IS_INFINITY(v)) return v > 0 ? _PyHASH_INF : -_PyHASH_INF; else - return _PyHASH_NAN; + return _Py_HashPointer(inst); } m = frexp(v, &e); |