summaryrefslogtreecommitdiff
path: root/Python/pyhash.c
diff options
context:
space:
mode:
authorRaymond Hettinger <rhettinger@users.noreply.github.com>2021-04-22 08:34:57 -0700
committerGitHub <noreply@github.com>2021-04-22 08:34:57 -0700
commita07da09ad5bd7d234ccd084a3a0933c290d1b592 (patch)
tree8c1ab67575527bd5c0c9452a74458ad5a29a1d08 /Python/pyhash.c
parentaccea7dc2bd30a6e8e1b0334acfca9585cbd7f8a (diff)
downloadcpython-git-a07da09ad5bd7d234ccd084a3a0933c290d1b592.tar.gz
bpo-43475: Fix worst case collision behavior for NaN instances (GH-25493)
Diffstat (limited to 'Python/pyhash.c')
-rw-r--r--Python/pyhash.c14
1 files changed, 10 insertions, 4 deletions
diff --git a/Python/pyhash.c b/Python/pyhash.c
index 3b6c34eefd..f0c82356f1 100644
--- a/Python/pyhash.c
+++ b/Python/pyhash.c
@@ -56,8 +56,12 @@ static Py_ssize_t hashstats[Py_HASH_STATS_MAX + 1] = {0};
If the result of the reduction is infinity (this is impossible for
integers, floats and Decimals) then use the predefined hash value
_PyHASH_INF for x >= 0, or -_PyHASH_INF for x < 0, instead.
- _PyHASH_INF, -_PyHASH_INF and _PyHASH_NAN are also used for the
- hashes of float and Decimal infinities and nans.
+ _PyHASH_INF and -_PyHASH_INF are also used for the
+ hashes of float and Decimal infinities.
+
+ NaNs hash with a pointer hash. Having distinct hash values prevents
+ catastrophic pileups from distinct NaN instances which used to always
+ have the same hash value but would compare unequal.
A selling point for the above strategy is that it makes it possible
to compute hashes of decimal and binary floating-point numbers
@@ -82,8 +86,10 @@ static Py_ssize_t hashstats[Py_HASH_STATS_MAX + 1] = {0};
*/
+Py_hash_t _Py_HashPointer(const void *);
+
Py_hash_t
-_Py_HashDouble(double v)
+_Py_HashDouble(PyObject *inst, double v)
{
int e, sign;
double m;
@@ -93,7 +99,7 @@ _Py_HashDouble(double v)
if (Py_IS_INFINITY(v))
return v > 0 ? _PyHASH_INF : -_PyHASH_INF;
else
- return _PyHASH_NAN;
+ return _Py_HashPointer(inst);
}
m = frexp(v, &e);