SF patch #729395: Dictionary tuning

* Increase dictionary growth rate resulting in more sparse dictionaries, fewer lookup collisions, increased memory use, and better cache performance. For dicts with over 50k entries, keep the current growth rate in case an application is suffering from tight memory constraints. * Set the most common case (no resize) to fall-through the test.
author: Raymond Hettinger <python@rcn.com> 2003-05-05 22:22:10 +0000
committer: Raymond Hettinger <python@rcn.com> 2003-05-05 22:22:10 +0000
commit: 3539f6b895814ad79c455f7fe633ce253c77c140 (patch)
tree: 8270deea9c957faf0203087cf8b3c47d96e45161 /Objects/dictobject.c
parent: f2e488db239054e41edbcc645364867d234b7259 (diff)
download: cpython-git-3539f6b895814ad79c455f7fe633ce253c77c140.tar.gz
1 files changed, 16 insertions, 10 deletions
diff --git a/Objects/dictobject.c b/Objects/dictobject.c
index f0e93f8c9a..f3adc0bfdc 100644
--- a/Objects/dictobject.c
+++ b/Objects/dictobject.c
@@ -531,17 +531,23 @@ PyDict_SetItem(register PyObject *op, PyObject *key, PyObject *value)
 	Py_INCREF(value);
 	Py_INCREF(key);
 	insertdict(mp, key, hash, value);
-	/* If we added a key, we can safely resize.  Otherwise skip this!
-	 * If fill >= 2/3 size, adjust size.  Normally, this doubles the
-	 * size, but it's also possible for the dict to shrink (if ma_fill is
-	 * much larger than ma_used, meaning a lot of dict keys have been
-	 * deleted).
+	/* If we added a key, we can safely resize.  Otherwise just return!
+	 * If fill >= 2/3 size, adjust size.  Normally, this doubles or
+	 * quaduples the size, but it's also possible for the dict to shrink
+	 * (if ma_fill is much larger than ma_used, meaning a lot of dict 
+	 * keys have been * deleted).
+	 * 
+	 * Quadrupling the size improves average dictionary sparseness
+	 * (reducing collisions) at the cost of some memory and iteration
+	 * speed (which loops over every possible entry).  It also halves
+	 * the number of expensive resize operations in a growing dictionary.
+	 * 
+	 * Very large dictionaries (over 50K items) use doubling instead.  
+	 * This may help applications with severe memory constraints.
 	 */
-	if (mp->ma_used > n_used && mp->ma_fill*3 >= (mp->ma_mask+1)*2) {
-		if (dictresize(mp, mp->ma_used*2) != 0)
-			return -1;
-	}
-	return 0;
+	if (!(mp->ma_used > n_used && mp->ma_fill*3 >= (mp->ma_mask+1)*2))
+		return 0;
+	return dictresize(mp, mp->ma_used*(mp->ma_used>50000 ? 2 : 4));
 }
 
 int
author	Raymond Hettinger <python@rcn.com>	2003-05-05 22:22:10 +0000
committer	Raymond Hettinger <python@rcn.com>	2003-05-05 22:22:10 +0000
commit	3539f6b895814ad79c455f7fe633ce253c77c140 (patch)
tree	8270deea9c957faf0203087cf8b3c47d96e45161 /Objects/dictobject.c
parent	f2e488db239054e41edbcc645364867d234b7259 (diff)
download	cpython-git-3539f6b895814ad79c455f7fe633ce253c77c140.tar.gz