summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--Doc/library/gc.rst25
-rw-r--r--Include/dictobject.h1
-rw-r--r--Include/objimpl.h11
-rw-r--r--Include/tupleobject.h1
-rw-r--r--Lib/test/test_dict.py98
-rw-r--r--Lib/test/test_gc.py31
-rw-r--r--Lib/test/test_tuple.py65
-rw-r--r--Misc/NEWS5
-rw-r--r--Modules/gcmodule.c30
-rw-r--r--Objects/dictobject.c82
-rw-r--r--Objects/tupleobject.c54
11 files changed, 401 insertions, 2 deletions
diff --git a/Doc/library/gc.rst b/Doc/library/gc.rst
index 9ebbf06c7b..65f0f39a15 100644
--- a/Doc/library/gc.rst
+++ b/Doc/library/gc.rst
@@ -140,6 +140,31 @@ The :mod:`gc` module provides the following functions:
.. versionadded:: 2.3
+.. function:: is_tracked(obj)
+
+ Returns True if the object is currently tracked by the garbage collector,
+ False otherwise. As a general rule, instances of atomic types aren't
+ tracked and instances of non-atomic types (containers, user-defined
+ objects...) are. However, some type-specific optimizations can be present
+ in order to suppress the garbage collector footprint of simple instances
+ (e.g. dicts containing only atomic keys and values)::
+
+ >>> gc.is_tracked(0)
+ False
+ >>> gc.is_tracked("a")
+ False
+ >>> gc.is_tracked([])
+ True
+ >>> gc.is_tracked({})
+ False
+ >>> gc.is_tracked({"a": 1})
+ False
+ >>> gc.is_tracked({"a": []})
+ True
+
+ .. versionadded:: 2.7
+
+
The following variable is provided for read-only access (you can mutate its
value but should not rebind it):
diff --git a/Include/dictobject.h b/Include/dictobject.h
index b83cd0e893..06e0a7ebe7 100644
--- a/Include/dictobject.h
+++ b/Include/dictobject.h
@@ -111,6 +111,7 @@ PyAPI_FUNC(PyObject *) PyDict_Copy(PyObject *mp);
PyAPI_FUNC(int) PyDict_Contains(PyObject *mp, PyObject *key);
PyAPI_FUNC(int) _PyDict_Contains(PyObject *mp, PyObject *key, long hash);
PyAPI_FUNC(PyObject *) _PyDict_NewPresized(Py_ssize_t minused);
+PyAPI_FUNC(void) _PyDict_MaybeUntrack(PyObject *mp);
/* PyDict_Update(mp, other) is equivalent to PyDict_Merge(mp, other, 1). */
PyAPI_FUNC(int) PyDict_Update(PyObject *mp, PyObject *other);
diff --git a/Include/objimpl.h b/Include/objimpl.h
index ef47218fce..55186b176b 100644
--- a/Include/objimpl.h
+++ b/Include/objimpl.h
@@ -285,6 +285,17 @@ extern PyGC_Head *_PyGC_generation0;
g->gc.gc_next = NULL; \
} while (0);
+/* True if the object is currently tracked by the GC. */
+#define _PyObject_GC_IS_TRACKED(o) \
+ ((_Py_AS_GC(o))->gc.gc_refs != _PyGC_REFS_UNTRACKED)
+
+/* True if the object may be tracked by the GC in the future, or already is.
+ This can be useful to implement some optimizations. */
+#define _PyObject_GC_MAY_BE_TRACKED(obj) \
+ (PyObject_IS_GC(obj) && \
+ (!PyTuple_CheckExact(obj) || _PyObject_GC_IS_TRACKED(obj)))
+
+
PyAPI_FUNC(PyObject *) _PyObject_GC_Malloc(size_t);
PyAPI_FUNC(PyObject *) _PyObject_GC_New(PyTypeObject *);
PyAPI_FUNC(PyVarObject *) _PyObject_GC_NewVar(PyTypeObject *, Py_ssize_t);
diff --git a/Include/tupleobject.h b/Include/tupleobject.h
index 58479ee0ff..a5ab733208 100644
--- a/Include/tupleobject.h
+++ b/Include/tupleobject.h
@@ -44,6 +44,7 @@ PyAPI_FUNC(int) PyTuple_SetItem(PyObject *, Py_ssize_t, PyObject *);
PyAPI_FUNC(PyObject *) PyTuple_GetSlice(PyObject *, Py_ssize_t, Py_ssize_t);
PyAPI_FUNC(int) _PyTuple_Resize(PyObject **, Py_ssize_t);
PyAPI_FUNC(PyObject *) PyTuple_Pack(Py_ssize_t, ...);
+PyAPI_FUNC(void) _PyTuple_MaybeUntrack(PyObject *);
/* Macro, trading safety for speed */
#define PyTuple_GET_ITEM(op, i) (((PyTupleObject *)(op))->ob_item[i])
diff --git a/Lib/test/test_dict.py b/Lib/test/test_dict.py
index 0907744ab5..b73a53f580 100644
--- a/Lib/test/test_dict.py
+++ b/Lib/test/test_dict.py
@@ -569,6 +569,104 @@ class DictTest(unittest.TestCase):
gc.collect()
self.assert_(ref() is None, "Cycle was not collected")
+ def _not_tracked(self, t):
+ # Nested containers can take several collections to untrack
+ gc.collect()
+ gc.collect()
+ self.assertFalse(gc.is_tracked(t), t)
+
+ def _tracked(self, t):
+ self.assertTrue(gc.is_tracked(t), t)
+ gc.collect()
+ gc.collect()
+ self.assertTrue(gc.is_tracked(t), t)
+
+ def test_track_literals(self):
+ # Test GC-optimization of dict literals
+ x, y, z, w = 1.5, "a", (1, None), []
+
+ self._not_tracked({})
+ self._not_tracked({x:(), y:x, z:1})
+ self._not_tracked({1: "a", "b": 2})
+ self._not_tracked({1: 2, (None, True, False, ()): int})
+ self._not_tracked({1: object()})
+
+ # Dicts with mutable elements are always tracked, even if those
+ # elements are not tracked right now.
+ self._tracked({1: []})
+ self._tracked({1: ([],)})
+ self._tracked({1: {}})
+ self._tracked({1: set()})
+
+ def test_track_dynamic(self):
+ # Test GC-optimization of dynamically-created dicts
+ class MyObject(object):
+ pass
+ x, y, z, w, o = 1.5, "a", (1, object()), [], MyObject()
+
+ d = dict()
+ self._not_tracked(d)
+ d[1] = "a"
+ self._not_tracked(d)
+ d[y] = 2
+ self._not_tracked(d)
+ d[z] = 3
+ self._not_tracked(d)
+ self._not_tracked(d.copy())
+ d[4] = w
+ self._tracked(d)
+ self._tracked(d.copy())
+ d[4] = None
+ self._not_tracked(d)
+ self._not_tracked(d.copy())
+
+ # dd isn't tracked right now, but it may mutate and therefore d
+ # which contains it must be tracked.
+ d = dict()
+ dd = dict()
+ d[1] = dd
+ self._not_tracked(dd)
+ self._tracked(d)
+ dd[1] = d
+ self._tracked(dd)
+
+ d = dict.fromkeys([x, y, z])
+ self._not_tracked(d)
+ dd = dict()
+ dd.update(d)
+ self._not_tracked(dd)
+ d = dict.fromkeys([x, y, z, o])
+ self._tracked(d)
+ dd = dict()
+ dd.update(d)
+ self._tracked(dd)
+
+ d = dict(x=x, y=y, z=z)
+ self._not_tracked(d)
+ d = dict(x=x, y=y, z=z, w=w)
+ self._tracked(d)
+ d = dict()
+ d.update(x=x, y=y, z=z)
+ self._not_tracked(d)
+ d.update(w=w)
+ self._tracked(d)
+
+ d = dict([(x, y), (z, 1)])
+ self._not_tracked(d)
+ d = dict([(x, y), (z, w)])
+ self._tracked(d)
+ d = dict()
+ d.update([(x, y), (z, 1)])
+ self._not_tracked(d)
+ d.update([(x, y), (z, w)])
+ self._tracked(d)
+
+ def test_track_subtypes(self):
+ # Dict subtypes are always tracked
+ class MyDict(dict):
+ pass
+ self._tracked(MyDict())
+
from test import mapping_tests
diff --git a/Lib/test/test_gc.py b/Lib/test/test_gc.py
index 6e2ea4198f..e9af550bf8 100644
--- a/Lib/test/test_gc.py
+++ b/Lib/test/test_gc.py
@@ -415,6 +415,37 @@ class GCTests(unittest.TestCase):
self.assertEqual(gc.get_referents(1, 'a', 4j), [])
+ def test_is_tracked(self):
+ # Atomic built-in types are not tracked, user-defined objects and
+ # mutable containers are.
+ # NOTE: types with special optimizations (e.g. tuple) have tests
+ # in their own test files instead.
+ self.assertFalse(gc.is_tracked(None))
+ self.assertFalse(gc.is_tracked(1))
+ self.assertFalse(gc.is_tracked(1.0))
+ self.assertFalse(gc.is_tracked(1.0 + 5.0j))
+ self.assertFalse(gc.is_tracked(True))
+ self.assertFalse(gc.is_tracked(False))
+ self.assertFalse(gc.is_tracked("a"))
+ self.assertFalse(gc.is_tracked(u"a"))
+ self.assertFalse(gc.is_tracked(bytearray("a")))
+ self.assertFalse(gc.is_tracked(type))
+ self.assertFalse(gc.is_tracked(int))
+ self.assertFalse(gc.is_tracked(object))
+ self.assertFalse(gc.is_tracked(object()))
+
+ class OldStyle:
+ pass
+ class NewStyle(object):
+ pass
+ self.assertTrue(gc.is_tracked(gc))
+ self.assertTrue(gc.is_tracked(OldStyle))
+ self.assertTrue(gc.is_tracked(OldStyle()))
+ self.assertTrue(gc.is_tracked(NewStyle))
+ self.assertTrue(gc.is_tracked(NewStyle()))
+ self.assertTrue(gc.is_tracked([]))
+ self.assertTrue(gc.is_tracked(set()))
+
def test_bug1055820b(self):
# Corresponds to temp2b.py in the bug report.
diff --git a/Lib/test/test_tuple.py b/Lib/test/test_tuple.py
index 15bc2956cf..89cbe1d4c5 100644
--- a/Lib/test/test_tuple.py
+++ b/Lib/test/test_tuple.py
@@ -1,5 +1,7 @@
from test import test_support, seq_tests
+import gc
+
class TupleTest(seq_tests.CommonTest):
type2test = tuple
@@ -82,6 +84,69 @@ class TupleTest(seq_tests.CommonTest):
self.assertEqual(repr(a0), "()")
self.assertEqual(repr(a2), "(0, 1, 2)")
+ def _not_tracked(self, t):
+ # Nested tuples can take several collections to untrack
+ gc.collect()
+ gc.collect()
+ self.assertFalse(gc.is_tracked(t), t)
+
+ def _tracked(self, t):
+ self.assertTrue(gc.is_tracked(t), t)
+ gc.collect()
+ gc.collect()
+ self.assertTrue(gc.is_tracked(t), t)
+
+ def test_track_literals(self):
+ # Test GC-optimization of tuple literals
+ x, y, z = 1.5, "a", []
+
+ self._not_tracked(())
+ self._not_tracked((1,))
+ self._not_tracked((1, 2))
+ self._not_tracked((1, 2, "a"))
+ self._not_tracked((1, 2, (None, True, False, ()), int))
+ self._not_tracked((object(),))
+ self._not_tracked(((1, x), y, (2, 3)))
+
+ # Tuples with mutable elements are always tracked, even if those
+ # elements are not tracked right now.
+ self._tracked(([],))
+ self._tracked(([1],))
+ self._tracked(({},))
+ self._tracked((set(),))
+ self._tracked((x, y, z))
+
+ def check_track_dynamic(self, tp, always_track):
+ x, y, z = 1.5, "a", []
+
+ check = self._tracked if always_track else self._not_tracked
+ check(tp())
+ check(tp([]))
+ check(tp(set()))
+ check(tp([1, x, y]))
+ check(tp(obj for obj in [1, x, y]))
+ check(tp(set([1, x, y])))
+ check(tp(tuple([obj]) for obj in [1, x, y]))
+ check(tuple(tp([obj]) for obj in [1, x, y]))
+
+ self._tracked(tp([z]))
+ self._tracked(tp([[x, y]]))
+ self._tracked(tp([{x: y}]))
+ self._tracked(tp(obj for obj in [x, y, z]))
+ self._tracked(tp(tuple([obj]) for obj in [x, y, z]))
+ self._tracked(tuple(tp([obj]) for obj in [x, y, z]))
+
+ def test_track_dynamic(self):
+ # Test GC-optimization of dynamically constructed tuples.
+ self.check_track_dynamic(tuple, False)
+
+ def test_track_subtypes(self):
+ # Tuple subtypes must always be tracked
+ class MyTuple(tuple):
+ pass
+ self.check_track_dynamic(MyTuple, True)
+
+
def test_main():
test_support.run_unittest(TupleTest)
diff --git a/Misc/NEWS b/Misc/NEWS
index e96ec1726a..d73453698d 100644
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -12,6 +12,11 @@ What's New in Python 2.7 alpha 1
Core and Builtins
-----------------
+- Issue #4688: Add a heuristic so that tuples and dicts containing only
+ untrackable objects are not tracked by the garbage collector. This can
+ reduce the size of collections and therefore the garbage collection overhead
+ on long-running programs, depending on their particular use of datatypes.
+
- Issue #5512: Rewrite PyLong long division algorithm (x_divrem) to
improve its performance. Long divisions and remainder operations
are now between 50% and 150% faster.
diff --git a/Modules/gcmodule.c b/Modules/gcmodule.c
index 4d71591466..d9bea73c33 100644
--- a/Modules/gcmodule.c
+++ b/Modules/gcmodule.c
@@ -432,7 +432,13 @@ move_unreachable(PyGC_Head *young, PyGC_Head *unreachable)
(void) traverse(op,
(visitproc)visit_reachable,
(void *)young);
- next = gc->gc.gc_next;
+ next = gc->gc.gc_next;
+ if (PyTuple_CheckExact(op)) {
+ _PyTuple_MaybeUntrack(op);
+ }
+ else if (PyDict_CheckExact(op)) {
+ _PyDict_MaybeUntrack(op);
+ }
}
else {
/* This *may* be unreachable. To make progress,
@@ -1264,6 +1270,26 @@ gc_get_objects(PyObject *self, PyObject *noargs)
return result;
}
+PyDoc_STRVAR(gc_is_tracked__doc__,
+"is_tracked(obj) -> bool\n"
+"\n"
+"Returns true if the object is tracked by the garbage collector.\n"
+"Simple atomic objects will return false.\n"
+);
+
+static PyObject *
+gc_is_tracked(PyObject *self, PyObject *obj)
+{
+ PyObject *result;
+
+ if (PyObject_IS_GC(obj) && IS_TRACKED(obj))
+ result = Py_True;
+ else
+ result = Py_False;
+ Py_INCREF(result);
+ return result;
+}
+
PyDoc_STRVAR(gc__doc__,
"This module provides access to the garbage collector for reference cycles.\n"
@@ -1278,6 +1304,7 @@ PyDoc_STRVAR(gc__doc__,
"set_threshold() -- Set the collection thresholds.\n"
"get_threshold() -- Return the current the collection thresholds.\n"
"get_objects() -- Return a list of all objects tracked by the collector.\n"
+"is_tracked() -- Returns true if a given object is tracked.\n"
"get_referrers() -- Return the list of objects that refer to an object.\n"
"get_referents() -- Return the list of objects that an object refers to.\n");
@@ -1293,6 +1320,7 @@ static PyMethodDef GcMethods[] = {
{"collect", (PyCFunction)gc_collect,
METH_VARARGS | METH_KEYWORDS, gc_collect__doc__},
{"get_objects", gc_get_objects,METH_NOARGS, gc_get_objects__doc__},
+ {"is_tracked", gc_is_tracked, METH_O, gc_is_tracked__doc__},
{"get_referrers", gc_get_referrers, METH_VARARGS,
gc_get_referrers__doc__},
{"get_referents", gc_get_referents, METH_VARARGS,
diff --git a/Objects/dictobject.c b/Objects/dictobject.c
index f4d86835e9..5069c76398 100644
--- a/Objects/dictobject.c
+++ b/Objects/dictobject.c
@@ -180,6 +180,24 @@ show_alloc(void)
}
#endif
+/* Debug statistic to count GC tracking of dicts */
+#ifdef SHOW_TRACK_COUNT
+static Py_ssize_t count_untracked = 0;
+static Py_ssize_t count_tracked = 0;
+
+static void
+show_track(void)
+{
+ fprintf(stderr, "Dicts created: %" PY_FORMAT_SIZE_T "d\n",
+ count_tracked + count_untracked);
+ fprintf(stderr, "Dicts tracked by the GC: %" PY_FORMAT_SIZE_T
+ "d\n", count_tracked);
+ fprintf(stderr, "%.2f%% dict tracking rate\n\n",
+ (100.0*count_tracked/(count_untracked+count_tracked)));
+}
+#endif
+
+
/* Initialization macros.
There are two ways to create a dict: PyDict_New() is the main C API
function, and the tp_new slot maps to dict_new(). In the latter case we
@@ -233,6 +251,9 @@ PyDict_New(void)
#ifdef SHOW_ALLOC_COUNT
Py_AtExit(show_alloc);
#endif
+#ifdef SHOW_TRACK_COUNT
+ Py_AtExit(show_track);
+#endif
}
if (numfree) {
mp = free_list[--numfree];
@@ -262,10 +283,12 @@ PyDict_New(void)
#endif
}
mp->ma_lookup = lookdict_string;
+#ifdef SHOW_TRACK_COUNT
+ count_untracked++;
+#endif
#ifdef SHOW_CONVERSION_COUNTS
++created;
#endif
- _PyObject_GC_TRACK(mp);
return (PyObject *)mp;
}
@@ -433,6 +456,52 @@ lookdict_string(PyDictObject *mp, PyObject *key, register long hash)
return 0;
}
+#ifdef SHOW_TRACK_COUNT
+#define INCREASE_TRACK_COUNT \
+ (count_tracked++, count_untracked--);
+#define DECREASE_TRACK_COUNT \
+ (count_tracked--, count_untracked++);
+#else
+#define INCREASE_TRACK_COUNT
+#define DECREASE_TRACK_COUNT
+#endif
+
+#define MAINTAIN_TRACKING(mp, key, value) \
+ do { \
+ if (!_PyObject_GC_IS_TRACKED(mp)) { \
+ if (_PyObject_GC_MAY_BE_TRACKED(key) || \
+ _PyObject_GC_MAY_BE_TRACKED(value)) { \
+ _PyObject_GC_TRACK(mp); \
+ INCREASE_TRACK_COUNT \
+ } \
+ } \
+ } while(0)
+
+void
+_PyDict_MaybeUntrack(PyObject *op)
+{
+ PyDictObject *mp;
+ PyObject *value;
+ Py_ssize_t mask, i;
+ PyDictEntry *ep;
+
+ if (!PyDict_CheckExact(op) || !_PyObject_GC_IS_TRACKED(op))
+ return;
+
+ mp = (PyDictObject *) op;
+ ep = mp->ma_table;
+ mask = mp->ma_mask;
+ for (i = 0; i <= mask; i++) {
+ if ((value = ep[i].me_value) == NULL)
+ continue;
+ if (_PyObject_GC_MAY_BE_TRACKED(value) ||
+ _PyObject_GC_MAY_BE_TRACKED(ep[i].me_key))
+ return;
+ }
+ _PyObject_GC_UNTRACK(op);
+}
+
+
/*
Internal routine to insert a new item into the table.
Used both by the internal resize routine and by the public insert routine.
@@ -453,6 +522,7 @@ insertdict(register PyDictObject *mp, PyObject *key, long hash, PyObject *value)
Py_DECREF(value);
return -1;
}
+ MAINTAIN_TRACKING(mp, key, value);
if (ep->me_value != NULL) {
old_value = ep->me_value;
ep->me_value = value;
@@ -492,6 +562,7 @@ insertdict_clean(register PyDictObject *mp, PyObject *key, long hash,
PyDictEntry *ep0 = mp->ma_table;
register PyDictEntry *ep;
+ MAINTAIN_TRACKING(mp, key, value);
i = hash & mask;
ep = &ep0[i];
for (perturb = hash; ep->me_key != NULL; perturb >>= PERTURB_SHIFT) {
@@ -2202,9 +2273,18 @@ dict_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
assert(d->ma_table == NULL && d->ma_fill == 0 && d->ma_used == 0);
INIT_NONZERO_DICT_SLOTS(d);
d->ma_lookup = lookdict_string;
+ /* The object has been implicitely tracked by tp_alloc */
+ if (type == &PyDict_Type)
+ _PyObject_GC_UNTRACK(d);
#ifdef SHOW_CONVERSION_COUNTS
++created;
#endif
+#ifdef SHOW_TRACK_COUNT
+ if (_PyObject_GC_IS_TRACKED(d))
+ count_tracked++;
+ else
+ count_untracked++;
+#endif
}
return self;
}
diff --git a/Objects/tupleobject.c b/Objects/tupleobject.c
index 74d392a4e5..644d8a92f6 100644
--- a/Objects/tupleobject.c
+++ b/Objects/tupleobject.c
@@ -23,11 +23,36 @@ Py_ssize_t fast_tuple_allocs;
Py_ssize_t tuple_zero_allocs;
#endif
+/* Debug statistic to count GC tracking of tuples.
+ Please note that tuples are only untracked when considered by the GC, and
+ many of them will be dead before. Therefore, a tracking rate close to 100%
+ does not necessarily prove that the heuristic is inefficient.
+*/
+#ifdef SHOW_TRACK_COUNT
+static Py_ssize_t count_untracked = 0;
+static Py_ssize_t count_tracked = 0;
+
+static void
+show_track(void)
+{
+ fprintf(stderr, "Tuples created: %" PY_FORMAT_SIZE_T "d\n",
+ count_tracked + count_untracked);
+ fprintf(stderr, "Tuples tracked by the GC: %" PY_FORMAT_SIZE_T
+ "d\n", count_tracked);
+ fprintf(stderr, "%.2f%% tuple tracking rate\n\n",
+ (100.0*count_tracked/(count_untracked+count_tracked)));
+}
+#endif
+
+
PyObject *
PyTuple_New(register Py_ssize_t size)
{
register PyTupleObject *op;
Py_ssize_t i;
+#ifdef SHOW_TRACK_COUNT
+ count_tracked++;
+#endif
if (size < 0) {
PyErr_BadInternalCall();
return NULL;
@@ -131,6 +156,32 @@ PyTuple_SetItem(register PyObject *op, register Py_ssize_t i, PyObject *newitem)
return 0;
}
+void
+_PyTuple_MaybeUntrack(PyObject *op)
+{
+ PyTupleObject *t;
+ Py_ssize_t i, n;
+
+ if (!PyTuple_CheckExact(op) || !_PyObject_GC_IS_TRACKED(op))
+ return;
+ t = (PyTupleObject *) op;
+ n = Py_SIZE(t);
+ for (i = 0; i < n; i++) {
+ PyObject *elt = PyTuple_GET_ITEM(t, i);
+ /* Tuple with NULL elements aren't
+ fully constructed, don't untrack
+ them yet. */
+ if (!elt ||
+ _PyObject_GC_MAY_BE_TRACKED(elt))
+ return;
+ }
+#ifdef SHOW_TRACK_COUNT
+ count_tracked--;
+ count_untracked++;
+#endif
+ _PyObject_GC_UNTRACK(op);
+}
+
PyObject *
PyTuple_Pack(Py_ssize_t n, ...)
{
@@ -880,6 +931,9 @@ PyTuple_Fini(void)
(void)PyTuple_ClearFreeList();
#endif
+#ifdef SHOW_TRACK_COUNT
+ show_track();
+#endif
}
/*********************** Tuple Iterator **************************/