diff options
-rw-r--r-- | Doc/library/gc.rst | 25 | ||||
-rw-r--r-- | Include/dictobject.h | 1 | ||||
-rw-r--r-- | Include/objimpl.h | 11 | ||||
-rw-r--r-- | Include/tupleobject.h | 1 | ||||
-rw-r--r-- | Lib/test/test_dict.py | 98 | ||||
-rw-r--r-- | Lib/test/test_gc.py | 31 | ||||
-rw-r--r-- | Lib/test/test_tuple.py | 65 | ||||
-rw-r--r-- | Misc/NEWS | 5 | ||||
-rw-r--r-- | Modules/gcmodule.c | 30 | ||||
-rw-r--r-- | Objects/dictobject.c | 82 | ||||
-rw-r--r-- | Objects/tupleobject.c | 54 |
11 files changed, 401 insertions, 2 deletions
diff --git a/Doc/library/gc.rst b/Doc/library/gc.rst index 9ebbf06c7b..65f0f39a15 100644 --- a/Doc/library/gc.rst +++ b/Doc/library/gc.rst @@ -140,6 +140,31 @@ The :mod:`gc` module provides the following functions: .. versionadded:: 2.3 +.. function:: is_tracked(obj) + + Returns True if the object is currently tracked by the garbage collector, + False otherwise. As a general rule, instances of atomic types aren't + tracked and instances of non-atomic types (containers, user-defined + objects...) are. However, some type-specific optimizations can be present + in order to suppress the garbage collector footprint of simple instances + (e.g. dicts containing only atomic keys and values):: + + >>> gc.is_tracked(0) + False + >>> gc.is_tracked("a") + False + >>> gc.is_tracked([]) + True + >>> gc.is_tracked({}) + False + >>> gc.is_tracked({"a": 1}) + False + >>> gc.is_tracked({"a": []}) + True + + .. versionadded:: 2.7 + + The following variable is provided for read-only access (you can mutate its value but should not rebind it): diff --git a/Include/dictobject.h b/Include/dictobject.h index b83cd0e893..06e0a7ebe7 100644 --- a/Include/dictobject.h +++ b/Include/dictobject.h @@ -111,6 +111,7 @@ PyAPI_FUNC(PyObject *) PyDict_Copy(PyObject *mp); PyAPI_FUNC(int) PyDict_Contains(PyObject *mp, PyObject *key); PyAPI_FUNC(int) _PyDict_Contains(PyObject *mp, PyObject *key, long hash); PyAPI_FUNC(PyObject *) _PyDict_NewPresized(Py_ssize_t minused); +PyAPI_FUNC(void) _PyDict_MaybeUntrack(PyObject *mp); /* PyDict_Update(mp, other) is equivalent to PyDict_Merge(mp, other, 1). */ PyAPI_FUNC(int) PyDict_Update(PyObject *mp, PyObject *other); diff --git a/Include/objimpl.h b/Include/objimpl.h index ef47218fce..55186b176b 100644 --- a/Include/objimpl.h +++ b/Include/objimpl.h @@ -285,6 +285,17 @@ extern PyGC_Head *_PyGC_generation0; g->gc.gc_next = NULL; \ } while (0); +/* True if the object is currently tracked by the GC. */ +#define _PyObject_GC_IS_TRACKED(o) \ + ((_Py_AS_GC(o))->gc.gc_refs != _PyGC_REFS_UNTRACKED) + +/* True if the object may be tracked by the GC in the future, or already is. + This can be useful to implement some optimizations. */ +#define _PyObject_GC_MAY_BE_TRACKED(obj) \ + (PyObject_IS_GC(obj) && \ + (!PyTuple_CheckExact(obj) || _PyObject_GC_IS_TRACKED(obj))) + + PyAPI_FUNC(PyObject *) _PyObject_GC_Malloc(size_t); PyAPI_FUNC(PyObject *) _PyObject_GC_New(PyTypeObject *); PyAPI_FUNC(PyVarObject *) _PyObject_GC_NewVar(PyTypeObject *, Py_ssize_t); diff --git a/Include/tupleobject.h b/Include/tupleobject.h index 58479ee0ff..a5ab733208 100644 --- a/Include/tupleobject.h +++ b/Include/tupleobject.h @@ -44,6 +44,7 @@ PyAPI_FUNC(int) PyTuple_SetItem(PyObject *, Py_ssize_t, PyObject *); PyAPI_FUNC(PyObject *) PyTuple_GetSlice(PyObject *, Py_ssize_t, Py_ssize_t); PyAPI_FUNC(int) _PyTuple_Resize(PyObject **, Py_ssize_t); PyAPI_FUNC(PyObject *) PyTuple_Pack(Py_ssize_t, ...); +PyAPI_FUNC(void) _PyTuple_MaybeUntrack(PyObject *); /* Macro, trading safety for speed */ #define PyTuple_GET_ITEM(op, i) (((PyTupleObject *)(op))->ob_item[i]) diff --git a/Lib/test/test_dict.py b/Lib/test/test_dict.py index 0907744ab5..b73a53f580 100644 --- a/Lib/test/test_dict.py +++ b/Lib/test/test_dict.py @@ -569,6 +569,104 @@ class DictTest(unittest.TestCase): gc.collect() self.assert_(ref() is None, "Cycle was not collected") + def _not_tracked(self, t): + # Nested containers can take several collections to untrack + gc.collect() + gc.collect() + self.assertFalse(gc.is_tracked(t), t) + + def _tracked(self, t): + self.assertTrue(gc.is_tracked(t), t) + gc.collect() + gc.collect() + self.assertTrue(gc.is_tracked(t), t) + + def test_track_literals(self): + # Test GC-optimization of dict literals + x, y, z, w = 1.5, "a", (1, None), [] + + self._not_tracked({}) + self._not_tracked({x:(), y:x, z:1}) + self._not_tracked({1: "a", "b": 2}) + self._not_tracked({1: 2, (None, True, False, ()): int}) + self._not_tracked({1: object()}) + + # Dicts with mutable elements are always tracked, even if those + # elements are not tracked right now. + self._tracked({1: []}) + self._tracked({1: ([],)}) + self._tracked({1: {}}) + self._tracked({1: set()}) + + def test_track_dynamic(self): + # Test GC-optimization of dynamically-created dicts + class MyObject(object): + pass + x, y, z, w, o = 1.5, "a", (1, object()), [], MyObject() + + d = dict() + self._not_tracked(d) + d[1] = "a" + self._not_tracked(d) + d[y] = 2 + self._not_tracked(d) + d[z] = 3 + self._not_tracked(d) + self._not_tracked(d.copy()) + d[4] = w + self._tracked(d) + self._tracked(d.copy()) + d[4] = None + self._not_tracked(d) + self._not_tracked(d.copy()) + + # dd isn't tracked right now, but it may mutate and therefore d + # which contains it must be tracked. + d = dict() + dd = dict() + d[1] = dd + self._not_tracked(dd) + self._tracked(d) + dd[1] = d + self._tracked(dd) + + d = dict.fromkeys([x, y, z]) + self._not_tracked(d) + dd = dict() + dd.update(d) + self._not_tracked(dd) + d = dict.fromkeys([x, y, z, o]) + self._tracked(d) + dd = dict() + dd.update(d) + self._tracked(dd) + + d = dict(x=x, y=y, z=z) + self._not_tracked(d) + d = dict(x=x, y=y, z=z, w=w) + self._tracked(d) + d = dict() + d.update(x=x, y=y, z=z) + self._not_tracked(d) + d.update(w=w) + self._tracked(d) + + d = dict([(x, y), (z, 1)]) + self._not_tracked(d) + d = dict([(x, y), (z, w)]) + self._tracked(d) + d = dict() + d.update([(x, y), (z, 1)]) + self._not_tracked(d) + d.update([(x, y), (z, w)]) + self._tracked(d) + + def test_track_subtypes(self): + # Dict subtypes are always tracked + class MyDict(dict): + pass + self._tracked(MyDict()) + from test import mapping_tests diff --git a/Lib/test/test_gc.py b/Lib/test/test_gc.py index 6e2ea4198f..e9af550bf8 100644 --- a/Lib/test/test_gc.py +++ b/Lib/test/test_gc.py @@ -415,6 +415,37 @@ class GCTests(unittest.TestCase): self.assertEqual(gc.get_referents(1, 'a', 4j), []) + def test_is_tracked(self): + # Atomic built-in types are not tracked, user-defined objects and + # mutable containers are. + # NOTE: types with special optimizations (e.g. tuple) have tests + # in their own test files instead. + self.assertFalse(gc.is_tracked(None)) + self.assertFalse(gc.is_tracked(1)) + self.assertFalse(gc.is_tracked(1.0)) + self.assertFalse(gc.is_tracked(1.0 + 5.0j)) + self.assertFalse(gc.is_tracked(True)) + self.assertFalse(gc.is_tracked(False)) + self.assertFalse(gc.is_tracked("a")) + self.assertFalse(gc.is_tracked(u"a")) + self.assertFalse(gc.is_tracked(bytearray("a"))) + self.assertFalse(gc.is_tracked(type)) + self.assertFalse(gc.is_tracked(int)) + self.assertFalse(gc.is_tracked(object)) + self.assertFalse(gc.is_tracked(object())) + + class OldStyle: + pass + class NewStyle(object): + pass + self.assertTrue(gc.is_tracked(gc)) + self.assertTrue(gc.is_tracked(OldStyle)) + self.assertTrue(gc.is_tracked(OldStyle())) + self.assertTrue(gc.is_tracked(NewStyle)) + self.assertTrue(gc.is_tracked(NewStyle())) + self.assertTrue(gc.is_tracked([])) + self.assertTrue(gc.is_tracked(set())) + def test_bug1055820b(self): # Corresponds to temp2b.py in the bug report. diff --git a/Lib/test/test_tuple.py b/Lib/test/test_tuple.py index 15bc2956cf..89cbe1d4c5 100644 --- a/Lib/test/test_tuple.py +++ b/Lib/test/test_tuple.py @@ -1,5 +1,7 @@ from test import test_support, seq_tests +import gc + class TupleTest(seq_tests.CommonTest): type2test = tuple @@ -82,6 +84,69 @@ class TupleTest(seq_tests.CommonTest): self.assertEqual(repr(a0), "()") self.assertEqual(repr(a2), "(0, 1, 2)") + def _not_tracked(self, t): + # Nested tuples can take several collections to untrack + gc.collect() + gc.collect() + self.assertFalse(gc.is_tracked(t), t) + + def _tracked(self, t): + self.assertTrue(gc.is_tracked(t), t) + gc.collect() + gc.collect() + self.assertTrue(gc.is_tracked(t), t) + + def test_track_literals(self): + # Test GC-optimization of tuple literals + x, y, z = 1.5, "a", [] + + self._not_tracked(()) + self._not_tracked((1,)) + self._not_tracked((1, 2)) + self._not_tracked((1, 2, "a")) + self._not_tracked((1, 2, (None, True, False, ()), int)) + self._not_tracked((object(),)) + self._not_tracked(((1, x), y, (2, 3))) + + # Tuples with mutable elements are always tracked, even if those + # elements are not tracked right now. + self._tracked(([],)) + self._tracked(([1],)) + self._tracked(({},)) + self._tracked((set(),)) + self._tracked((x, y, z)) + + def check_track_dynamic(self, tp, always_track): + x, y, z = 1.5, "a", [] + + check = self._tracked if always_track else self._not_tracked + check(tp()) + check(tp([])) + check(tp(set())) + check(tp([1, x, y])) + check(tp(obj for obj in [1, x, y])) + check(tp(set([1, x, y]))) + check(tp(tuple([obj]) for obj in [1, x, y])) + check(tuple(tp([obj]) for obj in [1, x, y])) + + self._tracked(tp([z])) + self._tracked(tp([[x, y]])) + self._tracked(tp([{x: y}])) + self._tracked(tp(obj for obj in [x, y, z])) + self._tracked(tp(tuple([obj]) for obj in [x, y, z])) + self._tracked(tuple(tp([obj]) for obj in [x, y, z])) + + def test_track_dynamic(self): + # Test GC-optimization of dynamically constructed tuples. + self.check_track_dynamic(tuple, False) + + def test_track_subtypes(self): + # Tuple subtypes must always be tracked + class MyTuple(tuple): + pass + self.check_track_dynamic(MyTuple, True) + + def test_main(): test_support.run_unittest(TupleTest) @@ -12,6 +12,11 @@ What's New in Python 2.7 alpha 1 Core and Builtins ----------------- +- Issue #4688: Add a heuristic so that tuples and dicts containing only + untrackable objects are not tracked by the garbage collector. This can + reduce the size of collections and therefore the garbage collection overhead + on long-running programs, depending on their particular use of datatypes. + - Issue #5512: Rewrite PyLong long division algorithm (x_divrem) to improve its performance. Long divisions and remainder operations are now between 50% and 150% faster. diff --git a/Modules/gcmodule.c b/Modules/gcmodule.c index 4d71591466..d9bea73c33 100644 --- a/Modules/gcmodule.c +++ b/Modules/gcmodule.c @@ -432,7 +432,13 @@ move_unreachable(PyGC_Head *young, PyGC_Head *unreachable) (void) traverse(op, (visitproc)visit_reachable, (void *)young); - next = gc->gc.gc_next; + next = gc->gc.gc_next; + if (PyTuple_CheckExact(op)) { + _PyTuple_MaybeUntrack(op); + } + else if (PyDict_CheckExact(op)) { + _PyDict_MaybeUntrack(op); + } } else { /* This *may* be unreachable. To make progress, @@ -1264,6 +1270,26 @@ gc_get_objects(PyObject *self, PyObject *noargs) return result; } +PyDoc_STRVAR(gc_is_tracked__doc__, +"is_tracked(obj) -> bool\n" +"\n" +"Returns true if the object is tracked by the garbage collector.\n" +"Simple atomic objects will return false.\n" +); + +static PyObject * +gc_is_tracked(PyObject *self, PyObject *obj) +{ + PyObject *result; + + if (PyObject_IS_GC(obj) && IS_TRACKED(obj)) + result = Py_True; + else + result = Py_False; + Py_INCREF(result); + return result; +} + PyDoc_STRVAR(gc__doc__, "This module provides access to the garbage collector for reference cycles.\n" @@ -1278,6 +1304,7 @@ PyDoc_STRVAR(gc__doc__, "set_threshold() -- Set the collection thresholds.\n" "get_threshold() -- Return the current the collection thresholds.\n" "get_objects() -- Return a list of all objects tracked by the collector.\n" +"is_tracked() -- Returns true if a given object is tracked.\n" "get_referrers() -- Return the list of objects that refer to an object.\n" "get_referents() -- Return the list of objects that an object refers to.\n"); @@ -1293,6 +1320,7 @@ static PyMethodDef GcMethods[] = { {"collect", (PyCFunction)gc_collect, METH_VARARGS | METH_KEYWORDS, gc_collect__doc__}, {"get_objects", gc_get_objects,METH_NOARGS, gc_get_objects__doc__}, + {"is_tracked", gc_is_tracked, METH_O, gc_is_tracked__doc__}, {"get_referrers", gc_get_referrers, METH_VARARGS, gc_get_referrers__doc__}, {"get_referents", gc_get_referents, METH_VARARGS, diff --git a/Objects/dictobject.c b/Objects/dictobject.c index f4d86835e9..5069c76398 100644 --- a/Objects/dictobject.c +++ b/Objects/dictobject.c @@ -180,6 +180,24 @@ show_alloc(void) } #endif +/* Debug statistic to count GC tracking of dicts */ +#ifdef SHOW_TRACK_COUNT +static Py_ssize_t count_untracked = 0; +static Py_ssize_t count_tracked = 0; + +static void +show_track(void) +{ + fprintf(stderr, "Dicts created: %" PY_FORMAT_SIZE_T "d\n", + count_tracked + count_untracked); + fprintf(stderr, "Dicts tracked by the GC: %" PY_FORMAT_SIZE_T + "d\n", count_tracked); + fprintf(stderr, "%.2f%% dict tracking rate\n\n", + (100.0*count_tracked/(count_untracked+count_tracked))); +} +#endif + + /* Initialization macros. There are two ways to create a dict: PyDict_New() is the main C API function, and the tp_new slot maps to dict_new(). In the latter case we @@ -233,6 +251,9 @@ PyDict_New(void) #ifdef SHOW_ALLOC_COUNT Py_AtExit(show_alloc); #endif +#ifdef SHOW_TRACK_COUNT + Py_AtExit(show_track); +#endif } if (numfree) { mp = free_list[--numfree]; @@ -262,10 +283,12 @@ PyDict_New(void) #endif } mp->ma_lookup = lookdict_string; +#ifdef SHOW_TRACK_COUNT + count_untracked++; +#endif #ifdef SHOW_CONVERSION_COUNTS ++created; #endif - _PyObject_GC_TRACK(mp); return (PyObject *)mp; } @@ -433,6 +456,52 @@ lookdict_string(PyDictObject *mp, PyObject *key, register long hash) return 0; } +#ifdef SHOW_TRACK_COUNT +#define INCREASE_TRACK_COUNT \ + (count_tracked++, count_untracked--); +#define DECREASE_TRACK_COUNT \ + (count_tracked--, count_untracked++); +#else +#define INCREASE_TRACK_COUNT +#define DECREASE_TRACK_COUNT +#endif + +#define MAINTAIN_TRACKING(mp, key, value) \ + do { \ + if (!_PyObject_GC_IS_TRACKED(mp)) { \ + if (_PyObject_GC_MAY_BE_TRACKED(key) || \ + _PyObject_GC_MAY_BE_TRACKED(value)) { \ + _PyObject_GC_TRACK(mp); \ + INCREASE_TRACK_COUNT \ + } \ + } \ + } while(0) + +void +_PyDict_MaybeUntrack(PyObject *op) +{ + PyDictObject *mp; + PyObject *value; + Py_ssize_t mask, i; + PyDictEntry *ep; + + if (!PyDict_CheckExact(op) || !_PyObject_GC_IS_TRACKED(op)) + return; + + mp = (PyDictObject *) op; + ep = mp->ma_table; + mask = mp->ma_mask; + for (i = 0; i <= mask; i++) { + if ((value = ep[i].me_value) == NULL) + continue; + if (_PyObject_GC_MAY_BE_TRACKED(value) || + _PyObject_GC_MAY_BE_TRACKED(ep[i].me_key)) + return; + } + _PyObject_GC_UNTRACK(op); +} + + /* Internal routine to insert a new item into the table. Used both by the internal resize routine and by the public insert routine. @@ -453,6 +522,7 @@ insertdict(register PyDictObject *mp, PyObject *key, long hash, PyObject *value) Py_DECREF(value); return -1; } + MAINTAIN_TRACKING(mp, key, value); if (ep->me_value != NULL) { old_value = ep->me_value; ep->me_value = value; @@ -492,6 +562,7 @@ insertdict_clean(register PyDictObject *mp, PyObject *key, long hash, PyDictEntry *ep0 = mp->ma_table; register PyDictEntry *ep; + MAINTAIN_TRACKING(mp, key, value); i = hash & mask; ep = &ep0[i]; for (perturb = hash; ep->me_key != NULL; perturb >>= PERTURB_SHIFT) { @@ -2202,9 +2273,18 @@ dict_new(PyTypeObject *type, PyObject *args, PyObject *kwds) assert(d->ma_table == NULL && d->ma_fill == 0 && d->ma_used == 0); INIT_NONZERO_DICT_SLOTS(d); d->ma_lookup = lookdict_string; + /* The object has been implicitely tracked by tp_alloc */ + if (type == &PyDict_Type) + _PyObject_GC_UNTRACK(d); #ifdef SHOW_CONVERSION_COUNTS ++created; #endif +#ifdef SHOW_TRACK_COUNT + if (_PyObject_GC_IS_TRACKED(d)) + count_tracked++; + else + count_untracked++; +#endif } return self; } diff --git a/Objects/tupleobject.c b/Objects/tupleobject.c index 74d392a4e5..644d8a92f6 100644 --- a/Objects/tupleobject.c +++ b/Objects/tupleobject.c @@ -23,11 +23,36 @@ Py_ssize_t fast_tuple_allocs; Py_ssize_t tuple_zero_allocs; #endif +/* Debug statistic to count GC tracking of tuples. + Please note that tuples are only untracked when considered by the GC, and + many of them will be dead before. Therefore, a tracking rate close to 100% + does not necessarily prove that the heuristic is inefficient. +*/ +#ifdef SHOW_TRACK_COUNT +static Py_ssize_t count_untracked = 0; +static Py_ssize_t count_tracked = 0; + +static void +show_track(void) +{ + fprintf(stderr, "Tuples created: %" PY_FORMAT_SIZE_T "d\n", + count_tracked + count_untracked); + fprintf(stderr, "Tuples tracked by the GC: %" PY_FORMAT_SIZE_T + "d\n", count_tracked); + fprintf(stderr, "%.2f%% tuple tracking rate\n\n", + (100.0*count_tracked/(count_untracked+count_tracked))); +} +#endif + + PyObject * PyTuple_New(register Py_ssize_t size) { register PyTupleObject *op; Py_ssize_t i; +#ifdef SHOW_TRACK_COUNT + count_tracked++; +#endif if (size < 0) { PyErr_BadInternalCall(); return NULL; @@ -131,6 +156,32 @@ PyTuple_SetItem(register PyObject *op, register Py_ssize_t i, PyObject *newitem) return 0; } +void +_PyTuple_MaybeUntrack(PyObject *op) +{ + PyTupleObject *t; + Py_ssize_t i, n; + + if (!PyTuple_CheckExact(op) || !_PyObject_GC_IS_TRACKED(op)) + return; + t = (PyTupleObject *) op; + n = Py_SIZE(t); + for (i = 0; i < n; i++) { + PyObject *elt = PyTuple_GET_ITEM(t, i); + /* Tuple with NULL elements aren't + fully constructed, don't untrack + them yet. */ + if (!elt || + _PyObject_GC_MAY_BE_TRACKED(elt)) + return; + } +#ifdef SHOW_TRACK_COUNT + count_tracked--; + count_untracked++; +#endif + _PyObject_GC_UNTRACK(op); +} + PyObject * PyTuple_Pack(Py_ssize_t n, ...) { @@ -880,6 +931,9 @@ PyTuple_Fini(void) (void)PyTuple_ClearFreeList(); #endif +#ifdef SHOW_TRACK_COUNT + show_track(); +#endif } /*********************** Tuple Iterator **************************/ |