diff options
author | Jason Madden <jamadden@gmail.com> | 2021-11-16 15:37:18 -0600 |
---|---|---|
committer | Jason Madden <jamadden@gmail.com> | 2021-11-16 15:37:18 -0600 |
commit | 34724e1c1880d02a69850160fef1abaa4305ccca (patch) | |
tree | 066ffdb7e9b395e565bb784b65cfaad3a5ba32a2 | |
parent | 1353150ac0f288099ab3b11f70cfae2dbd1d6f31 (diff) | |
download | greenlet-issue264.tar.gz |
Add a way to get how long it takes to cleanup using gc, and a way to disable it.issue264
Fixes #262.
-rw-r--r-- | src/greenlet/__init__.py | 5 | ||||
-rw-r--r-- | src/greenlet/greenlet.cpp | 67 | ||||
-rw-r--r-- | src/greenlet/greenlet_internal.hpp | 2 | ||||
-rw-r--r-- | src/greenlet/greenlet_thread_state.hpp | 38 | ||||
-rw-r--r-- | src/greenlet/tests/test_leaks.py | 31 |
5 files changed, 128 insertions, 15 deletions
diff --git a/src/greenlet/__init__.py b/src/greenlet/__init__.py index 9f2bcb2..2f671f2 100644 --- a/src/greenlet/__init__.py +++ b/src/greenlet/__init__.py @@ -61,3 +61,8 @@ except ImportError: from ._greenlet import GREENLET_USE_CONTEXT_VARS # pylint:disable=unused-import from ._greenlet import GREENLET_USE_GC # pylint:disable=unused-import from ._greenlet import GREENLET_USE_TRACING # pylint:disable=unused-import + +# Controlling the use of the gc module. +from ._greenlet import CLOCKS_PER_SEC # pylint:disable=unused-import +from ._greenlet import enable_optional_cleanup # pylint:disable=unused-import +from ._greenlet import get_clocks_used_doing_optional_cleanup # pylint:disable=unused-import diff --git a/src/greenlet/greenlet.cpp b/src/greenlet/greenlet.cpp index ab7321c..d915e13 100644 --- a/src/greenlet/greenlet.cpp +++ b/src/greenlet/greenlet.cpp @@ -2739,6 +2739,68 @@ mod_get_total_main_greenlets(PyObject* UNUSED(module)) return PyLong_FromSize_t(total_main_greenlets); } +PyDoc_STRVAR(mod_get_clocks_used_doing_optional_cleanup_doc, + "get_clocks_used_doing_optional_cleanup() -> Integer\n" + "\n" + "Get the number of clock ticks the program has used doing optional " + "greenlet cleanup.\n" + "Beginning in greenlet 2.0, greenlet tries to find and dispose of greenlets\n" + "that leaked after a thread exited. This requires invoking Python's garbage collector,\n" + "which may have a performance cost proportional to the number of live objects.\n" + "This function returns the amount of processor time\n" + "greenlet has used to do this. In programs that run with very large amounts of live\n" + "objects, this metric can be used to decide whether the cost of doing this cleanup\n" + "is worth the memory leak being corrected. If not, you can disable the cleanup\n" + "using ``enable_optional_cleanup(False)``.\n" + "The units are arbitrary and can only be compared to themselves (similarly to ``time.clock()``);\n" + "for example, to see how it scales with your heap. You can attempt to convert them into seconds\n" + "by dividing by the value of CLOCKS_PER_SEC." + "If cleanup has been disabled, returns None." + "\n" + "This is an implementation specific, provisional API. It may be changed or removed\n" + "in the future.\n" + ".. versionadded:: 2.0" + ); +static PyObject* +mod_get_clocks_used_doing_optional_cleanup(PyObject* UNUSED(module)) +{ + std::clock_t& clocks = ThreadState::clocks_used_doing_gc(); + + if (clocks == std::clock_t(-1)) { + Py_RETURN_NONE; + } + // This might not actually work on some implementations; clock_t + // is an opaque type. + return PyLong_FromSsize_t(clocks); +} + +PyDoc_STRVAR(mod_enable_optional_cleanup_doc, + "mod_enable_optional_cleanup(bool) -> None\n" + "\n" + "Enable or disable optional cleanup operations.\n" + "See ``get_clocks_used_doing_optional_cleanup()`` for details.\n" + ); +static PyObject* +mod_enable_optional_cleanup(PyObject* UNUSED(module), PyObject* flag) +{ + int is_true = PyObject_IsTrue(flag); + if (is_true == -1) { + return nullptr; + } + + std::clock_t& clocks = ThreadState::clocks_used_doing_gc(); + if (is_true) { + // If we already have a value, we don't want to lose it. + if (clocks == std::clock_t(-1)) { + clocks = 0; + } + } + else { + clocks = std::clock_t(-1); + } + Py_RETURN_NONE; +} + static PyMethodDef GreenMethods[] = { {"getcurrent", (PyCFunction)mod_getcurrent, @@ -2749,6 +2811,8 @@ static PyMethodDef GreenMethods[] = { {"set_thread_local", (PyCFunction)mod_set_thread_local, METH_VARARGS, mod_set_thread_local_doc}, {"get_pending_cleanup_count", (PyCFunction)mod_get_pending_cleanup_count, METH_NOARGS, mod_get_pending_cleanup_count_doc}, {"get_total_main_greenlets", (PyCFunction)mod_get_total_main_greenlets, METH_NOARGS, mod_get_total_main_greenlets_doc}, + {"get_clocks_used_doing_optional_cleanup", (PyCFunction)mod_get_clocks_used_doing_optional_cleanup, METH_NOARGS, mod_get_clocks_used_doing_optional_cleanup_doc}, + {"enable_optional_cleanup", (PyCFunction)mod_enable_optional_cleanup, METH_O, mod_enable_optional_cleanup_doc}, {NULL, NULL} /* Sentinel */ }; @@ -2800,6 +2864,9 @@ greenlet_internal_mod_init() G_NOEXCEPT m.PyAddObject("GREENLET_USE_CONTEXT_VARS", (long)GREENLET_PY37); m.PyAddObject("GREENLET_USE_STANDARD_THREADING", (long)G_USE_STANDARD_THREADING); + OwnedObject clocks_per_sec = OwnedObject::consuming(PyLong_FromSsize_t(CLOCKS_PER_SEC)); + m.PyAddObject("CLOCKS_PER_SEC", clocks_per_sec); + /* also publish module-level data as attributes of the greentype. */ // XXX: This is weird, and enables a strange pattern of // confusing the class greenlet with the module greenlet; with diff --git a/src/greenlet/greenlet_internal.hpp b/src/greenlet/greenlet_internal.hpp index fc25673..2892b75 100644 --- a/src/greenlet/greenlet_internal.hpp +++ b/src/greenlet/greenlet_internal.hpp @@ -28,8 +28,6 @@ namespace greenlet { class ThreadState; - typedef std::vector<PyGreenlet*, PythonAllocator<PyGreenlet*> > g_deleteme_t; - }; diff --git a/src/greenlet/greenlet_thread_state.hpp b/src/greenlet/greenlet_thread_state.hpp index fe1e160..c024cf3 100644 --- a/src/greenlet/greenlet_thread_state.hpp +++ b/src/greenlet/greenlet_thread_state.hpp @@ -1,6 +1,7 @@ #ifndef GREENLET_THREAD_STATE_HPP #define GREENLET_THREAD_STATE_HPP +#include <ctime> #include <stdexcept> #include "greenlet_internal.hpp" @@ -103,17 +104,21 @@ private: /* Strong reference to the trace function, if any. */ OwnedObject tracefunc; - /* A vector of PyGreenlet pointers representing things that need + typedef std::vector<PyGreenlet*, PythonAllocator<PyGreenlet*> > deleteme_t; + /* A vector of raw PyGreenlet pointers representing things that need deleted when this thread is running. The vector owns the references, but you need to manually INCREF/DECREF as you use - them. */ - greenlet::g_deleteme_t deleteme; + them. We don't use a vector<refs::OwnedGreenlet> because we + make copy of this vector, and that would become O(n) as all the + refcounts are incremented in the copy. + */ + deleteme_t deleteme; #ifdef GREENLET_NEEDS_EXCEPTION_STATE_SAVED void* exception_state; #endif - + static std::clock_t _clocks_used_doing_gc; static ImmortalString get_referrers_name; static PythonAllocator<ThreadState> allocator; @@ -134,6 +139,7 @@ public: static void init() { ThreadState::get_referrers_name = "get_referrers"; + ThreadState::_clocks_used_doing_gc = 0; } ThreadState() @@ -256,9 +262,9 @@ private: // It's possible we could add items to this list while // running Python code if there's a thread switch, so we // need to defensively copy it before that can happen. - g_deleteme_t copy = this->deleteme; + deleteme_t copy = this->deleteme; this->deleteme.clear(); // in case things come back on the list - for(greenlet::g_deleteme_t::iterator it = copy.begin(), end = copy.end(); + for(deleteme_t::iterator it = copy.begin(), end = copy.end(); it != end; ++it ) { PyGreenlet* to_del = *it; @@ -320,6 +326,14 @@ public: this->deleteme.push_back(to_del); } + /** + * Set to std::clock_t(-1) to disable. + */ + inline static std::clock_t& clocks_used_doing_gc() + { + return ThreadState::_clocks_used_doing_gc; + } + ~ThreadState() { if (!PyInterpreterState_Head()) { @@ -348,11 +362,6 @@ public: // on the stack, somewhere uncollectable. Try to detect that. if (this->current_greenlet == this->main_greenlet && this->current_greenlet) { assert(this->current_greenlet->is_currently_running_in_some_thread()); - - // // Break a cycle we know about, the self reference - // // the main greenlet keeps. - // this->main_greenlet->main_greenlet.CLEAR(); - // Drop one reference we hold. this->current_greenlet.CLEAR(); assert(!this->current_greenlet); @@ -361,12 +370,14 @@ public: PyGreenlet* old_main_greenlet = this->main_greenlet.borrow(); Py_ssize_t cnt = this->main_greenlet.REFCNT(); this->main_greenlet.CLEAR(); - if (cnt == 2 && Py_REFCNT(old_main_greenlet) == 1) { + if (ThreadState::_clocks_used_doing_gc != std::clock_t(-1) + && cnt == 2 && Py_REFCNT(old_main_greenlet) == 1) { // Highly likely that the reference is somewhere on // the stack, not reachable by GC. Verify. // XXX: This is O(n) in the total number of objects. // TODO: Add a way to disable this at runtime, and // another way to report on it. + std::clock_t begin = std::clock(); NewReference gc(PyImport_ImportModule("gc")); if (gc) { OwnedObject get_referrers = gc.PyRequireAttr(ThreadState::get_referrers_name); @@ -412,6 +423,8 @@ public: } } } + std::clock_t end = std::clock(); + ThreadState::_clocks_used_doing_gc += (end - begin); } } } @@ -453,6 +466,7 @@ public: ImmortalString ThreadState::get_referrers_name(nullptr); PythonAllocator<ThreadState> ThreadState::allocator; +std::clock_t ThreadState::_clocks_used_doing_gc(0); template<typename Destructor> class ThreadStateCreator diff --git a/src/greenlet/tests/test_leaks.py b/src/greenlet/tests/test_leaks.py index 946425b..7ff4e59 100644 --- a/src/greenlet/tests/test_leaks.py +++ b/src/greenlet/tests/test_leaks.py @@ -116,6 +116,15 @@ class TestLeaks(TestCase): for g in gg: self.assertIsNone(g()) + def assertClocksUsed(self): + used = greenlet._greenlet.get_clocks_used_doing_optional_cleanup() + self.assertGreaterEqual(used, 0) + # we don't lose the value + greenlet._greenlet.enable_optional_cleanup(True) + used2 = greenlet._greenlet.get_clocks_used_doing_optional_cleanup() + self.assertEqual(used, used2) + self.assertGreater(greenlet._greenlet.CLOCKS_PER_SEC, 1) + def _check_issue251(self, manually_collect_background=True, explicit_reference_to_switch=False): @@ -216,7 +225,8 @@ class TestLeaks(TestCase): # the ``greenlet.switch`` method still on the stack that we # can't reach to clean up. The C code goes through terrific # lengths to clean that up. - if not explicit_reference_to_switch: + if not explicit_reference_to_switch and greenlet._greenlet.get_clocks_used_doing_optional_cleanup() is not None: + # If cleanup was disabled, though, we may not find it. self.assertEqual(greenlets_after, greenlets_before) if manually_collect_background: # TODO: Figure out how to make this work! @@ -237,9 +247,19 @@ class TestLeaks(TestCase): # done by leakcheck will find it. pass + if greenlet._greenlet.get_clocks_used_doing_optional_cleanup() is not None: + self.assertClocksUsed() + def test_issue251_killing_cross_thread_leaks_list(self): self._check_issue251() + def test_issue251_with_cleanup_disabled(self): + greenlet._greenlet.enable_optional_cleanup(False) + try: + self._check_issue251() + finally: + greenlet._greenlet.enable_optional_cleanup(True) + @fails_leakcheck def test_issue251_issue252_need_to_collect_in_background(self): # Between greenlet 1.1.2 and the next version, this was still @@ -262,6 +282,15 @@ class TestLeaks(TestCase): self._check_issue251(manually_collect_background=False) @fails_leakcheck + def test_issue251_issue252_need_to_collect_in_background_cleanup_disabled(self): + self.expect_greenlet_leak = True + greenlet._greenlet.enable_optional_cleanup(False) + try: + self._check_issue251(manually_collect_background=False) + finally: + greenlet._greenlet.enable_optional_cleanup(True) + + @fails_leakcheck def test_issue251_issue252_explicit_reference_not_collectable(self): self._check_issue251( manually_collect_background=False, |