summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJason Madden <jamadden@gmail.com>2021-11-16 15:37:18 -0600
committerJason Madden <jamadden@gmail.com>2021-11-16 15:37:18 -0600
commit34724e1c1880d02a69850160fef1abaa4305ccca (patch)
tree066ffdb7e9b395e565bb784b65cfaad3a5ba32a2
parent1353150ac0f288099ab3b11f70cfae2dbd1d6f31 (diff)
downloadgreenlet-issue264.tar.gz
Add a way to get how long it takes to cleanup using gc, and a way to disable it.issue264
Fixes #262.
-rw-r--r--src/greenlet/__init__.py5
-rw-r--r--src/greenlet/greenlet.cpp67
-rw-r--r--src/greenlet/greenlet_internal.hpp2
-rw-r--r--src/greenlet/greenlet_thread_state.hpp38
-rw-r--r--src/greenlet/tests/test_leaks.py31
5 files changed, 128 insertions, 15 deletions
diff --git a/src/greenlet/__init__.py b/src/greenlet/__init__.py
index 9f2bcb2..2f671f2 100644
--- a/src/greenlet/__init__.py
+++ b/src/greenlet/__init__.py
@@ -61,3 +61,8 @@ except ImportError:
from ._greenlet import GREENLET_USE_CONTEXT_VARS # pylint:disable=unused-import
from ._greenlet import GREENLET_USE_GC # pylint:disable=unused-import
from ._greenlet import GREENLET_USE_TRACING # pylint:disable=unused-import
+
+# Controlling the use of the gc module.
+from ._greenlet import CLOCKS_PER_SEC # pylint:disable=unused-import
+from ._greenlet import enable_optional_cleanup # pylint:disable=unused-import
+from ._greenlet import get_clocks_used_doing_optional_cleanup # pylint:disable=unused-import
diff --git a/src/greenlet/greenlet.cpp b/src/greenlet/greenlet.cpp
index ab7321c..d915e13 100644
--- a/src/greenlet/greenlet.cpp
+++ b/src/greenlet/greenlet.cpp
@@ -2739,6 +2739,68 @@ mod_get_total_main_greenlets(PyObject* UNUSED(module))
return PyLong_FromSize_t(total_main_greenlets);
}
+PyDoc_STRVAR(mod_get_clocks_used_doing_optional_cleanup_doc,
+ "get_clocks_used_doing_optional_cleanup() -> Integer\n"
+ "\n"
+ "Get the number of clock ticks the program has used doing optional "
+ "greenlet cleanup.\n"
+ "Beginning in greenlet 2.0, greenlet tries to find and dispose of greenlets\n"
+ "that leaked after a thread exited. This requires invoking Python's garbage collector,\n"
+ "which may have a performance cost proportional to the number of live objects.\n"
+ "This function returns the amount of processor time\n"
+ "greenlet has used to do this. In programs that run with very large amounts of live\n"
+ "objects, this metric can be used to decide whether the cost of doing this cleanup\n"
+ "is worth the memory leak being corrected. If not, you can disable the cleanup\n"
+ "using ``enable_optional_cleanup(False)``.\n"
+ "The units are arbitrary and can only be compared to themselves (similarly to ``time.clock()``);\n"
+ "for example, to see how it scales with your heap. You can attempt to convert them into seconds\n"
+ "by dividing by the value of CLOCKS_PER_SEC."
+ "If cleanup has been disabled, returns None."
+ "\n"
+ "This is an implementation specific, provisional API. It may be changed or removed\n"
+ "in the future.\n"
+ ".. versionadded:: 2.0"
+ );
+static PyObject*
+mod_get_clocks_used_doing_optional_cleanup(PyObject* UNUSED(module))
+{
+ std::clock_t& clocks = ThreadState::clocks_used_doing_gc();
+
+ if (clocks == std::clock_t(-1)) {
+ Py_RETURN_NONE;
+ }
+ // This might not actually work on some implementations; clock_t
+ // is an opaque type.
+ return PyLong_FromSsize_t(clocks);
+}
+
+PyDoc_STRVAR(mod_enable_optional_cleanup_doc,
+ "mod_enable_optional_cleanup(bool) -> None\n"
+ "\n"
+ "Enable or disable optional cleanup operations.\n"
+ "See ``get_clocks_used_doing_optional_cleanup()`` for details.\n"
+ );
+static PyObject*
+mod_enable_optional_cleanup(PyObject* UNUSED(module), PyObject* flag)
+{
+ int is_true = PyObject_IsTrue(flag);
+ if (is_true == -1) {
+ return nullptr;
+ }
+
+ std::clock_t& clocks = ThreadState::clocks_used_doing_gc();
+ if (is_true) {
+ // If we already have a value, we don't want to lose it.
+ if (clocks == std::clock_t(-1)) {
+ clocks = 0;
+ }
+ }
+ else {
+ clocks = std::clock_t(-1);
+ }
+ Py_RETURN_NONE;
+}
+
static PyMethodDef GreenMethods[] = {
{"getcurrent",
(PyCFunction)mod_getcurrent,
@@ -2749,6 +2811,8 @@ static PyMethodDef GreenMethods[] = {
{"set_thread_local", (PyCFunction)mod_set_thread_local, METH_VARARGS, mod_set_thread_local_doc},
{"get_pending_cleanup_count", (PyCFunction)mod_get_pending_cleanup_count, METH_NOARGS, mod_get_pending_cleanup_count_doc},
{"get_total_main_greenlets", (PyCFunction)mod_get_total_main_greenlets, METH_NOARGS, mod_get_total_main_greenlets_doc},
+ {"get_clocks_used_doing_optional_cleanup", (PyCFunction)mod_get_clocks_used_doing_optional_cleanup, METH_NOARGS, mod_get_clocks_used_doing_optional_cleanup_doc},
+ {"enable_optional_cleanup", (PyCFunction)mod_enable_optional_cleanup, METH_O, mod_enable_optional_cleanup_doc},
{NULL, NULL} /* Sentinel */
};
@@ -2800,6 +2864,9 @@ greenlet_internal_mod_init() G_NOEXCEPT
m.PyAddObject("GREENLET_USE_CONTEXT_VARS", (long)GREENLET_PY37);
m.PyAddObject("GREENLET_USE_STANDARD_THREADING", (long)G_USE_STANDARD_THREADING);
+ OwnedObject clocks_per_sec = OwnedObject::consuming(PyLong_FromSsize_t(CLOCKS_PER_SEC));
+ m.PyAddObject("CLOCKS_PER_SEC", clocks_per_sec);
+
/* also publish module-level data as attributes of the greentype. */
// XXX: This is weird, and enables a strange pattern of
// confusing the class greenlet with the module greenlet; with
diff --git a/src/greenlet/greenlet_internal.hpp b/src/greenlet/greenlet_internal.hpp
index fc25673..2892b75 100644
--- a/src/greenlet/greenlet_internal.hpp
+++ b/src/greenlet/greenlet_internal.hpp
@@ -28,8 +28,6 @@ namespace greenlet {
class ThreadState;
- typedef std::vector<PyGreenlet*, PythonAllocator<PyGreenlet*> > g_deleteme_t;
-
};
diff --git a/src/greenlet/greenlet_thread_state.hpp b/src/greenlet/greenlet_thread_state.hpp
index fe1e160..c024cf3 100644
--- a/src/greenlet/greenlet_thread_state.hpp
+++ b/src/greenlet/greenlet_thread_state.hpp
@@ -1,6 +1,7 @@
#ifndef GREENLET_THREAD_STATE_HPP
#define GREENLET_THREAD_STATE_HPP
+#include <ctime>
#include <stdexcept>
#include "greenlet_internal.hpp"
@@ -103,17 +104,21 @@ private:
/* Strong reference to the trace function, if any. */
OwnedObject tracefunc;
- /* A vector of PyGreenlet pointers representing things that need
+ typedef std::vector<PyGreenlet*, PythonAllocator<PyGreenlet*> > deleteme_t;
+ /* A vector of raw PyGreenlet pointers representing things that need
deleted when this thread is running. The vector owns the
references, but you need to manually INCREF/DECREF as you use
- them. */
- greenlet::g_deleteme_t deleteme;
+ them. We don't use a vector<refs::OwnedGreenlet> because we
+ make copy of this vector, and that would become O(n) as all the
+ refcounts are incremented in the copy.
+ */
+ deleteme_t deleteme;
#ifdef GREENLET_NEEDS_EXCEPTION_STATE_SAVED
void* exception_state;
#endif
-
+ static std::clock_t _clocks_used_doing_gc;
static ImmortalString get_referrers_name;
static PythonAllocator<ThreadState> allocator;
@@ -134,6 +139,7 @@ public:
static void init()
{
ThreadState::get_referrers_name = "get_referrers";
+ ThreadState::_clocks_used_doing_gc = 0;
}
ThreadState()
@@ -256,9 +262,9 @@ private:
// It's possible we could add items to this list while
// running Python code if there's a thread switch, so we
// need to defensively copy it before that can happen.
- g_deleteme_t copy = this->deleteme;
+ deleteme_t copy = this->deleteme;
this->deleteme.clear(); // in case things come back on the list
- for(greenlet::g_deleteme_t::iterator it = copy.begin(), end = copy.end();
+ for(deleteme_t::iterator it = copy.begin(), end = copy.end();
it != end;
++it ) {
PyGreenlet* to_del = *it;
@@ -320,6 +326,14 @@ public:
this->deleteme.push_back(to_del);
}
+ /**
+ * Set to std::clock_t(-1) to disable.
+ */
+ inline static std::clock_t& clocks_used_doing_gc()
+ {
+ return ThreadState::_clocks_used_doing_gc;
+ }
+
~ThreadState()
{
if (!PyInterpreterState_Head()) {
@@ -348,11 +362,6 @@ public:
// on the stack, somewhere uncollectable. Try to detect that.
if (this->current_greenlet == this->main_greenlet && this->current_greenlet) {
assert(this->current_greenlet->is_currently_running_in_some_thread());
-
- // // Break a cycle we know about, the self reference
- // // the main greenlet keeps.
- // this->main_greenlet->main_greenlet.CLEAR();
-
// Drop one reference we hold.
this->current_greenlet.CLEAR();
assert(!this->current_greenlet);
@@ -361,12 +370,14 @@ public:
PyGreenlet* old_main_greenlet = this->main_greenlet.borrow();
Py_ssize_t cnt = this->main_greenlet.REFCNT();
this->main_greenlet.CLEAR();
- if (cnt == 2 && Py_REFCNT(old_main_greenlet) == 1) {
+ if (ThreadState::_clocks_used_doing_gc != std::clock_t(-1)
+ && cnt == 2 && Py_REFCNT(old_main_greenlet) == 1) {
// Highly likely that the reference is somewhere on
// the stack, not reachable by GC. Verify.
// XXX: This is O(n) in the total number of objects.
// TODO: Add a way to disable this at runtime, and
// another way to report on it.
+ std::clock_t begin = std::clock();
NewReference gc(PyImport_ImportModule("gc"));
if (gc) {
OwnedObject get_referrers = gc.PyRequireAttr(ThreadState::get_referrers_name);
@@ -412,6 +423,8 @@ public:
}
}
}
+ std::clock_t end = std::clock();
+ ThreadState::_clocks_used_doing_gc += (end - begin);
}
}
}
@@ -453,6 +466,7 @@ public:
ImmortalString ThreadState::get_referrers_name(nullptr);
PythonAllocator<ThreadState> ThreadState::allocator;
+std::clock_t ThreadState::_clocks_used_doing_gc(0);
template<typename Destructor>
class ThreadStateCreator
diff --git a/src/greenlet/tests/test_leaks.py b/src/greenlet/tests/test_leaks.py
index 946425b..7ff4e59 100644
--- a/src/greenlet/tests/test_leaks.py
+++ b/src/greenlet/tests/test_leaks.py
@@ -116,6 +116,15 @@ class TestLeaks(TestCase):
for g in gg:
self.assertIsNone(g())
+ def assertClocksUsed(self):
+ used = greenlet._greenlet.get_clocks_used_doing_optional_cleanup()
+ self.assertGreaterEqual(used, 0)
+ # we don't lose the value
+ greenlet._greenlet.enable_optional_cleanup(True)
+ used2 = greenlet._greenlet.get_clocks_used_doing_optional_cleanup()
+ self.assertEqual(used, used2)
+ self.assertGreater(greenlet._greenlet.CLOCKS_PER_SEC, 1)
+
def _check_issue251(self,
manually_collect_background=True,
explicit_reference_to_switch=False):
@@ -216,7 +225,8 @@ class TestLeaks(TestCase):
# the ``greenlet.switch`` method still on the stack that we
# can't reach to clean up. The C code goes through terrific
# lengths to clean that up.
- if not explicit_reference_to_switch:
+ if not explicit_reference_to_switch and greenlet._greenlet.get_clocks_used_doing_optional_cleanup() is not None:
+ # If cleanup was disabled, though, we may not find it.
self.assertEqual(greenlets_after, greenlets_before)
if manually_collect_background:
# TODO: Figure out how to make this work!
@@ -237,9 +247,19 @@ class TestLeaks(TestCase):
# done by leakcheck will find it.
pass
+ if greenlet._greenlet.get_clocks_used_doing_optional_cleanup() is not None:
+ self.assertClocksUsed()
+
def test_issue251_killing_cross_thread_leaks_list(self):
self._check_issue251()
+ def test_issue251_with_cleanup_disabled(self):
+ greenlet._greenlet.enable_optional_cleanup(False)
+ try:
+ self._check_issue251()
+ finally:
+ greenlet._greenlet.enable_optional_cleanup(True)
+
@fails_leakcheck
def test_issue251_issue252_need_to_collect_in_background(self):
# Between greenlet 1.1.2 and the next version, this was still
@@ -262,6 +282,15 @@ class TestLeaks(TestCase):
self._check_issue251(manually_collect_background=False)
@fails_leakcheck
+ def test_issue251_issue252_need_to_collect_in_background_cleanup_disabled(self):
+ self.expect_greenlet_leak = True
+ greenlet._greenlet.enable_optional_cleanup(False)
+ try:
+ self._check_issue251(manually_collect_background=False)
+ finally:
+ greenlet._greenlet.enable_optional_cleanup(True)
+
+ @fails_leakcheck
def test_issue251_issue252_explicit_reference_not_collectable(self):
self._check_issue251(
manually_collect_background=False,