summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorVictor Stinner <vstinner@python.org>2019-10-08 18:45:43 +0200
committerGitHub <noreply@github.com>2019-10-08 18:45:43 +0200
commit0ec618af98ac250a91ee9c91f8569e6df6772758 (patch)
tree91c21524bbedfeb7ad0f0ae9e3b50465a290751c
parente53c5800df3d4a07c9596ed933d431f7f77a2980 (diff)
downloadcpython-git-0ec618af98ac250a91ee9c91f8569e6df6772758.tar.gz
bpo-37531: regrtest ignores output on timeout (GH-16659)
bpo-37531, bpo-38207: On timeout, regrtest no longer attempts to call `popen.communicate() again: it can hang until all child processes using stdout and stderr pipes completes. Kill the worker process and ignores its output. Reenable test_regrtest.test_multiprocessing_timeout(). bpo-37531: Change also the faulthandler timeout of the main process from 1 minute to 5 minutes, for Python slowest buildbots.
-rw-r--r--Lib/test/libregrtest/runtest_mp.py64
-rw-r--r--Lib/test/test_regrtest.py1
-rw-r--r--Misc/NEWS.d/next/Tests/2019-10-08-16-42-05.bpo-37531.7v-_Ca.rst5
3 files changed, 42 insertions, 28 deletions
diff --git a/Lib/test/libregrtest/runtest_mp.py b/Lib/test/libregrtest/runtest_mp.py
index 79afa29fa0..71db8d6966 100644
--- a/Lib/test/libregrtest/runtest_mp.py
+++ b/Lib/test/libregrtest/runtest_mp.py
@@ -22,6 +22,12 @@ from test.libregrtest.utils import format_duration, print_warning
PROGRESS_UPDATE = 30.0 # seconds
assert PROGRESS_UPDATE >= PROGRESS_MIN_TIME
+# Kill the main process after 5 minutes. It is supposed to write an update
+# every PROGRESS_UPDATE seconds. Tolerate 5 minutes for Python slowest
+# buildbot workers.
+MAIN_PROCESS_TIMEOUT = 5 * 60.0
+assert MAIN_PROCESS_TIMEOUT >= PROGRESS_UPDATE
+
# Time to wait until a worker completes: should be immediate
JOIN_TIMEOUT = 30.0 # seconds
@@ -160,20 +166,6 @@ class TestWorkerProcess(threading.Thread):
result = TestResult(test_name, error_type, test_time, None)
return MultiprocessResult(result, stdout, stderr, err_msg)
- def _timedout(self, test_name):
- self._kill()
-
- stdout = stderr = ''
- popen = self._popen
- try:
- stdout, stderr = popen.communicate(timeout=JOIN_TIMEOUT)
- except (subprocess.TimeoutExpired, OSError) as exc:
- print_warning(f"Failed to read {self} output "
- f"(timeout={format_duration(JOIN_TIMEOUT)}): "
- f"{exc!r}")
-
- return self.mp_result_error(test_name, TIMEOUT, stdout, stderr)
-
def _run_process(self, test_name):
self.start_time = time.monotonic()
@@ -197,23 +189,32 @@ class TestWorkerProcess(threading.Thread):
try:
stdout, stderr = popen.communicate(timeout=self.timeout)
+ retcode = popen.returncode
+ assert retcode is not None
except subprocess.TimeoutExpired:
if self._stopped:
# kill() has been called: communicate() fails
# on reading closed stdout/stderr
raise ExitThread
- return self._timedout(test_name)
+ # On timeout, kill the process
+ self._kill()
+
+ # None means TIMEOUT for the caller
+ retcode = None
+ # bpo-38207: Don't attempt to call communicate() again: on it
+ # can hang until all child processes using stdout and stderr
+ # pipes completes.
+ stdout = stderr = ''
except OSError:
if self._stopped:
# kill() has been called: communicate() fails
# on reading closed stdout/stderr
raise ExitThread
raise
-
- retcode = popen.returncode
- stdout = stdout.strip()
- stderr = stderr.rstrip()
+ else:
+ stdout = stdout.strip()
+ stderr = stderr.rstrip()
return (retcode, stdout, stderr)
except:
@@ -225,13 +226,10 @@ class TestWorkerProcess(threading.Thread):
self.current_test_name = None
def _runtest(self, test_name):
- result = self._run_process(test_name)
-
- if isinstance(result, MultiprocessResult):
- # _timedout() case
- return result
+ retcode, stdout, stderr = self._run_process(test_name)
- retcode, stdout, stderr = result
+ if retcode is None:
+ return self.mp_result_error(test_name, TIMEOUT, stdout, stderr)
err_msg = None
if retcode != 0:
@@ -250,7 +248,8 @@ class TestWorkerProcess(threading.Thread):
err_msg = "Failed to parse worker JSON: %s" % exc
if err_msg is not None:
- return self.mp_result_error(test_name, CHILD_ERROR, stdout, stderr, err_msg)
+ return self.mp_result_error(test_name, CHILD_ERROR,
+ stdout, stderr, err_msg)
return MultiprocessResult(result, stdout, stderr, err_msg)
@@ -289,6 +288,16 @@ class TestWorkerProcess(threading.Thread):
f"{exc!r}")
def wait_stopped(self, start_time):
+ # bpo-38207: MultiprocessTestRunner.stop_workers() called self.stop()
+ # which killed the process. Sometimes, killing the process from the
+ # main thread does not interrupt popen.communicate() in
+ # TestWorkerProcess thread. This loop with a timeout is a workaround
+ # for that.
+ #
+ # Moreover, if this method fails to join the thread, it is likely
+ # that Python will hang at exit while calling threading._shutdown()
+ # which tries again to join the blocked thread. Regrtest.main()
+ # uses EXIT_TIMEOUT to workaround this second bug.
while True:
# Write a message every second
self.join(1.0)
@@ -355,7 +364,8 @@ class MultiprocessTestRunner:
timeout = PROGRESS_UPDATE
while True:
if use_faulthandler:
- faulthandler.dump_traceback_later(timeout * 2.0, exit=True)
+ faulthandler.dump_traceback_later(MAIN_PROCESS_TIMEOUT,
+ exit=True)
# wait for a thread
try:
diff --git a/Lib/test/test_regrtest.py b/Lib/test/test_regrtest.py
index aaa97e0408..5df78861db 100644
--- a/Lib/test/test_regrtest.py
+++ b/Lib/test/test_regrtest.py
@@ -1153,7 +1153,6 @@ class ArgsTestCase(BaseTestCase):
env_changed=[testname],
fail_env_changed=True)
- @unittest.skipIf(True, 'bpo-37531, bpo-38207: test hangs randomly')
def test_multiprocessing_timeout(self):
code = textwrap.dedent(r"""
import time
diff --git a/Misc/NEWS.d/next/Tests/2019-10-08-16-42-05.bpo-37531.7v-_Ca.rst b/Misc/NEWS.d/next/Tests/2019-10-08-16-42-05.bpo-37531.7v-_Ca.rst
new file mode 100644
index 0000000000..78ab636516
--- /dev/null
+++ b/Misc/NEWS.d/next/Tests/2019-10-08-16-42-05.bpo-37531.7v-_Ca.rst
@@ -0,0 +1,5 @@
+On timeout, regrtest no longer attempts to call ``popen.communicate()``
+again: it can hang until all child processes using stdout and stderr pipes
+completes. Kill the worker process and ignores its output. Change also the
+faulthandler timeout of the main process from 1 minute to 5 minutes, for Python
+slowest buildbots.