diff options
author | Josh Smith <joshsmith@codethink.co.uk> | 2018-07-23 11:33:31 +0100 |
---|---|---|
committer | Tristan Van Berkom <tristan.vanberkom@codethink.co.uk> | 2018-07-27 13:41:26 +0900 |
commit | 9ebd7fe1182a4d6afa027b88fb3d096408d9a912 (patch) | |
tree | 5d646889a356fea05f1dccf9b42e3f1ad10b585b | |
parent | 8c9fd9e40f5ac2902a500c46d5321ab16f6e30b7 (diff) | |
download | buildstream-9ebd7fe1182a4d6afa027b88fb3d096408d9a912.tar.gz |
_exceptions.py: Modify BstError API to allow optional retry
job.py: Changes to the logic surrounding retry attempts and child process return codes
element.py, source.py: ElementError and SourceError also implement this change.
These exceptions now have an optional parameter of temporary which defaults to false. This will potentially break
backwards compatibility where exceptions were previously raised and a retry was intended.
To trigger a retry, one must now raise their SourceError or ElementError with temporary=True.
This aims to fix #397.
-rw-r--r-- | buildstream/_exceptions.py | 7 | ||||
-rw-r--r-- | buildstream/_scheduler/jobs/job.py | 36 | ||||
-rw-r--r-- | buildstream/element.py | 5 | ||||
-rw-r--r-- | buildstream/source.py | 5 |
4 files changed, 40 insertions, 13 deletions
diff --git a/buildstream/_exceptions.py b/buildstream/_exceptions.py index 8baf16706..ada697a53 100644 --- a/buildstream/_exceptions.py +++ b/buildstream/_exceptions.py @@ -99,7 +99,7 @@ class ErrorDomain(Enum): # class BstError(Exception): - def __init__(self, message, *, detail=None, domain=None, reason=None): + def __init__(self, message, *, detail=None, domain=None, reason=None, temporary=False): global _last_exception super().__init__(message) @@ -114,6 +114,11 @@ class BstError(Exception): # self.sandbox = None + # When this exception occurred during the handling of a job, indicate + # whether or not there is any point retrying the job. + # + self.temporary = temporary + # Error domain and reason # self.domain = domain diff --git a/buildstream/_scheduler/jobs/job.py b/buildstream/_scheduler/jobs/job.py index 6d4b685af..c339a333b 100644 --- a/buildstream/_scheduler/jobs/job.py +++ b/buildstream/_scheduler/jobs/job.py @@ -35,6 +35,12 @@ from ..._exceptions import ImplError, BstError, set_last_task_error from ..._message import Message, MessageType, unconditional_messages from ... import _signals, utils +# Return code values shutdown of job handling child processes +# +RC_OK = 0 +RC_FAIL = 1 +RC_PERM_FAIL = 2 + # Used to distinguish between status messages and return values class Envelope(): @@ -111,6 +117,10 @@ class Job(): self._max_retries = max_retries # Maximum number of automatic retries self._result = None # Return value of child action in the parent self._tries = 0 # Try count, for retryable jobs + + # If False, a retry will not be attempted regardless of whether _tries is less than _max_retries. + # + self._retry_flag = True self._logfile = logfile self._task_id = None @@ -388,8 +398,9 @@ class Job(): result = self.child_process() except BstError as e: elapsed = datetime.datetime.now() - starttime + self._retry_flag = e.temporary - if self._tries <= self._max_retries: + if self._retry_flag and (self._tries <= self._max_retries): self.message(MessageType.FAIL, "Try #{} failed, retrying".format(self._tries), elapsed=elapsed) @@ -402,7 +413,10 @@ class Job(): # Report the exception to the parent (for internal testing purposes) self._child_send_error(e) - self._child_shutdown(1) + + # Set return code based on whether or not the error was temporary. + # + self._child_shutdown(RC_FAIL if self._retry_flag else RC_PERM_FAIL) except Exception as e: # pylint: disable=broad-except @@ -416,7 +430,7 @@ class Job(): self.message(MessageType.BUG, self.action_name, elapsed=elapsed, detail=detail, logfile=filename) - self._child_shutdown(1) + self._child_shutdown(RC_FAIL) else: # No exception occurred in the action @@ -430,7 +444,7 @@ class Job(): # Shutdown needs to stay outside of the above context manager, # make sure we dont try to handle SIGTERM while the process # is already busy in sys.exit() - self._child_shutdown(0) + self._child_shutdown(RC_OK) # _child_send_error() # @@ -495,7 +509,8 @@ class Job(): message.action_name = self.action_name message.task_id = self._task_id - if message.message_type == MessageType.FAIL and self._tries <= self._max_retries: + if (message.message_type == MessageType.FAIL and + self._tries <= self._max_retries and self._retry_flag): # Job will be retried, display failures as warnings in the frontend message.message_type = MessageType.WARN @@ -529,12 +544,17 @@ class Job(): def _parent_child_completed(self, pid, returncode): self._parent_shutdown() - if returncode != 0 and self._tries <= self._max_retries: + # We don't want to retry if we got OK or a permanent fail. + # This is set in _child_action but must also be set for the parent. + # + self._retry_flag = returncode not in (RC_OK, RC_PERM_FAIL) + + if self._retry_flag and (self._tries <= self._max_retries): self.spawn() return - self.parent_complete(returncode == 0, self._result) - self._scheduler.job_completed(self, returncode == 0) + self.parent_complete(returncode == RC_OK, self._result) + self._scheduler.job_completed(self, returncode == RC_OK) # _parent_process_envelope() # diff --git a/buildstream/element.py b/buildstream/element.py index e9cbdb63e..962ba1c78 100644 --- a/buildstream/element.py +++ b/buildstream/element.py @@ -140,9 +140,10 @@ class ElementError(BstError): message (str): The error message to report to the user detail (str): A possibly multiline, more detailed error message reason (str): An optional machine readable reason string, used for test cases + temporary(bool): An indicator to whether the error may occur if the operation was run again. (*Since: 1.4*) """ - def __init__(self, message, *, detail=None, reason=None): - super().__init__(message, detail=detail, domain=ErrorDomain.ELEMENT, reason=reason) + def __init__(self, message, *, detail=None, reason=None, temporary=False): + super().__init__(message, detail=detail, domain=ErrorDomain.ELEMENT, reason=reason, temporary=temporary) class Element(Plugin): diff --git a/buildstream/source.py b/buildstream/source.py index ec38ae8f2..c7c515e12 100644 --- a/buildstream/source.py +++ b/buildstream/source.py @@ -108,9 +108,10 @@ class SourceError(BstError): message (str): The breif error description to report to the user detail (str): A possibly multiline, more detailed error message reason (str): An optional machine readable reason string, used for test cases + temporary(bool): An indicator to whether the error may occur if the operation was run again. (*Since: 1.4*) """ - def __init__(self, message, *, detail=None, reason=None): - super().__init__(message, detail=detail, domain=ErrorDomain.SOURCE, reason=reason) + def __init__(self, message, *, detail=None, reason=None, temporary=False): + super().__init__(message, detail=detail, domain=ErrorDomain.SOURCE, reason=reason, temporary=temporary) class Source(Plugin): |