summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorKyle Mckay <kyle.mckay@codethink.co.uk>2022-01-05 13:49:27 +0000
committerKyle Mckay <kyle.mckay@codethink.co.uk>2022-01-12 11:10:05 +0000
commit1a1acf5ca41c4ffca5a9f94d770f852335111048 (patch)
treeecff3c56c2b61f77cd65bfd99859e47f253d210b
parent286b50ee969bad45a9f11df96c6a36d513310a51 (diff)
downloadlorry-controller-1a1acf5ca41c4ffca5a9f94d770f852335111048.tar.gz
Fix job queue getting stuck on repo preparation
- When a downstream type failed to prepare a repository the loop over lorries would abort unexpectly meaning any following lorries were never added into the minion's queue. Such cases are now simply skipped over so that lorry controller does not get stuck. - Catching Exception doesn't feel nice or correct, but we don't want any unexpected exceptions to cause the same problem of getting stuck so I don't see any obvious alternative in the short term. Perhaps the overall code can be reworked to avoid needing to do this. Log exceptions when skipping lorries When a repository fails to prepare, ensure there is a log of the exception that occured and the associated lorry. Update skipped lorry last run and exit data - Update last run to prevent frequent repeated failure - Update exit for user visiblity on status page
-rw-r--r--lorrycontroller/givemejob.py24
1 files changed, 21 insertions, 3 deletions
diff --git a/lorrycontroller/givemejob.py b/lorrycontroller/givemejob.py
index 2f7580f..973e141 100644
--- a/lorrycontroller/givemejob.py
+++ b/lorrycontroller/givemejob.py
@@ -16,6 +16,7 @@
import json
import logging
import re
+import traceback
import urllib.parse
import bottle
@@ -39,17 +40,34 @@ class GiveMeJob(lorrycontroller.LorryControllerRoute):
now = statedb.get_current_time()
for lorry_info in lorry_infos:
if self.ready_to_run(lorry_info, now):
+ lorry_path = lorry_info['path']
metadata = self.get_repo_metadata(statedb, lorry_info)
downstream_type = lorrycontroller.downstream_types[
self.app_settings['git-server-type']]
- downstream_type(self.app_settings) \
- .prepare_repo(lorry_info['path'], metadata)
+
+ # Skip over any repos that fail to prepare
+ # otherwise job queue will get stuck here
+ try:
+ downstream_type(self.app_settings).prepare_repo(
+ lorry_path, metadata)
+ # Catching base Exception because we don't want
+ # unexpected exception types to block the queue
+ except Exception:
+ logging.exception(
+ 'Skipping lorry %s due to an encountered exception',
+ lorry_path)
+ # Failure should be visible to user on status page
+ statedb.set_lorry_last_run_exit_and_output(
+ lorry_path, '1', traceback.format_exc())
+ # Prevent repeating failure
+ statedb.set_lorry_last_run(lorry_path, int(now))
+ continue
self.give_job_to_minion(statedb, lorry_info, now)
logging.info(
'Giving job %s to lorry %s to MINION %s:%s',
lorry_info['job_id'],
- lorry_info['path'],
+ lorry_path,
bottle.request.forms.host,
bottle.request.forms.pid)
return lorry_info