summaryrefslogtreecommitdiff
path: root/buildstream/_scheduler/queues/queue.py
blob: 909cebb44399e24433ebe9f6635f1bfd71a73eb7 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
#
#  Copyright (C) 2016 Codethink Limited
#
#  This program is free software; you can redistribute it and/or
#  modify it under the terms of the GNU Lesser General Public
#  License as published by the Free Software Foundation; either
#  version 2 of the License, or (at your option) any later version.
#
#  This library is distributed in the hope that it will be useful,
#  but WITHOUT ANY WARRANTY; without even the implied warranty of
#  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.	 See the GNU
#  Lesser General Public License for more details.
#
#  You should have received a copy of the GNU Lesser General Public
#  License along with this library. If not, see <http://www.gnu.org/licenses/>.
#
#  Authors:
#        Tristan Van Berkom <tristan.vanberkom@codethink.co.uk>
#        Jürg Billeter <juerg.billeter@codethink.co.uk>

# System imports
import os
from collections import deque
from enum import Enum
import traceback

# Local imports
from ..jobs import ElementJob
from ..resources import ResourceType

# BuildStream toplevel imports
from ..._exceptions import BstError, set_last_task_error
from ..._message import Message, MessageType


# Queue status for a given element
#
#
class QueueStatus(Enum):
    # The element is waiting for dependencies.
    WAIT = 1

    # The element can skip this queue.
    SKIP = 2

    # The element is ready for processing in this queue.
    READY = 3


# Queue()
#
# Args:
#    scheduler (Scheduler): The Scheduler
#
class Queue():

    # These should be overridden on class data of of concrete Queue implementations
    action_name = None
    complete_name = None
    resources = []                     # Resources this queues' jobs want

    def __init__(self, scheduler):

        #
        # Public members
        #
        self.failed_elements = []      # List of failed elements, for the frontend
        self.processed_elements = []   # List of processed elements, for the frontend
        self.skipped_elements = []     # List of skipped elements, for the frontend

        #
        # Private members
        #
        self._scheduler = scheduler
        self._wait_queue = deque()
        self._done_queue = deque()
        self._max_retries = 0

        # Assert the subclass has setup class data
        assert self.action_name is not None
        assert self.complete_name is not None

        if ResourceType.UPLOAD in self.resources or ResourceType.DOWNLOAD in self.resources:
            self._max_retries = scheduler.context.sched_network_retries

    #####################################################
    #     Abstract Methods for Queue implementations    #
    #####################################################

    # process()
    #
    # Abstract method for processing an element
    #
    # Args:
    #    element (Element): An element to process
    #
    # Returns:
    #    (any): An optional something to be returned
    #           for every element successfully processed
    #
    #
    def process(self, element):
        pass

    # status()
    #
    # Abstract method for reporting the status of an element.
    #
    # Args:
    #    element (Element): An element to process
    #
    # Returns:
    #    (QueueStatus): The element status
    #
    def status(self, element):
        return QueueStatus.READY

    # prepare()
    #
    # Abstract method for handling job preparation in the main process.
    #
    # Args:
    #    element (Element): The element which is scheduled
    #
    def prepare(self, element):
        pass

    # done()
    #
    # Abstract method for handling a successful job completion.
    #
    # Args:
    #    job (Job): The job which completed processing
    #    element (Element): The element which completed processing
    #    result (any): The return value of the process() implementation
    #    success (bool): True if the process() implementation did not
    #                    raise any exception
    #
    def done(self, job, element, result, success):
        pass

    #####################################################
    #          Scheduler / Pipeline facing APIs         #
    #####################################################

    # enqueue()
    #
    # Enqueues some elements
    #
    # Args:
    #    elts (list): A list of Elements
    #
    def enqueue(self, elts):
        if not elts:
            return

        # Note: The internal lists work with jobs. This is not
        #       reflected in any external methods (except
        #       pop/peek_ready_jobs).
        def create_job(element):
            logfile = self._element_log_path(element)
            return ElementJob(self._scheduler, self.action_name,
                              logfile, element=element, queue=self,
                              resources=self.resources,
                              action_cb=self.process,
                              complete_cb=self._job_done,
                              max_retries=self._max_retries)

        # Place skipped elements directly on the done queue
        jobs = [create_job(elt) for elt in elts]
        skip = [job for job in jobs if self.status(job.element) == QueueStatus.SKIP]
        wait = [job for job in jobs if job not in skip]

        self._wait_queue.extend(wait)
        self._done_queue.extend(skip)
        self.skipped_elements.extend(skip)

    # dequeue()
    #
    # A generator which dequeues the elements which
    # are ready to exit the queue.
    #
    # Yields:
    #    (Element): Elements being dequeued
    #
    def dequeue(self):
        while self._done_queue:
            yield self._done_queue.popleft().element

    # dequeue_ready()
    #
    # Reports whether there are any elements to dequeue
    #
    # Returns:
    #    (bool): Whether there are elements to dequeue
    #
    def dequeue_ready(self):
        return any(self._done_queue)

    # pop_ready_jobs()
    #
    # Returns:
    #     ([Job]): A list of jobs to run
    #
    # Process elements in the queue, moving elements which were enqueued
    # into the dequeue pool, and processing them if necessary.
    #
    # This will have different results for elements depending
    # on the Queue.status() implementation.
    #
    #   o Elements which are QueueStatus.WAIT will not be affected
    #
    #   o Elements which are QueueStatus.SKIP will move directly
    #     to the dequeue pool
    #
    #   o For Elements which are QueueStatus.READY a Job will be
    #     created and returned to the caller, given that the scheduler
    #     allows the Queue enough resources for the given job
    #
    def pop_ready_jobs(self):
        unready = []
        ready = []

        while self._wait_queue:
            job = self._wait_queue.popleft()
            element = job.element

            status = self.status(element)
            if status == QueueStatus.WAIT:
                unready.append(job)
                continue
            elif status == QueueStatus.SKIP:
                self._done_queue.append(job)
                self.skipped_elements.append(element)
                continue

            self.prepare(element)
            ready.append(job)

        # These were not ready but were in the beginning, give em
        # first priority again next time around
        self._wait_queue.extendleft(unready)

        return ready

    def peek_ready_jobs(self):
        def ready(job):
            return self.status(job.element) == QueueStatus.READY

        yield from (job for job in self._wait_queue if ready(job))

    #####################################################
    #                 Private Methods                   #
    #####################################################

    # _update_workspaces()
    #
    # Updates and possibly saves the workspaces in the
    # main data model in the main process after a job completes.
    #
    # Args:
    #    element (Element): The element which completed
    #    job (Job): The job which completed
    #
    def _update_workspaces(self, element, job):
        workspace_dict = None
        if job.child_data:
            workspace_dict = job.child_data.get('workspace', None)

        # Handle any workspace modifications now
        #
        if workspace_dict:
            context = element._get_context()
            workspaces = context.get_workspaces()
            if workspaces.update_workspace(element._get_full_name(), workspace_dict):
                try:
                    workspaces.save_config()
                except BstError as e:
                    self._message(element, MessageType.ERROR, "Error saving workspaces", detail=str(e))
                except Exception as e:   # pylint: disable=broad-except
                    self._message(element, MessageType.BUG,
                                  "Unhandled exception while saving workspaces",
                                  detail=traceback.format_exc())

    # _job_done()
    #
    # A callback reported by the Job() when a job completes
    #
    # This will call the Queue implementation specific Queue.done()
    # implementation and trigger the scheduler to reschedule.
    #
    # See the Job object for an explanation of the call signature
    #
    def _job_done(self, job, element, success, result):
        element._update_state()

        # Update values that need to be synchronized in the main task
        # before calling any queue implementation
        self._update_workspaces(element, job)

        # Give the result of the job to the Queue implementor,
        # and determine if it should be considered as processed
        # or skipped.
        try:
            self.done(job, element, result, success)
        except BstError as e:

            # Report error and mark as failed
            #
            self._message(element, MessageType.ERROR, "Post processing error", detail=str(e))
            self.failed_elements.append(element)

            # Treat this as a task error as it's related to a task
            # even though it did not occur in the task context
            #
            # This just allows us stronger testing capability
            #
            set_last_task_error(e.domain, e.reason)

        except Exception as e:   # pylint: disable=broad-except

            # Report unhandled exceptions and mark as failed
            #
            self._message(element, MessageType.BUG,
                          "Unhandled exception in post processing",
                          detail=traceback.format_exc())
            self.failed_elements.append(element)
        else:
            #
            # No exception occured in post processing
            #

            # All jobs get placed on the done queue for later processing.
            self._done_queue.append(job)

            # A Job can be skipped whether or not it has failed,
            # we want to only bookkeep them as processed or failed
            # if they are not skipped.
            if job.skipped:
                self.skipped_elements.append(element)
            elif success:
                self.processed_elements.append(element)
            else:
                self.failed_elements.append(element)

    # Convenience wrapper for Queue implementations to send
    # a message for the element they are processing
    def _message(self, element, message_type, brief, **kwargs):
        context = element._get_context()
        message = Message(element._get_unique_id(), message_type, brief, **kwargs)
        context.message(message)

    def _element_log_path(self, element):
        project = element._get_project()
        key = element._get_display_key()[1]
        action = self.action_name.lower()
        logfile = "{key}-{action}".format(key=key, action=action)

        return os.path.join(project.name, element.normal_name, logfile)