From d55039c33685d267fd8834ecc5d16030c1385325 Mon Sep 17 00:00:00 2001 From: Daniel Silverstone Date: Tue, 26 Feb 2019 14:27:43 +0000 Subject: utils.py: Add a _with_gc_disabled() decorator This decorator can be used to wrapper any function to disable the GC for the duration of the function. At the end it will be re-enabled. This is not recursive, so only use this decorator sparingly and with care. Signed-off-by: Daniel Silverstone --- buildstream/utils.py | 31 +++++++++++++++++++++++++++++++ 1 file changed, 31 insertions(+) (limited to 'buildstream') diff --git a/buildstream/utils.py b/buildstream/utils.py index 844153706..a4e161ed4 100644 --- a/buildstream/utils.py +++ b/buildstream/utils.py @@ -23,6 +23,8 @@ Utilities import calendar import errno +import functools +import gc import hashlib import os import re @@ -1280,3 +1282,32 @@ def _search_upward_for_files(directory, filenames): # i.e. we've reached the root of the filesystem return None, None directory = parent_dir + + +# _with_gc_disabled() +# +# Decorate a function to disable the garbage collector across its execution. +# +# In general, disabling the garbage collector should be considered to be an +# extreme action. Only use this in carefully selected subsets of the code +# where we generally create a lot more objects than we throw away. For example +# in loading the stream. +# +# Args: +# func (callable): The callable to disable the GC for +# +# Returns: +# (callable): The decorated callable +# +def _with_gc_disabled(func): + @functools.wraps(func) + def _gc_disabled(*args, **kwargs): + try: + gc.disable() + return func(*args, **kwargs) + finally: + gc.enable() + # Clean up to ensure we don't grow any more, freeing up room to be + # used by other objects during the course of running BuildStream. + gc.collect() + return _gc_disabled -- cgit v1.2.1 From 746aa7a63c4d06b1caa012a35c970b43e0166faa Mon Sep 17 00:00:00 2001 From: Daniel Silverstone Date: Tue, 19 Feb 2019 09:46:40 +0000 Subject: _stream.py, _project.py: Manage GC during pipeline load Since during pipeline load we are generating a lot of objects which need to live for the duration of the BuildStream instance there is benefit to disabling GC for the duration of the load since that drastically reduces wasted CPU cycles iterating data which will remain around always. In order to limit the increase in the peak memory consumption though, we do an explicit gc.collect() after loading the YAML in, since without that, we use 60% more memory at peak, and with it, only 20%. Signed-off-by: Daniel Silverstone # Tiago Gomes +import gc import os from collections import OrderedDict from collections.abc import Mapping @@ -352,6 +353,9 @@ class Project(): ticker=None, fetch_subprojects=fetch_subprojects) + # Loading elements generates a lot of garbage, clear it now + gc.collect() + with self._context.timed_activity("Resolving elements"): elements = [ Element._new_from_meta(meta) diff --git a/buildstream/_stream.py b/buildstream/_stream.py index caaa48908..b0fce3817 100644 --- a/buildstream/_stream.py +++ b/buildstream/_stream.py @@ -941,6 +941,7 @@ class Stream(): # (list of Element): The primary element selection # (list of Element): The tracking element selection # + @utils._with_gc_disabled def _load(self, targets, track_targets, *, selection=PipelineSelection.NONE, track_selection=PipelineSelection.NONE, -- cgit v1.2.1