From 90da23867767905c39bf8199c2b8a4d6e13a5c95 Mon Sep 17 00:00:00 2001 From: Benjamin Schubert Date: Fri, 26 Jul 2019 14:47:08 +0100 Subject: types: Add a 'FastEnum' implementation and replace Enum by it 'Enum' has a big performance impact on the running code. Replacing it with a safe subset of functionality removes lots of this overhead without removing the benefits of using enums (safe comparisions, uniqueness) --- src/buildstream/_scheduler/jobs/job.py | 18 ++++--- src/buildstream/_scheduler/queues/queue.py | 4 +- src/buildstream/_types.pyx | 79 ++++++++++++++++++++++++++++++ src/buildstream/storage/directory.py | 4 +- src/buildstream/types.py | 54 ++++++++++++++++++-- 5 files changed, 144 insertions(+), 15 deletions(-) create mode 100644 src/buildstream/_types.pyx (limited to 'src') diff --git a/src/buildstream/_scheduler/jobs/job.py b/src/buildstream/_scheduler/jobs/job.py index 7975488ed..c651d5206 100644 --- a/src/buildstream/_scheduler/jobs/job.py +++ b/src/buildstream/_scheduler/jobs/job.py @@ -20,7 +20,6 @@ # Tristan Maat # System imports -import enum import os import sys import signal @@ -32,6 +31,7 @@ import multiprocessing # BuildStream toplevel imports from ..._exceptions import ImplError, BstError, set_last_task_error, SkipJob from ..._message import Message, MessageType, unconditional_messages +from ...types import FastEnum from ... import _signals, utils from .jobpickler import pickle_child_job @@ -39,8 +39,7 @@ from .jobpickler import pickle_child_job # Return code values shutdown of job handling child processes # -@enum.unique -class _ReturnCode(enum.IntEnum): +class _ReturnCode(FastEnum): OK = 0 FAIL = 1 PERM_FAIL = 2 @@ -52,8 +51,7 @@ class _ReturnCode(enum.IntEnum): # The job completion status, passed back through the # complete callbacks. # -@enum.unique -class JobStatus(enum.Enum): +class JobStatus(FastEnum): # Job succeeded OK = 0 @@ -80,8 +78,7 @@ class Process(multiprocessing.Process): self._sentinel = self._popen.sentinel -@enum.unique -class _MessageType(enum.Enum): +class _MessageType(FastEnum): LOG_MESSAGE = 1 ERROR = 2 RESULT = 3 @@ -443,6 +440,11 @@ class Job(): def _parent_child_completed(self, pid, returncode): self._parent_shutdown() + try: + returncode = _ReturnCode(returncode) + except KeyError: # An unexpected return code was returned, let's fail permanently + returncode = _ReturnCode.PERM_FAIL + # We don't want to retry if we got OK or a permanent fail. retry_flag = returncode == _ReturnCode.FAIL @@ -834,7 +836,7 @@ class ChildJob(): def _child_shutdown(self, exit_code): self._queue.close() assert isinstance(exit_code, _ReturnCode) - sys.exit(int(exit_code)) + sys.exit(exit_code.value) # _child_message_handler() # diff --git a/src/buildstream/_scheduler/queues/queue.py b/src/buildstream/_scheduler/queues/queue.py index 538b2b9d1..79f1fa44f 100644 --- a/src/buildstream/_scheduler/queues/queue.py +++ b/src/buildstream/_scheduler/queues/queue.py @@ -21,7 +21,6 @@ # System imports import os from collections import deque -from enum import Enum import heapq import traceback @@ -32,12 +31,13 @@ from ..resources import ResourceType # BuildStream toplevel imports from ..._exceptions import BstError, ImplError, set_last_task_error from ..._message import Message, MessageType +from ...types import FastEnum # Queue status for a given element # # -class QueueStatus(Enum): +class QueueStatus(FastEnum): # The element is not yet ready to be processed in the queue. PENDING = 1 diff --git a/src/buildstream/_types.pyx b/src/buildstream/_types.pyx new file mode 100644 index 000000000..671f0bd2f --- /dev/null +++ b/src/buildstream/_types.pyx @@ -0,0 +1,79 @@ +# +# Copyright (C) 2018 Bloomberg LP +# +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 2 of the License, or (at your option) any later version. +# +# This library is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public +# License along with this library. If not, see . +# +# Authors: +# Tristan Van Berkom +# Jim MacArthur +# Benjamin Schubert + +# MetaFastEnum() +# +# This is a reemplementation of MetaEnum, in order to get a faster implementation of Enum. +# +# Enum turns out to be very slow and would add a noticeable slowdown when we try to add them to the codebase. +# We therefore reimplement a subset of the `Enum` functionality that keeps it compatible with normal `Enum`. +# That way, any place in the code base that access a `FastEnum`, can normally also accept an `Enum`. The reverse +# is not correct, since we only implement a subset of `Enum`. +class MetaFastEnum(type): + def __new__(mcs, name, bases, dct): + if name == "FastEnum": + return type.__new__(mcs, name, bases, dct) + + assert len(bases) == 1, "Multiple inheritance with Fast enums is not currently supported." + + dunder_values = {} + normal_values = {} + + parent_keys = bases[0].__dict__.keys() + + assert "__class__" not in dct.keys(), "Overriding '__class__' is not allowed on 'FastEnum' classes" + + for key, value in dct.items(): + if key.startswith("__") and key.endswith("__"): + dunder_values[key] = value + else: + assert key not in parent_keys, "Overriding 'FastEnum.{}' is not allowed. ".format(key) + normal_values[key] = value + + kls = type.__new__(mcs, name, bases, dunder_values) + mcs.set_values(kls, normal_values) + + return kls + + @classmethod + def set_values(mcs, kls, data): + value_to_entry = {} + + assert len(set(data.values())) == len(data.values()), "Values for {} are not unique".format(kls) + assert len(set(type(value) for value in data.values())) <= 1, \ + "Values of {} are of heterogeneous types".format(kls) + + for key, value in data.items(): + new_value = object.__new__(kls) + object.__setattr__(new_value, "value", value) + object.__setattr__(new_value, "name", key) + + type.__setattr__(kls, key, new_value) + + value_to_entry[value] = new_value + + type.__setattr__(kls, "_value_to_entry", value_to_entry) + + def __repr__(self): + return "".format(self.__name__) + + def __setattr__(self, key, value): + raise ValueError("Adding new values dynamically is not supported") diff --git a/src/buildstream/storage/directory.py b/src/buildstream/storage/directory.py index d32ac0063..5039e0af9 100644 --- a/src/buildstream/storage/directory.py +++ b/src/buildstream/storage/directory.py @@ -31,9 +31,9 @@ See also: :ref:`sandboxing`. """ -from enum import Enum from .._exceptions import BstError, ErrorDomain +from ..types import FastEnum from ..utils import BST_ARBITRARY_TIMESTAMP @@ -196,7 +196,7 @@ class Directory(): # # Type of file or directory entry. # -class _FileType(Enum): +class _FileType(FastEnum): # Directory DIRECTORY = 1 diff --git a/src/buildstream/types.py b/src/buildstream/types.py index 6f6262e41..dc40bd16f 100644 --- a/src/buildstream/types.py +++ b/src/buildstream/types.py @@ -25,10 +25,58 @@ Foundation types """ -from enum import Enum +from ._types import MetaFastEnum -class Scope(Enum): +class FastEnum(metaclass=MetaFastEnum): + """ + A reimplementation of a subset of the `Enum` functionality, which is far quicker than `Enum`. + + :class:`enum.Enum` attributes accesses can be really slow, and slow down the execution noticeably. + This reimplementation doesn't suffer the same problems, but also does not reimplement everything. + """ + + name = None + """The name of the current Enum entry, same as :func:`enum.Enum.name` + """ + + value = None + """The value of the current Enum entry, same as :func:`enum.Enum.value` + """ + + _value_to_entry = dict() # A dict of all values mapping to the entries in the enum + + def __new__(cls, value): + try: + return cls._value_to_entry[value] + except KeyError: + if type(value) is cls: # pylint: disable=unidiomatic-typecheck + return value + raise ValueError("Unknown enum value: {}".format(value)) + + def __eq__(self, other): + if self.__class__ is not other.__class__: + raise ValueError("Unexpected comparison between {} and {}".format(self, repr(other))) + # Enums instances are unique, so creating an instance with the same value as another will just + # send back the other one, hence we can use an identity comparison, which is much faster than '==' + return self is other + + def __ne__(self, other): + if self.__class__ is not other.__class__: + raise ValueError("Unexpected comparison between {} and {}".format(self, repr(other))) + return self is not other + + def __hash__(self): + return hash(id(self)) + + def __str__(self): + return "{}.{}".format(self.__class__.__name__, self.name) + + def __reduce__(self): + return self.__class__, (self.value,) + + +class Scope(FastEnum): """Defines the scope of dependencies to include for a given element when iterating over the dependency graph in APIs like :func:`Element.dependencies() ` @@ -116,7 +164,7 @@ class CoreWarnings(): # # Strength of cache key # -class _KeyStrength(Enum): +class _KeyStrength(FastEnum): # Includes strong cache keys of all build dependencies and their # runtime dependencies. -- cgit v1.2.1