From 1625de0e932fa8419d7da079c206cf6bf6c71559 Mon Sep 17 00:00:00 2001 From: Tristan Van Berkom Date: Wed, 10 Oct 2018 23:08:42 +0900 Subject: doc/source/conf.py: Added `images` directory for statically included files --- doc/source/conf.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/conf.py b/doc/source/conf.py index 5aaaed280..269053675 100644 --- a/doc/source/conf.py +++ b/doc/source/conf.py @@ -160,7 +160,7 @@ html_theme = 'sphinx_rtd_theme' # Add any paths that contain custom static files (such as style sheets) here, # relative to this directory. They are copied after the builtin static files, # so a file named "default.css" will overwrite the builtin "default.css". -html_static_path = ['badges'] +html_static_path = ['badges', 'images'] # Add any extra paths that contain custom files (such as robots.txt or # .htaccess) here, relative to this directory. These files are copied -- cgit v1.2.1 From 38507b4e1532273c13018472080c3fca3897fd92 Mon Sep 17 00:00:00 2001 From: Tristan Van Berkom Date: Sat, 13 Oct 2018 15:42:10 +0900 Subject: doc/source/format_intro.rst: Adding link anchor for include directives --- doc/source/format_intro.rst | 3 +++ 1 file changed, 3 insertions(+) diff --git a/doc/source/format_intro.rst b/doc/source/format_intro.rst index 23c37aeae..86a3d336a 100644 --- a/doc/source/format_intro.rst +++ b/doc/source/format_intro.rst @@ -290,6 +290,9 @@ free form and not validated. (=): - cp src/program %{bindir} + +.. _format_directives_include: + (@) Include ~~~~~~~~~~~ Indicates that content should be loaded from files. -- cgit v1.2.1 From 26164bcdb2d89ec91828acbdfd3f569b770498ee Mon Sep 17 00:00:00 2001 From: Tristan Van Berkom Date: Mon, 29 Oct 2018 22:57:11 +0900 Subject: MANIFEST.in: Include SVG and ODG files in source distributions --- MANIFEST.in | 2 ++ 1 file changed, 2 insertions(+) diff --git a/MANIFEST.in b/MANIFEST.in index 5c001616a..41c09d74e 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -16,6 +16,8 @@ recursive-include doc/source *.rst recursive-include doc/source *.py recursive-include doc/source *.in recursive-include doc/source *.html +recursive-include doc/source *.odg +recursive-include doc/source *.svg recursive-include doc/examples * # Tests -- cgit v1.2.1 From 3a23c8f6ecb38a513b26b381b38bc344dfe36486 Mon Sep 17 00:00:00 2001 From: Tristan Van Berkom Date: Wed, 10 Oct 2018 23:19:47 +0900 Subject: doc: Adding new architecture document with initial "Overview of modules" section --- doc/source/arch_overview.rst | 8 + doc/source/image-sources/arch-overview.odg | Bin 0 -> 20118 bytes doc/source/images/arch-overview.svg | 2058 ++++++++++++++++++++++++++++ doc/source/index.rst | 1 + doc/source/main_architecture.rst | 11 + 5 files changed, 2078 insertions(+) create mode 100644 doc/source/arch_overview.rst create mode 100644 doc/source/image-sources/arch-overview.odg create mode 100644 doc/source/images/arch-overview.svg create mode 100644 doc/source/main_architecture.rst diff --git a/doc/source/arch_overview.rst b/doc/source/arch_overview.rst new file mode 100644 index 000000000..f01dec42f --- /dev/null +++ b/doc/source/arch_overview.rst @@ -0,0 +1,8 @@ + + +Overview of modules +=================== +Below is a basic overview of the modules, what they are for, and generally +what their stacking order is internally in BuildStream. + +.. image:: images/arch-overview.svg diff --git a/doc/source/image-sources/arch-overview.odg b/doc/source/image-sources/arch-overview.odg new file mode 100644 index 000000000..2f342987b Binary files /dev/null and b/doc/source/image-sources/arch-overview.odg differ diff --git a/doc/source/images/arch-overview.svg b/doc/source/images/arch-overview.svg new file mode 100644 index 000000000..69c837465 --- /dev/null +++ b/doc/source/images/arch-overview.svgrontend + + + + + + + + + + + + + Main Entry Point (cli.py)Implements command line interface + + + + + + + + + + + + + Main Application State (App)Initializes the “Stream”, handles logging and user interactions + + + + + + + + + + + + + + + + + + + + Loggerwidget.py + + + + + + + + + + + + + Status Barstatus.py + + + + + + + + + + + + + Completionscomplete.py + + + + + + + + + + + + + Color profilesprofile.py + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Core, Frontend facing API + + + + + + + + + + + + + ContextUser configuration, CLI arguments + + + + + + + + + + + + + + + + + + + + ProjectLoaded project.conf configuration + + + + + + StreamLoading of the project, implementation of all main user facing commands in theore Internals + + + + + + + + + + + + + PlatformAbstract class providing platform specific routines. + + + + + + The Frontend implements the main command line interface, status bar, master log formatting, and any interactivity with the user. This is cleanly separated from the core BuildStream application. + + + + + + + + + + + + + Scheduler(Scheduling of element processing jobs)The Scheduler is in charge of processing elements in job queues. + + + + + + + + + + + + + Job(Abstract tasks) + + + + + + + + + + + + + Queue(Abstract job queues) + + + + + + + + + + + + + Loader(Load the elements in a project)The loader loads elements and sources from the project directory, transforming them into data structures ready for Element and Source instantiation + + + + + + + + + + + + + LoadElement(Intermediate element) + + + + + + + + + + + + + MetaElement & MetaSource(Loader output) + + + + + + The Frontend creates the toplevel Project, Context and Stream, and receives Elements and Sources by way of loading the Stream. + + + + + + These represent the main active internal components which sit below the frontend facing business logic in terms of module stacking orderptionsLoads and resolves project options, and processes conditional statements + + + + + + + + + + + + + VariablesResolves a collection of variables, and performs substitutions on strings containing variables + + + + + + + + + + + + + IncludesProcesses include statements and composites the resulting dictionaries + + + + + + YAML Parsing Utilities + + + + + + + + + + + + + YAMLLow level utilities for parsing YAML, storing the provenance of loaded values for error reporting, composition of dictionaries and validation of loaded valuesutils.pyVarious miscellaneous utilities, includes the directory staging mechanics + + + + + + + + + + + + + signals.pyUtilities and context managers for dealing with UNIX signal handling + + + + + + + + + + + + + utils.pyVarious miscellaneous utilities, includes the directory staging mechanics + + + + + + + + + + + + + WorkspacesHelper object for (de)serialization of the workspace configuration + + + + + + Low Level Subsystems + + + + + + + + + + + + + Element, Source, and PluginData Model + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + CAS Server + + + + + + + + + + + + + CasServerContent Addressable Storage Server + + + + + + The CAS Server only needs to interact with Core Internals and lower levels of the stack. + + + + + + + + + + + + + CasCacheContent Addressable Storage implementation, used by the artifact cache and CAS server + + + + + + + + + + + + + FUSE / SafeHardlinksThe Fuse layer implements a copy on write hardlink experience for the Sandbox implementations + + + + + + + + + + + + + SourceFactory / ElementFactoryLow level factory for instantiating Source and Element plugins, one plugin namespace per project + + + + + + + + + + + + + ArtifactCacheArtifact Storage Module + + + + + + + + + + + + + SandboxAbstract Class for running commands in the isolated build environment + + + + + + + \ No newline at end of file diff --git a/doc/source/index.rst b/doc/source/index.rst index 494e90c67..4d5fe9ac2 100644 --- a/doc/source/index.rst +++ b/doc/source/index.rst @@ -20,6 +20,7 @@ Later sections provide detailed information on BuildStream internals. main_using main_core CONTRIBUTING + main_architecture For any other information, including `how to install BuildStream `_, refer to `the BuildStream website `_. diff --git a/doc/source/main_architecture.rst b/doc/source/main_architecture.rst new file mode 100644 index 000000000..2d407e73f --- /dev/null +++ b/doc/source/main_architecture.rst @@ -0,0 +1,11 @@ + + +Architecture +============ +This section provides details on the overall BuildStream architecture. + + +.. toctree:: + :maxdepth: 2 + + arch_overview -- cgit v1.2.1 From 41d370f66116dd3bc3d20e86ee2a550c821a1eaa Mon Sep 17 00:00:00 2001 From: Tristan Van Berkom Date: Fri, 12 Oct 2018 03:07:22 +0900 Subject: doc: Adding new architecture document describing the data model --- doc/source/arch_data_model.rst | 157 +++++++ .../image-sources/arch-datamodel-context.odg | Bin 0 -> 17591 bytes .../arch-datamodel-element-composition.odg | Bin 0 -> 17831 bytes .../image-sources/arch-datamodel-element.odg | Bin 0 -> 14318 bytes .../arch-datamodel-source-composition.odg | Bin 0 -> 16877 bytes doc/source/image-sources/arch-datamodel-source.odg | Bin 0 -> 11040 bytes doc/source/images/arch-datamodel-context.svg | 211 ++++++++++ .../images/arch-datamodel-element-composition.svg | 463 +++++++++++++++++++++ doc/source/images/arch-datamodel-element.svg | 183 ++++++++ .../images/arch-datamodel-source-composition.svg | 255 ++++++++++++ doc/source/images/arch-datamodel-source.svg | 167 ++++++++ doc/source/main_architecture.rst | 1 + 12 files changed, 1437 insertions(+) create mode 100644 doc/source/arch_data_model.rst create mode 100644 doc/source/image-sources/arch-datamodel-context.odg create mode 100644 doc/source/image-sources/arch-datamodel-element-composition.odg create mode 100644 doc/source/image-sources/arch-datamodel-element.odg create mode 100644 doc/source/image-sources/arch-datamodel-source-composition.odg create mode 100644 doc/source/image-sources/arch-datamodel-source.odg create mode 100644 doc/source/images/arch-datamodel-context.svg create mode 100644 doc/source/images/arch-datamodel-element-composition.svg create mode 100644 doc/source/images/arch-datamodel-element.svg create mode 100644 doc/source/images/arch-datamodel-source-composition.svg create mode 100644 doc/source/images/arch-datamodel-source.svg diff --git a/doc/source/arch_data_model.rst b/doc/source/arch_data_model.rst new file mode 100644 index 000000000..467859a7d --- /dev/null +++ b/doc/source/arch_data_model.rst @@ -0,0 +1,157 @@ + + +Data model +========== +This section details the data model on which the BuildStream core operates. This +includes an overview of the project data model which is BuildStream's main input, +the user preferences, and local state. + + +Project +------- +The ``Project`` object is the main component of a given BuildStream *project*, and +is responsible for loading and validating the :ref:`project.conf `, and +providing this loaded *project data* in a convenient way to the BuildStream core. + +Conceptually, the *project* is a container for the :mod:`Elements `, +which are declared within a user's project, and as such acts as a factory for instantiating +elements at load time. + + +Element +------- +:mod:`Elements ` are the main processing unit in a pipeline. These +are the loaded representation of the ``.bst`` files loaded from the :ref:`project's element path +`. + +The *Element* is an abstract base class which cannot do anything on its own, its +concrete class is defined by *plugins* which are either included in the BuildStream +:ref:`core set of plugins ` or loaded from external sources :ref:`defined by the project +` + +The responsibilities of an element include: + +* Loading the element's configuration from the core provided dictionary +* Providing a unique key for any element specific configuration which might + effect the output produced by the element +* Configuring the sandbox +* Staging the data into the sandbox, which might include Sources and + the outputs of previous elements +* Assembling the output *artifact* + + +Element data structure +~~~~~~~~~~~~~~~~~~~~~~ +The properties of an element are a composition of what the BuildStream core understands, +the configurations exposed by the Element plugin, and free form data which allows +annotations and configurations which can be read back by reverse dependencies during +processing, as illustrated here: + +.. image:: images/arch-datamodel-element.svg + :align: center + + +Element composition +~~~~~~~~~~~~~~~~~~~ +The element is composed of configurations which are sourced from various entry +points using the low level YAML utilities. + +This composition takes place after :ref:`includes ` and +:ref:`conditional ` directives are processed, while +:ref:`list composition ` directives are processed +as a result of this composition. + +Here is a diagram showing which sources take precedence in the composition process +which results in the final element configuration being resolved: + +.. image:: images/arch-datamodel-element-composition.svg + :align: center + +Note that not all *BuildStream Core Data* is understood by the *Element*, but a great +deal of configurations understood by the *Element* is also understood by the core and +has default configurations built into BuildStream and configurable with the project +configuration. These include values such as *variables*, *environment*, *sandbox*, etc. + +As shown above, composition is performed in two stages, as we only need to composite +the data from the toplevel element declaration against the composition of previous +stages every time we instantiate an element. + + +Source +------ +:mod:`Sources ` are the abstract objects which are responsible +for obtaining remote source code or data to import into the build environment, and +ensuring that it is done in a bit-for-bit reproducible way without any contamination +of the host or build environment. + +This is to say that: + +* User configuration on the host, or filesystem outside of BuildStream designated + directories, must never be modified as a side effect of running BuildStream. + +* When the Source uses host tools, host side configurations must never result in + deviations of what is staged to a build directory. The Source must behave exactly + the same way regardless of host side configurations. + +The responsibilities of a source include: + +* Loading the source's configuration from the core provided dictionary +* Providing a unique key for any source specific configuration which might + effect the staged source +* Implement discovery of new versions of the source upstream (referred to as *"tracking"*) +* Staging the unpacked source to a given directory +* Preparing workspaces + + +Source data structure +~~~~~~~~~~~~~~~~~~~~~ +Similar to the *Element*, the properties of a source are a composition of what +the BuildStream core understands and the configurations exposed by the Source +plugin: + +.. image:: images/arch-datamodel-source.svg + :align: center + +.. note:: + + In .bst files, the BuildStream core configurations and Source specific configurations + share the same dictionary. + + Strictly speaking this is limiting, but provides a measure of convenience as .bst + files are a bit less wordy to express. + + +Source composition +~~~~~~~~~~~~~~~~~~ +Source composition is much simpler than Element composition, because defaults +cannot be specified at the project level, excepting for Source type specific +value overrides. + +.. image:: images/arch-datamodel-source-composition.svg + :align: center + + +Context +------- +The Context object is a very centric part of the BuildStream data model, and is +not a part of the Project data described above but rather is where we load and +store all of the user preferences. + +User preferences are sourced from various locations, but usually have a default, +an option in the user configuration file, and an option to override it on the +command line. + +.. image:: images/arch-datamodel-context.svg + :align: center + +Asides from being a focal point for loading and storing all user configuration, +the Context object also plays a central role in the logging framework. + + +Workspaces +---------- +The Workspaces object is yet another kind of state. Unlike the Context and +the Project data model, the Workspaces object loads, saves and stores in +memory the local state regarding a user's active and open workspaces. + +These are stored in the local state ``.bst/`` subdirectory of users projects. diff --git a/doc/source/image-sources/arch-datamodel-context.odg b/doc/source/image-sources/arch-datamodel-context.odg new file mode 100644 index 000000000..db4d62137 Binary files /dev/null and b/doc/source/image-sources/arch-datamodel-context.odg differ diff --git a/doc/source/image-sources/arch-datamodel-element-composition.odg b/doc/source/image-sources/arch-datamodel-element-composition.odg new file mode 100644 index 000000000..b1bcfbf13 Binary files /dev/null and b/doc/source/image-sources/arch-datamodel-element-composition.odg differ diff --git a/doc/source/image-sources/arch-datamodel-element.odg b/doc/source/image-sources/arch-datamodel-element.odg new file mode 100644 index 000000000..f1300aada Binary files /dev/null and b/doc/source/image-sources/arch-datamodel-element.odg differ diff --git a/doc/source/image-sources/arch-datamodel-source-composition.odg b/doc/source/image-sources/arch-datamodel-source-composition.odg new file mode 100644 index 000000000..b61718550 Binary files /dev/null and b/doc/source/image-sources/arch-datamodel-source-composition.odg differ diff --git a/doc/source/image-sources/arch-datamodel-source.odg b/doc/source/image-sources/arch-datamodel-source.odg new file mode 100644 index 000000000..ceb9610f6 Binary files /dev/null and b/doc/source/image-sources/arch-datamodel-source.odg differ diff --git a/doc/source/images/arch-datamodel-context.svg b/doc/source/images/arch-datamodel-context.svg new file mode 100644 index 000000000..b22fbf33e --- /dev/null +++ b/doc/source/images/arch-datamodel-context.svg @@ -0,0 +1,211 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Command Line ArgumentsArguments specified on the command line have the final say for any user preference.User ConfigurationThe user configuration file overrides any of the previous settings.Project RecommendationsWhile we try to keep project data and user preferences as separate as possible, in some cases it is convenient for the project to suggest a value, such as which artifact server to use.Default ConfigurationBuildStream ships an internal YAML file with the same structure as the user configuration file, this file specifies all of the default configurations. + + + + + + + + + + + + + Default Configuration + + + + + + + + + + + + + Project Recommend + + + + + + + + + + + + + User Configuration + + + + + + + + + + + + + Command Line Arguments + + + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/doc/source/images/arch-datamodel-element-composition.svg b/doc/source/images/arch-datamodel-element-composition.svg new file mode 100644 index 000000000..985fe7213 --- /dev/null +++ b/doc/source/images/arch-datamodel-element-composition.svg @@ -0,0 +1,463 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + BuildStream Core DataPublic Domain Data + + + + + + + + + + + + + Core Defaults + + + + + + + + + + + + + Element Declaration + + + + + + + + + + + + + Project Configuration + + + + + + + + + + + + + Element Defaults + + + + + + + + + + + + + Project Overrides + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Element Configuration Data + + + + + + + + + + + + + Element Declaration + + + + + + + + + + + + + Element Defaults + + + + + + + + + + + + + Project Overrides + + + + + + + + + + + + + + + + + + + + Element Configuration Data is composited in the same way as Core Data and Public Data, except that it is not understood by the project configuration so there are no defaults. + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Composited the first time that a Element of a given “kind” is instantiated.This is cached on the given Element’s class data for later reuse. + + + + + + Composited every time against the cached class data below every time an Element is instantiated. + + + + + + + \ No newline at end of file diff --git a/doc/source/images/arch-datamodel-element.svg b/doc/source/images/arch-datamodel-element.svg new file mode 100644 index 000000000..ab75414da --- /dev/null +++ b/doc/source/images/arch-datamodel-element.svg @@ -0,0 +1,183 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + BuildStream Core DataCore data is understood by the BuildStream core. This includes the list of source declarations which are components of the Element.Element Configuration DataElement Configuration Data is defined by Element plugin classes. This data is internal to a given plugin.Public Domain DataPublic Domain Data is free form and not validated, this provides a way to declare data that is intended to be read back and processed by reverse dependencies. + + + + + + Elementkind: autotoolsdescription: Optional description about foodepends:- gtk+.bst- clutter.bstsources:- <source declarations here>config: configure-commands: (<): - cp %{datadir}/automake-*/config.{sub,guess} .public: bst: integration-commands: - update-flying-pony-cache -f ${datadir}/ponies + + + + + + + \ No newline at end of file diff --git a/doc/source/images/arch-datamodel-source-composition.svg b/doc/source/images/arch-datamodel-source-composition.svg new file mode 100644 index 000000000..60de23620 --- /dev/null +++ b/doc/source/images/arch-datamodel-source-composition.svg @@ -0,0 +1,255 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + BuildStream Core DataSource Configuration Data + + + + + + + + + + + + + Source Declaration + + + + + + + + + + + + + Project Overrides + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Composited the first time that a Source of a given “kind” is instantiated.This is cached on the given Source’s class data for later reuse.Note: In the case of Sources, defaults are provided in code. + + + + + + Composited every time against the cached class data below every time an Source is instantiated. + + + + + + + + + + + + + Source Defaults + + + + + + + + + + + + + + \ No newline at end of file diff --git a/doc/source/images/arch-datamodel-source.svg b/doc/source/images/arch-datamodel-source.svg new file mode 100644 index 000000000..55e303311 --- /dev/null +++ b/doc/source/images/arch-datamodel-source.svg @@ -0,0 +1,167 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + BuildStream Core DataCore data understood by BuildStream.Source Configuration DataSource Configuration Data is defined by Source plugin classes. + + + + + + Sourcekind: gitdirectory: sub/diruri: upstream:foo.gittrack: masterref: bbf775301a08b9a578ef7f647bc35fe13e816241 + + + + + + + \ No newline at end of file diff --git a/doc/source/main_architecture.rst b/doc/source/main_architecture.rst index 2d407e73f..6ca2167b1 100644 --- a/doc/source/main_architecture.rst +++ b/doc/source/main_architecture.rst @@ -9,3 +9,4 @@ This section provides details on the overall BuildStream architecture. :maxdepth: 2 arch_overview + arch_data_model -- cgit v1.2.1 From bed7f7ac6411b3b4b6b34986da4855905cdbb9b8 Mon Sep 17 00:00:00 2001 From: Tristan Van Berkom Date: Fri, 12 Oct 2018 03:10:08 +0900 Subject: doc: Adding new architecture document describing the dependency model --- doc/source/arch_dependency_model.rst | 71 +++++ .../image-sources/arch-dependency-model-build.odg | Bin 0 -> 14060 bytes .../arch-dependency-model-runtime.odg | Bin 0 -> 14123 bytes doc/source/image-sources/arch-dependency-model.odg | Bin 0 -> 13850 bytes doc/source/images/arch-dependency-model-build.svg | 267 ++++++++++++++++++ .../images/arch-dependency-model-runtime.svg | 266 ++++++++++++++++++ doc/source/images/arch-dependency-model.svg | 299 +++++++++++++++++++++ doc/source/main_architecture.rst | 1 + 8 files changed, 904 insertions(+) create mode 100644 doc/source/arch_dependency_model.rst create mode 100644 doc/source/image-sources/arch-dependency-model-build.odg create mode 100644 doc/source/image-sources/arch-dependency-model-runtime.odg create mode 100644 doc/source/image-sources/arch-dependency-model.odg create mode 100644 doc/source/images/arch-dependency-model-build.svg create mode 100644 doc/source/images/arch-dependency-model-runtime.svg create mode 100644 doc/source/images/arch-dependency-model.svg diff --git a/doc/source/arch_dependency_model.rst b/doc/source/arch_dependency_model.rst new file mode 100644 index 000000000..ff802660c --- /dev/null +++ b/doc/source/arch_dependency_model.rst @@ -0,0 +1,71 @@ + + +Dependency model +================ +Elements in the data model are related by their *dependencies*. In BuildStream, there +are two types of relationship that an Element may have with a *dependency*, +:ref:`build and runtime dependencies `. More often than not, +an element will require its dependency both to *build* and also at *runtime*. + +Consider a simple build scenario where you want to build an application, which +requires a service be present in order to function properly at *runtime*, a +compiler that need only be present at *build time*, and a runtime environment +or base system which must be required all the time: + +.. image:: images/arch-dependency-model.svg + +Note that in BuildStream we are only concerned with element level granularity +in our dependency model, and there is no way to depend on only a part of an element's +output *artifact*. Instead we can employ :mod:`compose ` and +:mod:`filter ` elements in conjunction with :ref:`split rules ` +to achieve sub artifact granularity at build and deploy time. + +When developing BuildStream, it is important to understand the distinction +between dependency types and element :class:`Scope `, +which acts as a selector of which elements to consider in the dependency +graph of a given element when performing recursive activities. + + +Scope +~~~~~ + +* **Scope.ALL** + + In the :func:`Scope.ALL ` scope, all elements + are considered. + + This is used in some cases to forcefully fetch, pull or build all dependencies + of a given element, even when not all of them are needed. + + This scope simply includes all of the dependencies, including the element itself. + +* **Scope.RUN** + + In the :func:`Scope.RUN ` scope, only elements + which are required to run are considered, including the element itself. + + This is used when for example, launching a ``bst shell`` environment + for the purpose of running, or in any case we need to consider which + elements are required to run. + + .. image:: images/arch-dependency-model-runtime.svg + :align: center + +* **Scope.BUILD** + + In the :func:`Scope.BUILD ` scope, only + elements which are required to build are considered, *excluding* the + element we intend to build. + + .. image:: images/arch-dependency-model-build.svg + :align: center + + Note that build type dependencies are not transient, which is why the + *Bootstrap* element is not selected when pulling in the *Compiler* to + build the *Application*. + + Further, note that we still follow the *Compiler* dependency on the + *Base Runtime*, this is because when we depend on an element for the + purpose of *building*, we expect that element to *run* and as such + we include all of the *runtime dependencies* of *build dependencies* + when selecting the *Scope.BUILD* elements. diff --git a/doc/source/image-sources/arch-dependency-model-build.odg b/doc/source/image-sources/arch-dependency-model-build.odg new file mode 100644 index 000000000..9a8fe69cb Binary files /dev/null and b/doc/source/image-sources/arch-dependency-model-build.odg differ diff --git a/doc/source/image-sources/arch-dependency-model-runtime.odg b/doc/source/image-sources/arch-dependency-model-runtime.odg new file mode 100644 index 000000000..b771f8269 Binary files /dev/null and b/doc/source/image-sources/arch-dependency-model-runtime.odg differ diff --git a/doc/source/image-sources/arch-dependency-model.odg b/doc/source/image-sources/arch-dependency-model.odg new file mode 100644 index 000000000..0292db350 Binary files /dev/null and b/doc/source/image-sources/arch-dependency-model.odg differ diff --git a/doc/source/images/arch-dependency-model-build.svg b/doc/source/images/arch-dependency-model-build.svg new file mode 100644 index 000000000..67bc5b4d2 --- /dev/null +++ b/doc/source/images/arch-dependency-model-build.svg @@ -0,0 +1,267 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Build scope for the Application element + + + + + + + + + + + + + Application + + + + + + + + + + + + + ServiceRequired at runtime + + + + + + + + + + + + + CompilerRequired at build time + + + + + + + + + + + + + Base RuntimeRequired at all times + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Service DefinitionXML files needed to build the Service + + + + + + + + + + + + + + + + + + + + BootstrapSelf Hosting Compiler + + + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/doc/source/images/arch-dependency-model-runtime.svg b/doc/source/images/arch-dependency-model-runtime.svg new file mode 100644 index 000000000..dc758c844 --- /dev/null +++ b/doc/source/images/arch-dependency-model-runtime.svg @@ -0,0 +1,266 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Runtime scope for the Application element + + + + + + + + + + + + + Application + + + + + + + + + + + + + ServiceRequired at runtime + + + + + + + + + + + + + CompilerRequired at build time + + + + + + + + + + + + + Base RuntimeRequired at all times + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Service DefinitionXML files needed to build the Service + + + + + + + + + + + + + + + + + + + + BootstrapSelf Hosting Compiler + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/doc/source/images/arch-dependency-model.svg b/doc/source/images/arch-dependency-model.svg new file mode 100644 index 000000000..3c755b023 --- /dev/null +++ b/doc/source/images/arch-dependency-model.svg @@ -0,0 +1,299 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Application + + + + + + + + + + + + + ServiceRequired at runtime + + + + + + + + + + + + + CompilerRequired at build time + + + + + + + + + + + + + Base RuntimeRequired at all times + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + LegendBuild DependencyRuntime DependencyBuild & Runtime Dependency + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Service DefinitionXML files needed to build the Service + + + + + + + + + + + + + + + + + + + + BootstrapSelf Hosting Compiler + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/doc/source/main_architecture.rst b/doc/source/main_architecture.rst index 6ca2167b1..f31ffdbca 100644 --- a/doc/source/main_architecture.rst +++ b/doc/source/main_architecture.rst @@ -10,3 +10,4 @@ This section provides details on the overall BuildStream architecture. arch_overview arch_data_model + arch_dependency_model -- cgit v1.2.1 From 9bdf7f403a2ecf27ac191ba11b953191e05ca498 Mon Sep 17 00:00:00 2001 From: Tristan Van Berkom Date: Sat, 13 Oct 2018 18:29:59 +0900 Subject: doc: Adding new architecture document about highlevel program flow --- doc/source/arch_program_flow.rst | 11 + doc/source/image-sources/arch-program-flow.odg | Bin 0 -> 17495 bytes doc/source/images/arch-program-flow.svg | 1819 ++++++++++++++++++++++++ doc/source/main_architecture.rst | 1 + 4 files changed, 1831 insertions(+) create mode 100644 doc/source/arch_program_flow.rst create mode 100644 doc/source/image-sources/arch-program-flow.odg create mode 100644 doc/source/images/arch-program-flow.svg diff --git a/doc/source/arch_program_flow.rst b/doc/source/arch_program_flow.rst new file mode 100644 index 000000000..9198ee114 --- /dev/null +++ b/doc/source/arch_program_flow.rst @@ -0,0 +1,11 @@ + + +Overview of program flow +======================== +Here is a little chart to show the approximate highlevel program flow of +BuildStream. This is in no way a complete flow chart of BuildStream, but +should provide some highlevel insight into how the program operates in +general. + +.. image:: images/arch-program-flow.svg + :align: center diff --git a/doc/source/image-sources/arch-program-flow.odg b/doc/source/image-sources/arch-program-flow.odg new file mode 100644 index 000000000..9ad972572 Binary files /dev/null and b/doc/source/image-sources/arch-program-flow.odg differ diff --git a/doc/source/images/arch-program-flow.svg b/doc/source/images/arch-program-flow.svg new file mode 100644 index 000000000..ba196a90d --- /dev/null +++ b/doc/source/images/arch-program-flow.svg @@ -0,0 +1,1819 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Parse Command Line + + + + + + + + + + + + + + + + + + + + Load Context + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Enter Application Context + + + + + + + + + + + + + + + + + + + + Load Platform + + + + + + + + + + + + + + + + + + + + ArtifactCache Preflight + + + + + + + + + + + + + + + + + + + + Initialize Logging + + + + + + + + + + + + + + + + + + + + + + + + + + + Exit with error + + + + + + + + + + + + + Load Toplevel Project + + + + + + + + + + + + + + + + + + + + Initialize Stream + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Initialize Applicationun Stream API + + + + + + + + + + + + + Load Elements + + + + + + + + + + + + + + + + + + + + Resolve Keys + + + + + + + + + + + + + + + + + + + + Resolve Cached State + + + + + + + + + + + + + + + + + + + + Construct Scheduler Queues + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + EventSwitch + + + + + + + + + + + + + + + + + + + + Log Line + + + + + + + + + + + + + + + + + + + + Logging Event + + + + + + Interrupt or Error Event + + + + + + + + App isInteractive + + + + + + + + + + + + + Run SchedulerRun the event loop, and process the elements until they have passed through all of the queues, or until there is a terminate condition. + + + + + + + + + + + + + Interactive + + + + + + + + + + + + + Pause Scheduler + + + + + + + + Prompt User + + + + + + + + + + + + + + + + + + + + Resume Scheduler + + + + + + + + + + + + + Set Terminate Condition + + + + + + + + + + + + + + + + + + + + Non Interactive + + + + + + Continue + + + + + + + + + + + + + Set Terminate Condition + + + + + + + + + + + + + Terminate + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Log Session Summary + + + + + + + + + + + + + Clean Exit + + + + + + + + + + + + + Exception + + + + + + Event + + + + + + + \ No newline at end of file diff --git a/doc/source/main_architecture.rst b/doc/source/main_architecture.rst index f31ffdbca..6e284f31e 100644 --- a/doc/source/main_architecture.rst +++ b/doc/source/main_architecture.rst @@ -9,5 +9,6 @@ This section provides details on the overall BuildStream architecture. :maxdepth: 2 arch_overview + arch_program_flow arch_data_model arch_dependency_model -- cgit v1.2.1 From 6b8bdf35bbf3f06b5af3558abf70ae19812a22f7 Mon Sep 17 00:00:00 2001 From: Tristan Van Berkom Date: Sat, 13 Oct 2018 20:25:48 +0900 Subject: doc: Adding new architecture document about how the scheduler works. --- doc/source/arch_scheduler.rst | 132 ++++ doc/source/image-sources/arch-scheduler-job.odg | Bin 0 -> 15465 bytes .../image-sources/arch-scheduler-queue-ports.odg | Bin 0 -> 16179 bytes doc/source/image-sources/arch-scheduler-queues.odg | Bin 0 -> 15591 bytes doc/source/image-sources/arch-scheduler-run.odg | Bin 0 -> 14172 bytes doc/source/images/arch-scheduler-job.svg | 750 +++++++++++++++++++++ doc/source/images/arch-scheduler-queue-ports.svg | 318 +++++++++ doc/source/images/arch-scheduler-queues.svg | 488 ++++++++++++++ doc/source/images/arch-scheduler-run.svg | 437 ++++++++++++ doc/source/main_architecture.rst | 1 + 10 files changed, 2126 insertions(+) create mode 100644 doc/source/arch_scheduler.rst create mode 100644 doc/source/image-sources/arch-scheduler-job.odg create mode 100644 doc/source/image-sources/arch-scheduler-queue-ports.odg create mode 100644 doc/source/image-sources/arch-scheduler-queues.odg create mode 100644 doc/source/image-sources/arch-scheduler-run.odg create mode 100644 doc/source/images/arch-scheduler-job.svg create mode 100644 doc/source/images/arch-scheduler-queue-ports.svg create mode 100644 doc/source/images/arch-scheduler-queues.svg create mode 100644 doc/source/images/arch-scheduler-run.svg diff --git a/doc/source/arch_scheduler.rst b/doc/source/arch_scheduler.rst new file mode 100644 index 000000000..bc1a3efcd --- /dev/null +++ b/doc/source/arch_scheduler.rst @@ -0,0 +1,132 @@ + + +Scheduler +========= +The *Scheduler* is what is responsible for running the main event loop and +dispatching *Jobs* to complete tasks on behalf of *Queues*. + + +Jobs +~~~~ +The basic functionality of spawning tasks is implemented by the base Job +class, which is derived in a few ways but for now we'll only talk about the +ElementJob type since that is the most centric. + +The Job class has the following responsibilities: + +* Spawning the given job as a subprocess. + +* Offering an abstract method for subclasses to handle the outcome of + a job when it completes. + +* Forcefully terminating it's subprocess. + +* Suspending and resuming it's subprocess. + +* Declaring the types of resources it will require, and which resources + it will require exclusively. + + +Below is a rough outline of the interactions between the main process +and job specific child process: + +.. image:: images/arch-scheduler-job.svg + :align: center + + +Resources +~~~~~~~~~ +To understand how we manage load balancing in the scheduler it is important +to understand *resources*. + +For the scheduler, *resources* are domains which a Job can request which represent +physical resources, such as the CPU or some network bandwidth, or the local +artifact cache. + +This is used by the Scheduler when consuming Jobs from Queues and deciding +how many jobs can be run at a given time. + + +Queues +~~~~~~ +The various Queue implementations in the Scheduler can be combined such that +parallelism is maximized. For example one can *Track* and *Build* inside the +same session, in this way one does not need to wait for a tracking session to +complete in order to start building. + +The input elements to the scheduler are expected to be sorted in depth first +order whenever the order is important, again allowing maximum parallelism +at build time. + +.. image:: images/arch-scheduler-queues.svg + :align: center + +The Queue implementations are: + +* **Track** + + The tracking queue must always come first if it is used in the given session. + This is because the Source *"ref"*, and consequently the cache key of any elements + which have been requested for tracking, cannot be known until tracking is complete. + +* **Pull** + + The pull queue tries to obtain a built artifact from a remote artifact server, + it should be placed in advance of the fetch queue if used, since we can safely + avoid fetching if fetching is not imerative, and we already have a cached + artifact. + +* **Fetch** + + The fetch queue attempts to download source code to build the given element, + this activity is sometimes skipped if the artifact is already present, or + if the exact source code is already present. + +* **Build** + + The build queue attempts to build the element if it's artifact is not locally + present. + +* **Push** + + The push queue attempts to push the resulting artifact to a remote artifact + server. + + +Queue internals +~~~~~~~~~~~~~~~ +Internally, the queue has an input queue and an output queue. + +.. image:: images/arch-scheduler-queue-ports.svg + :align: center + +When elements are on the input queue they get queried for their *QueueStatus* +in order to determine which elements should be processed or moved from the input +queue to the output queue. When elements are on the output queue, they are ready +to be consumed by the scheduler and moved on to the next queue; however each +queue holds on to the result status of every element which passed through for later +reference and reports to the user. + + +Scheduler +~~~~~~~~~ +The scheduler itself has the main responsibility of popping off jobs from +the existing queues it was given, and running the jobs as long as elements +remain to be processed. + +A huge simplification of this can be visualized as follows: + +.. image:: images/arch-scheduler-run.svg + :align: center + +This is implemented by iterating over the Queues given to the scheduler +and processing any *"Ready"* elements so long as there are sufficient resource +tokens available for the ready elements to run, and by moving the *"Done"* +elements onto the subsequent queues in the list of queues. + +.. note:: + + When looking for *"Ready"* elements in the queues, we iterate over the + queue list in *reverse order*. This is important to allow elements to + get as far as they can in the queue list as fast as possible, and helps + to prevent resource starvation. diff --git a/doc/source/image-sources/arch-scheduler-job.odg b/doc/source/image-sources/arch-scheduler-job.odg new file mode 100644 index 000000000..0171ad553 Binary files /dev/null and b/doc/source/image-sources/arch-scheduler-job.odg differ diff --git a/doc/source/image-sources/arch-scheduler-queue-ports.odg b/doc/source/image-sources/arch-scheduler-queue-ports.odg new file mode 100644 index 000000000..1a13ed316 Binary files /dev/null and b/doc/source/image-sources/arch-scheduler-queue-ports.odg differ diff --git a/doc/source/image-sources/arch-scheduler-queues.odg b/doc/source/image-sources/arch-scheduler-queues.odg new file mode 100644 index 000000000..792890edf Binary files /dev/null and b/doc/source/image-sources/arch-scheduler-queues.odg differ diff --git a/doc/source/image-sources/arch-scheduler-run.odg b/doc/source/image-sources/arch-scheduler-run.odg new file mode 100644 index 000000000..4a596e413 Binary files /dev/null and b/doc/source/image-sources/arch-scheduler-run.odg differ diff --git a/doc/source/images/arch-scheduler-job.svg b/doc/source/images/arch-scheduler-job.svg new file mode 100644 index 000000000..9a50135d8 --- /dev/null +++ b/doc/source/images/arch-scheduler-job.svgetup signal handlers + + + + + + + + + + + + + Block signals + + + + + + + + + + + + + fork() + + + + + + + + + + + + + Unblock signals + + + + + + + + + + + + + + + + + + + + + + + + + + + Main BuildStream Process + + + + + + Job Process + + + + + + + + + + + + + Initialize subprocess logging redirection + + + + + + + + + + + + + + + + + + + + Unblock selected signals (ready) + + + + + + + + + + + + + + + + + + + + Run the job payload + + + + + + + + + + + + + + + + + + + + Log messages + + + + + + + + + + + + + + + + + + + Start New Job + + + + + + + + + + + + Receive IPC Messages + + + + + + + + + + + + + Report job result and any other state + + + + + + + + + + + + + Collect results on job instance + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Collect exit status + + + + + + + + + + + + + Exit with symbolic exit status + + + + + + + + + + + + Interruptible Job Harness + + + + + + + + + + + + + Inform the job owner that a job completed, hand over the result + + + + + + + + + + + + + + + + + + + + + + + + + + Job ControlSuspend (SIGTSTP)Resume (SIGCONT)Terminate (SIGTERM)Kill (SIGKILL) + + + + + + + + + + + + + Job Exited + + + + + + + + + + + + + This process is a fork() of the main process without execve().This means it has access to the main process data model at the time of the fork(), any modifications made after this point are copy-on-write, and any side effects of the job need to be manually propagated back to the main process through the IPC. + + + + + + + \ No newline at end of file diff --git a/doc/source/images/arch-scheduler-queue-ports.svg b/doc/source/images/arch-scheduler-queue-ports.svg new file mode 100644 index 000000000..2466551bf --- /dev/null +++ b/doc/source/images/arch-scheduler-queue-ports.svg @@ -0,0 +1,318 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Processed + + + + + + + + + + + + + Failed + + + + + + + + + + + + + Skipped + + + + + + + + + + + + + + + + + + + Ready + + + + + + + + + + + + + Skip + + + + + + Queue implementations report a “QueueStatus” for all of the elements which are in the input queue at all times.Skip elements go directly to the output queue without processing, and the scheduler only ever processes the Ready elements. + + + + + + + + + + + + + Wait + + + + + + + + + + + + + Elements In + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Element Processing + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + After elements are either processed or skipped, they move to the Queue’s output queue where the scheduler can pick them up and and move them along to the next Queue.Elements which have passed through a Queue are also kept in status lists for bookkeeping purposes. + + + + + + Input Queue + + + + + + Output Queue + + + + + + + \ No newline at end of file diff --git a/doc/source/images/arch-scheduler-queues.svg b/doc/source/images/arch-scheduler-queues.svg new file mode 100644 index 000000000..8732b9fc4 --- /dev/null +++ b/doc/source/images/arch-scheduler-queues.svg @@ -0,0 +1,488 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Element + + + + + + + + + + + + + Element + + + + + + + + + + + + + Element + + + + + + + + + + + + + Element + + + + + + Pull + + + + + + + + + + + + + + + + + + + + Element + + + + + + + + + + + + + Element + + + + + + + + + + + + + Element + + + + + + + + + + + + + Element + + + + + + Fetch + + + + + + + + + + + + + + + + + + + + Element + + + + + + + + + + + + + Element + + + + + + + + + + + + + Element + + + + + + + + + + + + + Element + + + + + + Build + + + + + + + + + + + + + + + + + + + + Element + + + + + + + + + + + + + Element + + + + + + + + + + + + + Element + + + + + + + + + + + + + Element + + + + + + Track + + + + + + + + + + + + + + + + + + + + Element + + + + + + + + + + + + + Element + + + + + + + + + + + + + Element + + + + + + + + + + + + + Element + + + + + + Push + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Elements In + + + + + + + + + + + + + + + + + + + + Elements Out + + + + + + + + + + + + + + \ No newline at end of file diff --git a/doc/source/images/arch-scheduler-run.svg b/doc/source/images/arch-scheduler-run.svg new file mode 100644 index 000000000..e031aa57e --- /dev/null +++ b/doc/source/images/arch-scheduler-run.svgcheduler.run() + + + + + + + + + + + + + Wait + + + + + + Run Event Loop + + + + + + + + + + + + + Job Done + + + + + + + + ReadyElementsRemain + + + + + + + + + + + + + QueueJobs + + + + + + + + + + + + + + + + + + + + Yes + + + + + + + + + + + + + + + + + + + + Return + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + No + + + + + + + \ No newline at end of file diff --git a/doc/source/main_architecture.rst b/doc/source/main_architecture.rst index 6e284f31e..7c13e41d6 100644 --- a/doc/source/main_architecture.rst +++ b/doc/source/main_architecture.rst @@ -12,3 +12,4 @@ This section provides details on the overall BuildStream architecture. arch_program_flow arch_data_model arch_dependency_model + arch_scheduler -- cgit v1.2.1 From b1fadaa525ff8d046ac766acd1275bf56dd15ae6 Mon Sep 17 00:00:00 2001 From: Tristan Van Berkom Date: Mon, 29 Oct 2018 22:32:49 +0900 Subject: doc: Moved cachekey and sandboxing writings into the architecture section --- doc/source/additional_cachekeys.rst | 96 ----------------- doc/source/additional_sandboxing.rst | 200 ----------------------------------- doc/source/arch_cachekeys.rst | 96 +++++++++++++++++ doc/source/arch_sandboxing.rst | 200 +++++++++++++++++++++++++++++++++++ doc/source/core_additional.rst | 2 - doc/source/main_architecture.rst | 2 + 6 files changed, 298 insertions(+), 298 deletions(-) delete mode 100644 doc/source/additional_cachekeys.rst delete mode 100644 doc/source/additional_sandboxing.rst create mode 100644 doc/source/arch_cachekeys.rst create mode 100644 doc/source/arch_sandboxing.rst diff --git a/doc/source/additional_cachekeys.rst b/doc/source/additional_cachekeys.rst deleted file mode 100644 index f0df796c5..000000000 --- a/doc/source/additional_cachekeys.rst +++ /dev/null @@ -1,96 +0,0 @@ - -.. _cachekeys: - - -Cache keys -========== - -Cache keys for artifacts are generated from the inputs of the build process -for the purpose of reusing artifacts in a well-defined, predictable way. - -Structure ---------- -Cache keys are SHA256 hash values generated from a pickled Python dict that -includes: - -* Environment (e.g., project configuration and variables) -* Element configuration (details depend on element kind, ``Element.get_unique_key()``) -* Sources (``Source.get_unique_key()``) -* Dependencies (depending on cache key type, see below) -* Public data - -Cache key types ---------------- -There are two types of cache keys in BuildStream, ``strong`` and ``weak``. - -The purpose of a ``strong`` cache key is to capture the state of as many aspects -as possible that can have an influence on the build output. The aim is that -builds will be fully reproducible as long as the cache key doesn't change, -with suitable module build systems that don't embed timestamps, for example. - -A ``strong`` cache key includes the strong cache key of each build dependency -(and their runtime dependencies) of the element as changes in build dependencies -(or their runtime dependencies) can result in build differences in reverse -dependencies. This means that whenever the strong cache key of a dependency -changes, the strong cache key of its reverse dependencies will change as well. - -A ``weak`` cache key has an almost identical structure, however, it includes -only the names of build dependencies, not their cache keys or their runtime -dependencies. A weak cache key will thus still change when the element itself -or the environment changes but it will not change when a dependency is updated. - -For elements without build dependencies the ``strong`` cache key is identical -to the ``weak`` cache key. - -Strict build plan ------------------ -This is the default build plan that exclusively uses ``strong`` cache keys -for the core functionality. An element's cache key can be calculated when -the cache keys of the element's build dependencies (and their runtime -dependencies) have been calculated and either tracking is not enabled or it -has already completed for this element, i.e., the ``ref`` is available. -This means that with tracking disabled the cache keys of all elements could be -calculated right at the start of a build session. - -While BuildStream only uses ``strong`` cache keys with the strict build plan -for the actual staging and build process, it will still calculate ``weak`` -cache keys for each element. This allows BuildStream to store the artifact -in the cache with both keys, reducing rebuilds when switching between strict -and non-strict build plans. If the artifact cache already contains an -artifact with the same ``weak`` cache key, it's replaced. Thus, non-strict -builds always use the latest artifact available for a given ``weak`` cache key. - -Non-strict build plan ---------------------- -The non-strict build plan disables the time-consuming automatic rebuild of -reverse dependencies at the cost of dropping the reproducibility benefits. -It uses the ``weak`` cache keys for the core staging and build process. -I.e., if an artifact is available with the calculated ``weak`` cache key, -it will be reused for staging instead of being rebuilt. ``weak`` cache keys -can be calculated early in the build session. After tracking, similar to -when ``strong`` cache keys can be calculated with a strict build plan. - -Similar to how strict build plans also calculate ``weak`` cache keys, non-strict -build plans also calculate ``strong`` cache keys. However, this is slightly -more complex. To calculate the ``strong`` cache key of an element, BuildStream -requires the ``strong`` cache keys of the build dependencies (and their runtime -dependencies). - -The build dependencies of an element may have been updated since the artifact -was built. With the non-strict build plan the artifact will still be reused. -However, this means that we cannot use a ``strong`` cache key calculated purely -based on the element definitions. We need a cache key that matches the -environment at the time the artifact was built, not the current definitions. - -The only way to get the correct ``strong`` cache key is by retrieving it from -the metadata stored in the artifact. As artifacts may need to be pulled from a -remote artifact cache, the ``strong`` cache key is not readily available early -in the build session. However, it can always be retrieved when an element is -about to be built, as the dependencies are guaranteed to be in the local -artifact cache at that point. - -``Element._get_cache_key_from_artifact()`` extracts the ``strong`` cache key -from an artifact in the local cache. ``Element._get_cache_key_for_build()`` -calculates the ``strong`` cache key that is used for a particular build job. -This is used for the embedded metadata and also as key to store the artifact in -the cache. diff --git a/doc/source/additional_sandboxing.rst b/doc/source/additional_sandboxing.rst deleted file mode 100644 index 531a3e5d5..000000000 --- a/doc/source/additional_sandboxing.rst +++ /dev/null @@ -1,200 +0,0 @@ - -.. _sandboxing: - - -Sandboxing -========== - -Introduction ------------- - -BuildStream assembles each element in a *sandbox*. The sandbox is a container -environment which serves two purposes: giving BuildStream control over -all build aspects in order to ensure reproducibility of build results, -and providing safety guarantees for the host system that BuildStream is -running on. - -The exact implementation of the sandbox varies depending on which platform you -are running BuildStream. See below for backend-specific details. - -There are several factors that affect the build output and must therefore be -under BuildStream's control: - -* Filesystem contents and metadata -* The user and permissions model -* Network access -* Device access - -Each of these is detailed below. - -For safety reasons, BuildStream also controls the following things: - -* Access to files outside of the sandbox directory -* Access to certain kernel-specific syscalls - -Creating a sandbox can require special priviliges. This is a safety concern too -because bugs in the `bst` program can cause damage to a host if the program is -running with extra privileges. The exact priviliges that are required depend on -your platform and backend. - -Element plugins can run arbitary commands within the sandbox using the -:mod:`sandbox API `. - -What elements can and can't do in the sandbox ---------------------------------------------- - -This section specifies how BuildStream sandboxes are intended to work. A -specific sandbox provider may not necessarily be able to achieve all of the -requirements listed below so be sure to read the "platform notes" section as -well. - -Filesystem access -~~~~~~~~~~~~~~~~~ - -The filesystem inside sandboxes should be read only during element assembly, -except for certain directories which element plugins can mark as being -read/write. Most elements plugins derive from :mod:`BuildElement -`, which marks ``%{build-root}`` and -``%{install-root}`` as read/write. - -When running integration commands or `bst shell`, the sandbox should have a -fully read-write filesystem. The changes made here do not need to persist -beyond the lifetime of that sandbox, and **must not** affect the contents of -artifacts stored in the cache. - -Certain top level directories should be treated specially in all sandboxes: - -* The ``/dev`` directory should contain device nodes, which are described in - a separate section. - -* The ``/proc`` directory should have a UNIX 'procfs' style filesystem mounted. - It should not expose any information about processes running outside of the - sandbox. - -* The ``/tmp`` directory should be writable. - -Filesystem metadata -~~~~~~~~~~~~~~~~~~~ - -The writable areas inside a BuildStream sandbox are limited in what metadata -can be written and stored. - -* All files must be owned by UID 0 and GID 0 -* No files may have the setuid or setgid bits set -* Extended file attributes (xattrs) cannot be written to or read. -* Hardlinks to other files can be created, but the information about which - files are hardlinked to each other will not be stored in the artifact - that is created from the sandbox. - -These restrictions are due to technical limitations. In future we hope to -support a wider range of filesystem metadata operations. See `issue #38 -`_ for more details. - -User and permissions model -~~~~~~~~~~~~~~~~~~~~~~~~~~ - -All commands inside the sandbox run with user ID 0 and group ID 0. It should -not be possible to become any other user ID. - -Network access -~~~~~~~~~~~~~~ - -Builds should not be able to access the network at all from the sandbox. All -remote resources needed to build an element must be specified in the element's -``sources`` list so that BuildStream is able to see when they have changed. - -A sandbox opened by `bst shell` should allow network access. - -Device access -~~~~~~~~~~~~~ - -Builds should not be able to access any hardware devices at all. - -A few standard UNIX device files are needed, the whitelist is: - -* ``/dev/full`` -* ``/dev/null`` -* ``/dev/urandom`` -* ``/dev/random`` -* ``/dev/zero`` - -It may seem odd that we have sources of randomness in the sandbox, but a lot of -tools do expect them to exist. We take the view that it's up to integrators to -ensure that elements do not deliberately include randomness in their output. - -A sandbox opened by `bst shell` can make any devices available. There needs to -be a console device so that it can be used interactively. - -Platform notes --------------- - -BuildStream currently only carries first-class support for modern Linux-based -operating systems. - -There is also a "fallback" backend which aims to make BuildStream usable on any -POSIX-compatible operating system. The POSIX standard does not provide good -support for creating containers so this implementation makes a number of -unfortunate compromises. - -Linux -~~~~~ - -On Linux we use the following isolation and sandboxing primitives: - -* bind mounts -* FUSE -* Mount namespaces -* Network namespaces -* PID (process ID) namespaces -* User namespaces (if available) -* seccomp - -We access all of these features through a sandboxing tool named `Bubblewrap -`_. - -User namespaces are not enabled by default in all Linux distributions. -BuildStream still runs on such systems but will give a big warning on startup -and will refuse to push any artifacts built on such a system to a remote cache. -For more information, see `issue #92 -`_. - -The Linux platform can operate as a standard user provided user namespace -support is available. If user namespace support is not available you have the -option of installing bubblewrap as a setuid binary to avoid needing to run the -entire ``bst`` process as the ``root`` user. - -The artifact cache on Linux systems is implemented using `OSTree -`_, which can allow us to stage artifacts -using hardlinks instead of copying them. To avoid cache corruption it is -vital that hardlinked files cannot be overwritten. In cases where the root -filesystem inside the sandbox needs to be writable, a custom FUSE filesystem -named SafeHardlinks is used which provides a copy-on-write layer. - -Some of the operations on filesystem metadata listed above are not prohibited -by the sandbox, but will instead be silently dropped when an artifact is -created. For more details see `issue #38 -`_. - -Some details of the host machine are currently leaked by this platform backend. -For more details, see `issue #262 -`_. - -Fallback (POSIX) -~~~~~~~~~~~~~~~~ - -The fallback backend aims to be usable on a wide range of operating systems. -Any OS that implements the POSIX specification and the ``chroot()`` syscall -can be expected to work. There are no real isolation or sandboxing primitives -that work across multiple operating systems, so the protection provided by -this backend is minimal. It would be much safer to use a platform-specific -backend. - -Filesystem isolation is done using the chroot() system call. This system call -requires special privileges to use so ``bst`` usually needs to be run as the -``root`` user when using this backend. - -Network access is not blocked in the sandbox. However since there is unlikely -to be a correct `/etc/resolv.conf` file, any network access that depends on -name resolution will most likely fail anyway. - -Builds inside the sandbox execute as the ``root`` user. diff --git a/doc/source/arch_cachekeys.rst b/doc/source/arch_cachekeys.rst new file mode 100644 index 000000000..f0df796c5 --- /dev/null +++ b/doc/source/arch_cachekeys.rst @@ -0,0 +1,96 @@ + +.. _cachekeys: + + +Cache keys +========== + +Cache keys for artifacts are generated from the inputs of the build process +for the purpose of reusing artifacts in a well-defined, predictable way. + +Structure +--------- +Cache keys are SHA256 hash values generated from a pickled Python dict that +includes: + +* Environment (e.g., project configuration and variables) +* Element configuration (details depend on element kind, ``Element.get_unique_key()``) +* Sources (``Source.get_unique_key()``) +* Dependencies (depending on cache key type, see below) +* Public data + +Cache key types +--------------- +There are two types of cache keys in BuildStream, ``strong`` and ``weak``. + +The purpose of a ``strong`` cache key is to capture the state of as many aspects +as possible that can have an influence on the build output. The aim is that +builds will be fully reproducible as long as the cache key doesn't change, +with suitable module build systems that don't embed timestamps, for example. + +A ``strong`` cache key includes the strong cache key of each build dependency +(and their runtime dependencies) of the element as changes in build dependencies +(or their runtime dependencies) can result in build differences in reverse +dependencies. This means that whenever the strong cache key of a dependency +changes, the strong cache key of its reverse dependencies will change as well. + +A ``weak`` cache key has an almost identical structure, however, it includes +only the names of build dependencies, not their cache keys or their runtime +dependencies. A weak cache key will thus still change when the element itself +or the environment changes but it will not change when a dependency is updated. + +For elements without build dependencies the ``strong`` cache key is identical +to the ``weak`` cache key. + +Strict build plan +----------------- +This is the default build plan that exclusively uses ``strong`` cache keys +for the core functionality. An element's cache key can be calculated when +the cache keys of the element's build dependencies (and their runtime +dependencies) have been calculated and either tracking is not enabled or it +has already completed for this element, i.e., the ``ref`` is available. +This means that with tracking disabled the cache keys of all elements could be +calculated right at the start of a build session. + +While BuildStream only uses ``strong`` cache keys with the strict build plan +for the actual staging and build process, it will still calculate ``weak`` +cache keys for each element. This allows BuildStream to store the artifact +in the cache with both keys, reducing rebuilds when switching between strict +and non-strict build plans. If the artifact cache already contains an +artifact with the same ``weak`` cache key, it's replaced. Thus, non-strict +builds always use the latest artifact available for a given ``weak`` cache key. + +Non-strict build plan +--------------------- +The non-strict build plan disables the time-consuming automatic rebuild of +reverse dependencies at the cost of dropping the reproducibility benefits. +It uses the ``weak`` cache keys for the core staging and build process. +I.e., if an artifact is available with the calculated ``weak`` cache key, +it will be reused for staging instead of being rebuilt. ``weak`` cache keys +can be calculated early in the build session. After tracking, similar to +when ``strong`` cache keys can be calculated with a strict build plan. + +Similar to how strict build plans also calculate ``weak`` cache keys, non-strict +build plans also calculate ``strong`` cache keys. However, this is slightly +more complex. To calculate the ``strong`` cache key of an element, BuildStream +requires the ``strong`` cache keys of the build dependencies (and their runtime +dependencies). + +The build dependencies of an element may have been updated since the artifact +was built. With the non-strict build plan the artifact will still be reused. +However, this means that we cannot use a ``strong`` cache key calculated purely +based on the element definitions. We need a cache key that matches the +environment at the time the artifact was built, not the current definitions. + +The only way to get the correct ``strong`` cache key is by retrieving it from +the metadata stored in the artifact. As artifacts may need to be pulled from a +remote artifact cache, the ``strong`` cache key is not readily available early +in the build session. However, it can always be retrieved when an element is +about to be built, as the dependencies are guaranteed to be in the local +artifact cache at that point. + +``Element._get_cache_key_from_artifact()`` extracts the ``strong`` cache key +from an artifact in the local cache. ``Element._get_cache_key_for_build()`` +calculates the ``strong`` cache key that is used for a particular build job. +This is used for the embedded metadata and also as key to store the artifact in +the cache. diff --git a/doc/source/arch_sandboxing.rst b/doc/source/arch_sandboxing.rst new file mode 100644 index 000000000..531a3e5d5 --- /dev/null +++ b/doc/source/arch_sandboxing.rst @@ -0,0 +1,200 @@ + +.. _sandboxing: + + +Sandboxing +========== + +Introduction +------------ + +BuildStream assembles each element in a *sandbox*. The sandbox is a container +environment which serves two purposes: giving BuildStream control over +all build aspects in order to ensure reproducibility of build results, +and providing safety guarantees for the host system that BuildStream is +running on. + +The exact implementation of the sandbox varies depending on which platform you +are running BuildStream. See below for backend-specific details. + +There are several factors that affect the build output and must therefore be +under BuildStream's control: + +* Filesystem contents and metadata +* The user and permissions model +* Network access +* Device access + +Each of these is detailed below. + +For safety reasons, BuildStream also controls the following things: + +* Access to files outside of the sandbox directory +* Access to certain kernel-specific syscalls + +Creating a sandbox can require special priviliges. This is a safety concern too +because bugs in the `bst` program can cause damage to a host if the program is +running with extra privileges. The exact priviliges that are required depend on +your platform and backend. + +Element plugins can run arbitary commands within the sandbox using the +:mod:`sandbox API `. + +What elements can and can't do in the sandbox +--------------------------------------------- + +This section specifies how BuildStream sandboxes are intended to work. A +specific sandbox provider may not necessarily be able to achieve all of the +requirements listed below so be sure to read the "platform notes" section as +well. + +Filesystem access +~~~~~~~~~~~~~~~~~ + +The filesystem inside sandboxes should be read only during element assembly, +except for certain directories which element plugins can mark as being +read/write. Most elements plugins derive from :mod:`BuildElement +`, which marks ``%{build-root}`` and +``%{install-root}`` as read/write. + +When running integration commands or `bst shell`, the sandbox should have a +fully read-write filesystem. The changes made here do not need to persist +beyond the lifetime of that sandbox, and **must not** affect the contents of +artifacts stored in the cache. + +Certain top level directories should be treated specially in all sandboxes: + +* The ``/dev`` directory should contain device nodes, which are described in + a separate section. + +* The ``/proc`` directory should have a UNIX 'procfs' style filesystem mounted. + It should not expose any information about processes running outside of the + sandbox. + +* The ``/tmp`` directory should be writable. + +Filesystem metadata +~~~~~~~~~~~~~~~~~~~ + +The writable areas inside a BuildStream sandbox are limited in what metadata +can be written and stored. + +* All files must be owned by UID 0 and GID 0 +* No files may have the setuid or setgid bits set +* Extended file attributes (xattrs) cannot be written to or read. +* Hardlinks to other files can be created, but the information about which + files are hardlinked to each other will not be stored in the artifact + that is created from the sandbox. + +These restrictions are due to technical limitations. In future we hope to +support a wider range of filesystem metadata operations. See `issue #38 +`_ for more details. + +User and permissions model +~~~~~~~~~~~~~~~~~~~~~~~~~~ + +All commands inside the sandbox run with user ID 0 and group ID 0. It should +not be possible to become any other user ID. + +Network access +~~~~~~~~~~~~~~ + +Builds should not be able to access the network at all from the sandbox. All +remote resources needed to build an element must be specified in the element's +``sources`` list so that BuildStream is able to see when they have changed. + +A sandbox opened by `bst shell` should allow network access. + +Device access +~~~~~~~~~~~~~ + +Builds should not be able to access any hardware devices at all. + +A few standard UNIX device files are needed, the whitelist is: + +* ``/dev/full`` +* ``/dev/null`` +* ``/dev/urandom`` +* ``/dev/random`` +* ``/dev/zero`` + +It may seem odd that we have sources of randomness in the sandbox, but a lot of +tools do expect them to exist. We take the view that it's up to integrators to +ensure that elements do not deliberately include randomness in their output. + +A sandbox opened by `bst shell` can make any devices available. There needs to +be a console device so that it can be used interactively. + +Platform notes +-------------- + +BuildStream currently only carries first-class support for modern Linux-based +operating systems. + +There is also a "fallback" backend which aims to make BuildStream usable on any +POSIX-compatible operating system. The POSIX standard does not provide good +support for creating containers so this implementation makes a number of +unfortunate compromises. + +Linux +~~~~~ + +On Linux we use the following isolation and sandboxing primitives: + +* bind mounts +* FUSE +* Mount namespaces +* Network namespaces +* PID (process ID) namespaces +* User namespaces (if available) +* seccomp + +We access all of these features through a sandboxing tool named `Bubblewrap +`_. + +User namespaces are not enabled by default in all Linux distributions. +BuildStream still runs on such systems but will give a big warning on startup +and will refuse to push any artifacts built on such a system to a remote cache. +For more information, see `issue #92 +`_. + +The Linux platform can operate as a standard user provided user namespace +support is available. If user namespace support is not available you have the +option of installing bubblewrap as a setuid binary to avoid needing to run the +entire ``bst`` process as the ``root`` user. + +The artifact cache on Linux systems is implemented using `OSTree +`_, which can allow us to stage artifacts +using hardlinks instead of copying them. To avoid cache corruption it is +vital that hardlinked files cannot be overwritten. In cases where the root +filesystem inside the sandbox needs to be writable, a custom FUSE filesystem +named SafeHardlinks is used which provides a copy-on-write layer. + +Some of the operations on filesystem metadata listed above are not prohibited +by the sandbox, but will instead be silently dropped when an artifact is +created. For more details see `issue #38 +`_. + +Some details of the host machine are currently leaked by this platform backend. +For more details, see `issue #262 +`_. + +Fallback (POSIX) +~~~~~~~~~~~~~~~~ + +The fallback backend aims to be usable on a wide range of operating systems. +Any OS that implements the POSIX specification and the ``chroot()`` syscall +can be expected to work. There are no real isolation or sandboxing primitives +that work across multiple operating systems, so the protection provided by +this backend is minimal. It would be much safer to use a platform-specific +backend. + +Filesystem isolation is done using the chroot() system call. This system call +requires special privileges to use so ``bst`` usually needs to be run as the +``root`` user when using this backend. + +Network access is not blocked in the sandbox. However since there is unlikely +to be a correct `/etc/resolv.conf` file, any network access that depends on +name resolution will most likely fail anyway. + +Builds inside the sandbox execute as the ``root`` user. diff --git a/doc/source/core_additional.rst b/doc/source/core_additional.rst index 08c445630..878745ba2 100644 --- a/doc/source/core_additional.rst +++ b/doc/source/core_additional.rst @@ -6,6 +6,4 @@ Additional writings .. toctree:: :maxdepth: 2 - additional_cachekeys - additional_sandboxing additional_docker diff --git a/doc/source/main_architecture.rst b/doc/source/main_architecture.rst index 7c13e41d6..55b607dae 100644 --- a/doc/source/main_architecture.rst +++ b/doc/source/main_architecture.rst @@ -13,3 +13,5 @@ This section provides details on the overall BuildStream architecture. arch_data_model arch_dependency_model arch_scheduler + arch_cachekeys + arch_sandboxing -- cgit v1.2.1