summaryrefslogtreecommitdiff
path: root/morphlib/artifactsplitrule.py
diff options
context:
space:
mode:
Diffstat (limited to 'morphlib/artifactsplitrule.py')
-rw-r--r--morphlib/artifactsplitrule.py303
1 files changed, 303 insertions, 0 deletions
diff --git a/morphlib/artifactsplitrule.py b/morphlib/artifactsplitrule.py
new file mode 100644
index 00000000..246691d8
--- /dev/null
+++ b/morphlib/artifactsplitrule.py
@@ -0,0 +1,303 @@
+# Copyright (C) 2013-2014 Codethink Limited
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; version 2 of the License.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License along
+# with this program; if not, write to the Free Software Foundation, Inc.,
+# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+
+
+import collections
+import itertools
+import re
+
+import morphlib
+
+
+class Rule(object):
+ '''Rule base class.
+
+ Rules are passed an object and are expected to determine whether
+ it matches. It's roughly the same machinery for matching files
+ as artifacts, it's just that Files are given just the path, while
+ Artifact matches are given the artifact name and the name of the
+ source it came from.
+
+ '''
+
+ def match(self, *args):
+ return True
+
+
+class FileMatch(Rule):
+ '''Match a file path against a list of regular expressions.
+
+ If the path matches any of the regular expressions, then the file
+ is counted as a valid match.
+
+ '''
+
+ def __init__(self, regexes):
+ # Possible optimisation: compile regexes as one pattern
+ self._regexes = [re.compile(r) for r in regexes]
+
+ def match(self, path):
+ return any(r.match(path) for r in self._regexes)
+
+
+class ArtifactMatch(Rule):
+ '''Match an artifact's name against a list of regular expressions.
+ '''
+
+ def __init__(self, regexes):
+ # Possible optimisation: compile regexes as one pattern
+ self._regexes = [re.compile(r) for r in regexes]
+
+ def match(self, (source_name, artifact_name)):
+ return any(r.match(artifact_name) for r in self._regexes)
+
+
+class ArtifactAssign(Rule):
+ '''Match only artifacts with the specified source and artifact names.
+
+ This is a valid match if the source and artifact names exactly match.
+ This is used for explicit artifact assignment e.g. chunk artifact
+ foo-bins which comes from chunk source foo goes into stratum
+ bar-runtime.
+
+ '''
+
+ def __init__(self, source_name, artifact_name):
+ self._key = (source_name, artifact_name)
+
+ def match(self, (source_name, artifact_name)):
+ return (source_name, artifact_name) == self._key
+
+
+class SourceAssign(Rule):
+ '''Match only artifacts which come from the specified source.
+
+ This is a valid match only if the artifact comes from the specified
+ source. e.g. all artifacts produced by source bar-runtime go into
+ system baz
+
+ '''
+
+ def __init__(self, source_name):
+ self._source = source_name
+
+ def match(self, (source_name, artifact_name)):
+ return source_name == self._source
+
+
+class SplitRules(collections.Iterable):
+ '''Rules engine for splitting a source's artifacts.
+
+ Rules are added with the .add(artifact, rule) method, though another
+ SplitRules may be created by passing a SplitRules to the constructor.
+
+ .match(path|(source, artifact)) and .partition(iterable) are used
+ to determine if an artifact matches the rules. Rules are processed
+ in order, so more specific matches first can be followed by more
+ generic catch-all matches.
+
+ '''
+
+ def __init__(self, *args):
+ self._rules = list(*args)
+
+ def __iter__(self):
+ return iter(self._rules)
+
+ def add(self, artifact, rule):
+ self._rules.append((artifact, rule))
+
+ @property
+ def artifacts(self):
+ '''Get names of all artifacts in the rule set.
+
+ Returns artifact names in the order they were added to the rules,
+ and not repeating the artifact.
+
+ '''
+
+ seen = set()
+ result = []
+ for artifact_name, rule in self._rules:
+ if artifact_name not in seen:
+ seen.add(artifact_name)
+ result.append(artifact_name)
+ return result
+
+ def match(self, *args):
+ '''Return all artifact names the given argument matches.
+
+ It's returned in match order as a list, so it's possible to
+ detect overlapping matches, even though most of the time, the
+ only used entry will be the first.
+
+ '''
+
+ return [a for a, r in self._rules if r.match(*args)]
+
+ def partition(self, iterable):
+ '''Match many files or artifacts.
+
+ It's the common case to take a bunch of filenames and determine
+ which artifact each should go to, so rather than implement this
+ logic in multiple places, it's here as a convenience method.
+
+ '''
+
+ matches = collections.defaultdict(list)
+ overlaps = collections.defaultdict(set)
+ unmatched = set()
+
+ for arg in iterable:
+ matched = self.match(arg)
+ if len(matched) == 0:
+ unmatched.add(arg)
+ continue
+ if len(matched) != 1:
+ overlaps[arg].update(matched)
+ matches[matched[0]].append(arg)
+
+ return matches, overlaps, unmatched
+
+
+# TODO: Work out a good way to feed new defaults in. This is good for
+# the usual Linux userspace, but we may find issues and need a
+# migration path to a more useful set, or develop a system with
+# a different layout, like Android.
+DEFAULT_CHUNK_RULES = [
+ ('-bins', [ r"(usr/)?s?bin/.*" ]),
+ ('-libs', [
+ r"(usr/)?lib(32|64)?/lib[^/]*\.so(\.\d+)*",
+ r"(usr/)libexec/.*"]),
+ ('-devel', [
+ r"(usr/)?include/.*",
+ r"(usr/)?lib(32|64)?/lib.*\.a",
+ r"(usr/)?lib(32|64)?/lib.*\.la",
+ r"(usr/)?(lib(32|64)?|share)/pkgconfig/.*\.pc"]),
+ ('-doc', [
+ r"(usr/)?share/doc/.*",
+ r"(usr/)?share/man/.*",
+ r"(usr/)?share/info/.*"]),
+ ('-locale', [
+ r"(usr/)?share/locale/.*",
+ r"(usr/)?share/i18n/.*",
+ r"(usr/)?share/zoneinfo/.*"]),
+ ('-misc', [ r".*" ]),
+]
+
+
+DEFAULT_STRATUM_RULES = [
+ ('-devel', [
+ r'.*-devel',
+ r'.*-debug',
+ r'.*-doc']),
+ ('-runtime', [
+ r'.*-bins',
+ r'.*-libs',
+ r'.*-locale',
+ r'.*-misc',
+ r'.*']),
+]
+
+
+def unify_chunk_matches(morphology):
+ '''Create split rules including defaults and per-chunk rules.
+
+ With rules specified in the morphology's 'products' field and the
+ default rules for chunks, generate rules to match the files produced
+ by building the chunk to the chunk artifact they should be put in.
+
+ '''
+
+ split_rules = SplitRules()
+
+ for ca_name, patterns in ((d['artifact'], d['include'])
+ for d in morphology['products']):
+ split_rules.add(ca_name, FileMatch(patterns))
+
+ name = morphology['name']
+ for suffix, patterns in DEFAULT_CHUNK_RULES:
+ ca_name = name + suffix
+ # Default rules are replaced by explicit ones
+ if ca_name in split_rules.artifacts:
+ break
+ split_rules.add(ca_name, FileMatch(patterns))
+
+ return split_rules
+
+
+def unify_stratum_matches(morphology):
+ '''Create split rules including defaults and per-stratum rules.
+
+ With rules specified in the chunk spec's 'artifacts' fields, the
+ stratum's 'products' field and the default rules for strata, generate
+ rules to match the artifacts produced by building the chunks in the
+ strata to the stratum artifact they should be put in.
+
+ '''
+
+ assignment_split_rules = SplitRules()
+ for spec in morphology['chunks']:
+ source_name = spec['name']
+ for ca_name, sta_name in sorted(spec.get('artifacts', {}).iteritems()):
+ assignment_split_rules.add(sta_name,
+ ArtifactAssign(source_name, ca_name))
+
+ # Construct match rules separately, so we can use the SplitRules object's
+ # own knowledge of which rules already exist to determine whether
+ # to include the default rule.
+ # Rather than use the existing SplitRules, use a new one, since
+ # match rules suppliment assignment rules, rather than replace.
+ match_split_rules = SplitRules()
+ for sta_name, patterns in ((d['artifact'], d['include'])
+ for d in morphology.get('products', {})):
+ match_split_rules.add(sta_name, ArtifactMatch(patterns))
+
+ for suffix, patterns in DEFAULT_STRATUM_RULES:
+ sta_name = morphology['name'] + suffix
+ if sta_name in match_split_rules.artifacts:
+ break
+ match_split_rules.add(sta_name, ArtifactMatch(patterns))
+
+ # Construct a new SplitRules with the assignments before matches
+ return SplitRules(itertools.chain(assignment_split_rules,
+ match_split_rules))
+
+
+def unify_system_matches(morphology):
+ '''Create split rules including defaults and per-chunk rules.
+
+ With rules specified in the morphology's 'products' field and the
+ default rules for chunks, generate rules to match the files produced
+ by building the chunk to the chunk artifact they should be put in.
+
+ '''
+
+ name = morphology['name'] + '-rootfs'
+ split_rules = SplitRules()
+
+ for spec in morphology['strata']:
+ source_name = spec.get('name', spec['morph'])
+ if spec.get('artifacts', None) is None:
+ split_rules.add(name, SourceAssign(source_name))
+ continue
+ for sta_name in spec['artifacts']:
+ split_rules.add(name, ArtifactAssign(source_name, sta_name))
+
+ return split_rules
+
+
+def unify_cluster_matches(_):
+ return None