diff options
Diffstat (limited to 'morphlib/artifactsplitrule.py')
-rw-r--r-- | morphlib/artifactsplitrule.py | 303 |
1 files changed, 303 insertions, 0 deletions
diff --git a/morphlib/artifactsplitrule.py b/morphlib/artifactsplitrule.py new file mode 100644 index 00000000..246691d8 --- /dev/null +++ b/morphlib/artifactsplitrule.py @@ -0,0 +1,303 @@ +# Copyright (C) 2013-2014 Codethink Limited +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; version 2 of the License. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + + +import collections +import itertools +import re + +import morphlib + + +class Rule(object): + '''Rule base class. + + Rules are passed an object and are expected to determine whether + it matches. It's roughly the same machinery for matching files + as artifacts, it's just that Files are given just the path, while + Artifact matches are given the artifact name and the name of the + source it came from. + + ''' + + def match(self, *args): + return True + + +class FileMatch(Rule): + '''Match a file path against a list of regular expressions. + + If the path matches any of the regular expressions, then the file + is counted as a valid match. + + ''' + + def __init__(self, regexes): + # Possible optimisation: compile regexes as one pattern + self._regexes = [re.compile(r) for r in regexes] + + def match(self, path): + return any(r.match(path) for r in self._regexes) + + +class ArtifactMatch(Rule): + '''Match an artifact's name against a list of regular expressions. + ''' + + def __init__(self, regexes): + # Possible optimisation: compile regexes as one pattern + self._regexes = [re.compile(r) for r in regexes] + + def match(self, (source_name, artifact_name)): + return any(r.match(artifact_name) for r in self._regexes) + + +class ArtifactAssign(Rule): + '''Match only artifacts with the specified source and artifact names. + + This is a valid match if the source and artifact names exactly match. + This is used for explicit artifact assignment e.g. chunk artifact + foo-bins which comes from chunk source foo goes into stratum + bar-runtime. + + ''' + + def __init__(self, source_name, artifact_name): + self._key = (source_name, artifact_name) + + def match(self, (source_name, artifact_name)): + return (source_name, artifact_name) == self._key + + +class SourceAssign(Rule): + '''Match only artifacts which come from the specified source. + + This is a valid match only if the artifact comes from the specified + source. e.g. all artifacts produced by source bar-runtime go into + system baz + + ''' + + def __init__(self, source_name): + self._source = source_name + + def match(self, (source_name, artifact_name)): + return source_name == self._source + + +class SplitRules(collections.Iterable): + '''Rules engine for splitting a source's artifacts. + + Rules are added with the .add(artifact, rule) method, though another + SplitRules may be created by passing a SplitRules to the constructor. + + .match(path|(source, artifact)) and .partition(iterable) are used + to determine if an artifact matches the rules. Rules are processed + in order, so more specific matches first can be followed by more + generic catch-all matches. + + ''' + + def __init__(self, *args): + self._rules = list(*args) + + def __iter__(self): + return iter(self._rules) + + def add(self, artifact, rule): + self._rules.append((artifact, rule)) + + @property + def artifacts(self): + '''Get names of all artifacts in the rule set. + + Returns artifact names in the order they were added to the rules, + and not repeating the artifact. + + ''' + + seen = set() + result = [] + for artifact_name, rule in self._rules: + if artifact_name not in seen: + seen.add(artifact_name) + result.append(artifact_name) + return result + + def match(self, *args): + '''Return all artifact names the given argument matches. + + It's returned in match order as a list, so it's possible to + detect overlapping matches, even though most of the time, the + only used entry will be the first. + + ''' + + return [a for a, r in self._rules if r.match(*args)] + + def partition(self, iterable): + '''Match many files or artifacts. + + It's the common case to take a bunch of filenames and determine + which artifact each should go to, so rather than implement this + logic in multiple places, it's here as a convenience method. + + ''' + + matches = collections.defaultdict(list) + overlaps = collections.defaultdict(set) + unmatched = set() + + for arg in iterable: + matched = self.match(arg) + if len(matched) == 0: + unmatched.add(arg) + continue + if len(matched) != 1: + overlaps[arg].update(matched) + matches[matched[0]].append(arg) + + return matches, overlaps, unmatched + + +# TODO: Work out a good way to feed new defaults in. This is good for +# the usual Linux userspace, but we may find issues and need a +# migration path to a more useful set, or develop a system with +# a different layout, like Android. +DEFAULT_CHUNK_RULES = [ + ('-bins', [ r"(usr/)?s?bin/.*" ]), + ('-libs', [ + r"(usr/)?lib(32|64)?/lib[^/]*\.so(\.\d+)*", + r"(usr/)libexec/.*"]), + ('-devel', [ + r"(usr/)?include/.*", + r"(usr/)?lib(32|64)?/lib.*\.a", + r"(usr/)?lib(32|64)?/lib.*\.la", + r"(usr/)?(lib(32|64)?|share)/pkgconfig/.*\.pc"]), + ('-doc', [ + r"(usr/)?share/doc/.*", + r"(usr/)?share/man/.*", + r"(usr/)?share/info/.*"]), + ('-locale', [ + r"(usr/)?share/locale/.*", + r"(usr/)?share/i18n/.*", + r"(usr/)?share/zoneinfo/.*"]), + ('-misc', [ r".*" ]), +] + + +DEFAULT_STRATUM_RULES = [ + ('-devel', [ + r'.*-devel', + r'.*-debug', + r'.*-doc']), + ('-runtime', [ + r'.*-bins', + r'.*-libs', + r'.*-locale', + r'.*-misc', + r'.*']), +] + + +def unify_chunk_matches(morphology): + '''Create split rules including defaults and per-chunk rules. + + With rules specified in the morphology's 'products' field and the + default rules for chunks, generate rules to match the files produced + by building the chunk to the chunk artifact they should be put in. + + ''' + + split_rules = SplitRules() + + for ca_name, patterns in ((d['artifact'], d['include']) + for d in morphology['products']): + split_rules.add(ca_name, FileMatch(patterns)) + + name = morphology['name'] + for suffix, patterns in DEFAULT_CHUNK_RULES: + ca_name = name + suffix + # Default rules are replaced by explicit ones + if ca_name in split_rules.artifacts: + break + split_rules.add(ca_name, FileMatch(patterns)) + + return split_rules + + +def unify_stratum_matches(morphology): + '''Create split rules including defaults and per-stratum rules. + + With rules specified in the chunk spec's 'artifacts' fields, the + stratum's 'products' field and the default rules for strata, generate + rules to match the artifacts produced by building the chunks in the + strata to the stratum artifact they should be put in. + + ''' + + assignment_split_rules = SplitRules() + for spec in morphology['chunks']: + source_name = spec['name'] + for ca_name, sta_name in sorted(spec.get('artifacts', {}).iteritems()): + assignment_split_rules.add(sta_name, + ArtifactAssign(source_name, ca_name)) + + # Construct match rules separately, so we can use the SplitRules object's + # own knowledge of which rules already exist to determine whether + # to include the default rule. + # Rather than use the existing SplitRules, use a new one, since + # match rules suppliment assignment rules, rather than replace. + match_split_rules = SplitRules() + for sta_name, patterns in ((d['artifact'], d['include']) + for d in morphology.get('products', {})): + match_split_rules.add(sta_name, ArtifactMatch(patterns)) + + for suffix, patterns in DEFAULT_STRATUM_RULES: + sta_name = morphology['name'] + suffix + if sta_name in match_split_rules.artifacts: + break + match_split_rules.add(sta_name, ArtifactMatch(patterns)) + + # Construct a new SplitRules with the assignments before matches + return SplitRules(itertools.chain(assignment_split_rules, + match_split_rules)) + + +def unify_system_matches(morphology): + '''Create split rules including defaults and per-chunk rules. + + With rules specified in the morphology's 'products' field and the + default rules for chunks, generate rules to match the files produced + by building the chunk to the chunk artifact they should be put in. + + ''' + + name = morphology['name'] + '-rootfs' + split_rules = SplitRules() + + for spec in morphology['strata']: + source_name = spec.get('name', spec['morph']) + if spec.get('artifacts', None) is None: + split_rules.add(name, SourceAssign(source_name)) + continue + for sta_name in spec['artifacts']: + split_rules.add(name, ArtifactAssign(source_name, sta_name)) + + return split_rules + + +def unify_cluster_matches(_): + return None |