summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSam Thursfield <sam.thursfield@codethink.co.uk>2014-07-07 18:06:28 +0100
committerSam Thursfield <sam.thursfield@codethink.co.uk>2014-07-07 18:06:28 +0100
commit1b08ccc17070c4c2ff4b7d1b064f1e31419001e1 (patch)
tree46977e34a02dc1c947ac9cfd74462d974675f8c1
parentc1cbbdd036fb50e2a8cac466bc6b332250297d64 (diff)
parent9fa220986784bd9e4d63343ed0e22a2a8559ead7 (diff)
downloadmorph-1b08ccc17070c4c2ff4b7d1b064f1e31419001e1.tar.gz
Merge branch 'sam/splitting-fixes'
Reviewed-By: Richard Maw <richard.maw@codethink.co.uk> Reviewed-By: Lars Wirzenius <lars.wirzenius@codethink.co.uk>
-rw-r--r--morphlib/artifactsplitrule.py24
1 files changed, 14 insertions, 10 deletions
diff --git a/morphlib/artifactsplitrule.py b/morphlib/artifactsplitrule.py
index bc92e5fb..125f5b93 100644
--- a/morphlib/artifactsplitrule.py
+++ b/morphlib/artifactsplitrule.py
@@ -150,9 +150,10 @@ class SplitRules(collections.Iterable):
def partition(self, iterable):
'''Match many files or artifacts.
- It's the common case to take a bunch of filenames and determine
- which artifact each should go to, so rather than implement this
- logic in multiple places, it's here as a convenience method.
+ This function takes an iterable of file names, and groups them
+ using the rules that have been added to this object.
+
+ This is a convenience function that uses the match() method internally.
'''
@@ -230,10 +231,11 @@ def unify_chunk_matches(morphology):
name = morphology['name']
for suffix, patterns in DEFAULT_CHUNK_RULES:
ca_name = name + suffix
- # Default rules are replaced by explicit ones
- if ca_name in split_rules.artifacts:
- break
- split_rules.add(ca_name, FileMatch(patterns))
+ # Explicit rules override the default rules. This is an all-or-nothing
+ # override: there is no way to extend the default split rules right now
+ # without duplicating them in the chunk morphology.
+ if ca_name not in split_rules.artifacts:
+ split_rules.add(ca_name, FileMatch(patterns))
return split_rules
@@ -267,9 +269,11 @@ def unify_stratum_matches(morphology):
for suffix, patterns in DEFAULT_STRATUM_RULES:
sta_name = morphology['name'] + suffix
- if sta_name in match_split_rules.artifacts:
- break
- match_split_rules.add(sta_name, ArtifactMatch(patterns))
+ # Explicit rules override the default rules. This is an all-or-nothing
+ # override: there is no way to extend the default split rules right now
+ # without duplicating them in the chunk morphology.
+ if sta_name not in match_split_rules.artifacts:
+ match_split_rules.add(sta_name, ArtifactMatch(patterns))
# Construct a new SplitRules with the assignments before matches
return SplitRules(itertools.chain(assignment_split_rules,