summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorRichard Maw <richard.maw@codethink.co.uk>2014-07-30 12:30:30 +0000
committerRichard Maw <richard.maw@codethink.co.uk>2014-07-30 12:43:43 +0000
commit391d8cec9ae74a5f1d3b4101f4542a81f63d9860 (patch)
treebe34cff00a09e3baba0e0045ebaa7004faa7781d
parentf860d3e7b0e14b01b31d4f3d63a8c37572cd9fdc (diff)
downloadmorph-391d8cec9ae74a5f1d3b4101f4542a81f63d9860.tar.gz
Dump multi-line strings in yaml documents in '|' formbaserock/richardmaw/bugfix/yaml-multi-line-dump
This prevents the description fields of morphologies being mangled. This does not preserve the original formatting, so much as happen to dump it in the same way we wrote it, but given we chose that form because we think it looks the nicest, that's not a problem.
-rw-r--r--morphlib/morphloader.py31
-rw-r--r--morphlib/morphloader_tests.py30
-rw-r--r--morphlib/yamlparse.py2
3 files changed, 60 insertions, 3 deletions
diff --git a/morphlib/morphloader.py b/morphlib/morphloader.py
index 45416a19..21e10827 100644
--- a/morphlib/morphloader.py
+++ b/morphlib/morphloader.py
@@ -224,7 +224,7 @@ class DuplicateDeploymentNameError(MorphologyValidationError):
% (cluster_filename, '\n ' + '\n '.join(duplicates)))
-class OrderedDumper(yaml.SafeDumper):
+class MorphologyDumper(yaml.SafeDumper):
keyorder = (
'name',
'kind',
@@ -274,9 +274,36 @@ class OrderedDumper(yaml.SafeDumper):
return dumper.represent_mapping('tag:yaml.org,2002:map',
cls._iter_in_global_order(mapping))
+ @classmethod
+ def _represent_str(cls, dumper, orig_data):
+ fallback_representer = yaml.representer.SafeRepresenter.represent_str
+ try:
+ data = unicode(orig_data, 'ascii')
+ if data.count('\n') == 0:
+ return fallback_representer(dumper, orig_data)
+ except UnicodeDecodeError:
+ try:
+ data = unicode(orig_data, 'utf-8')
+ if data.count('\n') == 0:
+ return fallback_representer(dumper, orig_data)
+ except UnicodeDecodeError:
+ return fallback_representer(dumper, orig_data)
+ return dumper.represent_scalar(u'tag:yaml.org,2002:str',
+ data, style='|')
+
+ @classmethod
+ def _represent_unicode(cls, dumper, data):
+ if data.count('\n') == 0:
+ return yaml.representer.SafeRepresenter.represent_unicode(dumper,
+ data)
+ return dumper.represent_scalar(u'tag:yaml.org,2002:str',
+ data, style='|')
+
def __init__(self, *args, **kwargs):
yaml.SafeDumper.__init__(self, *args, **kwargs)
self.add_representer(dict, self._represent_dict)
+ self.add_representer(str, self._represent_str)
+ self.add_representer(unicode, self._represent_unicode)
class MorphologyLoader(object):
@@ -394,7 +421,7 @@ class MorphologyLoader(object):
def save_to_string(self, morphology):
'''Return normalised textual form of morphology.'''
- return yaml.dump(morphology.data, Dumper=OrderedDumper,
+ return yaml.dump(morphology.data, Dumper=MorphologyDumper,
default_flow_style=False)
def save_to_file(self, filename, morphology):
diff --git a/morphlib/morphloader_tests.py b/morphlib/morphloader_tests.py
index 82663298..f4d2f9b6 100644
--- a/morphlib/morphloader_tests.py
+++ b/morphlib/morphloader_tests.py
@@ -912,3 +912,33 @@ build-system: dummy
# deployment keys field order
self.assertLess(s.find('type'), s.find('location'))
self.assertLess(s.find('location'), s.find('HOSTNAME'))
+
+ def test_multi_line_round_trip(self):
+ s = ('name: foo\n'
+ 'kind: bar\n'
+ 'description: |\n'
+ ' 1 2 3\n'
+ ' 4 5 6\n'
+ ' 7 8 9\n')
+ m = self.loader.parse_morphology_text(s, 'string')
+ self.assertEqual(s, self.loader.save_to_string(m))
+
+ def test_smoketest_multi_line_unicode(self):
+ m = morphlib.morph3.Morphology(
+ name=u'foo',
+ description=u'1 2 3\n4 5 6\n7 8 9\n',
+ )
+ s = self.loader.save_to_string(m)
+
+ def test_smoketest_multi_line_unicode_encoded(self):
+ m = morphlib.morph3.Morphology(
+ name=u'foo \u263A'.encode('utf-8'),
+ description=u'1 \u263A\n2 \u263A\n3 \u263A\n'.encode('utf-8'),
+ )
+ s = self.loader.save_to_string(m)
+
+ def test_smoketest_binary_garbage(self):
+ m = morphlib.morph3.Morphology(
+ description='\x92',
+ )
+ s = self.loader.save_to_string(m)
diff --git a/morphlib/yamlparse.py b/morphlib/yamlparse.py
index 726b4181..6f139304 100644
--- a/morphlib/yamlparse.py
+++ b/morphlib/yamlparse.py
@@ -29,7 +29,7 @@ if morphlib.got_yaml: # pragma: no cover
def dump(*args, **kwargs):
if 'default_flow_style' not in kwargs:
kwargs['default_flow_style'] = False
- return yaml.dump(Dumper=morphlib.morphloader.OrderedDumper,
+ return yaml.dump(Dumper=morphlib.morphloader.MorphologyDumper,
*args, **kwargs)
else: # pragma: no cover