From 391d8cec9ae74a5f1d3b4101f4542a81f63d9860 Mon Sep 17 00:00:00 2001 From: Richard Maw Date: Wed, 30 Jul 2014 12:30:30 +0000 Subject: Dump multi-line strings in yaml documents in '|' form This prevents the description fields of morphologies being mangled. This does not preserve the original formatting, so much as happen to dump it in the same way we wrote it, but given we chose that form because we think it looks the nicest, that's not a problem. --- morphlib/morphloader.py | 31 +++++++++++++++++++++++++++++-- morphlib/morphloader_tests.py | 30 ++++++++++++++++++++++++++++++ morphlib/yamlparse.py | 2 +- 3 files changed, 60 insertions(+), 3 deletions(-) diff --git a/morphlib/morphloader.py b/morphlib/morphloader.py index 45416a19..21e10827 100644 --- a/morphlib/morphloader.py +++ b/morphlib/morphloader.py @@ -224,7 +224,7 @@ class DuplicateDeploymentNameError(MorphologyValidationError): % (cluster_filename, '\n ' + '\n '.join(duplicates))) -class OrderedDumper(yaml.SafeDumper): +class MorphologyDumper(yaml.SafeDumper): keyorder = ( 'name', 'kind', @@ -274,9 +274,36 @@ class OrderedDumper(yaml.SafeDumper): return dumper.represent_mapping('tag:yaml.org,2002:map', cls._iter_in_global_order(mapping)) + @classmethod + def _represent_str(cls, dumper, orig_data): + fallback_representer = yaml.representer.SafeRepresenter.represent_str + try: + data = unicode(orig_data, 'ascii') + if data.count('\n') == 0: + return fallback_representer(dumper, orig_data) + except UnicodeDecodeError: + try: + data = unicode(orig_data, 'utf-8') + if data.count('\n') == 0: + return fallback_representer(dumper, orig_data) + except UnicodeDecodeError: + return fallback_representer(dumper, orig_data) + return dumper.represent_scalar(u'tag:yaml.org,2002:str', + data, style='|') + + @classmethod + def _represent_unicode(cls, dumper, data): + if data.count('\n') == 0: + return yaml.representer.SafeRepresenter.represent_unicode(dumper, + data) + return dumper.represent_scalar(u'tag:yaml.org,2002:str', + data, style='|') + def __init__(self, *args, **kwargs): yaml.SafeDumper.__init__(self, *args, **kwargs) self.add_representer(dict, self._represent_dict) + self.add_representer(str, self._represent_str) + self.add_representer(unicode, self._represent_unicode) class MorphologyLoader(object): @@ -394,7 +421,7 @@ class MorphologyLoader(object): def save_to_string(self, morphology): '''Return normalised textual form of morphology.''' - return yaml.dump(morphology.data, Dumper=OrderedDumper, + return yaml.dump(morphology.data, Dumper=MorphologyDumper, default_flow_style=False) def save_to_file(self, filename, morphology): diff --git a/morphlib/morphloader_tests.py b/morphlib/morphloader_tests.py index 82663298..f4d2f9b6 100644 --- a/morphlib/morphloader_tests.py +++ b/morphlib/morphloader_tests.py @@ -912,3 +912,33 @@ build-system: dummy # deployment keys field order self.assertLess(s.find('type'), s.find('location')) self.assertLess(s.find('location'), s.find('HOSTNAME')) + + def test_multi_line_round_trip(self): + s = ('name: foo\n' + 'kind: bar\n' + 'description: |\n' + ' 1 2 3\n' + ' 4 5 6\n' + ' 7 8 9\n') + m = self.loader.parse_morphology_text(s, 'string') + self.assertEqual(s, self.loader.save_to_string(m)) + + def test_smoketest_multi_line_unicode(self): + m = morphlib.morph3.Morphology( + name=u'foo', + description=u'1 2 3\n4 5 6\n7 8 9\n', + ) + s = self.loader.save_to_string(m) + + def test_smoketest_multi_line_unicode_encoded(self): + m = morphlib.morph3.Morphology( + name=u'foo \u263A'.encode('utf-8'), + description=u'1 \u263A\n2 \u263A\n3 \u263A\n'.encode('utf-8'), + ) + s = self.loader.save_to_string(m) + + def test_smoketest_binary_garbage(self): + m = morphlib.morph3.Morphology( + description='\x92', + ) + s = self.loader.save_to_string(m) diff --git a/morphlib/yamlparse.py b/morphlib/yamlparse.py index 726b4181..6f139304 100644 --- a/morphlib/yamlparse.py +++ b/morphlib/yamlparse.py @@ -29,7 +29,7 @@ if morphlib.got_yaml: # pragma: no cover def dump(*args, **kwargs): if 'default_flow_style' not in kwargs: kwargs['default_flow_style'] = False - return yaml.dump(Dumper=morphlib.morphloader.OrderedDumper, + return yaml.dump(Dumper=morphlib.morphloader.MorphologyDumper, *args, **kwargs) else: # pragma: no cover -- cgit v1.2.1