diff options
author | Peter Murphy <peterkmurphy@gmail.com> | 2017-05-08 16:39:26 +1000 |
---|---|---|
committer | Peter Murphy <peterkmurphy@gmail.com> | 2017-05-08 16:39:26 +1000 |
commit | cf1c86cb86db74206085e6f83e4586ddc7db9ac2 (patch) | |
tree | 438cc3c30c8afafee3a68b421830a10583439f64 | |
parent | a06c1f644b0ba3298efb7b6ec928aaa9221b52b8 (diff) | |
download | pyyaml-git-cf1c86cb86db74206085e6f83e4586ddc7db9ac2.tar.gz |
First attack at pyyaml does not support literals in unicode over codepoint 0xffff #25
-rw-r--r-- | lib/yaml/emitter.py | 10 | ||||
-rw-r--r-- | lib/yaml/reader.py | 10 | ||||
-rw-r--r-- | lib3/yaml/emitter.py | 6 | ||||
-rw-r--r-- | lib3/yaml/reader.py | 3 |
4 files changed, 18 insertions, 11 deletions
diff --git a/lib/yaml/emitter.py b/lib/yaml/emitter.py index e5bcdcc..5fb4179 100644 --- a/lib/yaml/emitter.py +++ b/lib/yaml/emitter.py @@ -8,9 +8,13 @@ __all__ = ['Emitter', 'EmitterError'] +import sys + from error import YAMLError from events import * +has_ucs4 = sys.maxunicode > 0xffff + class EmitterError(YAMLError): pass @@ -674,7 +678,7 @@ class Emitter(object): # Check for indicators. if index == 0: # Leading indicators are special characters. - if ch in u'#,[]{}&*!|>\'\"%@`': + if ch in u'#,[]{}&*!|>\'\"%@`': flow_indicators = True block_indicators = True if ch in u'?:': @@ -701,7 +705,8 @@ class Emitter(object): line_breaks = True if not (ch == u'\n' or u'\x20' <= ch <= u'\x7E'): if (ch == u'\x85' or u'\xA0' <= ch <= u'\uD7FF' - or u'\uE000' <= ch <= u'\uFFFD') and ch != u'\uFEFF': + or u'\uE000' <= ch <= u'\uFFFD' + or ((not has_ucs4) or (u'\U00010000' <= ch < u'\U0010ffff'))) and ch != u'\uFEFF': unicode_characters = True if not self.allow_unicode: special_characters = True @@ -1137,4 +1142,3 @@ class Emitter(object): spaces = (ch == u' ') breaks = (ch in u'\n\x85\u2028\u2029') end += 1 - diff --git a/lib/yaml/reader.py b/lib/yaml/reader.py index 3249e6b..56a12f4 100644 --- a/lib/yaml/reader.py +++ b/lib/yaml/reader.py @@ -19,7 +19,9 @@ __all__ = ['Reader', 'ReaderError'] from error import YAMLError, Mark -import codecs, re +import codecs, re, sys + +has_ucs4 = sys.maxunicode > 0xffff class ReaderError(YAMLError): @@ -134,7 +136,10 @@ class Reader(object): self.encoding = 'utf-8' self.update(1) - NON_PRINTABLE = re.compile(u'[^\x09\x0A\x0D\x20-\x7E\x85\xA0-\uD7FF\uE000-\uFFFD]') + if has_ucs4: + NON_PRINTABLE = re.compile(u'[^\x09\x0A\x0D\x20-\x7E\x85\xA0-\uD7FF\uE000-\uFFFD\U00010000-\U0010ffff]') + else: + NON_PRINTABLE = re.compile(u'[^\x09\x0A\x0D\x20-\x7E\x85\xA0-\uD7FF\uE000-\uFFFD]') def check_printable(self, data): match = self.NON_PRINTABLE.search(data) if match: @@ -187,4 +192,3 @@ class Reader(object): # psyco.bind(Reader) #except ImportError: # pass - diff --git a/lib3/yaml/emitter.py b/lib3/yaml/emitter.py index 34cb145..3479883 100644 --- a/lib3/yaml/emitter.py +++ b/lib3/yaml/emitter.py @@ -671,7 +671,7 @@ class Emitter: # Check for indicators. if index == 0: # Leading indicators are special characters. - if ch in '#,[]{}&*!|>\'\"%@`': + if ch in '#,[]{}&*!|>\'\"%@`': flow_indicators = True block_indicators = True if ch in '?:': @@ -698,7 +698,8 @@ class Emitter: line_breaks = True if not (ch == '\n' or '\x20' <= ch <= '\x7E'): if (ch == '\x85' or '\xA0' <= ch <= '\uD7FF' - or '\uE000' <= ch <= '\uFFFD') and ch != '\uFEFF': + or '\uE000' <= ch <= '\uFFFD' + or '\U00010000' <= ch < '\U0010ffff') and ch != '\uFEFF': unicode_characters = True if not self.allow_unicode: special_characters = True @@ -1134,4 +1135,3 @@ class Emitter: spaces = (ch == ' ') breaks = (ch in '\n\x85\u2028\u2029') end += 1 - diff --git a/lib3/yaml/reader.py b/lib3/yaml/reader.py index f70e920..376b9a3 100644 --- a/lib3/yaml/reader.py +++ b/lib3/yaml/reader.py @@ -134,7 +134,7 @@ class Reader(object): self.encoding = 'utf-8' self.update(1) - NON_PRINTABLE = re.compile('[^\x09\x0A\x0D\x20-\x7E\x85\xA0-\uD7FF\uE000-\uFFFD]') + NON_PRINTABLE = re.compile('[^\x09\x0A\x0D\x20-\x7E\x85\xA0-\uD7FF\uE000-\uFFFD\U00010000-\U0010ffff]') def check_printable(self, data): match = self.NON_PRINTABLE.search(data) if match: @@ -189,4 +189,3 @@ class Reader(object): # psyco.bind(Reader) #except ImportError: # pass - |