summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPeter Murphy <peterkmurphy@gmail.com>2017-05-08 16:39:26 +1000
committerPeter Murphy <peterkmurphy@gmail.com>2017-05-08 16:39:26 +1000
commitcf1c86cb86db74206085e6f83e4586ddc7db9ac2 (patch)
tree438cc3c30c8afafee3a68b421830a10583439f64
parenta06c1f644b0ba3298efb7b6ec928aaa9221b52b8 (diff)
downloadpyyaml-git-cf1c86cb86db74206085e6f83e4586ddc7db9ac2.tar.gz
First attack at pyyaml does not support literals in unicode over codepoint 0xffff #25
-rw-r--r--lib/yaml/emitter.py10
-rw-r--r--lib/yaml/reader.py10
-rw-r--r--lib3/yaml/emitter.py6
-rw-r--r--lib3/yaml/reader.py3
4 files changed, 18 insertions, 11 deletions
diff --git a/lib/yaml/emitter.py b/lib/yaml/emitter.py
index e5bcdcc..5fb4179 100644
--- a/lib/yaml/emitter.py
+++ b/lib/yaml/emitter.py
@@ -8,9 +8,13 @@
__all__ = ['Emitter', 'EmitterError']
+import sys
+
from error import YAMLError
from events import *
+has_ucs4 = sys.maxunicode > 0xffff
+
class EmitterError(YAMLError):
pass
@@ -674,7 +678,7 @@ class Emitter(object):
# Check for indicators.
if index == 0:
# Leading indicators are special characters.
- if ch in u'#,[]{}&*!|>\'\"%@`':
+ if ch in u'#,[]{}&*!|>\'\"%@`':
flow_indicators = True
block_indicators = True
if ch in u'?:':
@@ -701,7 +705,8 @@ class Emitter(object):
line_breaks = True
if not (ch == u'\n' or u'\x20' <= ch <= u'\x7E'):
if (ch == u'\x85' or u'\xA0' <= ch <= u'\uD7FF'
- or u'\uE000' <= ch <= u'\uFFFD') and ch != u'\uFEFF':
+ or u'\uE000' <= ch <= u'\uFFFD'
+ or ((not has_ucs4) or (u'\U00010000' <= ch < u'\U0010ffff'))) and ch != u'\uFEFF':
unicode_characters = True
if not self.allow_unicode:
special_characters = True
@@ -1137,4 +1142,3 @@ class Emitter(object):
spaces = (ch == u' ')
breaks = (ch in u'\n\x85\u2028\u2029')
end += 1
-
diff --git a/lib/yaml/reader.py b/lib/yaml/reader.py
index 3249e6b..56a12f4 100644
--- a/lib/yaml/reader.py
+++ b/lib/yaml/reader.py
@@ -19,7 +19,9 @@ __all__ = ['Reader', 'ReaderError']
from error import YAMLError, Mark
-import codecs, re
+import codecs, re, sys
+
+has_ucs4 = sys.maxunicode > 0xffff
class ReaderError(YAMLError):
@@ -134,7 +136,10 @@ class Reader(object):
self.encoding = 'utf-8'
self.update(1)
- NON_PRINTABLE = re.compile(u'[^\x09\x0A\x0D\x20-\x7E\x85\xA0-\uD7FF\uE000-\uFFFD]')
+ if has_ucs4:
+ NON_PRINTABLE = re.compile(u'[^\x09\x0A\x0D\x20-\x7E\x85\xA0-\uD7FF\uE000-\uFFFD\U00010000-\U0010ffff]')
+ else:
+ NON_PRINTABLE = re.compile(u'[^\x09\x0A\x0D\x20-\x7E\x85\xA0-\uD7FF\uE000-\uFFFD]')
def check_printable(self, data):
match = self.NON_PRINTABLE.search(data)
if match:
@@ -187,4 +192,3 @@ class Reader(object):
# psyco.bind(Reader)
#except ImportError:
# pass
-
diff --git a/lib3/yaml/emitter.py b/lib3/yaml/emitter.py
index 34cb145..3479883 100644
--- a/lib3/yaml/emitter.py
+++ b/lib3/yaml/emitter.py
@@ -671,7 +671,7 @@ class Emitter:
# Check for indicators.
if index == 0:
# Leading indicators are special characters.
- if ch in '#,[]{}&*!|>\'\"%@`':
+ if ch in '#,[]{}&*!|>\'\"%@`':
flow_indicators = True
block_indicators = True
if ch in '?:':
@@ -698,7 +698,8 @@ class Emitter:
line_breaks = True
if not (ch == '\n' or '\x20' <= ch <= '\x7E'):
if (ch == '\x85' or '\xA0' <= ch <= '\uD7FF'
- or '\uE000' <= ch <= '\uFFFD') and ch != '\uFEFF':
+ or '\uE000' <= ch <= '\uFFFD'
+ or '\U00010000' <= ch < '\U0010ffff') and ch != '\uFEFF':
unicode_characters = True
if not self.allow_unicode:
special_characters = True
@@ -1134,4 +1135,3 @@ class Emitter:
spaces = (ch == ' ')
breaks = (ch in '\n\x85\u2028\u2029')
end += 1
-
diff --git a/lib3/yaml/reader.py b/lib3/yaml/reader.py
index f70e920..376b9a3 100644
--- a/lib3/yaml/reader.py
+++ b/lib3/yaml/reader.py
@@ -134,7 +134,7 @@ class Reader(object):
self.encoding = 'utf-8'
self.update(1)
- NON_PRINTABLE = re.compile('[^\x09\x0A\x0D\x20-\x7E\x85\xA0-\uD7FF\uE000-\uFFFD]')
+ NON_PRINTABLE = re.compile('[^\x09\x0A\x0D\x20-\x7E\x85\xA0-\uD7FF\uE000-\uFFFD\U00010000-\U0010ffff]')
def check_printable(self, data):
match = self.NON_PRINTABLE.search(data)
if match:
@@ -189,4 +189,3 @@ class Reader(object):
# psyco.bind(Reader)
#except ImportError:
# pass
-