summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAnish Athalye <me@anishathalye.com>2020-01-22 16:07:06 -0500
committerIngy döt Net <ingy@ingy.net>2021-01-13 17:51:32 -0500
commita60f7a19c0b418fe95fcf2ec0957005ae39e1090 (patch)
treef39bebcd19cd20a4975a000d409f033a1b6d2abe
parentee98abd7d7bd2ca9c7b98aa19164fd0306a3f3d2 (diff)
downloadpyyaml-git-a60f7a19c0b418fe95fcf2ec0957005ae39e1090.tar.gz
Fix compatibility with Jython
This patch was taken from https://github.com/yaml/pyyaml/issues/369#issuecomment-571596545, authored by Pekka Klärck <peke@iki.fi>. In short, Jython doesn't support lone surrogates, so importing yaml (and in particular, loading `reader.py`) caused a UnicodeDecodeError. This patch works around this through a clever use of `eval` to defer evaluation of the string containing the lone surrogates, only doing it on non-Jython platforms. This is only done in `lib/yaml/reader.py` and not `lib3/yaml/reader.py` because Jython does not support Python 3. With this patch, Jython's behavior with respect to Unicode code points over 0xFFFF becomes as it was before 0716ae21a1e7ab6b4ef73428c0c8fff49685d057. It still does not pass all the unit tests on Jython (passes 1275, fails 3, errors on 1); all the failing tests are related to unicode. Still, this is better than simply crashing upon `import yaml`. With this patch, all tests continue to pass on Python 2 / Python 3.
-rw-r--r--lib/yaml/reader.py9
1 files changed, 7 insertions, 2 deletions
diff --git a/lib/yaml/reader.py b/lib/yaml/reader.py
index 4b377d6..4c42150 100644
--- a/lib/yaml/reader.py
+++ b/lib/yaml/reader.py
@@ -137,9 +137,14 @@ class Reader(object):
self.update(1)
if has_ucs4:
- NON_PRINTABLE = re.compile(u'[^\x09\x0A\x0D\x20-\x7E\x85\xA0-\uD7FF\uE000-\uFFFD\U00010000-\U0010ffff]')
+ NON_PRINTABLE = u'[^\x09\x0A\x0D\x20-\x7E\x85\xA0-\uD7FF\uE000-\uFFFD\U00010000-\U0010ffff]'
+ elif sys.platform.startswith('java'):
+ # Jython doesn't support lone surrogates https://bugs.jython.org/issue2048
+ NON_PRINTABLE = u'[^\x09\x0A\x0D\x20-\x7E\x85\xA0-\uD7FF\uE000-\uFFFD]'
else:
- NON_PRINTABLE = re.compile(u'[^\x09\x0A\x0D\x20-\x7E\x85\xA0-\uFFFD]|(?:^|[^\uD800-\uDBFF])[\uDC00-\uDFFF]|[\uD800-\uDBFF](?:[^\uDC00-\uDFFF]|$)')
+ # Need to use eval here due to the above Jython issue
+ NON_PRINTABLE = eval(r"u'[^\x09\x0A\x0D\x20-\x7E\x85\xA0-\uFFFD]|(?:^|[^\uD800-\uDBFF])[\uDC00-\uDFFF]|[\uD800-\uDBFF](?:[^\uDC00-\uDFFF]|$)'")
+ NON_PRINTABLE = re.compile(NON_PRINTABLE)
def check_printable(self, data):
match = self.NON_PRINTABLE.search(data)
if match: