summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorptmcg <ptmcg@9bf210a0-9d2d-494c-87cf-cfb32e7dff7b>2015-11-25 19:53:19 +0000
committerptmcg <ptmcg@9bf210a0-9d2d-494c-87cf-cfb32e7dff7b>2015-11-25 19:53:19 +0000
commited32e9167dc06efa20210a41b45340d86d80ba7f (patch)
treeb989cf797f6faca30639ba3acc2204c706cdc1b3
parentf41bfe97655123599dfe46fc4238cb5d5be78f5d (diff)
downloadpyparsing-ed32e9167dc06efa20210a41b45340d86d80ba7f.tar.gz
Cleaned up additional issues from enhancing the error messages for Or and MatchFirst, handling Unicode values in expressions. Fixes Unicode encoding issues in Python 2.
git-svn-id: svn://svn.code.sf.net/p/pyparsing/code/trunk@303 9bf210a0-9d2d-494c-87cf-cfb32e7dff7b
-rw-r--r--src/CHANGES8
-rw-r--r--src/pyparsing.py19
-rw-r--r--src/unitTests.py13
3 files changed, 26 insertions, 14 deletions
diff --git a/src/CHANGES b/src/CHANGES
index 6fd73dd..ce95f13 100644
--- a/src/CHANGES
+++ b/src/CHANGES
@@ -5,7 +5,13 @@ Change Log
Version 2.0.7 -
---------------------------
- Simplified string representation of Forward class, to avoid memory
- and performance errors while building ParseException messages.
+ and performance errors while building ParseException messages. Thanks,
+ Will McGugan, Andrea Censi, and Martijn Vermaat for the bug reports and
+ test code.
+
+- Cleaned up additional issues from enhancing the error messages for
+ Or and MatchFirst, handling Unicode values in expressions. Fixes Unicode
+ encoding issues in Python 2, thanks to Evan Hubinger for the bug report.
Version 2.0.6 -
diff --git a/src/pyparsing.py b/src/pyparsing.py
index 186bc45..f30feb9 100644
--- a/src/pyparsing.py
+++ b/src/pyparsing.py
@@ -123,18 +123,11 @@ else:
return str(obj)
except UnicodeEncodeError:
- # The Python docs (http://docs.python.org/ref/customization.html#l2h-182)
- # state that "The return value must be a string object". However, does a
- # unicode object (being a subclass of basestring) count as a "string
- # object"?
- # If so, then return a unicode object:
- return unicode(obj)
- # Else encode it... but how? There are many choices... :)
- # Replace unprintables with escape codes?
- #return unicode(obj).encode(sys.getdefaultencoding(), 'backslashreplace_errors')
- # Replace unprintables with question marks?
- #return unicode(obj).encode(sys.getdefaultencoding(), 'replace')
- # ...
+ # Else encode it
+ ret = unicode(obj).encode(sys.getdefaultencoding(), 'xmlcharrefreplace')
+ xmlcharref = Regex('&#\d+;')
+ xmlcharref.setParseAction(lambda t: '\\u' + hex(int(t[0][2:-1]))[2:])
+ return xmlcharref.transformString(ret)
# build list of single arg builtins, tolerant of Python version, that can be used as parse actions
singleArgBuiltins = []
@@ -2351,7 +2344,7 @@ class ParseExpression(ParserElement):
self.mayReturnEmpty |= other.mayReturnEmpty
self.mayIndexError |= other.mayIndexError
- self.errmsg = "Expected " + str(self)
+ self.errmsg = "Expected " + _ustr(self)
return self
diff --git a/src/unitTests.py b/src/unitTests.py
index f496be1..22ef6d6 100644
--- a/src/unitTests.py
+++ b/src/unitTests.py
@@ -2297,6 +2297,18 @@ class EachWithOptionalWithResultsNameTest(ParseTestCase):
print_(result.dump())
assert sorted(result.keys()) == ['one','two']
+class UnicodeExpressionTest(ParseTestCase):
+ def runTest(self):
+ from pyparsing import Literal, ParseException
+
+ z = 'a' | Literal(u'\u1111')
+ z.streamline()
+ try:
+ z.parseString('b')
+ except ParseException as pe:
+ if not PY_3:
+ assert pe.msg == r'''Expected {"a" | "\u1111"}''', "Invalid error message raised, got %r" % pe.msg
+
class MiscellaneousParserTests(ParseTestCase):
def runTest(self):
import pyparsing
@@ -2508,6 +2520,7 @@ def makeTestSuite():
suite.addTest( AddConditionTest() )
suite.addTest( PatientOrTest() )
suite.addTest( EachWithOptionalWithResultsNameTest() )
+ suite.addTest( UnicodeExpressionTest() )
suite.addTest( MiscellaneousParserTests() )
if TEST_USING_PACKRAT:
# retest using packrat parsing (disable those tests that aren't compatible)