diff options
author | ptmcg <ptmcg@9bf210a0-9d2d-494c-87cf-cfb32e7dff7b> | 2015-11-25 19:53:19 +0000 |
---|---|---|
committer | ptmcg <ptmcg@9bf210a0-9d2d-494c-87cf-cfb32e7dff7b> | 2015-11-25 19:53:19 +0000 |
commit | ed32e9167dc06efa20210a41b45340d86d80ba7f (patch) | |
tree | b989cf797f6faca30639ba3acc2204c706cdc1b3 | |
parent | f41bfe97655123599dfe46fc4238cb5d5be78f5d (diff) | |
download | pyparsing-ed32e9167dc06efa20210a41b45340d86d80ba7f.tar.gz |
Cleaned up additional issues from enhancing the error messages for Or and MatchFirst, handling Unicode values in expressions. Fixes Unicode encoding issues in Python 2.
git-svn-id: svn://svn.code.sf.net/p/pyparsing/code/trunk@303 9bf210a0-9d2d-494c-87cf-cfb32e7dff7b
-rw-r--r-- | src/CHANGES | 8 | ||||
-rw-r--r-- | src/pyparsing.py | 19 | ||||
-rw-r--r-- | src/unitTests.py | 13 |
3 files changed, 26 insertions, 14 deletions
diff --git a/src/CHANGES b/src/CHANGES index 6fd73dd..ce95f13 100644 --- a/src/CHANGES +++ b/src/CHANGES @@ -5,7 +5,13 @@ Change Log Version 2.0.7 -
---------------------------
- Simplified string representation of Forward class, to avoid memory
- and performance errors while building ParseException messages.
+ and performance errors while building ParseException messages. Thanks,
+ Will McGugan, Andrea Censi, and Martijn Vermaat for the bug reports and
+ test code.
+
+- Cleaned up additional issues from enhancing the error messages for
+ Or and MatchFirst, handling Unicode values in expressions. Fixes Unicode
+ encoding issues in Python 2, thanks to Evan Hubinger for the bug report.
Version 2.0.6 -
diff --git a/src/pyparsing.py b/src/pyparsing.py index 186bc45..f30feb9 100644 --- a/src/pyparsing.py +++ b/src/pyparsing.py @@ -123,18 +123,11 @@ else: return str(obj)
except UnicodeEncodeError:
- # The Python docs (http://docs.python.org/ref/customization.html#l2h-182)
- # state that "The return value must be a string object". However, does a
- # unicode object (being a subclass of basestring) count as a "string
- # object"?
- # If so, then return a unicode object:
- return unicode(obj)
- # Else encode it... but how? There are many choices... :)
- # Replace unprintables with escape codes?
- #return unicode(obj).encode(sys.getdefaultencoding(), 'backslashreplace_errors')
- # Replace unprintables with question marks?
- #return unicode(obj).encode(sys.getdefaultencoding(), 'replace')
- # ...
+ # Else encode it
+ ret = unicode(obj).encode(sys.getdefaultencoding(), 'xmlcharrefreplace')
+ xmlcharref = Regex('&#\d+;')
+ xmlcharref.setParseAction(lambda t: '\\u' + hex(int(t[0][2:-1]))[2:])
+ return xmlcharref.transformString(ret)
# build list of single arg builtins, tolerant of Python version, that can be used as parse actions
singleArgBuiltins = []
@@ -2351,7 +2344,7 @@ class ParseExpression(ParserElement): self.mayReturnEmpty |= other.mayReturnEmpty
self.mayIndexError |= other.mayIndexError
- self.errmsg = "Expected " + str(self)
+ self.errmsg = "Expected " + _ustr(self)
return self
diff --git a/src/unitTests.py b/src/unitTests.py index f496be1..22ef6d6 100644 --- a/src/unitTests.py +++ b/src/unitTests.py @@ -2297,6 +2297,18 @@ class EachWithOptionalWithResultsNameTest(ParseTestCase): print_(result.dump())
assert sorted(result.keys()) == ['one','two']
+class UnicodeExpressionTest(ParseTestCase):
+ def runTest(self):
+ from pyparsing import Literal, ParseException
+
+ z = 'a' | Literal(u'\u1111')
+ z.streamline()
+ try:
+ z.parseString('b')
+ except ParseException as pe:
+ if not PY_3:
+ assert pe.msg == r'''Expected {"a" | "\u1111"}''', "Invalid error message raised, got %r" % pe.msg
+
class MiscellaneousParserTests(ParseTestCase):
def runTest(self):
import pyparsing
@@ -2508,6 +2520,7 @@ def makeTestSuite(): suite.addTest( AddConditionTest() )
suite.addTest( PatientOrTest() )
suite.addTest( EachWithOptionalWithResultsNameTest() )
+ suite.addTest( UnicodeExpressionTest() )
suite.addTest( MiscellaneousParserTests() )
if TEST_USING_PACKRAT:
# retest using packrat parsing (disable those tests that aren't compatible)
|