From ed32e9167dc06efa20210a41b45340d86d80ba7f Mon Sep 17 00:00:00 2001 From: ptmcg Date: Wed, 25 Nov 2015 19:53:19 +0000 Subject: Cleaned up additional issues from enhancing the error messages for Or and MatchFirst, handling Unicode values in expressions. Fixes Unicode encoding issues in Python 2. git-svn-id: svn://svn.code.sf.net/p/pyparsing/code/trunk@303 9bf210a0-9d2d-494c-87cf-cfb32e7dff7b --- src/CHANGES | 8 +++++++- src/pyparsing.py | 19 ++++++------------- src/unitTests.py | 13 +++++++++++++ 3 files changed, 26 insertions(+), 14 deletions(-) diff --git a/src/CHANGES b/src/CHANGES index 6fd73dd..ce95f13 100644 --- a/src/CHANGES +++ b/src/CHANGES @@ -5,7 +5,13 @@ Change Log Version 2.0.7 - --------------------------- - Simplified string representation of Forward class, to avoid memory - and performance errors while building ParseException messages. + and performance errors while building ParseException messages. Thanks, + Will McGugan, Andrea Censi, and Martijn Vermaat for the bug reports and + test code. + +- Cleaned up additional issues from enhancing the error messages for + Or and MatchFirst, handling Unicode values in expressions. Fixes Unicode + encoding issues in Python 2, thanks to Evan Hubinger for the bug report. Version 2.0.6 - diff --git a/src/pyparsing.py b/src/pyparsing.py index 186bc45..f30feb9 100644 --- a/src/pyparsing.py +++ b/src/pyparsing.py @@ -123,18 +123,11 @@ else: return str(obj) except UnicodeEncodeError: - # The Python docs (http://docs.python.org/ref/customization.html#l2h-182) - # state that "The return value must be a string object". However, does a - # unicode object (being a subclass of basestring) count as a "string - # object"? - # If so, then return a unicode object: - return unicode(obj) - # Else encode it... but how? There are many choices... :) - # Replace unprintables with escape codes? - #return unicode(obj).encode(sys.getdefaultencoding(), 'backslashreplace_errors') - # Replace unprintables with question marks? - #return unicode(obj).encode(sys.getdefaultencoding(), 'replace') - # ... + # Else encode it + ret = unicode(obj).encode(sys.getdefaultencoding(), 'xmlcharrefreplace') + xmlcharref = Regex('&#\d+;') + xmlcharref.setParseAction(lambda t: '\\u' + hex(int(t[0][2:-1]))[2:]) + return xmlcharref.transformString(ret) # build list of single arg builtins, tolerant of Python version, that can be used as parse actions singleArgBuiltins = [] @@ -2351,7 +2344,7 @@ class ParseExpression(ParserElement): self.mayReturnEmpty |= other.mayReturnEmpty self.mayIndexError |= other.mayIndexError - self.errmsg = "Expected " + str(self) + self.errmsg = "Expected " + _ustr(self) return self diff --git a/src/unitTests.py b/src/unitTests.py index f496be1..22ef6d6 100644 --- a/src/unitTests.py +++ b/src/unitTests.py @@ -2297,6 +2297,18 @@ class EachWithOptionalWithResultsNameTest(ParseTestCase): print_(result.dump()) assert sorted(result.keys()) == ['one','two'] +class UnicodeExpressionTest(ParseTestCase): + def runTest(self): + from pyparsing import Literal, ParseException + + z = 'a' | Literal(u'\u1111') + z.streamline() + try: + z.parseString('b') + except ParseException as pe: + if not PY_3: + assert pe.msg == r'''Expected {"a" | "\u1111"}''', "Invalid error message raised, got %r" % pe.msg + class MiscellaneousParserTests(ParseTestCase): def runTest(self): import pyparsing @@ -2508,6 +2520,7 @@ def makeTestSuite(): suite.addTest( AddConditionTest() ) suite.addTest( PatientOrTest() ) suite.addTest( EachWithOptionalWithResultsNameTest() ) + suite.addTest( UnicodeExpressionTest() ) suite.addTest( MiscellaneousParserTests() ) if TEST_USING_PACKRAT: # retest using packrat parsing (disable those tests that aren't compatible) -- cgit v1.2.1