summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorptmcg <ptmcg@9bf210a0-9d2d-494c-87cf-cfb32e7dff7b>2008-07-28 04:17:33 +0000
committerptmcg <ptmcg@9bf210a0-9d2d-494c-87cf-cfb32e7dff7b>2008-07-28 04:17:33 +0000
commit3cc36e8c8ffc628342c71e882a61d6d2c1536107 (patch)
tree160dc33f115b2c44588444fd22753cf0650826e8
parent94d112a1cc169e357e0dffeba8473e4491cc5ad5 (diff)
downloadpyparsing-3cc36e8c8ffc628342c71e882a61d6d2c1536107.tar.gz
Added new helper method 'originalTextFor' to eventually supercede keepOriginalText parse action.
Fixed '-' error stop bug when expression is contained within a Combine. Added __ne__ method to ParserElement to prevent Python's default behavior of id(self) != id(other) git-svn-id: svn://svn.code.sf.net/p/pyparsing/code/src@161 9bf210a0-9d2d-494c-87cf-cfb32e7dff7b
-rw-r--r--CHANGES17
-rw-r--r--pyparsing.py50
2 files changed, 56 insertions, 11 deletions
diff --git a/CHANGES b/CHANGES
index a45df2e..2ae2109 100644
--- a/CHANGES
+++ b/CHANGES
@@ -4,6 +4,20 @@ Change Log
Version 1.5.1 - ???, 2008
--------------------------
+- Added new helper method originalTextFor, to replace the use of
+ the current keepOriginalText parse action. Now instead of
+ using the parse action, as in:
+ fullName = Word(alphas) + Word(alphas)
+ fullName.setParseAction(keepOriginalText)
+ (in this example, we used keepOriginalText to restore any white
+ space that may have been skipped between the first and last
+ names)
+ You can now write:
+ fullName = originalTextFor(Word(alphas) + Word(alphas))
+ The implementation of originalTextFor is simpler and faster than
+ keepOriginalText, and does not depend on using the inspect or
+ imp modules.
+
- Added optional parseAll argument to parseFile, to be consistent
with parseAll argument to parseString. Posted by pboucher on the
pyparsing wiki, thanks!
@@ -32,6 +46,9 @@ Version 1.5.1 - ???, 2008
- Fixed bug in ParseResults.asXML(), in which the first named
item within a ParseResults gets reported with an <ITEM> tag instead
of with the correct results name.
+
+- Fixed bug in '-' error stop, when '-' operator used inside a
+ Combine expression.
Version 1.5.0 - June, 2008
diff --git a/pyparsing.py b/pyparsing.py
index 40959e1..1b7b89c 100644
--- a/pyparsing.py
+++ b/pyparsing.py
@@ -59,7 +59,7 @@ The pyparsing module handles some of the problems that are typically vexing when
"""
__version__ = "1.5.1"
-__versionTime__ = "1 July 2008 21:54"
+__versionTime__ = "27 July 2008 02:11"
__author__ = "Paul McGuire <ptmcg@users.sourceforge.net>"
import string
@@ -84,10 +84,10 @@ __all__ = [
'htmlComment', 'javaStyleComment', 'keepOriginalText', 'line', 'lineEnd', 'lineStart', 'lineno',
'makeHTMLTags', 'makeXMLTags', 'matchOnlyAtCol', 'matchPreviousExpr', 'matchPreviousLiteral',
'nestedExpr', 'nullDebugAction', 'nums', 'oneOf', 'opAssoc', 'operatorPrecedence', 'printables',
-'punc8bit', 'pythonStyleComment', 'quotedString', 'removeQuotes', 'replaceHTMLEntity',
+'punc8bit', 'pythonStyleComment', 'quotedString', 'removeQuotes', 'replaceHTMLEntity',
'replaceWith', 'restOfLine', 'sglQuotedString', 'srange', 'stringEnd',
'stringStart', 'traceParseAction', 'unicodeString', 'upcaseTokens', 'withAttribute',
-'indentedBlock',
+'indentedBlock', 'originalTextFor',
]
@@ -730,7 +730,7 @@ class ParserElement(object):
def breaker(instring, loc, doActions=True, callPreParse=True):
import pdb
pdb.set_trace()
- _parseMethod( instring, loc, doActions, callPreParse )
+ return _parseMethod( instring, loc, doActions, callPreParse )
breaker._originalParseMethod = _parseMethod
self._parse = breaker
else:
@@ -1408,12 +1408,18 @@ class ParserElement(object):
else:
return super(ParserElement,self)==other
+ def __ne__(self,other):
+ return not (self == other)
+
def __hash__(self):
return hash(id(self))
def __req__(self,other):
return self == other
+ def __rne__(self,other):
+ return not (self == other)
+
class Token(ParserElement):
"""Abstract ParserElement subclass, for defining atomic matching patterns."""
@@ -2285,10 +2291,9 @@ class And(ParseExpression):
"""
class _ErrorStop(Empty):
- def __new__(cls,*args,**kwargs):
- return And._ErrorStop.instance
- _ErrorStop.instance = Empty()
- _ErrorStop.instance.leaveWhitespace()
+ def __init__(self, *args, **kwargs):
+ super(Empty,self).__init__(*args, **kwargs)
+ self.leaveWhitespace()
def __init__( self, exprs, savelist = True ):
super(And,self).__init__(exprs, savelist)
@@ -2307,12 +2312,14 @@ class And(ParseExpression):
loc, resultlist = self.exprs[0]._parse( instring, loc, doActions, callPreParse=False )
errorStop = False
for e in self.exprs[1:]:
- if e is And._ErrorStop.instance:
+ if isinstance(e, And._ErrorStop):
errorStop = True
continue
if errorStop:
try:
loc, exprtokens = e._parse( instring, loc, doActions )
+ except ParseSyntaxException:
+ raise
except ParseBaseException, pe:
raise ParseSyntaxException(pe)
except IndexError, ie:
@@ -3147,7 +3154,7 @@ def matchPreviousExpr(expr):
def _escapeRegexRangeChars(s):
#~ escape these chars: ^-]
for c in r"\^-]":
- s = s.replace(c,"\\"+c)
+ s = s.replace(c,_bslash+c)
s = s.replace("\n",r"\n")
s = s.replace("\t",r"\t")
return _ustr(s)
@@ -3221,6 +3228,27 @@ def dictOf( key, value ):
"""
return Dict( ZeroOrMore( Group ( key + value ) ) )
+def originalTextFor(expr, asString=True):
+ """Helper to return the original, untokenized text for a given expression. Useful to
+ restore the parsed fields of an HTML start tag into the raw tag text itself, or to
+ revert separate tokens with intervening whitespace back to the original matching
+ input text. Simpler to use than the parse action keepOriginalText, and does not
+ require the inspect module to chase up the call stack. By default, returns a
+ string containing the original parsed text. If the optional asString argument
+ is passed as False, then the return value is a ParseResults containing any
+ results names that were originally matched, and a single token containing the
+ original matched text from the input string."""
+ locMarker = Empty().setParseAction(lambda s,loc,t: loc)
+ matchExpr = locMarker("_original_start") + expr + locMarker("_original_end")
+ if asString:
+ extractText = lambda s,l,t: s[t._original_start:t._original_end]
+ else:
+ def extractText(s,l,t):
+ del t[:]
+ t[0] = s[t._original_start:t._original_end]
+ matchExpr.setParseAction(extractText)
+ return matchExpr
+
# convenience constants for positional expressions
empty = Empty().setName("empty")
lineStart = LineStart().setName("lineStart")
@@ -3554,7 +3582,7 @@ def indentedBlock(blockStatementExpr, indentStack, indent=True):
else:
smExpr = Group( Optional(NL) +
(OneOrMore( PEER + Group(blockStatementExpr) + Optional(NL) )) )
- blockStatementExpr.ignore("\\" + LineEnd())
+ blockStatementExpr.ignore(_bslash + LineEnd())
return smExpr
alphas8bit = srange(r"[\0xc0-\0xd6\0xd8-\0xf6\0xf8-\0xff]")