diff options
author | ptmcg <ptmcg@9bf210a0-9d2d-494c-87cf-cfb32e7dff7b> | 2008-10-18 04:02:49 +0000 |
---|---|---|
committer | ptmcg <ptmcg@9bf210a0-9d2d-494c-87cf-cfb32e7dff7b> | 2008-10-18 04:02:49 +0000 |
commit | 9707144cb337693f9fcb1db33c8ce879669762e2 (patch) | |
tree | c21b172902a108bc5bc7a10973b9a4d350df408f /pyparsing_py3.py | |
parent | ad08a46aa28a0b3768c7fe4935c5e3f7e4fa68ed (diff) | |
download | pyparsing-9707144cb337693f9fcb1db33c8ce879669762e2.tar.gz |
Updated for version 1.5.1 release
git-svn-id: svn://svn.code.sf.net/p/pyparsing/code/src@167 9bf210a0-9d2d-494c-87cf-cfb32e7dff7b
Diffstat (limited to 'pyparsing_py3.py')
-rw-r--r-- | pyparsing_py3.py | 193 |
1 files changed, 131 insertions, 62 deletions
diff --git a/pyparsing_py3.py b/pyparsing_py3.py index 4ae3e22..45d2668 100644 --- a/pyparsing_py3.py +++ b/pyparsing_py3.py @@ -58,17 +58,17 @@ The pyparsing module handles some of the problems that are typically vexing when - embedded comments
"""
-__version__ = "1.5.0.Py3"
-__versionTime__ = "28 May 2008 10:05"
+__version__ = "1.5.1.Py3"
+__versionTime__ = "17 Oct 2008 20:05"
__author__ = "Paul McGuire <ptmcg@users.sourceforge.net>"
import string
from weakref import ref as wkref
-import copy,sys
+import copy
+import sys
import warnings
import re
import sre_constants
-import xml.sax.saxutils
#~ sys.stderr.write( "testing pyparsing module, version %s, %s\n" % (__version__,__versionTime__ ) )
__all__ = [
@@ -85,10 +85,10 @@ __all__ = [ 'htmlComment', 'javaStyleComment', 'keepOriginalText', 'line', 'lineEnd', 'lineStart', 'lineno',
'makeHTMLTags', 'makeXMLTags', 'matchOnlyAtCol', 'matchPreviousExpr', 'matchPreviousLiteral',
'nestedExpr', 'nullDebugAction', 'nums', 'oneOf', 'opAssoc', 'operatorPrecedence', 'printables',
-'punc8bit', 'pythonStyleComment', 'quotedString', 'removeQuotes', 'replaceHTMLEntity',
+'punc8bit', 'pythonStyleComment', 'quotedString', 'removeQuotes', 'replaceHTMLEntity',
'replaceWith', 'restOfLine', 'sglQuotedString', 'srange', 'stringEnd',
'stringStart', 'traceParseAction', 'unicodeString', 'upcaseTokens', 'withAttribute',
-'indentedBlock',
+'indentedBlock', 'originalTextFor',
]
@@ -130,11 +130,22 @@ if not _PY3K: # ...
else:
_ustr = str
+ unichr = chr
def _str2dict(strg):
return dict( [(c,0) for c in strg] )
#~ return set( [c for c in strg] )
+def _xml_escape(data):
+ """Escape &, <, >, ", ', etc. in a string of data."""
+
+ # ampersand must be replaced first
+ from_symbols = '&><"\''
+ to_symbols = ['&'+s+';' for s in "amp gt lt quot apos".split()]
+ for from_,to_ in zip(from_symbols, to_symbols):
+ data = data.replace(from_, to_)
+ return data
+
class _Constants(object):
pass
@@ -145,7 +156,7 @@ else: nums = string.digits
hexnums = nums + "ABCDEFabcdef"
alphanums = alphas + nums
-_bslash = "\\"
+_bslash = chr(92)
printables = "".join( [ c for c in string.printable if c not in string.whitespace ] )
class ParseBaseException(Exception):
@@ -193,6 +204,9 @@ class ParseBaseException(Exception): line_str = "".join( [line_str[:line_column],
markerString, line_str[line_column:]])
return line_str.strip()
+ def __dir__(self):
+ return "loc msg pstr parserElement lineno col line " \
+ "markInputLine __str__ __repr__".split()
class ParseException(ParseBaseException):
"""exception thrown when parse expressions don't match class;
@@ -244,6 +258,8 @@ class _ParseResultsWithOffset(object): return self.tup[i]
def __repr__(self):
return repr(self.tup)
+ def setOffset(self,i):
+ self.tup = (self.tup[0],i)
class ParseResults(object):
"""Structured parse results, to provide multiple means of access to the parsed data:
@@ -273,9 +289,6 @@ class ParseResults(object): self.__toklist = [toklist]
self.__tokdict = dict()
- # this line is related to debugging the asXML bug
- #~ asList = False
-
if name:
if not modal:
self.__accumNames[name] = 0
@@ -287,9 +300,9 @@ class ParseResults(object): toklist = [ toklist ]
if asList:
if isinstance(toklist,ParseResults):
- self[name] = _ParseResultsWithOffset(toklist.copy(),-1)
+ self[name] = _ParseResultsWithOffset(toklist.copy(),0)
else:
- self[name] = _ParseResultsWithOffset(ParseResults(toklist[0]),-1)
+ self[name] = _ParseResultsWithOffset(ParseResults(toklist[0]),0)
self[name].__name = name
else:
try:
@@ -375,7 +388,7 @@ class ParseResults(object): for name in self.__tokdict:
occurrences = self.__tokdict[name]
for k, (value, position) in enumerate(occurrences):
- occurrences[k] = _ParseResultsWithOffset(value, position + (position > j))
+ occurrences[k] = _ParseResultsWithOffset(value, position + (position > index))
def items( self ):
"""Returns all named result keys and values as a list of tuples."""
@@ -412,6 +425,7 @@ class ParseResults(object): self[k] = v
if isinstance(v[0],ParseResults):
v[0].__parent = wkref(self)
+
self.__toklist += other.__toklist
self.__accumNames.update( other.__accumNames )
del other
@@ -518,7 +532,7 @@ class ParseResults(object): continue
else:
resTag = "ITEM"
- xmlBodyText = xml.sax.saxutils.escape(_ustr(res))
+ xmlBodyText = _xml_escape(_ustr(res))
out += [ nl, nextLevelIndent, "<", resTag, ">",
xmlBodyText,
"</", resTag, ">" ]
@@ -595,6 +609,8 @@ class ParseResults(object): else:
self.__parent = None
+ def __dir__(self):
+ return dir(super(ParseResults,self)) + self.keys()
def col (loc,strg):
"""Returns current column within a string, counting newlines as line separators.
@@ -716,7 +732,7 @@ class ParserElement(object): def breaker(instring, loc, doActions=True, callPreParse=True):
import pdb
pdb.set_trace()
- _parseMethod( instring, loc, doActions, callPreParse )
+ return _parseMethod( instring, loc, doActions, callPreParse )
breaker._originalParseMethod = _parseMethod
self._parse = breaker
else:
@@ -1048,6 +1064,7 @@ class ParserElement(object): instring = instring.expandtabs()
loc, tokens = self._parse( instring, 0 )
if parseAll:
+ loc = self.preParse( instring, loc )
StringEnd()._parse( instring, loc )
return tokens
@@ -1159,27 +1176,21 @@ class ParserElement(object): if isinstance(other,int):
minElements, optElements = other,0
elif isinstance(other,tuple):
- if len(other)==0:
- other = (None,None)
- elif len(other)==1:
- other = (other[0],None)
- if len(other)==2:
- if other[0] is None:
- other = (0, other[1])
- if isinstance(other[0],int) and other[1] is None:
- if other[0] == 0:
- return ZeroOrMore(self)
- if other[0] == 1:
- return OneOrMore(self)
- else:
- return self*other[0] + ZeroOrMore(self)
- elif isinstance(other[0],int) and isinstance(other[1],int):
- minElements, optElements = other
- optElements -= minElements
+ other = (other + (None, None))[:2]
+ if other[0] is None:
+ other = (0, other[1])
+ if isinstance(other[0],int) and other[1] is None:
+ if other[0] == 0:
+ return ZeroOrMore(self)
+ if other[0] == 1:
+ return OneOrMore(self)
else:
- raise TypeError("cannot multiply 'ParserElement' and ('%s','%s') objects", type(other[0]),type(other[1]))
+ return self*other[0] + ZeroOrMore(self)
+ elif isinstance(other[0],int) and isinstance(other[1],int):
+ minElements, optElements = other
+ optElements -= minElements
else:
- raise TypeError("can only multiply 'ParserElement' and int or (int,int) objects")
+ raise TypeError("cannot multiply 'ParserElement' and ('%s','%s') objects", type(other[0]),type(other[1]))
else:
raise TypeError("cannot multiply 'ParserElement' and '%s' objects", type(other))
@@ -1361,7 +1372,7 @@ class ParserElement(object): """Check defined expressions for valid structure, check for infinite recursive definitions."""
self.checkRecursion( [] )
- def parseFile( self, file_or_filename ):
+ def parseFile( self, file_or_filename, parseAll=False ):
"""Execute the parse expression on the given file or filename.
If a filename is specified (instead of a file object),
the entire file is opened, read, and closed before parsing.
@@ -1372,7 +1383,7 @@ class ParserElement(object): f = open(file_or_filename, "rb")
file_contents = f.read()
f.close()
- return self.parseString(file_contents)
+ return self.parseString(file_contents, parseAll)
def getException(self):
return ParseException("",0,self.errmsg,self)
@@ -1394,12 +1405,18 @@ class ParserElement(object): else:
return super(ParserElement,self)==other
+ def __ne__(self,other):
+ return not (self == other)
+
def __hash__(self):
return hash(id(self))
def __req__(self,other):
return self == other
+ def __rne__(self,other):
+ return not (self == other)
+
class Token(ParserElement):
"""Abstract ParserElement subclass, for defining atomic matching patterns."""
@@ -1534,7 +1551,6 @@ class Keyword(Token): Keyword.DEFAULT_KEYWORD_CHARS = chars
setDefaultKeywordChars = staticmethod(setDefaultKeywordChars)
-
class CaselessLiteral(Literal):
"""Token to match a specified string, ignoring case of letters.
Note: the matched results will always be in the case of the given
@@ -2035,7 +2051,7 @@ class LineStart(_PositionToken): """Matches if current position is at the beginning of a line within the parse string"""
def __init__( self ):
super(LineStart,self).__init__()
- self.setWhitespaceChars( " \t" )
+ self.setWhitespaceChars( ParserElement.DEFAULT_WHITE_CHARS.replace("\n","") )
self.errmsg = "Expected start of line"
#self.myException.msg = self.errmsg
@@ -2060,7 +2076,7 @@ class LineEnd(_PositionToken): """Matches if current position is at the end of a line within the parse string"""
def __init__( self ):
super(LineEnd,self).__init__()
- self.setWhitespaceChars( " \t" )
+ self.setWhitespaceChars( ParserElement.DEFAULT_WHITE_CHARS.replace("\n","") )
self.errmsg = "Expected end of line"
#self.myException.msg = self.errmsg
@@ -2272,10 +2288,9 @@ class And(ParseExpression): """
class _ErrorStop(Empty):
- def __new__(cls,*args,**kwargs):
- return And._ErrorStop.instance
- _ErrorStop.instance = Empty()
- _ErrorStop.instance.leaveWhitespace()
+ def __init__(self, *args, **kwargs):
+ super(Empty,self).__init__(*args, **kwargs)
+ self.leaveWhitespace()
def __init__( self, exprs, savelist = True ):
super(And,self).__init__(exprs, savelist)
@@ -2294,12 +2309,14 @@ class And(ParseExpression): loc, resultlist = self.exprs[0]._parse( instring, loc, doActions, callPreParse=False )
errorStop = False
for e in self.exprs[1:]:
- if e is And._ErrorStop.instance:
+ if isinstance(e, And._ErrorStop):
errorStop = True
continue
if errorStop:
try:
loc, exprtokens = e._parse( instring, loc, doActions )
+ except ParseSyntaxException:
+ raise
except ParseBaseException as pe:
raise ParseSyntaxException(pe)
except IndexError as ie:
@@ -2782,7 +2799,7 @@ class SkipTo(ParseElementEnhance): argument is used to define grammars (typically quoted strings and comments) that
might contain false matches.
"""
- def __init__( self, other, include=False, ignore=None ):
+ def __init__( self, other, include=False, ignore=None, failOn=None ):
super( SkipTo, self ).__init__( other )
if ignore is not None:
self.expr = self.expr.copy()
@@ -2791,6 +2808,10 @@ class SkipTo(ParseElementEnhance): self.mayIndexError = False
self.includeMatch = include
self.asList = False
+ if failOn is not None and isinstance(failOn, basestring):
+ self.failOn = Literal(failOn)
+ else:
+ self.failOn = failOn
self.errmsg = "No match found for "+_ustr(self.expr)
#self.myException = ParseException("",0,self.errmsg,self)
@@ -2798,12 +2819,17 @@ class SkipTo(ParseElementEnhance): startLoc = loc
instrlen = len(instring)
expr = self.expr
+ failParse = False
while loc <= instrlen:
try:
+ if self.failOn:
+ failParse = True
+ self.failOn.tryParse(instring, loc)
+ failParse = False
loc = expr._skipIgnorables( instring, loc )
expr._parse( instring, loc, doActions=False, callPreParse=False )
+ skipText = instring[startLoc:loc]
if self.includeMatch:
- skipText = instring[startLoc:loc]
loc,mat = expr._parse(instring,loc,doActions,callPreParse=False)
if mat:
skipRes = ParseResults( skipText )
@@ -2812,9 +2838,12 @@ class SkipTo(ParseElementEnhance): else:
return loc, [ skipText ]
else:
- return loc, [ instring[startLoc:loc] ]
+ return loc, [ skipText ]
except (ParseException,IndexError):
- loc += 1
+ if failParse:
+ raise
+ else:
+ loc += 1
exc = self.myException
exc.loc = loc
exc.pstr = instring
@@ -2873,6 +2902,7 @@ class Forward(ParseElementEnhance): if hasattr(self,"name"):
return self.name
+ self._revertClass = self.__class__
self.__class__ = _ForwardNoRecurse
try:
if self.expr is not None:
@@ -2880,8 +2910,8 @@ class Forward(ParseElementEnhance): else:
retString = "None"
finally:
- self.__class__ = Forward
- return "Forward: "+retString
+ self.__class__ = self._revertClass
+ return self.__class__.__name__ + ": " + retString
def copy(self):
if self.expr is not None:
@@ -3122,7 +3152,7 @@ def matchPreviousExpr(expr): def _escapeRegexRangeChars(s):
#~ escape these chars: ^-]
for c in r"\^-]":
- s = s.replace(c,"\\"+c)
+ s = s.replace(c,_bslash+c)
s = s.replace("\n",r"\n")
s = s.replace("\t",r"\t")
return _ustr(s)
@@ -3196,6 +3226,33 @@ def dictOf( key, value ): """
return Dict( ZeroOrMore( Group ( key + value ) ) )
+def originalTextFor(expr, asString=True):
+ """Helper to return the original, untokenized text for a given expression. Useful to
+ restore the parsed fields of an HTML start tag into the raw tag text itself, or to
+ revert separate tokens with intervening whitespace back to the original matching
+ input text. Simpler to use than the parse action keepOriginalText, and does not
+ require the inspect module to chase up the call stack. By default, returns a
+ string containing the original parsed text.
+
+ If the optional asString argument is passed as False, then the return value is a
+ ParseResults containing any results names that were originally matched, and a
+ single token containing the original matched text from the input string. So if
+ the expression passed to originalTextFor contains expressions with defined
+ results names, you must set asString to False if you want to preserve those
+ results name values."""
+ locMarker = Empty().setParseAction(lambda s,loc,t: loc)
+ matchExpr = locMarker("_original_start") + expr + locMarker("_original_end")
+ if asString:
+ extractText = lambda s,l,t: s[t._original_start:t._original_end]
+ else:
+ def extractText(s,l,t):
+ del t[:]
+ t.insert(0, s[t._original_start:t._original_end])
+ del t["_original_start"]
+ del t["_original_end"]
+ matchExpr.setParseAction(extractText)
+ return matchExpr
+
# convenience constants for positional expressions
empty = Empty().setName("empty")
lineStart = LineStart().setName("lineStart")
@@ -3465,12 +3522,24 @@ def nestedExpr(opener="(", closer=")", content=None, ignoreExpr=quotedString): raise ValueError("opening and closing strings cannot be the same")
if content is None:
if isinstance(opener,basestring) and isinstance(closer,basestring):
- if ignoreExpr is not None:
- content = (Combine(OneOrMore(~ignoreExpr +
- CharsNotIn(opener+closer+ParserElement.DEFAULT_WHITE_CHARS,exact=1))
- ).setParseAction(lambda t:t[0].strip()))
+ if len(opener) == 1 and len(closer)==1:
+ if ignoreExpr is not None:
+ content = (Combine(OneOrMore(~ignoreExpr +
+ CharsNotIn(opener+closer+ParserElement.DEFAULT_WHITE_CHARS,exact=1))
+ ).setParseAction(lambda t:t[0].strip()))
+ else:
+ content = (empty+CharsNotIn(opener+closer+ParserElement.DEFAULT_WHITE_CHARS
+ ).setParseAction(lambda t:t[0].strip()))
else:
- content = (empty+CharsNotIn(opener+closer+ParserElement.DEFAULT_WHITE_CHARS).setParseAction(lambda t:t[0].strip()))
+ if ignoreExpr is not None:
+ content = (Combine(OneOrMore(~ignoreExpr +
+ ~Literal(opener) + ~Literal(closer) +
+ CharsNotIn(ParserElement.DEFAULT_WHITE_CHARS,exact=1))
+ ).setParseAction(lambda t:t[0].strip()))
+ else:
+ content = (Combine(OneOrMore(~Literal(opener) + ~Literal(closer) +
+ CharsNotIn(ParserElement.DEFAULT_WHITE_CHARS,exact=1))
+ ).setParseAction(lambda t:t[0].strip()))
else:
raise ValueError("opening and closing arguments must be strings if no content expression is given")
ret = Forward()
@@ -3481,16 +3550,16 @@ def nestedExpr(opener="(", closer=")", content=None, ignoreExpr=quotedString): return ret
def indentedBlock(blockStatementExpr, indentStack, indent=True):
- """Helper method for defining space-delimited indentation blocks, such as
+ """Helper method for defining space-delimited indentation blocks, such as
those used to define block statements in Python source code.
-
+
Parameters:
- - blockStatementExpr - expression defining syntax of statement that
+ - blockStatementExpr - expression defining syntax of statement that
is repeated within the indented block
- indentStack - list created by caller to manage indentation stack
(multiple statementWithIndentedBlock expressions within a single grammar
should share a common indentStack)
- - indent - boolean indicating whether block must be indented beyond the
+ - indent - boolean indicating whether block must be indented beyond the
the current level; set to False for block of left-most statements
(default=True)
@@ -3529,7 +3598,7 @@ def indentedBlock(blockStatementExpr, indentStack, indent=True): else:
smExpr = Group( Optional(NL) +
(OneOrMore( PEER + Group(blockStatementExpr) + Optional(NL) )) )
- blockStatementExpr.ignore("\\" + LineEnd())
+ blockStatementExpr.ignore(_bslash + LineEnd())
return smExpr
alphas8bit = srange(r"[\0xc0-\0xd6\0xd8-\0xf6\0xf8-\0xff]")
@@ -3537,7 +3606,7 @@ punc8bit = srange(r"[\0xa1-\0xbf\0xd7\0xf7]") anyOpenTag,anyCloseTag = makeHTMLTags(Word(alphas,alphanums+"_:"))
commonHTMLEntity = Combine(_L("&") + oneOf("gt lt amp nbsp quot").setResultsName("entity") +";")
-_htmlEntityMap = dict(zip("gt lt amp nbsp quot".split(),"><& '"))
+_htmlEntityMap = dict(zip("gt lt amp nbsp quot".split(),'><& "'))
replaceHTMLEntity = lambda t : t.entity in _htmlEntityMap and _htmlEntityMap[t.entity] or None
# it's easy to get these comment structures wrong - they're very common, so may as well make them available
|