Updated for version 1.5.1 release

git-svn-id: svn://svn.code.sf.net/p/pyparsing/code/src@167 9bf210a0-9d2d-494c-87cf-cfb32e7dff7b
author: ptmcg <ptmcg@9bf210a0-9d2d-494c-87cf-cfb32e7dff7b> 2008-10-18 04:02:49 +0000
committer: ptmcg <ptmcg@9bf210a0-9d2d-494c-87cf-cfb32e7dff7b> 2008-10-18 04:02:49 +0000
commit: 9707144cb337693f9fcb1db33c8ce879669762e2 (patch)
tree: c21b172902a108bc5bc7a10973b9a4d350df408f /pyparsing_py3.py
parent: ad08a46aa28a0b3768c7fe4935c5e3f7e4fa68ed (diff)
download: pyparsing-9707144cb337693f9fcb1db33c8ce879669762e2.tar.gz
1 files changed, 131 insertions, 62 deletions
diff --git a/pyparsing_py3.py b/pyparsing_py3.py
index 4ae3e22..45d2668 100644
--- a/pyparsing_py3.py
+++ b/pyparsing_py3.py
@@ -58,17 +58,17 @@ The pyparsing module handles some of the problems that are typically vexing when
  - embedded comments
 """
 
-__version__ = "1.5.0.Py3"
-__versionTime__ = "28 May 2008 10:05"
+__version__ = "1.5.1.Py3"
+__versionTime__ = "17 Oct 2008 20:05"
 __author__ = "Paul McGuire <ptmcg@users.sourceforge.net>"
 
 import string
 from weakref import ref as wkref
-import copy,sys
+import copy
+import sys
 import warnings
 import re
 import sre_constants
-import xml.sax.saxutils
 #~ sys.stderr.write( "testing pyparsing module, version %s, %s\n" % (__version__,__versionTime__ ) )
 
 __all__ = [
@@ -85,10 +85,10 @@ __all__ = [
 'htmlComment', 'javaStyleComment', 'keepOriginalText', 'line', 'lineEnd', 'lineStart', 'lineno',
 'makeHTMLTags', 'makeXMLTags', 'matchOnlyAtCol', 'matchPreviousExpr', 'matchPreviousLiteral',
 'nestedExpr', 'nullDebugAction', 'nums', 'oneOf', 'opAssoc', 'operatorPrecedence', 'printables',
-'punc8bit', 'pythonStyleComment', 'quotedString', 'removeQuotes', 'replaceHTMLEntity',
+'punc8bit', 'pythonStyleComment', 'quotedString', 'removeQuotes', 'replaceHTMLEntity', 
 'replaceWith', 'restOfLine', 'sglQuotedString', 'srange', 'stringEnd',
 'stringStart', 'traceParseAction', 'unicodeString', 'upcaseTokens', 'withAttribute',
-'indentedBlock',
+'indentedBlock', 'originalTextFor',
 ]
 
 
@@ -130,11 +130,22 @@ if not _PY3K:
             # ...
 else:
     _ustr = str
+    unichr = chr
 
 def _str2dict(strg):
     return dict( [(c,0) for c in strg] )
     #~ return set( [c for c in strg] )
 
+def _xml_escape(data):
+    """Escape &, <, >, ", ', etc. in a string of data."""
+
+    # ampersand must be replaced first
+    from_symbols = '&><"\''
+    to_symbols = ['&'+s+';' for s in "amp gt lt quot apos".split()]
+    for from_,to_ in zip(from_symbols, to_symbols):
+        data = data.replace(from_, to_)
+    return data
+
 class _Constants(object):
     pass
 
@@ -145,7 +156,7 @@ else:
 nums       = string.digits
 hexnums    = nums + "ABCDEFabcdef"
 alphanums  = alphas + nums
-_bslash = "\\"
+_bslash = chr(92)
 printables = "".join( [ c for c in string.printable if c not in string.whitespace ] )
 
 class ParseBaseException(Exception):
@@ -193,6 +204,9 @@ class ParseBaseException(Exception):
             line_str = "".join( [line_str[:line_column],
                                 markerString, line_str[line_column:]])
         return line_str.strip()
+    def __dir__(self):
+        return "loc msg pstr parserElement lineno col line " \
+               "markInputLine __str__ __repr__".split()
 
 class ParseException(ParseBaseException):
     """exception thrown when parse expressions don't match class;
@@ -244,6 +258,8 @@ class _ParseResultsWithOffset(object):
         return self.tup[i]
     def __repr__(self):
         return repr(self.tup)
+    def setOffset(self,i):
+        self.tup = (self.tup[0],i)
 
 class ParseResults(object):
     """Structured parse results, to provide multiple means of access to the parsed data:
@@ -273,9 +289,6 @@ class ParseResults(object):
                 self.__toklist = [toklist]
             self.__tokdict = dict()
 
-        # this line is related to debugging the asXML bug
-        #~ asList = False
-
         if name:
             if not modal:
                 self.__accumNames[name] = 0
@@ -287,9 +300,9 @@ class ParseResults(object):
                     toklist = [ toklist ]
                 if asList:
                     if isinstance(toklist,ParseResults):
-                        self[name] = _ParseResultsWithOffset(toklist.copy(),-1)
+                        self[name] = _ParseResultsWithOffset(toklist.copy(),0)
                     else:
-                        self[name] = _ParseResultsWithOffset(ParseResults(toklist[0]),-1)
+                        self[name] = _ParseResultsWithOffset(ParseResults(toklist[0]),0)
                     self[name].__name = name
                 else:
                     try:
@@ -375,7 +388,7 @@ class ParseResults(object):
         for name in self.__tokdict:
             occurrences = self.__tokdict[name]
             for k, (value, position) in enumerate(occurrences):
-                occurrences[k] = _ParseResultsWithOffset(value, position + (position > j))
+                occurrences[k] = _ParseResultsWithOffset(value, position + (position > index))
 
     def items( self ):
         """Returns all named result keys and values as a list of tuples."""
@@ -412,6 +425,7 @@ class ParseResults(object):
                 self[k] = v
                 if isinstance(v[0],ParseResults):
                     v[0].__parent = wkref(self)
+            
         self.__toklist += other.__toklist
         self.__accumNames.update( other.__accumNames )
         del other
@@ -518,7 +532,7 @@ class ParseResults(object):
                         continue
                     else:
                         resTag = "ITEM"
-                xmlBodyText = xml.sax.saxutils.escape(_ustr(res))
+                xmlBodyText = _xml_escape(_ustr(res))
                 out += [ nl, nextLevelIndent, "<", resTag, ">",
                                                 xmlBodyText,
                                                 "</", resTag, ">" ]
@@ -595,6 +609,8 @@ class ParseResults(object):
         else:
             self.__parent = None
 
+    def __dir__(self):
+        return dir(super(ParseResults,self)) + self.keys()
 
 def col (loc,strg):
     """Returns current column within a string, counting newlines as line separators.
@@ -716,7 +732,7 @@ class ParserElement(object):
             def breaker(instring, loc, doActions=True, callPreParse=True):
                 import pdb
                 pdb.set_trace()
-                _parseMethod( instring, loc, doActions, callPreParse )
+                return _parseMethod( instring, loc, doActions, callPreParse )
             breaker._originalParseMethod = _parseMethod
             self._parse = breaker
         else:
@@ -1048,6 +1064,7 @@ class ParserElement(object):
             instring = instring.expandtabs()
         loc, tokens = self._parse( instring, 0 )
         if parseAll:
+            loc = self.preParse( instring, loc )
             StringEnd()._parse( instring, loc )
         return tokens
 
@@ -1159,27 +1176,21 @@ class ParserElement(object):
         if isinstance(other,int):
             minElements, optElements = other,0
         elif isinstance(other,tuple):
-            if len(other)==0:
-                other = (None,None)
-            elif len(other)==1:
-                other = (other[0],None)
-            if len(other)==2:
-                if other[0] is None:
-                    other = (0, other[1])
-                if isinstance(other[0],int) and other[1] is None:
-                    if other[0] == 0:
-                        return ZeroOrMore(self)
-                    if other[0] == 1:
-                        return OneOrMore(self)
-                    else:
-                        return self*other[0] + ZeroOrMore(self)
-                elif isinstance(other[0],int) and isinstance(other[1],int):
-                    minElements, optElements = other
-                    optElements -= minElements
+            other = (other + (None, None))[:2]
+            if other[0] is None:
+                other = (0, other[1])
+            if isinstance(other[0],int) and other[1] is None:
+                if other[0] == 0:
+                    return ZeroOrMore(self)
+                if other[0] == 1:
+                    return OneOrMore(self)
                 else:
-                    raise TypeError("cannot multiply 'ParserElement' and ('%s','%s') objects", type(other[0]),type(other[1]))
+                    return self*other[0] + ZeroOrMore(self)
+            elif isinstance(other[0],int) and isinstance(other[1],int):
+                minElements, optElements = other
+                optElements -= minElements
             else:
-                raise TypeError("can only multiply 'ParserElement' and int or (int,int) objects")
+                raise TypeError("cannot multiply 'ParserElement' and ('%s','%s') objects", type(other[0]),type(other[1]))
         else:
             raise TypeError("cannot multiply 'ParserElement' and '%s' objects", type(other))
 
@@ -1361,7 +1372,7 @@ class ParserElement(object):
         """Check defined expressions for valid structure, check for infinite recursive definitions."""
         self.checkRecursion( [] )
 
-    def parseFile( self, file_or_filename ):
+    def parseFile( self, file_or_filename, parseAll=False ):
         """Execute the parse expression on the given file or filename.
            If a filename is specified (instead of a file object),
            the entire file is opened, read, and closed before parsing.
@@ -1372,7 +1383,7 @@ class ParserElement(object):
             f = open(file_or_filename, "rb")
             file_contents = f.read()
             f.close()
-        return self.parseString(file_contents)
+        return self.parseString(file_contents, parseAll)
 
     def getException(self):
         return ParseException("",0,self.errmsg,self)
@@ -1394,12 +1405,18 @@ class ParserElement(object):
         else:
             return super(ParserElement,self)==other
 
+    def __ne__(self,other):
+        return not (self == other)
+
     def __hash__(self):
         return hash(id(self))
 
     def __req__(self,other):
         return self == other
 
+    def __rne__(self,other):
+        return not (self == other)
+
 
 class Token(ParserElement):
     """Abstract ParserElement subclass, for defining atomic matching patterns."""
@@ -1534,7 +1551,6 @@ class Keyword(Token):
         Keyword.DEFAULT_KEYWORD_CHARS = chars
     setDefaultKeywordChars = staticmethod(setDefaultKeywordChars)
 
-
 class CaselessLiteral(Literal):
     """Token to match a specified string, ignoring case of letters.
        Note: the matched results will always be in the case of the given
@@ -2035,7 +2051,7 @@ class LineStart(_PositionToken):
     """Matches if current position is at the beginning of a line within the parse string"""
     def __init__( self ):
         super(LineStart,self).__init__()
-        self.setWhitespaceChars( " \t" )
+        self.setWhitespaceChars( ParserElement.DEFAULT_WHITE_CHARS.replace("\n","") )
         self.errmsg = "Expected start of line"
         #self.myException.msg = self.errmsg
 
@@ -2060,7 +2076,7 @@ class LineEnd(_PositionToken):
     """Matches if current position is at the end of a line within the parse string"""
     def __init__( self ):
         super(LineEnd,self).__init__()
-        self.setWhitespaceChars( " \t" )
+        self.setWhitespaceChars( ParserElement.DEFAULT_WHITE_CHARS.replace("\n","") )
         self.errmsg = "Expected end of line"
         #self.myException.msg = self.errmsg
 
@@ -2272,10 +2288,9 @@ class And(ParseExpression):
     """
 
     class _ErrorStop(Empty):
-        def __new__(cls,*args,**kwargs):
-            return And._ErrorStop.instance
-    _ErrorStop.instance = Empty()
-    _ErrorStop.instance.leaveWhitespace()
+        def __init__(self, *args, **kwargs):
+            super(Empty,self).__init__(*args, **kwargs)
+            self.leaveWhitespace()
 
     def __init__( self, exprs, savelist = True ):
         super(And,self).__init__(exprs, savelist)
@@ -2294,12 +2309,14 @@ class And(ParseExpression):
         loc, resultlist = self.exprs[0]._parse( instring, loc, doActions, callPreParse=False )
         errorStop = False
         for e in self.exprs[1:]:
-            if e is And._ErrorStop.instance:
+            if isinstance(e, And._ErrorStop):
                 errorStop = True
                 continue
             if errorStop:
                 try:
                     loc, exprtokens = e._parse( instring, loc, doActions )
+                except ParseSyntaxException:
+                    raise
                 except ParseBaseException as pe:
                     raise ParseSyntaxException(pe)
                 except IndexError as ie:
@@ -2782,7 +2799,7 @@ class SkipTo(ParseElementEnhance):
        argument is used to define grammars (typically quoted strings and comments) that
        might contain false matches.
     """
-    def __init__( self, other, include=False, ignore=None ):
+    def __init__( self, other, include=False, ignore=None, failOn=None ):
         super( SkipTo, self ).__init__( other )
         if ignore is not None:
             self.expr = self.expr.copy()
@@ -2791,6 +2808,10 @@ class SkipTo(ParseElementEnhance):
         self.mayIndexError = False
         self.includeMatch = include
         self.asList = False
+        if failOn is not None and isinstance(failOn, basestring):
+            self.failOn = Literal(failOn)
+        else:
+            self.failOn = failOn
         self.errmsg = "No match found for "+_ustr(self.expr)
         #self.myException = ParseException("",0,self.errmsg,self)
 
@@ -2798,12 +2819,17 @@ class SkipTo(ParseElementEnhance):
         startLoc = loc
         instrlen = len(instring)
         expr = self.expr
+        failParse = False
         while loc <= instrlen:
             try:
+                if self.failOn:
+                    failParse = True
+                    self.failOn.tryParse(instring, loc)
+                    failParse = False
                 loc = expr._skipIgnorables( instring, loc )
                 expr._parse( instring, loc, doActions=False, callPreParse=False )
+                skipText = instring[startLoc:loc]
                 if self.includeMatch:
-                    skipText = instring[startLoc:loc]
                     loc,mat = expr._parse(instring,loc,doActions,callPreParse=False)
                     if mat:
                         skipRes = ParseResults( skipText )
@@ -2812,9 +2838,12 @@ class SkipTo(ParseElementEnhance):
                     else:
                         return loc, [ skipText ]
                 else:
-                    return loc, [ instring[startLoc:loc] ]
+                    return loc, [ skipText ]
             except (ParseException,IndexError):
-                loc += 1
+                if failParse:
+                    raise
+                else:
+                    loc += 1
         exc = self.myException
         exc.loc = loc
         exc.pstr = instring
@@ -2873,6 +2902,7 @@ class Forward(ParseElementEnhance):
         if hasattr(self,"name"):
             return self.name
 
+        self._revertClass = self.__class__
         self.__class__ = _ForwardNoRecurse
         try:
             if self.expr is not None:
@@ -2880,8 +2910,8 @@ class Forward(ParseElementEnhance):
             else:
                 retString = "None"
         finally:
-            self.__class__ = Forward
-        return "Forward: "+retString
+            self.__class__ = self._revertClass
+        return self.__class__.__name__ + ": " + retString
 
     def copy(self):
         if self.expr is not None:
@@ -3122,7 +3152,7 @@ def matchPreviousExpr(expr):
 def _escapeRegexRangeChars(s):
     #~  escape these chars: ^-]
     for c in r"\^-]":
-        s = s.replace(c,"\\"+c)
+        s = s.replace(c,_bslash+c)
     s = s.replace("\n",r"\n")
     s = s.replace("\t",r"\t")
     return _ustr(s)
@@ -3196,6 +3226,33 @@ def dictOf( key, value ):
     """
     return Dict( ZeroOrMore( Group ( key + value ) ) )
 
+def originalTextFor(expr, asString=True):
+    """Helper to return the original, untokenized text for a given expression.  Useful to
+       restore the parsed fields of an HTML start tag into the raw tag text itself, or to
+       revert separate tokens with intervening whitespace back to the original matching
+       input text. Simpler to use than the parse action keepOriginalText, and does not
+       require the inspect module to chase up the call stack.  By default, returns a 
+       string containing the original parsed text.  
+       
+       If the optional asString argument is passed as False, then the return value is a 
+       ParseResults containing any results names that were originally matched, and a 
+       single token containing the original matched text from the input string.  So if 
+       the expression passed to originalTextFor contains expressions with defined
+       results names, you must set asString to False if you want to preserve those
+       results name values."""
+    locMarker = Empty().setParseAction(lambda s,loc,t: loc)
+    matchExpr = locMarker("_original_start") + expr + locMarker("_original_end")
+    if asString:
+        extractText = lambda s,l,t: s[t._original_start:t._original_end]
+    else:
+        def extractText(s,l,t):
+            del t[:]
+            t.insert(0, s[t._original_start:t._original_end])
+            del t["_original_start"]
+            del t["_original_end"]
+    matchExpr.setParseAction(extractText)
+    return matchExpr
+    
 # convenience constants for positional expressions
 empty       = Empty().setName("empty")
 lineStart   = LineStart().setName("lineStart")
@@ -3465,12 +3522,24 @@ def nestedExpr(opener="(", closer=")", content=None, ignoreExpr=quotedString):
         raise ValueError("opening and closing strings cannot be the same")
     if content is None:
         if isinstance(opener,basestring) and isinstance(closer,basestring):
-            if ignoreExpr is not None:
-                content = (Combine(OneOrMore(~ignoreExpr +
-                                CharsNotIn(opener+closer+ParserElement.DEFAULT_WHITE_CHARS,exact=1))
-                            ).setParseAction(lambda t:t[0].strip()))
+            if len(opener) == 1 and len(closer)==1:
+                if ignoreExpr is not None:
+                    content = (Combine(OneOrMore(~ignoreExpr +
+                                    CharsNotIn(opener+closer+ParserElement.DEFAULT_WHITE_CHARS,exact=1))
+                                ).setParseAction(lambda t:t[0].strip()))
+                else:
+                    content = (empty+CharsNotIn(opener+closer+ParserElement.DEFAULT_WHITE_CHARS
+                                ).setParseAction(lambda t:t[0].strip()))
             else:
-                content = (empty+CharsNotIn(opener+closer+ParserElement.DEFAULT_WHITE_CHARS).setParseAction(lambda t:t[0].strip()))
+                if ignoreExpr is not None:
+                    content = (Combine(OneOrMore(~ignoreExpr + 
+                                    ~Literal(opener) + ~Literal(closer) +
+                                    CharsNotIn(ParserElement.DEFAULT_WHITE_CHARS,exact=1))
+                                ).setParseAction(lambda t:t[0].strip()))
+                else:
+                    content = (Combine(OneOrMore(~Literal(opener) + ~Literal(closer) +
+                                    CharsNotIn(ParserElement.DEFAULT_WHITE_CHARS,exact=1))
+                                ).setParseAction(lambda t:t[0].strip()))
         else:
             raise ValueError("opening and closing arguments must be strings if no content expression is given")
     ret = Forward()
@@ -3481,16 +3550,16 @@ def nestedExpr(opener="(", closer=")", content=None, ignoreExpr=quotedString):
     return ret
 
 def indentedBlock(blockStatementExpr, indentStack, indent=True):
-    """Helper method for defining space-delimited indentation blocks, such as 
+    """Helper method for defining space-delimited indentation blocks, such as
        those used to define block statements in Python source code.
-       
+
        Parameters:
-        - blockStatementExpr - expression defining syntax of statement that 
+        - blockStatementExpr - expression defining syntax of statement that
             is repeated within the indented block
         - indentStack - list created by caller to manage indentation stack
             (multiple statementWithIndentedBlock expressions within a single grammar
             should share a common indentStack)
-        - indent - boolean indicating whether block must be indented beyond the 
+        - indent - boolean indicating whether block must be indented beyond the
             the current level; set to False for block of left-most statements
             (default=True)
 
@@ -3529,7 +3598,7 @@ def indentedBlock(blockStatementExpr, indentStack, indent=True):
     else:
         smExpr = Group( Optional(NL) +
             (OneOrMore( PEER + Group(blockStatementExpr) + Optional(NL) )) )
-    blockStatementExpr.ignore("\\" + LineEnd())
+    blockStatementExpr.ignore(_bslash + LineEnd())
     return smExpr
 
 alphas8bit = srange(r"[\0xc0-\0xd6\0xd8-\0xf6\0xf8-\0xff]")
@@ -3537,7 +3606,7 @@ punc8bit = srange(r"[\0xa1-\0xbf\0xd7\0xf7]")
 
 anyOpenTag,anyCloseTag = makeHTMLTags(Word(alphas,alphanums+"_:"))
 commonHTMLEntity = Combine(_L("&") + oneOf("gt lt amp nbsp quot").setResultsName("entity") +";")
-_htmlEntityMap = dict(zip("gt lt amp nbsp quot".split(),"><& '"))
+_htmlEntityMap = dict(zip("gt lt amp nbsp quot".split(),'><& "'))
 replaceHTMLEntity = lambda t : t.entity in _htmlEntityMap and _htmlEntityMap[t.entity] or None
 
 # it's easy to get these comment structures wrong - they're very common, so may as well make them available
author	ptmcg <ptmcg@9bf210a0-9d2d-494c-87cf-cfb32e7dff7b>	2008-10-18 04:02:49 +0000
committer	ptmcg <ptmcg@9bf210a0-9d2d-494c-87cf-cfb32e7dff7b>	2008-10-18 04:02:49 +0000
commit	9707144cb337693f9fcb1db33c8ce879669762e2 (patch)
tree	c21b172902a108bc5bc7a10973b9a4d350df408f /pyparsing_py3.py
parent	ad08a46aa28a0b3768c7fe4935c5e3f7e4fa68ed (diff)
download	pyparsing-9707144cb337693f9fcb1db33c8ce879669762e2.tar.gz