Added new helper method 'originalTextFor' to eventually supercede keepOriginalText parse action.

Fixed '-' error stop bug when expression is contained within a Combine. Added __ne__ method to ParserElement to prevent Python's default behavior of id(self) != id(other) git-svn-id: svn://svn.code.sf.net/p/pyparsing/code/src@161 9bf210a0-9d2d-494c-87cf-cfb32e7dff7b
author: ptmcg <ptmcg@9bf210a0-9d2d-494c-87cf-cfb32e7dff7b> 2008-07-28 04:17:33 +0000
committer: ptmcg <ptmcg@9bf210a0-9d2d-494c-87cf-cfb32e7dff7b> 2008-07-28 04:17:33 +0000
commit: 3cc36e8c8ffc628342c71e882a61d6d2c1536107 (patch)
tree: 160dc33f115b2c44588444fd22753cf0650826e8
parent: 94d112a1cc169e357e0dffeba8473e4491cc5ad5 (diff)
download: pyparsing-3cc36e8c8ffc628342c71e882a61d6d2c1536107.tar.gz
2 files changed, 56 insertions, 11 deletions
diff --git a/CHANGES b/CHANGES
index a45df2e..2ae2109 100644
--- a/CHANGES
+++ b/CHANGES
@@ -4,6 +4,20 @@ Change Log
 
 Version 1.5.1 - ???, 2008
 --------------------------
+- Added new helper method originalTextFor, to replace the use of
+  the current keepOriginalText parse action.  Now instead of 
+  using the parse action, as in:
+      fullName = Word(alphas) + Word(alphas)
+      fullName.setParseAction(keepOriginalText)
+  (in this example, we used keepOriginalText to restore any white
+  space that may have been skipped between the first and last
+  names)
+  You can now write:
+      fullName = originalTextFor(Word(alphas) + Word(alphas))
+  The implementation of originalTextFor is simpler and faster than
+  keepOriginalText, and does not depend on using the inspect or
+  imp modules.
+  
 - Added optional parseAll argument to parseFile, to be consistent
   with parseAll argument to parseString.  Posted by pboucher on the
   pyparsing wiki, thanks!
@@ -32,6 +46,9 @@ Version 1.5.1 - ???, 2008
 - Fixed bug in ParseResults.asXML(), in which the first named
   item within a ParseResults gets reported with an <ITEM> tag instead
   of with the correct results name.
+  
+- Fixed bug in '-' error stop, when '-' operator used inside a 
+  Combine expression.
 
 
 Version 1.5.0 - June, 2008
diff --git a/pyparsing.py b/pyparsing.py
index 40959e1..1b7b89c 100644
--- a/pyparsing.py
+++ b/pyparsing.py
@@ -59,7 +59,7 @@ The pyparsing module handles some of the problems that are typically vexing when
 """
 
 __version__ = "1.5.1"
-__versionTime__ = "1 July 2008 21:54"
+__versionTime__ = "27 July 2008 02:11"
 __author__ = "Paul McGuire <ptmcg@users.sourceforge.net>"
 
 import string
@@ -84,10 +84,10 @@ __all__ = [
 'htmlComment', 'javaStyleComment', 'keepOriginalText', 'line', 'lineEnd', 'lineStart', 'lineno',
 'makeHTMLTags', 'makeXMLTags', 'matchOnlyAtCol', 'matchPreviousExpr', 'matchPreviousLiteral',
 'nestedExpr', 'nullDebugAction', 'nums', 'oneOf', 'opAssoc', 'operatorPrecedence', 'printables',
-'punc8bit', 'pythonStyleComment', 'quotedString', 'removeQuotes', 'replaceHTMLEntity',
+'punc8bit', 'pythonStyleComment', 'quotedString', 'removeQuotes', 'replaceHTMLEntity', 
 'replaceWith', 'restOfLine', 'sglQuotedString', 'srange', 'stringEnd',
 'stringStart', 'traceParseAction', 'unicodeString', 'upcaseTokens', 'withAttribute',
-'indentedBlock',
+'indentedBlock', 'originalTextFor',
 ]
 
 
@@ -730,7 +730,7 @@ class ParserElement(object):
             def breaker(instring, loc, doActions=True, callPreParse=True):
                 import pdb
                 pdb.set_trace()
-                _parseMethod( instring, loc, doActions, callPreParse )
+                return _parseMethod( instring, loc, doActions, callPreParse )
             breaker._originalParseMethod = _parseMethod
             self._parse = breaker
         else:
@@ -1408,12 +1408,18 @@ class ParserElement(object):
         else:
             return super(ParserElement,self)==other
 
+    def __ne__(self,other):
+        return not (self == other)
+
     def __hash__(self):
         return hash(id(self))
 
     def __req__(self,other):
         return self == other
 
+    def __rne__(self,other):
+        return not (self == other)
+
 
 class Token(ParserElement):
     """Abstract ParserElement subclass, for defining atomic matching patterns."""
@@ -2285,10 +2291,9 @@ class And(ParseExpression):
     """
 
     class _ErrorStop(Empty):
-        def __new__(cls,*args,**kwargs):
-            return And._ErrorStop.instance
-    _ErrorStop.instance = Empty()
-    _ErrorStop.instance.leaveWhitespace()
+        def __init__(self, *args, **kwargs):
+            super(Empty,self).__init__(*args, **kwargs)
+            self.leaveWhitespace()
 
     def __init__( self, exprs, savelist = True ):
         super(And,self).__init__(exprs, savelist)
@@ -2307,12 +2312,14 @@ class And(ParseExpression):
         loc, resultlist = self.exprs[0]._parse( instring, loc, doActions, callPreParse=False )
         errorStop = False
         for e in self.exprs[1:]:
-            if e is And._ErrorStop.instance:
+            if isinstance(e, And._ErrorStop):
                 errorStop = True
                 continue
             if errorStop:
                 try:
                     loc, exprtokens = e._parse( instring, loc, doActions )
+                except ParseSyntaxException:
+                    raise
                 except ParseBaseException, pe:
                     raise ParseSyntaxException(pe)
                 except IndexError, ie:
@@ -3147,7 +3154,7 @@ def matchPreviousExpr(expr):
 def _escapeRegexRangeChars(s):
     #~  escape these chars: ^-]
     for c in r"\^-]":
-        s = s.replace(c,"\\"+c)
+        s = s.replace(c,_bslash+c)
     s = s.replace("\n",r"\n")
     s = s.replace("\t",r"\t")
     return _ustr(s)
@@ -3221,6 +3228,27 @@ def dictOf( key, value ):
     """
     return Dict( ZeroOrMore( Group ( key + value ) ) )
 
+def originalTextFor(expr, asString=True):
+    """Helper to return the original, untokenized text for a given expression.  Useful to
+       restore the parsed fields of an HTML start tag into the raw tag text itself, or to
+       revert separate tokens with intervening whitespace back to the original matching
+       input text. Simpler to use than the parse action keepOriginalText, and does not
+       require the inspect module to chase up the call stack.  By default, returns a 
+       string containing the original parsed text.  If the optional asString argument
+       is passed as False, then the return value is a ParseResults containing any 
+       results names that were originally matched, and a single token containing the
+       original matched text from the input string."""
+    locMarker = Empty().setParseAction(lambda s,loc,t: loc)
+    matchExpr = locMarker("_original_start") + expr + locMarker("_original_end")
+    if asString:
+        extractText = lambda s,l,t: s[t._original_start:t._original_end]
+    else:
+        def extractText(s,l,t):
+            del t[:]
+            t[0] = s[t._original_start:t._original_end]
+    matchExpr.setParseAction(extractText)
+    return matchExpr
+    
 # convenience constants for positional expressions
 empty       = Empty().setName("empty")
 lineStart   = LineStart().setName("lineStart")
@@ -3554,7 +3582,7 @@ def indentedBlock(blockStatementExpr, indentStack, indent=True):
     else:
         smExpr = Group( Optional(NL) +
             (OneOrMore( PEER + Group(blockStatementExpr) + Optional(NL) )) )
-    blockStatementExpr.ignore("\\" + LineEnd())
+    blockStatementExpr.ignore(_bslash + LineEnd())
     return smExpr
 
 alphas8bit = srange(r"[\0xc0-\0xd6\0xd8-\0xf6\0xf8-\0xff]")
author	ptmcg <ptmcg@9bf210a0-9d2d-494c-87cf-cfb32e7dff7b>	2008-07-28 04:17:33 +0000
committer	ptmcg <ptmcg@9bf210a0-9d2d-494c-87cf-cfb32e7dff7b>	2008-07-28 04:17:33 +0000
commit	3cc36e8c8ffc628342c71e882a61d6d2c1536107 (patch)
tree	160dc33f115b2c44588444fd22753cf0650826e8
parent	94d112a1cc169e357e0dffeba8473e4491cc5ad5 (diff)
download	pyparsing-3cc36e8c8ffc628342c71e882a61d6d2c1536107.tar.gz