summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorptmcg <ptmcg@9bf210a0-9d2d-494c-87cf-cfb32e7dff7b>2012-10-01 23:18:06 +0000
committerptmcg <ptmcg@9bf210a0-9d2d-494c-87cf-cfb32e7dff7b>2012-10-01 23:18:06 +0000
commitd7ef07239bf57e2c815f3e93f1a8bb9c9782a604 (patch)
treeb49c4834d1309d5c98a27eb66b3d68780c39b7c5
parentbb028cfc6dbf203ae223d9be126b964576bb6dd0 (diff)
downloadpyparsing-d7ef07239bf57e2c815f3e93f1a8bb9c9782a604.tar.gz
Create standard SVN structure
git-svn-id: svn://svn.code.sf.net/p/pyparsing/code/trunk@226 9bf210a0-9d2d-494c-87cf-cfb32e7dff7b
-rw-r--r--src/CHANGES1668
-rw-r--r--src/HowToUsePyparsing.html1288
-rw-r--r--src/HowToUsePyparsing.txt993
-rw-r--r--src/LICENSE18
-rw-r--r--src/MANIFEST.in7
-rw-r--r--src/MANIFEST.in_bdist7
-rw-r--r--src/MANIFEST.in_src7
-rw-r--r--src/README72
-rw-r--r--src/genEpydoc.bat1
-rw-r--r--src/makeRelease.bat25
-rw-r--r--src/pyparsing.py3740
-rw-r--r--src/pyparsingClassDiagram.JPGbin0 -> 236402 bytes
-rw-r--r--src/pyparsingClassDiagram.PNGbin0 -> 141354 bytes
-rw-r--r--src/pyparsing_py2.py3740
-rw-r--r--src/pyparsing_py3.py3595
-rw-r--r--src/setup.py56
-rw-r--r--src/unitTests.py2375
17 files changed, 17592 insertions, 0 deletions
diff --git a/src/CHANGES b/src/CHANGES
new file mode 100644
index 0000000..0005203
--- /dev/null
+++ b/src/CHANGES
@@ -0,0 +1,1668 @@
+==========
+Change Log
+==========
+
+Version 1.5.7 -
+-----------------
+- An awesome new example is included in this release, submitted
+ by Luca DellOlio, for parsing ANTLR grammar definitions, nice
+ work Luca!
+
+- Fixed implementation of ParseResults.__str__ to use Pythonic
+ ''.join() instead of repeated string concatenation. This
+ purportedly has been a performance issue under PyPy.
+
+- Fixed bug in ParseResults.__dir__ under Python 3, reported by
+ Thomas Kluyver, thank you Thomas!
+
+- Added ParserElement.inlineLiteralsUsing static method, to
+ override pyparsing's default behavior of converting string
+ literals to Literal instances, to use other classes (such
+ as Suppress or CaselessLiteral).
+
+- Added new operator '<<=', which will eventually replace '<<' for
+ storing the contents of a Forward(). '<<=' does not have the same
+ operator precedence problems that '<<' does.
+
+- Added support for using single argument builtin functions as parse
+ actions. Now you can write 'expr.setParseAction(len)' and get back
+ the length of the list of matched tokens. Supported builtins are:
+ sum, len, sorted, reversed, list, tuple, set, any, all, min, and max.
+
+- Improved linking in generated docs, proposed on the pyparsing wiki
+ by techtonik, thanks!
+
+- Fixed a bug in the definition of 'alphas', which was based on the
+ string.uppercase and string.lowercase "constants", which in fact
+ *aren't* constant, but vary with locale settings. This could make
+ parsers locale-sensitive in a subtle way. Thanks to Kef Schecter for
+ his diligence in following through on reporting and monitoring
+ this bugfix!
+
+- Fixed a bug in the Py3 version of pyparsing, during exception
+ handling with packrat parsing enabled, reported by Catherine
+ Devlin - thanks Catherine!
+
+- Fixed typo in ParseBaseException.__dir__, reported anonymously on
+ the SourceForge bug tracker, thank you Pyparsing User With No Name.
+
+- Fixed bug in srange when using '\x###' hex character codes.
+
+- Addeed optional 'intExpr' argument to countedArray, so that you
+ can define your own expression that will evaluate to an integer,
+ to be used as the count for the following elements. Allows you
+ to define a countedArray with the count given in hex, for example,
+ by defining intExpr as "Word(hexnums).setParseAction(int(t[0],16))".
+
+
+Version 1.5.6 - June, 2011
+----------------------------
+- Cleanup of parse action normalizing code, to be more version-tolerant,
+ and robust in the face of future Python versions - much thanks to
+ Raymond Hettinger for this rewrite!
+
+- Removal of exception cacheing, addressing a memory leak condition
+ in Python 3. Thanks to Michael Droettboom and the Cape Town PUG for
+ their analysis and work on this problem!
+
+- Fixed bug when using packrat parsing, where a previously parsed
+ expression would duplicate subsequent tokens - reported by Frankie
+ Ribery on stackoverflow, thanks!
+
+- Added 'ungroup' helper method, to address token grouping done
+ implicitly by And expressions, even if only one expression in the
+ And actually returns any text - also inspired by stackoverflow
+ discussion with Frankie Ribery!
+
+- Fixed bug in srange, which accepted escaped hex characters of the
+ form '\0x##', but should be '\x##'. Both forms will be supported
+ for backwards compatibility.
+
+- Enhancement to countedArray, accepting an optional expression to be
+ used for matching the leading integer count - proposed by Mathias on
+ the pyparsing mailing list, good idea!
+
+- Added the Verilog parser to the provided set of examples, under the
+ MIT license. While this frees up this parser for any use, if you find
+ yourself using it in a commercial purpose, please consider making a
+ charitable donation as described in the parser's header.
+
+- Added the excludeChars argument to the Word class, to simplify defining
+ a word composed of all characters in a large range except for one or
+ two. Suggested by JesterEE on the pyparsing wiki.
+
+- Added optional overlap parameter to scanString, to return overlapping
+ matches found in the source text.
+
+- Updated oneOf internal regular expression generation, with improved
+ parse time performance.
+
+- Slight performance improvement in transformString, removing empty
+ strings from the list of string fragments built while scanning the
+ source text, before calling ''.join. Especially useful when using
+ transformString to strip out selected text.
+
+- Enhanced form of using the "expr('name')" style of results naming,
+ in lieu of calling setResultsName. If name ends with an '*', then
+ this is equivalent to expr.setResultsName('name',listAllMatches=True).
+
+- Fixed up internal list flattener to use iteration instead of recursion,
+ to avoid stack overflow when transforming large files.
+
+- Added other new examples:
+ . protobuf parser - parses Google's protobuf language
+ . btpyparse - a BibTex parser contributed by Matthew Brett,
+ with test suite test_bibparse.py (thanks, Matthew!)
+ . groupUsingListAllMatches.py - demo using trailing '*' for results
+ names
+
+
+Version 1.5.5 - August, 2010
+----------------------------
+
+- Typo in Python3 version of pyparsing, "builtin" should be "builtins".
+ (sigh)
+
+
+Version 1.5.4 - August, 2010
+----------------------------
+
+- Fixed __builtins__ and file references in Python 3 code, thanks to
+ Greg Watson, saulspatz, sminos, and Mark Summerfield for reporting
+ their Python 3 experiences.
+
+- Added new example, apicheck.py, as a sample of scanning a Tcl-like
+ language for functions with incorrect number of arguments (difficult
+ to track down in Tcl languages). This example uses some interesting
+ methods for capturing exceptions while scanning through source
+ code.
+
+- Added new example deltaTime.py, that takes everyday time references
+ like "an hour from now", "2 days ago", "next Sunday at 2pm".
+
+
+Version 1.5.3 - June, 2010
+--------------------------
+
+- ======= NOTE: API CHANGE!!!!!!! ===============
+ With this release, and henceforward, the pyparsing module is
+ imported as "pyparsing" on both Python 2.x and Python 3.x versions.
+
+- Fixed up setup.py to auto-detect Python version and install the
+ correct version of pyparsing - suggested by Alex Martelli,
+ thanks, Alex! (and my apologies to all those who struggled with
+ those spurious installation errors caused by my earlier
+ fumblings!)
+
+- Fixed bug on Python3 when using parseFile, getting bytes instead of
+ a str from the input file.
+
+- Fixed subtle bug in originalTextFor, if followed by
+ significant whitespace (like a newline) - discovered by
+ Francis Vidal, thanks!
+
+- Fixed very sneaky bug in Each, in which Optional elements were
+ not completely recognized as optional - found by Tal Weiss, thanks
+ for your patience.
+
+- Fixed off-by-1 bug in line() method when the first line of the
+ input text was an empty line. Thanks to John Krukoff for submitting
+ a patch!
+
+- Fixed bug in transformString if grammar contains Group expressions,
+ thanks to patch submitted by barnabas79, nice work!
+
+- Fixed bug in originalTextFor in which trailing comments or otherwised
+ ignored text got slurped in with the matched expression. Thanks to
+ michael_ramirez44 on the pyparsing wiki for reporting this just in
+ time to get into this release!
+
+- Added better support for summing ParseResults, see the new example,
+ parseResultsSumExample.py.
+
+- Added support for composing a Regex using a compiled RE object;
+ thanks to my new colleague, Mike Thornton!
+
+- In version 1.5.2, I changed the way exceptions are raised in order
+ to simplify the stacktraces reported during parsing. An anonymous
+ user posted a bug report on SF that this behavior makes it difficult
+ to debug some complex parsers, or parsers nested within parsers. In
+ this release I've added a class attribute ParserElement.verbose_stacktrace,
+ with a default value of False. If you set this to True, pyparsing will
+ report stacktraces using the pre-1.5.2 behavior.
+
+- New examples:
+
+ . pymicko.py, a MicroC compiler submitted by Zarko Zivanov.
+ (Note: this example is separately licensed under the GPLv3,
+ and requires Python 2.6 or higher.) Thank you, Zarko!
+
+ . oc.py, a subset C parser, using the BNF from the 1996 Obfuscated C
+ Contest.
+
+ . stateMachine2.py, a modified version of stateMachine.py submitted
+ by Matt Anderson, that is compatible with Python versions 2.7 and
+ above - thanks so much, Matt!
+
+ . select_parser.py, a parser for reading SQLite SELECT statements,
+ as specified at http://www.sqlite.org/lang_select.html; this goes
+ into much more detail than the simple SQL parser included in pyparsing's
+ source code
+
+ . excelExpr.py, a *simplistic* first-cut at a parser for Excel
+ expressions, which I originally posted on comp.lang.python in January,
+ 2010; beware, this parser omits many common Excel cases (addition of
+ numbers represented as strings, references to named ranges)
+
+ . cpp_enum_parser.py, a nice little parser posted my Mark Tolonen on
+ comp.lang.python in August, 2009 (redistributed here with Mark's
+ permission). Thanks a bunch, Mark!
+
+ . partial_gene_match.py, a sample I posted to Stackoverflow.com,
+ implementing a special variation on Literal that does "close" matching,
+ up to a given number of allowed mismatches. The application was to
+ find matching gene sequences, with allowance for one or two mismatches.
+
+ . tagCapture.py, a sample showing how to use a Forward placeholder to
+ enforce matching of text parsed in a previous expression.
+
+ . matchPreviousDemo.py, simple demo showing how the matchPreviousLiteral
+ helper method is used to match a previously parsed token.
+
+
+Version 1.5.2 - April, 2009
+------------------------------
+- Added pyparsing_py3.py module, so that Python 3 users can use
+ pyparsing by changing their pyparsing import statement to:
+
+ import pyparsing_py3
+
+ Thanks for help from Patrick Laban and his friend Geremy
+ Condra on the pyparsing wiki.
+
+- Removed __slots__ declaration on ParseBaseException, for
+ compatibility with IronPython 2.0.1. Raised by David
+ Lawler on the pyparsing wiki, thanks David!
+
+- Fixed bug in SkipTo/failOn handling - caught by eagle eye
+ cpennington on the pyparsing wiki!
+
+- Fixed second bug in SkipTo when using the ignore constructor
+ argument, reported by Catherine Devlin, thanks!
+
+- Fixed obscure bug reported by Eike Welk when using a class
+ as a ParseAction with an errant __getitem__ method.
+
+- Simplified exception stack traces when reporting parse
+ exceptions back to caller of parseString or parseFile - thanks
+ to a tip from Peter Otten on comp.lang.python.
+
+- Changed behavior of scanString to avoid infinitely looping on
+ expressions that match zero-length strings. Prompted by a
+ question posted by ellisonbg on the wiki.
+
+- Enhanced classes that take a list of expressions (And, Or,
+ MatchFirst, and Each) to accept generator expressions also.
+ This can be useful when generating lists of alternative
+ expressions, as in this case, where the user wanted to match
+ any repetitions of '+', '*', '#', or '.', but not mixtures
+ of them (that is, match '+++', but not '+-+'):
+
+ codes = "+*#."
+ format = MatchFirst(Word(c) for c in codes)
+
+ Based on a problem posed by Denis Spir on the Python tutor
+ list.
+
+- Added new example eval_arith.py, which extends the example
+ simpleArith.py to actually evaluate the parsed expressions.
+
+
+Version 1.5.1 - October, 2008
+-------------------------------
+- Added new helper method originalTextFor, to replace the use of
+ the current keepOriginalText parse action. Now instead of
+ using the parse action, as in:
+
+ fullName = Word(alphas) + Word(alphas)
+ fullName.setParseAction(keepOriginalText)
+
+ (in this example, we used keepOriginalText to restore any white
+ space that may have been skipped between the first and last
+ names)
+ You can now write:
+
+ fullName = originalTextFor(Word(alphas) + Word(alphas))
+
+ The implementation of originalTextFor is simpler and faster than
+ keepOriginalText, and does not depend on using the inspect or
+ imp modules.
+
+- Added optional parseAll argument to parseFile, to be consistent
+ with parseAll argument to parseString. Posted by pboucher on the
+ pyparsing wiki, thanks!
+
+- Added failOn argument to SkipTo, so that grammars can define
+ literal strings or pyparsing expressions which, if found in the
+ skipped text, will cause SkipTo to fail. Useful to prevent
+ SkipTo from reading past terminating expression. Instigated by
+ question posed by Aki Niimura on the pyparsing wiki.
+
+- Fixed bug in nestedExpr if multi-character expressions are given
+ for nesting delimiters. Patch provided by new pyparsing user,
+ Hans-Martin Gaudecker - thanks, H-M!
+
+- Removed dependency on xml.sax.saxutils.escape, and included
+ internal implementation instead - proposed by Mike Droettboom on
+ the pyparsing mailing list, thanks Mike! Also fixed erroneous
+ mapping in replaceHTMLEntity of &quot; to ', now correctly maps
+ to ". (Also added support for mapping &apos; to '.)
+
+- Fixed typo in ParseResults.insert, found by Alejandro Dubrovsky,
+ good catch!
+
+- Added __dir__() methods to ParseBaseException and ParseResults,
+ to support new dir() behavior in Py2.6 and Py3.0. If dir() is
+ called on a ParseResults object, the returned list will include
+ the base set of attribute names, plus any results names that are
+ defined.
+
+- Fixed bug in ParseResults.asXML(), in which the first named
+ item within a ParseResults gets reported with an <ITEM> tag
+ instead of with the correct results name.
+
+- Fixed bug in '-' error stop, when '-' operator is used inside a
+ Combine expression.
+
+- Reverted generator expression to use list comprehension, for
+ better compatibility with old versions of Python. Reported by
+ jester/artixdesign on the SourceForge pyparsing discussion list.
+
+- Fixed bug in parseString(parseAll=True), when the input string
+ ends with a comment or whitespace.
+
+- Fixed bug in LineStart and LineEnd that did not recognize any
+ special whitespace chars defined using ParserElement.setDefault-
+ WhitespaceChars, found while debugging an issue for Marek Kubica,
+ thanks for the new test case, Marek!
+
+- Made Forward class more tolerant of subclassing.
+
+
+Version 1.5.0 - June, 2008
+--------------------------
+This version of pyparsing includes work on two long-standing
+FAQ's: support for forcing parsing of the complete input string
+(without having to explicitly append StringEnd() to the grammar),
+and a method to improve the mechanism of detecting where syntax
+errors occur in an input string with various optional and
+alternative paths. This release also includes a helper method
+to simplify definition of indentation-based grammars. With
+these changes (and the past few minor updates), I thought it was
+finally time to bump the minor rev number on pyparsing - so
+1.5.0 is now available! Read on...
+
+- AT LAST!!! You can now call parseString and have it raise
+ an exception if the expression does not parse the entire
+ input string. This has been an FAQ for a LONG time.
+
+ The parseString method now includes an optional parseAll
+ argument (default=False). If parseAll is set to True, then
+ the given parse expression must parse the entire input
+ string. (This is equivalent to adding StringEnd() to the
+ end of the expression.) The default value is False to
+ retain backward compatibility.
+
+ Inspired by MANY requests over the years, most recently by
+ ecir-hana on the pyparsing wiki!
+
+- Added new operator '-' for composing grammar sequences. '-'
+ behaves just like '+' in creating And expressions, but '-'
+ is used to mark grammar structures that should stop parsing
+ immediately and report a syntax error, rather than just
+ backtracking to the last successful parse and trying another
+ alternative. For instance, running the following code:
+
+ port_definition = Keyword("port") + '=' + Word(nums)
+ entity_definition = Keyword("entity") + "{" +
+ Optional(port_definition) + "}"
+
+ entity_definition.parseString("entity { port 100 }")
+
+ pyparsing fails to detect the missing '=' in the port definition.
+ But, since this expression is optional, pyparsing then proceeds
+ to try to match the closing '}' of the entity_definition. Not
+ finding it, pyparsing reports that there was no '}' after the '{'
+ character. Instead, we would like pyparsing to parse the 'port'
+ keyword, and if not followed by an equals sign and an integer,
+ to signal this as a syntax error.
+
+ This can now be done simply by changing the port_definition to:
+
+ port_definition = Keyword("port") - '=' + Word(nums)
+
+ Now after successfully parsing 'port', pyparsing must also find
+ an equals sign and an integer, or it will raise a fatal syntax
+ exception.
+
+ By judicious insertion of '-' operators, a pyparsing developer
+ can have their grammar report much more informative syntax error
+ messages.
+
+ Patches and suggestions proposed by several contributors on
+ the pyparsing mailing list and wiki - special thanks to
+ Eike Welk and Thomas/Poldy on the pyparsing wiki!
+
+- Added indentedBlock helper method, to encapsulate the parse
+ actions and indentation stack management needed to keep track of
+ indentation levels. Use indentedBlock to define grammars for
+ indentation-based grouping grammars, like Python's.
+
+ indentedBlock takes up to 3 parameters:
+ - blockStatementExpr - expression defining syntax of statement
+ that is repeated within the indented block
+ - indentStack - list created by caller to manage indentation
+ stack (multiple indentedBlock expressions
+ within a single grammar should share a common indentStack)
+ - indent - boolean indicating whether block must be indented
+ beyond the the current level; set to False for block of
+ left-most statements (default=True)
+
+ A valid block must contain at least one indented statement.
+
+- Fixed bug in nestedExpr in which ignored expressions needed
+ to be set off with whitespace. Reported by Stefaan Himpe,
+ nice catch!
+
+- Expanded multiplication of an expression by a tuple, to
+ accept tuple values of None:
+ . expr*(n,None) or expr*(n,) is equivalent
+ to expr*n + ZeroOrMore(expr)
+ (read as "at least n instances of expr")
+ . expr*(None,n) is equivalent to expr*(0,n)
+ (read as "0 to n instances of expr")
+ . expr*(None,None) is equivalent to ZeroOrMore(expr)
+ . expr*(1,None) is equivalent to OneOrMore(expr)
+
+ Note that expr*(None,n) does not raise an exception if
+ more than n exprs exist in the input stream; that is,
+ expr*(None,n) does not enforce a maximum number of expr
+ occurrences. If this behavior is desired, then write
+ expr*(None,n) + ~expr
+
+- Added None as a possible operator for operatorPrecedence.
+ None signifies "no operator", as in multiplying m times x
+ in "y=mx+b".
+
+- Fixed bug in Each, reported by Michael Ramirez, in which the
+ order of terms in the Each affected the parsing of the results.
+ Problem was due to premature grouping of the expressions in
+ the overall Each during grammar construction, before the
+ complete Each was defined. Thanks, Michael!
+
+- Also fixed bug in Each in which Optional's with default values
+ were not getting the defaults added to the results of the
+ overall Each expression.
+
+- Fixed a bug in Optional in which results names were not
+ assigned if a default value was supplied.
+
+- Cleaned up Py3K compatibility statements, including exception
+ construction statements, and better equivalence between _ustr
+ and basestring, and __nonzero__ and __bool__.
+
+
+Version 1.4.11 - February, 2008
+-------------------------------
+- With help from Robert A. Clark, this version of pyparsing
+ is compatible with Python 3.0a3. Thanks for the help,
+ Robert!
+
+- Added WordStart and WordEnd positional classes, to support
+ expressions that must occur at the start or end of a word.
+ Proposed by piranha on the pyparsing wiki, good idea!
+
+- Added matchOnlyAtCol helper parser action, to simplify
+ parsing log or data files that have optional fields that are
+ column dependent. Inspired by a discussion thread with
+ hubritic on comp.lang.python.
+
+- Added withAttribute.ANY_VALUE as a match-all value when using
+ withAttribute. Used to ensure that an attribute is present,
+ without having to match on the actual attribute value.
+
+- Added get() method to ParseResults, similar to dict.get().
+ Suggested by new pyparsing user, Alejandro Dubrovksy, thanks!
+
+- Added '==' short-cut to see if a given string matches a
+ pyparsing expression. For instance, you can now write:
+
+ integer = Word(nums)
+ if "123" == integer:
+ # do something
+
+ print [ x for x in "123 234 asld".split() if x==integer ]
+ # prints ['123', '234']
+
+- Simplified the use of nestedExpr when using an expression for
+ the opening or closing delimiters. Now the content expression
+ will not have to explicitly negate closing delimiters. Found
+ while working with dfinnie on GHOP Task #277, thanks!
+
+- Fixed bug when defining ignorable expressions that are
+ later enclosed in a wrapper expression (such as ZeroOrMore,
+ OneOrMore, etc.) - found while working with Prabhu
+ Gurumurthy, thanks Prahbu!
+
+- Fixed bug in withAttribute in which keys were automatically
+ converted to lowercase, making it impossible to match XML
+ attributes with uppercase characters in them. Using with-
+ Attribute requires that you reference attributes in all
+ lowercase if parsing HTML, and in correct case when parsing
+ XML.
+
+- Changed '<<' operator on Forward to return None, since this
+ is really used as a pseudo-assignment operator, not as a
+ left-shift operator. By returning None, it is easier to
+ catch faulty statements such as a << b | c, where precedence
+ of operations causes the '|' operation to be performed
+ *after* inserting b into a, so no alternation is actually
+ implemented. The correct form is a << (b | c). With this
+ change, an error will be reported instead of silently
+ clipping the alternative term. (Note: this may break some
+ existing code, but if it does, the code had a silent bug in
+ it anyway.) Proposed by wcbarksdale on the pyparsing wiki,
+ thanks!
+
+- Several unit tests were added to pyparsing's regression
+ suite, courtesy of the Google Highly-Open Participation
+ Contest. Thanks to all who administered and took part in
+ this event!
+
+
+Version 1.4.10 - December 9, 2007
+---------------------------------
+- Fixed bug introduced in v1.4.8, parse actions were called for
+ intermediate operator levels, not just the deepest matching
+ operation level. Again, big thanks to Torsten Marek for
+ helping isolate this problem!
+
+
+Version 1.4.9 - December 8, 2007
+--------------------------------
+- Added '*' multiplication operator support when creating
+ grammars, accepting either an integer, or a two-integer
+ tuple multiplier, as in:
+ ipAddress = Word(nums) + ('.'+Word(nums))*3
+ usPhoneNumber = Word(nums) + ('-'+Word(nums))*(1,2)
+ If multiplying by a tuple, the two integer values represent
+ min and max multiples. Suggested by Vincent of eToy.com,
+ great idea, Vincent!
+
+- Fixed bug in nestedExpr, original version was overly greedy!
+ Thanks to Michael Ramirez for raising this issue.
+
+- Fixed internal bug in ParseResults - when an item was deleted,
+ the key indices were not updated. Thanks to Tim Mitchell for
+ posting a bugfix patch to the SF bug tracking system!
+
+- Fixed internal bug in operatorPrecedence - when the results of
+ a right-associative term were sent to a parse action, the wrong
+ tokens were sent. Reported by Torsten Marek, nice job!
+
+- Added pop() method to ParseResults. If pop is called with an
+ integer or with no arguments, it will use list semantics and
+ update the ParseResults' list of tokens. If pop is called with
+ a non-integer (a string, for instance), then it will use dict
+ semantics and update the ParseResults' internal dict.
+ Suggested by Donn Ingle, thanks Donn!
+
+- Fixed quoted string built-ins to accept '\xHH' hex characters
+ within the string.
+
+
+Version 1.4.8 - October, 2007
+-----------------------------
+- Added new helper method nestedExpr to easily create expressions
+ that parse lists of data in nested parentheses, braces, brackets,
+ etc.
+
+- Added withAttribute parse action helper, to simplify creating
+ filtering parse actions to attach to expressions returned by
+ makeHTMLTags and makeXMLTags. Use withAttribute to qualify a
+ starting tag with one or more required attribute values, to avoid
+ false matches on common tags such as <TD> or <DIV>.
+
+- Added new examples nested.py and withAttribute.py to demonstrate
+ the new features.
+
+- Added performance speedup to grammars using operatorPrecedence,
+ instigated by Stefan Reichör - thanks for the feedback, Stefan!
+
+- Fixed bug/typo when deleting an element from a ParseResults by
+ using the element's results name.
+
+- Fixed whitespace-skipping bug in wrapper classes (such as Group,
+ Suppress, Combine, etc.) and when using setDebug(), reported by
+ new pyparsing user dazzawazza on SourceForge, nice job!
+
+- Added restriction to prevent defining Word or CharsNotIn expressions
+ with minimum length of 0 (should use Optional if this is desired),
+ and enhanced docstrings to reflect this limitation. Issue was
+ raised by Joey Tallieu, who submitted a patch with a slightly
+ different solution. Thanks for taking the initiative, Joey, and
+ please keep submitting your ideas!
+
+- Fixed bug in makeHTMLTags that did not detect HTML tag attributes
+ with no '= value' portion (such as "<td nowrap>"), reported by
+ hamidh on the pyparsing wiki - thanks!
+
+- Fixed minor bug in makeHTMLTags and makeXMLTags, which did not
+ accept whitespace in closing tags.
+
+
+Version 1.4.7 - July, 2007
+--------------------------
+- NEW NOTATION SHORTCUT: ParserElement now accepts results names using
+ a notational shortcut, following the expression with the results name
+ in parentheses. So this:
+
+ stats = "AVE:" + realNum.setResultsName("average") + \
+ "MIN:" + realNum.setResultsName("min") + \
+ "MAX:" + realNum.setResultsName("max")
+
+ can now be written as this:
+
+ stats = "AVE:" + realNum("average") + \
+ "MIN:" + realNum("min") + \
+ "MAX:" + realNum("max")
+
+ The intent behind this change is to make it simpler to define results
+ names for significant fields within the expression, while keeping
+ the grammar syntax clean and uncluttered.
+
+- Fixed bug when packrat parsing is enabled, with cached ParseResults
+ being updated by subsequent parsing. Reported on the pyparsing
+ wiki by Kambiz, thanks!
+
+- Fixed bug in operatorPrecedence for unary operators with left
+ associativity, if multiple operators were given for the same term.
+
+- Fixed bug in example simpleBool.py, corrected precedence of "and" vs.
+ "or" operations.
+
+- Fixed bug in Dict class, in which keys were converted to strings
+ whether they needed to be or not. Have narrowed this logic to
+ convert keys to strings only if the keys are ints (which would
+ confuse __getitem__ behavior for list indexing vs. key lookup).
+
+- Added ParserElement method setBreak(), which will invoke the pdb
+ module's set_trace() function when this expression is about to be
+ parsed.
+
+- Fixed bug in StringEnd in which reading off the end of the input
+ string raises an exception - should match. Resolved while
+ answering a question for Shawn on the pyparsing wiki.
+
+
+Version 1.4.6 - April, 2007
+---------------------------
+- Simplified constructor for ParseFatalException, to support common
+ exception construction idiom:
+ raise ParseFatalException, "unexpected text: 'Spanish Inquisition'"
+
+- Added method getTokensEndLoc(), to be called from within a parse action,
+ for those parse actions that need both the starting *and* ending
+ location of the parsed tokens within the input text.
+
+- Enhanced behavior of keepOriginalText so that named parse fields are
+ preserved, even though tokens are replaced with the original input
+ text matched by the current expression. Also, cleaned up the stack
+ traversal to be more robust. Suggested by Tim Arnold - thanks, Tim!
+
+- Fixed subtle bug in which countedArray (and similar dynamic
+ expressions configured in parse actions) failed to match within Or,
+ Each, FollowedBy, or NotAny. Reported by Ralf Vosseler, thanks for
+ your patience, Ralf!
+
+- Fixed Unicode bug in upcaseTokens and downcaseTokens parse actions,
+ scanString, and default debugging actions; reported (and patch submitted)
+ by Nikolai Zamkovoi, spasibo!
+
+- Fixed bug when saving a tuple as a named result. The returned
+ token list gave the proper tuple value, but accessing the result by
+ name only gave the first element of the tuple. Reported by
+ Poromenos, nice catch!
+
+- Fixed bug in makeHTMLTags/makeXMLTags, which failed to match tag
+ attributes with namespaces.
+
+- Fixed bug in SkipTo when setting include=True, to have the skipped-to
+ tokens correctly included in the returned data. Reported by gunars on
+ the pyparsing wiki, thanks!
+
+- Fixed typobug in OnceOnly.reset method, omitted self argument.
+ Submitted by eike welk, thanks for the lint-picking!
+
+- Added performance enhancement to Forward class, suggested by
+ akkartik on the pyparsing Wiki discussion, nice work!
+
+- Added optional asKeyword to Word constructor, to indicate that the
+ given word pattern should be matched only as a keyword, that is, it
+ should only match if it is within word boundaries.
+
+- Added S-expression parser to examples directory.
+
+- Added macro substitution example to examples directory.
+
+- Added holaMundo.py example, excerpted from Marco Alfonso's blog -
+ muchas gracias, Marco!
+
+- Modified internal cyclic references in ParseResults to use weakrefs;
+ this should help reduce the memory footprint of large parsing
+ programs, at some cost to performance (3-5%). Suggested by bca48150 on
+ the pyparsing wiki, thanks!
+
+- Enhanced the documentation describing the vagaries and idiosyncracies
+ of parsing strings with embedded tabs, and the impact on:
+ . parse actions
+ . scanString
+ . col and line helper functions
+ (Suggested by eike welk in response to some unexplained inconsistencies
+ between parsed location and offsets in the input string.)
+
+- Cleaned up internal decorators to preserve function names,
+ docstrings, etc.
+
+
+Version 1.4.5 - December, 2006
+------------------------------
+- Removed debugging print statement from QuotedString class. Sorry
+ for not stripping this out before the 1.4.4 release!
+
+- A significant performance improvement, the first one in a while!
+ For my Verilog parser, this version of pyparsing is about double the
+ speed - YMMV.
+
+- Added support for pickling of ParseResults objects. (Reported by
+ Jeff Poole, thanks Jeff!)
+
+- Fixed minor bug in makeHTMLTags that did not recognize tag attributes
+ with embedded '-' or '_' characters. Also, added support for
+ passing expressions to makeHTMLTags and makeXMLTags, and used this
+ feature to define the globals anyOpenTag and anyCloseTag.
+
+- Fixed error in alphas8bit, I had omitted the y-with-umlaut character.
+
+- Added punc8bit string to complement alphas8bit - it contains all the
+ non-alphabetic, non-blank 8-bit characters.
+
+- Added commonHTMLEntity expression, to match common HTML "ampersand"
+ codes, such as "&lt;", "&gt;", "&amp;", "&nbsp;", and "&quot;". This
+ expression also defines a results name 'entity', which can be used
+ to extract the entity field (that is, "lt", "gt", etc.). Also added
+ built-in parse action replaceHTMLEntity, which can be attached to
+ commonHTMLEntity to translate "&lt;", "&gt;", "&amp;", "&nbsp;", and
+ "&quot;" to "<", ">", "&", " ", and "'".
+
+- Added example, htmlStripper.py, that strips HTML tags and scripts
+ from HTML pages. It also translates common HTML entities to their
+ respective characters.
+
+
+Version 1.4.4 - October, 2006
+-------------------------------
+- Fixed traceParseAction decorator to also trap and record exception
+ returns from parse actions, and to handle parse actions with 0,
+ 1, 2, or 3 arguments.
+
+- Enhanced parse action normalization to support using classes as
+ parse actions; that is, the class constructor is called at parse
+ time and the __init__ function is called with 0, 1, 2, or 3
+ arguments. If passing a class as a parse action, the __init__
+ method must use one of the valid parse action parameter list
+ formats. (This technique is useful when using pyparsing to compile
+ parsed text into a series of application objects - see the new
+ example simpleBool.py.)
+
+- Fixed bug in ParseResults when setting an item using an integer
+ index. (Reported by Christopher Lambacher, thanks!)
+
+- Fixed whitespace-skipping bug, patch submitted by Paolo Losi -
+ grazie, Paolo!
+
+- Fixed bug when a Combine contained an embedded Forward expression,
+ reported by cie on the pyparsing wiki - good catch!
+
+- Fixed listAllMatches bug, when a listAllMatches result was
+ nested within another result. (Reported by don pasquale on
+ comp.lang.python, well done!)
+
+- Fixed bug in ParseResults items() method, when returning an item
+ marked as listAllMatches=True
+
+- Fixed bug in definition of cppStyleComment (and javaStyleComment)
+ in which '//' line comments were not continued to the next line
+ if the line ends with a '\'. (Reported by eagle-eyed Ralph
+ Corderoy!)
+
+- Optimized re's for cppStyleComment and quotedString for better
+ re performance - also provided by Ralph Corderoy, thanks!
+
+- Added new example, indentedGrammarExample.py, showing how to
+ define a grammar using indentation to show grouping (as Python
+ does for defining statement nesting). Instigated by an e-mail
+ discussion with Andrew Dalke, thanks Andrew!
+
+- Added new helper operatorPrecedence (based on e-mail list discussion
+ with Ralph Corderoy and Paolo Losi), to facilitate definition of
+ grammars for expressions with unary and binary operators. For
+ instance, this grammar defines a 6-function arithmetic expression
+ grammar, with unary plus and minus, proper operator precedence,and
+ right- and left-associativity:
+
+ expr = operatorPrecedence( operand,
+ [("!", 1, opAssoc.LEFT),
+ ("^", 2, opAssoc.RIGHT),
+ (oneOf("+ -"), 1, opAssoc.RIGHT),
+ (oneOf("* /"), 2, opAssoc.LEFT),
+ (oneOf("+ -"), 2, opAssoc.LEFT),]
+ )
+
+ Also added example simpleArith.py and simpleBool.py to provide
+ more detailed code samples using this new helper method.
+
+- Added new helpers matchPreviousLiteral and matchPreviousExpr, for
+ creating adaptive parsing expressions that match the same content
+ as was parsed in a previous parse expression. For instance:
+
+ first = Word(nums)
+ matchExpr = first + ":" + matchPreviousLiteral(first)
+
+ will match "1:1", but not "1:2". Since this matches at the literal
+ level, this will also match the leading "1:1" in "1:10".
+
+ In contrast:
+
+ first = Word(nums)
+ matchExpr = first + ":" + matchPreviousExpr(first)
+
+ will *not* match the leading "1:1" in "1:10"; the expressions are
+ evaluated first, and then compared, so "1" is compared with "10".
+
+- Added keepOriginalText parse action. Sometimes pyparsing's
+ whitespace-skipping leaves out too much whitespace. Adding this
+ parse action will restore any internal whitespace for a parse
+ expression. This is especially useful when defining expressions
+ for scanString or transformString applications.
+
+- Added __add__ method for ParseResults class, to better support
+ using Python sum built-in for summing ParseResults objects returned
+ from scanString.
+
+- Added reset method for the new OnlyOnce class wrapper for parse
+ actions (to allow a grammar to be used multiple times).
+
+- Added optional maxMatches argument to scanString and searchString,
+ to short-circuit scanning after 'n' expression matches are found.
+
+
+Version 1.4.3 - July, 2006
+------------------------------
+- Fixed implementation of multiple parse actions for an expression
+ (added in 1.4.2).
+ . setParseAction() reverts to its previous behavior, setting
+ one (or more) actions for an expression, overwriting any
+ action or actions previously defined
+ . new method addParseAction() appends one or more parse actions
+ to the list of parse actions attached to an expression
+ Now it is harder to accidentally append parse actions to an
+ expression, when what you wanted to do was overwrite whatever had
+ been defined before. (Thanks, Jean-Paul Calderone!)
+
+- Simplified interface to parse actions that do not require all 3
+ parse action arguments. Very rarely do parse actions require more
+ than just the parsed tokens, yet parse actions still require all
+ 3 arguments including the string being parsed and the location
+ within the string where the parse expression was matched. With this
+ release, parse actions may now be defined to be called as:
+ . fn(string,locn,tokens) (the current form)
+ . fn(locn,tokens)
+ . fn(tokens)
+ . fn()
+ The setParseAction and addParseAction methods will internally decorate
+ the provided parse actions with compatible wrappers to conform to
+ the full (string,locn,tokens) argument sequence.
+
+- REMOVED SUPPORT FOR RETURNING PARSE LOCATION FROM A PARSE ACTION.
+ I announced this in March, 2004, and gave a final warning in the last
+ release. Now you can return a tuple from a parse action, and it will
+ be treated like any other return value (i.e., the tuple will be
+ substituted for the incoming tokens passed to the parse action,
+ which is useful when trying to parse strings into tuples).
+
+- Added setFailAction method, taking a callable function fn that
+ takes the arguments fn(s,loc,expr,err) where:
+ . s - string being parsed
+ . loc - location where expression match was attempted and failed
+ . expr - the parse expression that failed
+ . err - the exception thrown
+ The function returns no values. It may throw ParseFatalException
+ if it is desired to stop parsing immediately.
+ (Suggested by peter21081944 on wikispaces.com)
+
+- Added class OnlyOnce as helper wrapper for parse actions. OnlyOnce
+ only permits a parse action to be called one time, after which
+ all subsequent calls throw a ParseException.
+
+- Added traceParseAction decorator to help debug parse actions.
+ Simply insert "@traceParseAction" ahead of the definition of your
+ parse action, and each invocation will be displayed, along with
+ incoming arguments, and returned value.
+
+- Fixed bug when copying ParserElements using copy() or
+ setResultsName(). (Reported by Dan Thill, great catch!)
+
+- Fixed bug in asXML() where token text contains <, >, and &
+ characters - generated XML now escapes these as &lt;, &gt; and
+ &amp;. (Reported by Jacek Sieka, thanks!)
+
+- Fixed bug in SkipTo() when searching for a StringEnd(). (Reported
+ by Pete McEvoy, thanks Pete!)
+
+- Fixed "except Exception" statements, the most critical added as part
+ of the packrat parsing enhancement. (Thanks, Erick Tryzelaar!)
+
+- Fixed end-of-string infinite looping on LineEnd and StringEnd
+ expressions. (Thanks again to Erick Tryzelaar.)
+
+- Modified setWhitespaceChars to return self, to be consistent with
+ other ParserElement modifiers. (Suggested by Erick Tryzelaar.)
+
+- Fixed bug/typo in new ParseResults.dump() method.
+
+- Fixed bug in searchString() method, in which only the first token of
+ an expression was returned. searchString() now returns a
+ ParseResults collection of all search matches.
+
+- Added example program removeLineBreaks.py, a string transformer that
+ converts text files with hard line-breaks into one with line breaks
+ only between paragraphs.
+
+- Added example program listAllMatches.py, to illustrate using the
+ listAllMatches option when specifying results names (also shows new
+ support for passing lists to oneOf).
+
+- Added example program linenoExample.py, to illustrate using the
+ helper methods lineno, line, and col, and returning objects from a
+ parse action.
+
+- Added example program parseListString.py, to which can parse the
+ string representation of a Python list back into a true list. Taken
+ mostly from my PyCon presentation examples, but now with support
+ for tuple elements, too!
+
+
+
+Version 1.4.2 - April 1, 2006 (No foolin'!)
+-------------------------------------------
+- Significant speedup from memoizing nested expressions (a technique
+ known as "packrat parsing"), thanks to Chris Lesniewski-Laas! Your
+ mileage may vary, but my Verilog parser almost doubled in speed to
+ over 600 lines/sec!
+
+ This speedup may break existing programs that use parse actions that
+ have side-effects. For this reason, packrat parsing is disabled when
+ you first import pyparsing. To activate the packrat feature, your
+ program must call the class method ParserElement.enablePackrat(). If
+ your program uses psyco to "compile as you go", you must call
+ enablePackrat before calling psyco.full(). If you do not do this,
+ Python will crash. For best results, call enablePackrat() immediately
+ after importing pyparsing.
+
+- Added new helper method countedArray(expr), for defining patterns that
+ start with a leading integer to indicate the number of array elements,
+ followed by that many elements, matching the given expr parse
+ expression. For instance, this two-liner:
+ wordArray = countedArray(Word(alphas))
+ print wordArray.parseString("3 Practicality beats purity")[0]
+ returns the parsed array of words:
+ ['Practicality', 'beats', 'purity']
+ The leading token '3' is suppressed, although it is easily obtained
+ from the length of the returned array.
+ (Inspired by e-mail discussion with Ralf Vosseler.)
+
+- Added support for attaching multiple parse actions to a single
+ ParserElement. (Suggested by Dan "Dang" Griffith - nice idea, Dan!)
+
+- Added support for asymmetric quoting characters in the recently-added
+ QuotedString class. Now you can define your own quoted string syntax
+ like "<<This is a string in double angle brackets.>>". To define
+ this custom form of QuotedString, your code would define:
+ dblAngleQuotedString = QuotedString('<<',endQuoteChar='>>')
+ QuotedString also supports escaped quotes, escape character other
+ than '\', and multiline.
+
+- Changed the default value returned internally by Optional, so that
+ None can be used as a default value. (Suggested by Steven Bethard -
+ I finally saw the light!)
+
+- Added dump() method to ParseResults, to make it easier to list out
+ and diagnose values returned from calling parseString.
+
+- A new example, a search query string parser, submitted by Steven
+ Mooij and Rudolph Froger - a very interesting application, thanks!
+
+- Added an example that parses the BNF in Python's Grammar file, in
+ support of generating Python grammar documentation. (Suggested by
+ J H Stovall.)
+
+- A new example, submitted by Tim Cera, of a flexible parser module,
+ using a simple config variable to adjust parsing for input formats
+ that have slight variations - thanks, Tim!
+
+- Added an example for parsing Roman numerals, showing the capability
+ of parse actions to "compile" Roman numerals into their integer
+ values during parsing.
+
+- Added a new docs directory, for additional documentation or help.
+ Currently, this includes the text and examples from my recent
+ presentation at PyCon.
+
+- Fixed another typo in CaselessKeyword, thanks Stefan Behnel.
+
+- Expanded oneOf to also accept tuples, not just lists. This really
+ should be sufficient...
+
+- Added deprecation warnings when tuple is returned from a parse action.
+ Looking back, I see that I originally deprecated this feature in March,
+ 2004, so I'm guessing people really shouldn't have been using this
+ feature - I'll drop it altogether in the next release, which will
+ allow users to return a tuple from a parse action (which is really
+ handy when trying to reconstuct tuples from a tuple string
+ representation!).
+
+
+Version 1.4.1 - February, 2006
+------------------------------
+- Converted generator expression in QuotedString class to list
+ comprehension, to retain compatibility with Python 2.3. (Thanks, Titus
+ Brown for the heads-up!)
+
+- Added searchString() method to ParserElement, as an alternative to
+ using "scanString(instring).next()[0][0]" to search through a string
+ looking for a substring matching a given parse expression. (Inspired by
+ e-mail conversation with Dave Feustel.)
+
+- Modified oneOf to accept lists of strings as well as a single string
+ of space-delimited literals. (Suggested by Jacek Sieka - thanks!)
+
+- Removed deprecated use of Upcase in pyparsing test code. (Also caught by
+ Titus Brown.)
+
+- Removed lstrip() call from Literal - too aggressive in stripping
+ whitespace which may be valid for some grammars. (Point raised by Jacek
+ Sieka). Also, made Literal more robust in the event of passing an empty
+ string.
+
+- Fixed bug in replaceWith when returning None.
+
+- Added cautionary documentation for Forward class when assigning a
+ MatchFirst expression, as in:
+ fwdExpr << a | b | c
+ Precedence of operators causes this to be evaluated as:
+ (fwdExpr << a) | b | c
+ thereby leaving b and c out as parseable alternatives. Users must
+ explicitly group the values inserted into the Forward:
+ fwdExpr << (a | b | c)
+ (Suggested by Scot Wilcoxon - thanks, Scot!)
+
+
+Version 1.4 - January 18, 2006
+------------------------------
+- Added Regex class, to permit definition of complex embedded expressions
+ using regular expressions. (Enhancement provided by John Beisley, great
+ job!)
+
+- Converted implementations of Word, oneOf, quoted string, and comment
+ helpers to utilize regular expression matching. Performance improvements
+ in the 20-40% range.
+
+- Added QuotedString class, to support definition of non-standard quoted
+ strings (Suggested by Guillaume Proulx, thanks!)
+
+- Added CaselessKeyword class, to streamline grammars with, well, caseless
+ keywords (Proposed by Stefan Behnel, thanks!)
+
+- Fixed bug in SkipTo, when using an ignoreable expression. (Patch provided
+ by Anonymous, thanks, whoever-you-are!)
+
+- Fixed typo in NoMatch class. (Good catch, Stefan Behnel!)
+
+- Fixed minor bug in _makeTags(), using string.printables instead of
+ pyparsing.printables.
+
+- Cleaned up some of the expressions created by makeXXXTags helpers, to
+ suppress extraneous <> characters.
+
+- Added some grammar definition-time checking to verify that a grammar is
+ being built using proper ParserElements.
+
+- Added examples:
+ . LAparser.py - linear algebra C preprocessor (submitted by Mike Ellis,
+ thanks Mike!)
+ . wordsToNum.py - converts word description of a number back to
+ the original number (such as 'one hundred and twenty three' -> 123)
+ . updated fourFn.py to support unary minus, added BNF comments
+
+
+Version 1.3.3 - September 12, 2005
+----------------------------------
+- Improved support for Unicode strings that would be returned using
+ srange. Added greetingInKorean.py example, for a Korean version of
+ "Hello, World!" using Unicode. (Thanks, June Kim!)
+
+- Added 'hexnums' string constant (nums+"ABCDEFabcdef") for defining
+ hexadecimal value expressions.
+
+- NOTE: ===THIS CHANGE MAY BREAK EXISTING CODE===
+ Modified tag and results definitions returned by makeHTMLTags(),
+ to better support the looseness of HTML parsing. Tags to be
+ parsed are now caseless, and keys generated for tag attributes are
+ now converted to lower case.
+
+ Formerly, makeXMLTags("XYZ") would return a tag with results
+ name of "startXYZ", this has been changed to "startXyz". If this
+ tag is matched against '<XYZ Abc="1" DEF="2" ghi="3">', the
+ matched keys formerly would be "Abc", "DEF", and "ghi"; keys are
+ now converted to lower case, giving keys of "abc", "def", and
+ "ghi". These changes were made to try to address the lax
+ case sensitivity agreement between start and end tags in many
+ HTML pages.
+
+ No changes were made to makeXMLTags(), which assumes more rigorous
+ parsing rules.
+
+ Also, cleaned up case-sensitivity bugs in closing tags, and
+ switched to using Keyword instead of Literal class for tags.
+ (Thanks, Steve Young, for getting me to look at these in more
+ detail!)
+
+- Added two helper parse actions, upcaseTokens and downcaseTokens,
+ which will convert matched text to all uppercase or lowercase,
+ respectively.
+
+- Deprecated Upcase class, to be replaced by upcaseTokens parse
+ action.
+
+- Converted messages sent to stderr to use warnings module, such as
+ when constructing a Literal with an empty string, one should use
+ the Empty() class or the empty helper instead.
+
+- Added ' ' (space) as an escapable character within a quoted
+ string.
+
+- Added helper expressions for common comment types, in addition
+ to the existing cStyleComment (/*...*/) and htmlStyleComment
+ (<!-- ... -->)
+ . dblSlashComment = // ... (to end of line)
+ . cppStyleComment = cStyleComment or dblSlashComment
+ . javaStyleComment = cppStyleComment
+ . pythonStyleComment = # ... (to end of line)
+
+
+
+Version 1.3.2 - July 24, 2005
+-----------------------------
+- Added Each class as an enhanced version of And. 'Each' requires
+ that all given expressions be present, but may occur in any order.
+ Special handling is provided to group ZeroOrMore and OneOrMore
+ elements that occur out-of-order in the input string. You can also
+ construct 'Each' objects by joining expressions with the '&'
+ operator. When using the Each class, results names are strongly
+ recommended for accessing the matched tokens. (Suggested by Pradam
+ Amini - thanks, Pradam!)
+
+- Stricter interpretation of 'max' qualifier on Word elements. If the
+ 'max' attribute is specified, matching will fail if an input field
+ contains more than 'max' consecutive body characters. For example,
+ previously, Word(nums,max=3) would match the first three characters
+ of '0123456', returning '012' and continuing parsing at '3'. Now,
+ when constructed using the max attribute, Word will raise an
+ exception with this string.
+
+- Cleaner handling of nested dictionaries returned by Dict. No
+ longer necessary to dereference sub-dictionaries as element [0] of
+ their parents.
+ === NOTE: THIS CHANGE MAY BREAK SOME EXISTING CODE, BUT ONLY IF
+ PARSING NESTED DICTIONARIES USING THE LITTLE-USED DICT CLASS ===
+ (Prompted by discussion thread on the Python Tutor list, with
+ contributions from Danny Yoo, Kent Johnson, and original post by
+ Liam Clarke - thanks all!)
+
+
+
+Version 1.3.1 - June, 2005
+----------------------------------
+- Added markInputline() method to ParseException, to display the input
+ text line location of the parsing exception. (Thanks, Stefan Behnel!)
+
+- Added setDefaultKeywordChars(), so that Keyword definitions using a
+ custom keyword character set do not all need to add the keywordChars
+ constructor argument (similar to setDefaultWhitespaceChars()).
+ (suggested by rzhanka on the SourceForge pyparsing forum.)
+
+- Simplified passing debug actions to setDebugAction(). You can now
+ pass 'None' for a debug action if you want to take the default
+ debug behavior. To suppress a particular debug action, you can pass
+ the pyparsing method nullDebugAction.
+
+- Refactored parse exception classes, moved all behavior to
+ ParseBaseException, and the former ParseException is now a subclass of
+ ParseBaseException. Added a second subclass, ParseFatalException, as
+ a subclass of ParseBaseException. User-defined parse actions can raise
+ ParseFatalException if a data inconsistency is detected (such as a
+ begin-tag/end-tag mismatch), and this will stop all parsing immediately.
+ (Inspired by e-mail thread with Michele Petrazzo - thanks, Michelle!)
+
+- Added helper methods makeXMLTags and makeHTMLTags, that simplify the
+ definition of XML or HTML tag parse expressions for a given tagname.
+ Both functions return a pair of parse expressions, one for the opening
+ tag (that is, '<tagname>') and one for the closing tag ('</tagname>').
+ The opening tagame also recognizes any attribute definitions that have
+ been included in the opening tag, as well as an empty tag (one with a
+ trailing '/', as in '<BODY/>' which is equivalent to '<BODY></BODY>').
+ makeXMLTags uses stricter XML syntax for attributes, requiring that they
+ be enclosed in double quote characters - makeHTMLTags is more lenient,
+ and accepts single-quoted strings or any contiguous string of characters
+ up to the next whitespace character or '>' character. Attributes can
+ be retrieved as dictionary or attribute values of the returned results
+ from the opening tag.
+
+- Added example minimath2.py, a refinement on fourFn.py that adds
+ an interactive session and support for variables. (Thanks, Steven Siew!)
+
+- Added performance improvement, up to 20% reduction! (Found while working
+ with Wolfgang Borgert on performance tuning of his TTCN3 parser.)
+
+- And another performance improvement, up to 25%, when using scanString!
+ (Found while working with Henrik Westlund on his C header file scanner.)
+
+- Updated UML diagrams to reflect latest class/method changes.
+
+
+Version 1.3 - March, 2005
+----------------------------------
+- Added new Keyword class, as a special form of Literal. Keywords
+ must be followed by whitespace or other non-keyword characters, to
+ distinguish them from variables or other identifiers that just
+ happen to start with the same characters as a keyword. For instance,
+ the input string containing "ifOnlyIfOnly" will match a Literal("if")
+ at the beginning and in the middle, but will fail to match a
+ Keyword("if"). Keyword("if") will match only strings such as "if only"
+ or "if(only)". (Proposed by Wolfgang Borgert, and Berteun Damman
+ separately requested this on comp.lang.python - great idea!)
+
+- Added setWhitespaceChars() method to override the characters to be
+ skipped as whitespace before matching a particular ParseElement. Also
+ added the class-level method setDefaultWhitespaceChars(), to allow
+ users to override the default set of whitespace characters (space,
+ tab, newline, and return) for all subsequently defined ParseElements.
+ (Inspired by Klaas Hofstra's inquiry on the Sourceforge pyparsing
+ forum.)
+
+- Added helper parse actions to support some very common parse
+ action use cases:
+ . replaceWith(replStr) - replaces the matching tokens with the
+ provided replStr replacement string; especially useful with
+ transformString()
+ . removeQuotes - removes first and last character from string enclosed
+ in quotes (note - NOT the same as the string strip() method, as only
+ a single character is removed at each end)
+
+- Added copy() method to ParseElement, to make it easier to define
+ different parse actions for the same basic parse expression. (Note, copy
+ is implicitly called when using setResultsName().)
+
+
+ (The following changes were posted to CVS as Version 1.2.3 -
+ October-December, 2004)
+
+- Added support for Unicode strings in creating grammar definitions.
+ (Big thanks to Gavin Panella!)
+
+- Added constant alphas8bit to include the following 8-bit characters:
+ ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖØÙÚÛÜÝÞßàáâãäåæçèéêëìíîïðñòóôõöøùúûüýþ
+
+- Added srange() function to simplify definition of Word elements, using
+ regexp-like '[A-Za-z0-9]' syntax. This also simplifies referencing
+ common 8-bit characters.
+
+- Fixed bug in Dict when a single element Dict was embedded within another
+ Dict. (Thanks Andy Yates for catching this one!)
+
+- Added 'formatted' argument to ParseResults.asXML(). If set to False,
+ suppresses insertion of whitespace for pretty-print formatting. Default
+ equals True for backward compatibility.
+
+- Added setDebugActions() function to ParserElement, to allow user-defined
+ debugging actions.
+
+- Added support for escaped quotes (either in \', \", or doubled quote
+ form) to the predefined expressions for quoted strings. (Thanks, Ero
+ Carrera!)
+
+- Minor performance improvement (~5%) converting "char in string" tests
+ to "char in dict". (Suggested by Gavin Panella, cool idea!)
+
+
+Version 1.2.2 - September 27, 2004
+----------------------------------
+- Modified delimitedList to accept an expression as the delimiter, instead
+ of only accepting strings.
+
+- Modified ParseResults, to convert integer field keys to strings (to
+ avoid confusion with list access).
+
+- Modified Combine, to convert all embedded tokens to strings before
+ combining.
+
+- Fixed bug in MatchFirst in which parse actions would be called for
+ expressions that only partially match. (Thanks, John Hunter!)
+
+- Fixed bug in fourFn.py example that fixes right-associativity of ^
+ operator. (Thanks, Andrea Griffini!)
+
+- Added class FollowedBy(expression), to look ahead in the input string
+ without consuming tokens.
+
+- Added class NoMatch that never matches any input. Can be useful in
+ debugging, and in very specialized grammars.
+
+- Added example pgn.py, for parsing chess game files stored in Portable
+ Game Notation. (Thanks, Alberto Santini!)
+
+
+Version 1.2.1 - August 19, 2004
+-------------------------------
+- Added SkipTo(expression) token type, simplifying grammars that only
+ want to specify delimiting expressions, and want to match any characters
+ between them.
+
+- Added helper method dictOf(key,value), making it easier to work with
+ the Dict class. (Inspired by Pavel Volkovitskiy, thanks!).
+
+- Added optional argument listAllMatches (default=False) to
+ setResultsName(). Setting listAllMatches to True overrides the default
+ modal setting of tokens to results names; instead, the results name
+ acts as an accumulator for all matching tokens within the local
+ repetition group. (Suggested by Amaury Le Leyzour - thanks!)
+
+- Fixed bug in ParseResults, throwing exception when trying to extract
+ slice, or make a copy using [:]. (Thanks, Wilson Fowlie!)
+
+- Fixed bug in transformString() when the input string contains <TAB>'s
+ (Thanks, Rick Walia!).
+
+- Fixed bug in returning tokens from un-Grouped And's, Or's and
+ MatchFirst's, where too many tokens would be included in the results,
+ confounding parse actions and returned results.
+
+- Fixed bug in naming ParseResults returned by And's, Or's, and Match
+ First's.
+
+- Fixed bug in LineEnd() - matching this token now correctly consumes
+ and returns the end of line "\n".
+
+- Added a beautiful example for parsing Mozilla calendar files (Thanks,
+ Petri Savolainen!).
+
+- Added support for dynamically modifying Forward expressions during
+ parsing.
+
+
+Version 1.2 - 20 June 2004
+--------------------------
+- Added definition for htmlComment to help support HTML scanning and
+ parsing.
+
+- Fixed bug in generating XML for Dict classes, in which trailing item was
+ duplicated in the output XML.
+
+- Fixed release bug in which scanExamples.py was omitted from release
+ files.
+
+- Fixed bug in transformString() when parse actions are not defined on the
+ outermost parser element.
+
+- Added example urlExtractor.py, as another example of using scanString
+ and parse actions.
+
+
+Version 1.2beta3 - 4 June 2004
+------------------------------
+- Added White() token type, analogous to Word, to match on whitespace
+ characters. Use White in parsers with significant whitespace (such as
+ configuration file parsers that use indentation to indicate grouping).
+ Construct White with a string containing the whitespace characters to be
+ matched. Similar to Word, White also takes optional min, max, and exact
+ parameters.
+
+- As part of supporting whitespace-signficant parsing, added parseWithTabs()
+ method to ParserElement, to override the default behavior in parseString
+ of automatically expanding tabs to spaces. To retain tabs during
+ parsing, call parseWithTabs() before calling parseString(), parseFile() or
+ scanString(). (Thanks, Jean-Guillaume Paradis for catching this, and for
+ your suggestions on whitespace-significant parsing.)
+
+- Added transformString() method to ParseElement, as a complement to
+ scanString(). To use transformString, define a grammar and attach a parse
+ action to the overall grammar that modifies the returned token list.
+ Invoking transformString() on a target string will then scan for matches,
+ and replace the matched text patterns according to the logic in the parse
+ action. transformString() returns the resulting transformed string.
+ (Note: transformString() does *not* automatically expand tabs to spaces.)
+ Also added scanExamples.py to the examples directory to show sample uses of
+ scanString() and transformString().
+
+- Removed group() method that was introduced in beta2. This turns out NOT to
+ be equivalent to nesting within a Group() object, and I'd prefer not to sow
+ more seeds of confusion.
+
+- Fixed behavior of asXML() where tags for groups were incorrectly duplicated.
+ (Thanks, Brad Clements!)
+
+- Changed beta version message to display to stderr instead of stdout, to
+ make asXML() easier to use. (Thanks again, Brad.)
+
+
+Version 1.2beta2 - 19 May 2004
+------------------------------
+- *** SIMPLIFIED API *** - Parse actions that do not modify the list of tokens
+ no longer need to return a value. This simplifies those parse actions that
+ use the list of tokens to update a counter or record or display some of the
+ token content; these parse actions can simply end without having to specify
+ 'return toks'.
+
+- *** POSSIBLE API INCOMPATIBILITY *** - Fixed CaselessLiteral bug, where the
+ returned token text was not the original string (as stated in the docs),
+ but the original string converted to upper case. (Thanks, Dang Griffith!)
+ **NOTE: this may break some code that relied on this erroneous behavior.
+ Users should scan their code for uses of CaselessLiteral.**
+
+- *** POSSIBLE CODE INCOMPATIBILITY *** - I have renamed the internal
+ attributes on ParseResults from 'dict' and 'list' to '__tokdict' and
+ '__toklist', to avoid collisions with user-defined data fields named 'dict'
+ and 'list'. Any client code that accesses these attributes directly will
+ need to be modified. Hopefully the implementation of methods such as keys(),
+ items(), len(), etc. on ParseResults will make such direct attribute
+ accessess unnecessary.
+
+- Added asXML() method to ParseResults. This greatly simplifies the process
+ of parsing an input data file and generating XML-structured data.
+
+- Added getName() method to ParseResults. This method is helpful when
+ a grammar specifies ZeroOrMore or OneOrMore of a MatchFirst or Or
+ expression, and the parsing code needs to know which expression matched.
+ (Thanks, Eric van der Vlist, for this idea!)
+
+- Added items() and values() methods to ParseResults, to better support using
+ ParseResults as a Dictionary.
+
+- Added parseFile() as a convenience function to parse the contents of an
+ entire text file. Accepts either a file name or a file object. (Thanks
+ again, Dang!)
+
+- Added group() method to And, Or, and MatchFirst, as a short-cut alternative
+ to enclosing a construct inside a Group object.
+
+- Extended fourFn.py to support exponentiation, and simple built-in functions.
+
+- Added EBNF parser to examples, including a demo where it parses its own
+ EBNF! (Thanks to Seo Sanghyeon!)
+
+- Added Delphi Form parser to examples, dfmparse.py, plus a couple of
+ sample Delphi forms as tests. (Well done, Dang!)
+
+- Another performance speedup, 5-10%, inspired by Dang! Plus about a 20%
+ speedup, by pre-constructing and cacheing exception objects instead of
+ constructing them on the fly.
+
+- Fixed minor bug when specifying oneOf() with 'caseless=True'.
+
+- Cleaned up and added a few more docstrings, to improve the generated docs.
+
+
+Version 1.1.2 - 21 Mar 2004
+---------------------------
+- Fixed minor bug in scanString(), so that start location is at the start of
+ the matched tokens, not at the start of the whitespace before the matched
+ tokens.
+
+- Inclusion of HTML documentation, generated using Epydoc. Reformatted some
+ doc strings to better generate readable docs. (Beautiful work, Ed Loper,
+ thanks for Epydoc!)
+
+- Minor performance speedup, 5-15%
+
+- And on a process note, I've used the unittest module to define a series of
+ unit tests, to help avoid the embarrassment of the version 1.1 snafu.
+
+
+Version 1.1.1 - 6 Mar 2004
+--------------------------
+- Fixed critical bug introduced in 1.1, which broke MatchFirst(!) token
+ matching.
+ **THANK YOU, SEO SANGHYEON!!!**
+
+- Added "from future import __generators__" to permit running under
+ pre-Python 2.3.
+
+- Added example getNTPservers.py, showing how to use pyparsing to extract
+ a text pattern from the HTML of a web page.
+
+
+Version 1.1 - 3 Mar 2004
+-------------------------
+- ***Changed API*** - While testing out parse actions, I found that the value
+ of loc passed in was not the starting location of the matched tokens, but
+ the location of the next token in the list. With this version, the location
+ passed to the parse action is now the starting location of the tokens that
+ matched.
+
+ A second part of this change is that the return value of parse actions no
+ longer needs to return a tuple containing both the location and the parsed
+ tokens (which may optionally be modified); parse actions only need to return
+ the list of tokens. Parse actions that return a tuple are deprecated; they
+ will still work properly for conversion/compatibility, but this behavior will
+ be removed in a future version.
+
+- Added validate() method, to help diagnose infinite recursion in a grammar tree.
+ validate() is not 100% fool-proof, but it can help track down nasty infinite
+ looping due to recursively referencing the same grammar construct without some
+ intervening characters.
+
+- Cleaned up default listing of some parse element types, to more closely match
+ ordinary BNF. Instead of the form <classname>:[contents-list], some changes
+ are:
+ . And(token1,token2,token3) is "{ token1 token2 token3 }"
+ . Or(token1,token2,token3) is "{ token1 ^ token2 ^ token3 }"
+ . MatchFirst(token1,token2,token3) is "{ token1 | token2 | token3 }"
+ . Optional(token) is "[ token ]"
+ . OneOrMore(token) is "{ token }..."
+ . ZeroOrMore(token) is "[ token ]..."
+
+- Fixed an infinite loop in oneOf if the input string contains a duplicated
+ option. (Thanks Brad Clements)
+
+- Fixed a bug when specifying a results name on an Optional token. (Thanks
+ again, Brad Clements)
+
+- Fixed a bug introduced in 1.0.6 when I converted quotedString to use
+ CharsNotIn; I accidentally permitted quoted strings to span newlines. I have
+ fixed this in this version to go back to the original behavior, in which
+ quoted strings do *not* span newlines.
+
+- Fixed minor bug in HTTP server log parser. (Thanks Jim Richardson)
+
+
+Version 1.0.6 - 13 Feb 2004
+----------------------------
+- Added CharsNotIn class (Thanks, Lee SangYeong). This is the opposite of
+ Word, in that it is constructed with a set of characters *not* to be matched.
+ (This enhancement also allowed me to clean up and simplify some of the
+ definitions for quoted strings, cStyleComment, and restOfLine.)
+
+- **MINOR API CHANGE** - Added joinString argument to the __init__ method of
+ Combine (Thanks, Thomas Kalka). joinString defaults to "", but some
+ applications might choose some other string to use instead, such as a blank
+ or newline. joinString was inserted as the second argument to __init__,
+ so if you have code that specifies an adjacent value, without using
+ 'adjacent=', this code will break.
+
+- Modified LineStart to recognize the start of an empty line.
+
+- Added optional caseless flag to oneOf(), to create a list of CaselessLiteral
+ tokens instead of Literal tokens.
+
+- Added some enhancements to the SQL example:
+ . Oracle-style comments (Thanks to Harald Armin Massa)
+ . simple WHERE clause
+
+- Minor performance speedup - 5-15%
+
+
+Version 1.0.5 - 19 Jan 2004
+----------------------------
+- Added scanString() generator method to ParseElement, to support regex-like
+ pattern-searching
+
+- Added items() list to ParseResults, to return named results as a
+ list of (key,value) pairs
+
+- Fixed memory overflow in asList() for deeply nested ParseResults (Thanks,
+ Sverrir Valgeirsson)
+
+- Minor performance speedup - 10-15%
+
+
+Version 1.0.4 - 8 Jan 2004
+---------------------------
+- Added positional tokens StringStart, StringEnd, LineStart, and LineEnd
+
+- Added commaSeparatedList to pre-defined global token definitions; also added
+ commasep.py to the examples directory, to demonstrate the differences between
+ parsing comma-separated data and simple line-splitting at commas
+
+- Minor API change: delimitedList does not automatically enclose the
+ list elements in a Group, but makes this the responsibility of the caller;
+ also, if invoked using 'combine=True', the list delimiters are also included
+ in the returned text (good for scoped variables, such as a.b.c or a::b::c, or
+ for directory paths such as a/b/c)
+
+- Performance speed-up again, 30-40%
+
+- Added httpServerLogParser.py to examples directory, as this is
+ a common parsing task
+
+
+Version 1.0.3 - 23 Dec 2003
+---------------------------
+- Performance speed-up again, 20-40%
+
+- Added Python distutils installation setup.py, etc. (thanks, Dave Kuhlman)
+
+
+Version 1.0.2 - 18 Dec 2003
+---------------------------
+- **NOTE: Changed API again!!!** (for the last time, I hope)
+
+ + Renamed module from parsing to pyparsing, to better reflect Python
+ linkage.
+
+- Also added dictExample.py to examples directory, to illustrate
+ usage of the Dict class.
+
+
+Version 1.0.1 - 17 Dec 2003
+---------------------------
+- **NOTE: Changed API!**
+
+ + Renamed 'len' argument on Word.__init__() to 'exact'
+
+- Performance speed-up, 10-30%
+
+
+Version 1.0.0 - 15 Dec 2003
+---------------------------
+- Initial public release
+
+Version 0.1.1 thru 0.1.17 - October-November, 2003
+--------------------------------------------------
+- initial development iterations:
+ - added Dict, Group
+ - added helper methods oneOf, delimitedList
+ - added helpers quotedString (and double and single), restOfLine, cStyleComment
+ - added MatchFirst as an alternative to the slower Or
+ - added UML class diagram
+ - fixed various logic bugs
diff --git a/src/HowToUsePyparsing.html b/src/HowToUsePyparsing.html
new file mode 100644
index 0000000..b071fcb
--- /dev/null
+++ b/src/HowToUsePyparsing.html
@@ -0,0 +1,1288 @@
+<?xml version="1.0" encoding="utf-8" ?>
+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
+<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
+<head>
+<meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
+<meta name="generator" content="Docutils 0.8: http://docutils.sourceforge.net/" />
+<title>Using the pyparsing module</title>
+<meta name="author" content="Paul McGuire" />
+<meta name="date" content="June, 2011" />
+<meta name="copyright" content="Copyright © 2003-2011 Paul McGuire." />
+<style type="text/css">
+
+/*
+:Author: David Goodger (goodger@python.org)
+:Id: $Id: html4css1.css 6387 2010-08-13 12:23:41Z milde $
+:Copyright: This stylesheet has been placed in the public domain.
+
+Default cascading style sheet for the HTML output of Docutils.
+
+See http://docutils.sf.net/docs/howto/html-stylesheets.html for how to
+customize this style sheet.
+*/
+
+/* used to remove borders from tables and images */
+.borderless, table.borderless td, table.borderless th {
+ border: 0 }
+
+table.borderless td, table.borderless th {
+ /* Override padding for "table.docutils td" with "! important".
+ The right padding separates the table cells. */
+ padding: 0 0.5em 0 0 ! important }
+
+.first {
+ /* Override more specific margin styles with "! important". */
+ margin-top: 0 ! important }
+
+.last, .with-subtitle {
+ margin-bottom: 0 ! important }
+
+.hidden {
+ display: none }
+
+a.toc-backref {
+ text-decoration: none ;
+ color: black }
+
+blockquote.epigraph {
+ margin: 2em 5em ; }
+
+dl.docutils dd {
+ margin-bottom: 0.5em }
+
+object[type="image/svg+xml"], object[type="application/x-shockwave-flash"] {
+ overflow: hidden;
+}
+
+/* Uncomment (and remove this text!) to get bold-faced definition list terms
+dl.docutils dt {
+ font-weight: bold }
+*/
+
+div.abstract {
+ margin: 2em 5em }
+
+div.abstract p.topic-title {
+ font-weight: bold ;
+ text-align: center }
+
+div.admonition, div.attention, div.caution, div.danger, div.error,
+div.hint, div.important, div.note, div.tip, div.warning {
+ margin: 2em ;
+ border: medium outset ;
+ padding: 1em }
+
+div.admonition p.admonition-title, div.hint p.admonition-title,
+div.important p.admonition-title, div.note p.admonition-title,
+div.tip p.admonition-title {
+ font-weight: bold ;
+ font-family: sans-serif }
+
+div.attention p.admonition-title, div.caution p.admonition-title,
+div.danger p.admonition-title, div.error p.admonition-title,
+div.warning p.admonition-title {
+ color: red ;
+ font-weight: bold ;
+ font-family: sans-serif }
+
+/* Uncomment (and remove this text!) to get reduced vertical space in
+ compound paragraphs.
+div.compound .compound-first, div.compound .compound-middle {
+ margin-bottom: 0.5em }
+
+div.compound .compound-last, div.compound .compound-middle {
+ margin-top: 0.5em }
+*/
+
+div.dedication {
+ margin: 2em 5em ;
+ text-align: center ;
+ font-style: italic }
+
+div.dedication p.topic-title {
+ font-weight: bold ;
+ font-style: normal }
+
+div.figure {
+ margin-left: 2em ;
+ margin-right: 2em }
+
+div.footer, div.header {
+ clear: both;
+ font-size: smaller }
+
+div.line-block {
+ display: block ;
+ margin-top: 1em ;
+ margin-bottom: 1em }
+
+div.line-block div.line-block {
+ margin-top: 0 ;
+ margin-bottom: 0 ;
+ margin-left: 1.5em }
+
+div.sidebar {
+ margin: 0 0 0.5em 1em ;
+ border: medium outset ;
+ padding: 1em ;
+ background-color: #ffffee ;
+ width: 40% ;
+ float: right ;
+ clear: right }
+
+div.sidebar p.rubric {
+ font-family: sans-serif ;
+ font-size: medium }
+
+div.system-messages {
+ margin: 5em }
+
+div.system-messages h1 {
+ color: red }
+
+div.system-message {
+ border: medium outset ;
+ padding: 1em }
+
+div.system-message p.system-message-title {
+ color: red ;
+ font-weight: bold }
+
+div.topic {
+ margin: 2em }
+
+h1.section-subtitle, h2.section-subtitle, h3.section-subtitle,
+h4.section-subtitle, h5.section-subtitle, h6.section-subtitle {
+ margin-top: 0.4em }
+
+h1.title {
+ text-align: center }
+
+h2.subtitle {
+ text-align: center }
+
+hr.docutils {
+ width: 75% }
+
+img.align-left, .figure.align-left, object.align-left {
+ clear: left ;
+ float: left ;
+ margin-right: 1em }
+
+img.align-right, .figure.align-right, object.align-right {
+ clear: right ;
+ float: right ;
+ margin-left: 1em }
+
+img.align-center, .figure.align-center, object.align-center {
+ display: block;
+ margin-left: auto;
+ margin-right: auto;
+}
+
+.align-left {
+ text-align: left }
+
+.align-center {
+ clear: both ;
+ text-align: center }
+
+.align-right {
+ text-align: right }
+
+/* reset inner alignment in figures */
+div.align-right {
+ text-align: left }
+
+/* div.align-center * { */
+/* text-align: left } */
+
+ol.simple, ul.simple {
+ margin-bottom: 1em }
+
+ol.arabic {
+ list-style: decimal }
+
+ol.loweralpha {
+ list-style: lower-alpha }
+
+ol.upperalpha {
+ list-style: upper-alpha }
+
+ol.lowerroman {
+ list-style: lower-roman }
+
+ol.upperroman {
+ list-style: upper-roman }
+
+p.attribution {
+ text-align: right ;
+ margin-left: 50% }
+
+p.caption {
+ font-style: italic }
+
+p.credits {
+ font-style: italic ;
+ font-size: smaller }
+
+p.label {
+ white-space: nowrap }
+
+p.rubric {
+ font-weight: bold ;
+ font-size: larger ;
+ color: maroon ;
+ text-align: center }
+
+p.sidebar-title {
+ font-family: sans-serif ;
+ font-weight: bold ;
+ font-size: larger }
+
+p.sidebar-subtitle {
+ font-family: sans-serif ;
+ font-weight: bold }
+
+p.topic-title {
+ font-weight: bold }
+
+pre.address {
+ margin-bottom: 0 ;
+ margin-top: 0 ;
+ font: inherit }
+
+pre.literal-block, pre.doctest-block {
+ margin-left: 2em ;
+ margin-right: 2em }
+
+span.classifier {
+ font-family: sans-serif ;
+ font-style: oblique }
+
+span.classifier-delimiter {
+ font-family: sans-serif ;
+ font-weight: bold }
+
+span.interpreted {
+ font-family: sans-serif }
+
+span.option {
+ white-space: nowrap }
+
+span.pre {
+ white-space: pre }
+
+span.problematic {
+ color: red }
+
+span.section-subtitle {
+ /* font-size relative to parent (h1..h6 element) */
+ font-size: 80% }
+
+table.citation {
+ border-left: solid 1px gray;
+ margin-left: 1px }
+
+table.docinfo {
+ margin: 2em 4em }
+
+table.docutils {
+ margin-top: 0.5em ;
+ margin-bottom: 0.5em }
+
+table.footnote {
+ border-left: solid 1px black;
+ margin-left: 1px }
+
+table.docutils td, table.docutils th,
+table.docinfo td, table.docinfo th {
+ padding-left: 0.5em ;
+ padding-right: 0.5em ;
+ vertical-align: top }
+
+table.docutils th.field-name, table.docinfo th.docinfo-name {
+ font-weight: bold ;
+ text-align: left ;
+ white-space: nowrap ;
+ padding-left: 0 }
+
+h1 tt.docutils, h2 tt.docutils, h3 tt.docutils,
+h4 tt.docutils, h5 tt.docutils, h6 tt.docutils {
+ font-size: 100% }
+
+ul.auto-toc {
+ list-style-type: none }
+
+</style>
+</head>
+<body>
+<div class="document" id="using-the-pyparsing-module">
+<h1 class="title">Using the pyparsing module</h1>
+<table class="docinfo" frame="void" rules="none">
+<col class="docinfo-name" />
+<col class="docinfo-content" />
+<tbody valign="top">
+<tr><th class="docinfo-name">Author:</th>
+<td>Paul McGuire</td></tr>
+<tr><th class="docinfo-name">Address:</th>
+<td><pre class="address">
+<a class="first last reference external" href="mailto:ptmcg&#64;users.sourceforge.net">ptmcg&#64;users.sourceforge.net</a>
+</pre>
+</td></tr>
+<tr><th class="docinfo-name">Revision:</th>
+<td>1.5.6</td></tr>
+<tr><th class="docinfo-name">Date:</th>
+<td>June, 2011</td></tr>
+<tr><th class="docinfo-name">Copyright:</th>
+<td>Copyright © 2003-2011 Paul McGuire.</td></tr>
+</tbody>
+</table>
+<table class="docutils field-list" frame="void" rules="none">
+<col class="field-name" />
+<col class="field-body" />
+<tbody valign="top">
+<tr class="field"><th class="field-name">abstract:</th><td class="field-body">This document provides how-to instructions for the
+pyparsing library, an easy-to-use Python module for constructing
+and executing basic text parsers. The pyparsing module is useful
+for evaluating user-definable
+expressions, processing custom application language commands, or
+extracting data from formatted reports.</td>
+</tr>
+</tbody>
+</table>
+<div class="contents topic" id="contents">
+<p class="topic-title first">Contents</p>
+<ul class="auto-toc simple">
+<li><a class="reference internal" href="#steps-to-follow" id="id1">1&nbsp;&nbsp;&nbsp;Steps to follow</a><ul class="auto-toc">
+<li><a class="reference internal" href="#hello-world" id="id2">1.1&nbsp;&nbsp;&nbsp;Hello, World!</a></li>
+<li><a class="reference internal" href="#usage-notes" id="id3">1.2&nbsp;&nbsp;&nbsp;Usage notes</a></li>
+</ul>
+</li>
+<li><a class="reference internal" href="#classes" id="id4">2&nbsp;&nbsp;&nbsp;Classes</a><ul class="auto-toc">
+<li><a class="reference internal" href="#classes-in-the-pyparsing-module" id="id5">2.1&nbsp;&nbsp;&nbsp;Classes in the pyparsing module</a></li>
+<li><a class="reference internal" href="#basic-parserelement-subclasses" id="id6">2.2&nbsp;&nbsp;&nbsp;Basic ParserElement subclasses</a></li>
+<li><a class="reference internal" href="#expression-subclasses" id="id7">2.3&nbsp;&nbsp;&nbsp;Expression subclasses</a></li>
+<li><a class="reference internal" href="#expression-operators" id="id8">2.4&nbsp;&nbsp;&nbsp;Expression operators</a></li>
+<li><a class="reference internal" href="#positional-subclasses" id="id9">2.5&nbsp;&nbsp;&nbsp;Positional subclasses</a></li>
+<li><a class="reference internal" href="#converter-subclasses" id="id10">2.6&nbsp;&nbsp;&nbsp;Converter subclasses</a></li>
+<li><a class="reference internal" href="#special-subclasses" id="id11">2.7&nbsp;&nbsp;&nbsp;Special subclasses</a></li>
+<li><a class="reference internal" href="#other-classes" id="id12">2.8&nbsp;&nbsp;&nbsp;Other classes</a></li>
+<li><a class="reference internal" href="#exception-classes-and-troubleshooting" id="id13">2.9&nbsp;&nbsp;&nbsp;Exception classes and Troubleshooting</a></li>
+</ul>
+</li>
+<li><a class="reference internal" href="#miscellaneous-attributes-and-methods" id="id14">3&nbsp;&nbsp;&nbsp;Miscellaneous attributes and methods</a><ul class="auto-toc">
+<li><a class="reference internal" href="#helper-methods" id="id15">3.1&nbsp;&nbsp;&nbsp;Helper methods</a></li>
+<li><a class="reference internal" href="#helper-parse-actions" id="id16">3.2&nbsp;&nbsp;&nbsp;Helper parse actions</a></li>
+<li><a class="reference internal" href="#common-string-and-token-constants" id="id17">3.3&nbsp;&nbsp;&nbsp;Common string and token constants</a></li>
+</ul>
+</li>
+</ul>
+</div>
+<div class="section" id="steps-to-follow">
+<h1><a class="toc-backref" href="#id1">1&nbsp;&nbsp;&nbsp;Steps to follow</a></h1>
+<p>To parse an incoming data string, the client code must follow these steps:</p>
+<ol class="arabic simple">
+<li>First define the tokens and patterns to be matched, and assign
+this to a program variable. Optional results names or parsing
+actions can also be defined at this time.</li>
+<li>Call <tt class="docutils literal">parseString()</tt> or <tt class="docutils literal">scanString()</tt> on this variable, passing in
+the string to
+be parsed. During the matching process, whitespace between
+tokens is skipped by default (although this can be changed).
+When token matches occur, any defined parse action methods are
+called.</li>
+<li>Process the parsed results, returned as a list of strings.
+Matching results may also be accessed as named attributes of
+the returned results, if names are defined in the definition of
+the token pattern, using <tt class="docutils literal">setResultsName()</tt>.</li>
+</ol>
+<div class="section" id="hello-world">
+<h2><a class="toc-backref" href="#id2">1.1&nbsp;&nbsp;&nbsp;Hello, World!</a></h2>
+<p>The following complete Python program will parse the greeting &quot;Hello, World!&quot;,
+or any other greeting of the form &quot;&lt;salutation&gt;, &lt;addressee&gt;!&quot;:</p>
+<pre class="literal-block">
+from pyparsing import Word, alphas
+
+greet = Word( alphas ) + &quot;,&quot; + Word( alphas ) + &quot;!&quot;
+greeting = greet.parseString( &quot;Hello, World!&quot; )
+print greeting
+</pre>
+<p>The parsed tokens are returned in the following form:</p>
+<pre class="literal-block">
+['Hello', ',', 'World', '!']
+</pre>
+</div>
+<div class="section" id="usage-notes">
+<h2><a class="toc-backref" href="#id3">1.2&nbsp;&nbsp;&nbsp;Usage notes</a></h2>
+<ul>
+<li><p class="first">The pyparsing module can be used to interpret simple command
+strings or algebraic expressions, or can be used to extract data
+from text reports with complicated format and structure (&quot;screen
+or report scraping&quot;). However, it is possible that your defined
+matching patterns may accept invalid inputs. Use pyparsing to
+extract data from strings assumed to be well-formatted.</p>
+</li>
+<li><p class="first">To keep up the readability of your code, use <a class="reference internal" href="#operators">operators</a> such as <tt class="docutils literal">+</tt>, <tt class="docutils literal">|</tt>,
+<tt class="docutils literal">^</tt>, and <tt class="docutils literal">~</tt> to combine expressions. You can also combine
+string literals with ParseExpressions - they will be
+automatically converted to Literal objects. For example:</p>
+<pre class="literal-block">
+integer = Word( nums ) # simple unsigned integer
+variable = Word( alphas, max=1 ) # single letter variable, such as x, z, m, etc.
+arithOp = Word( &quot;+-*/&quot;, max=1 ) # arithmetic operators
+equation = variable + &quot;=&quot; + integer + arithOp + integer # will match &quot;x=2+2&quot;, etc.
+</pre>
+<p>In the definition of <tt class="docutils literal">equation</tt>, the string <tt class="docutils literal">&quot;=&quot;</tt> will get added as
+a <tt class="docutils literal"><span class="pre">Literal(&quot;=&quot;)</span></tt>, but in a more readable way.</p>
+</li>
+<li><p class="first">The pyparsing module's default behavior is to ignore whitespace. This is the
+case for 99% of all parsers ever written. This allows you to write simple, clean,
+grammars, such as the above <tt class="docutils literal">equation</tt>, without having to clutter it up with
+extraneous <tt class="docutils literal">ws</tt> markers. The <tt class="docutils literal">equation</tt> grammar will successfully parse all of the
+following statements:</p>
+<pre class="literal-block">
+x=2+2
+x = 2+2
+a = 10 * 4
+r= 1234/ 100000
+</pre>
+<p>Of course, it is quite simple to extend this example to support more elaborate expressions, with
+nesting with parentheses, floating point numbers, scientific notation, and named constants
+(such as <tt class="docutils literal">e</tt> or <tt class="docutils literal">pi</tt>). See <tt class="docutils literal">fourFn.py</tt>, included in the examples directory.</p>
+</li>
+<li><p class="first">To modify pyparsing's default whitespace skipping, you can use one or
+more of the following methods:</p>
+<ul>
+<li><p class="first">use the static method <tt class="docutils literal">ParserElement.setDefaultWhitespaceChars</tt>
+to override the normal set of whitespace chars (' tn'). For instance
+when defining a grammar in which newlines are significant, you should
+call <tt class="docutils literal">ParserElement.setDefaultWhitespaceChars(' \t')</tt> to remove
+newline from the set of skippable whitespace characters. Calling
+this method will affect all pyparsing expressions defined afterward.</p>
+</li>
+<li><p class="first">call <tt class="docutils literal">leaveWhitespace()</tt> on individual expressions, to suppress the
+skipping of whitespace before trying to match the expression</p>
+</li>
+<li><p class="first">use <tt class="docutils literal">Combine</tt> to require that successive expressions must be
+adjacent in the input string. For instance, this expression:</p>
+<pre class="literal-block">
+real = Word(nums) + '.' + Word(nums)
+</pre>
+<p>will match &quot;3.14159&quot;, but will also match &quot;3 . 12&quot;. It will also
+return the matched results as ['3', '.', '14159']. By changing this
+expression to:</p>
+<pre class="literal-block">
+real = Combine( Word(nums) + '.' + Word(nums) )
+</pre>
+<p>it will not match numbers with embedded spaces, and it will return a
+single concatenated string '3.14159' as the parsed token.</p>
+</li>
+</ul>
+</li>
+<li><p class="first">Repetition of expressions can be indicated using the '*' operator. An
+expression may be multiplied by an integer value (to indicate an exact
+repetition count), or by a tuple containing
+two integers, or None and an integer, representing min and max repetitions
+(with None representing no min or no max, depending whether it is the first or
+second tuple element). See the following examples, where n is used to
+indicate an integer value:</p>
+<ul class="simple">
+<li><tt class="docutils literal">expr*3</tt> is equivalent to <tt class="docutils literal">expr + expr + expr</tt></li>
+<li><tt class="docutils literal"><span class="pre">expr*(2,3)</span></tt> is equivalent to <tt class="docutils literal">expr + expr + Optional(expr)</tt></li>
+<li><tt class="docutils literal"><span class="pre">expr*(n,None)</span></tt> or <tt class="docutils literal"><span class="pre">expr*(n,)</span></tt> is equivalent
+to <tt class="docutils literal">expr*n + ZeroOrMore(expr)</tt> (read as &quot;at least n instances of expr&quot;)</li>
+<li><tt class="docutils literal"><span class="pre">expr*(None,n)</span></tt> is equivalent to <tt class="docutils literal"><span class="pre">expr*(0,n)</span></tt>
+(read as &quot;0 to n instances of expr&quot;)</li>
+<li><tt class="docutils literal"><span class="pre">expr*(None,None)</span></tt> is equivalent to <tt class="docutils literal">ZeroOrMore(expr)</tt></li>
+<li><tt class="docutils literal"><span class="pre">expr*(1,None)</span></tt> is equivalent to <tt class="docutils literal">OneOrMore(expr)</tt></li>
+</ul>
+<p>Note that <tt class="docutils literal"><span class="pre">expr*(None,n)</span></tt> does not raise an exception if
+more than n exprs exist in the input stream; that is,
+<tt class="docutils literal"><span class="pre">expr*(None,n)</span></tt> does not enforce a maximum number of expr
+occurrences. If this behavior is desired, then write
+<tt class="docutils literal"><span class="pre">expr*(None,n)</span> + ~expr</tt>.</p>
+</li>
+<li><p class="first"><tt class="docutils literal">MatchFirst</tt> expressions are matched left-to-right, and the first
+match found will skip all later expressions within, so be sure
+to define less-specific patterns after more-specific patterns.
+If you are not sure which expressions are most specific, use Or
+expressions (defined using the <tt class="docutils literal">^</tt> operator) - they will always
+match the longest expression, although they are more
+compute-intensive.</p>
+</li>
+<li><p class="first"><tt class="docutils literal">Or</tt> expressions will evaluate all of the specified subexpressions
+to determine which is the &quot;best&quot; match, that is, which matches
+the longest string in the input data. In case of a tie, the
+left-most expression in the <tt class="docutils literal">Or</tt> list will win.</p>
+</li>
+<li><p class="first">If parsing the contents of an entire file, pass it to the
+<tt class="docutils literal">parseFile</tt> method using:</p>
+<pre class="literal-block">
+expr.parseFile( sourceFile )
+</pre>
+</li>
+<li><p class="first"><tt class="docutils literal">ParseExceptions</tt> will report the location where an expected token
+or expression failed to match. For example, if we tried to use our
+&quot;Hello, World!&quot; parser to parse &quot;Hello World!&quot; (leaving out the separating
+comma), we would get an exception, with the message:</p>
+<pre class="literal-block">
+pyparsing.ParseException: Expected &quot;,&quot; (6), (1,7)
+</pre>
+<p>In the case of complex
+expressions, the reported location may not be exactly where you
+would expect. See more information under <a class="reference internal" href="#parseexception">ParseException</a> .</p>
+</li>
+<li><p class="first">Use the <tt class="docutils literal">Group</tt> class to enclose logical groups of tokens within a
+sublist. This will help organize your results into more
+hierarchical form (the default behavior is to return matching
+tokens as a flat list of matching input strings).</p>
+</li>
+<li><p class="first">Punctuation may be significant for matching, but is rarely of
+much interest in the parsed results. Use the <tt class="docutils literal">suppress()</tt> method
+to keep these tokens from cluttering up your returned lists of
+tokens. For example, <tt class="docutils literal">delimitedList()</tt> matches a succession of
+one or more expressions, separated by delimiters (commas by
+default), but only returns a list of the actual expressions -
+the delimiters are used for parsing, but are suppressed from the
+returned output.</p>
+</li>
+<li><p class="first">Parse actions can be used to convert values from strings to
+other data types (ints, floats, booleans, etc.).</p>
+</li>
+<li><p class="first">Results names are recommended for retrieving tokens from complex
+expressions. It is much easier to access a token using its field
+name than using a positional index, especially if the expression
+contains optional elements. You can also shortcut
+the <tt class="docutils literal">setResultsName</tt> call:</p>
+<pre class="literal-block">
+stats = &quot;AVE:&quot; + realNum.setResultsName(&quot;average&quot;) + \
+ &quot;MIN:&quot; + realNum.setResultsName(&quot;min&quot;) + \
+ &quot;MAX:&quot; + realNum.setResultsName(&quot;max&quot;)
+</pre>
+<p>can now be written as this:</p>
+<pre class="literal-block">
+stats = &quot;AVE:&quot; + realNum(&quot;average&quot;) + \
+ &quot;MIN:&quot; + realNum(&quot;min&quot;) + \
+ &quot;MAX:&quot; + realNum(&quot;max&quot;)
+</pre>
+</li>
+<li><p class="first">Be careful when defining parse actions that modify global variables or
+data structures (as in <tt class="docutils literal">fourFn.py</tt>), especially for low level tokens
+or expressions that may occur within an <tt class="docutils literal">And</tt> expression; an early element
+of an <tt class="docutils literal">And</tt> may match, but the overall expression may fail.</p>
+</li>
+<li><p class="first">Performance of pyparsing may be slow for complex grammars and/or large
+input strings. The <a class="reference external" href="http://psyco.sourceforge.net/">psyco</a> package can be used to improve the speed of the
+pyparsing module with no changes to grammar or program logic - observed
+improvments have been in the 20-50% range.</p>
+</li>
+</ul>
+</div>
+</div>
+<div class="section" id="classes">
+<h1><a class="toc-backref" href="#id4">2&nbsp;&nbsp;&nbsp;Classes</a></h1>
+<div class="section" id="classes-in-the-pyparsing-module">
+<h2><a class="toc-backref" href="#id5">2.1&nbsp;&nbsp;&nbsp;Classes in the pyparsing module</a></h2>
+<p><tt class="docutils literal">ParserElement</tt> - abstract base class for all pyparsing classes;
+methods for code to use are:</p>
+<ul>
+<li><p class="first"><tt class="docutils literal">parseString( sourceString, parseAll=False )</tt> - only called once, on the overall
+matching pattern; returns a <a class="reference internal" href="#parseresults">ParseResults</a> object that makes the
+matched tokens available as a list, and optionally as a dictionary,
+or as an object with named attributes; if parseAll is set to True, then
+parseString will raise a ParseException if the grammar does not process
+the complete input string.</p>
+</li>
+<li><p class="first"><tt class="docutils literal">parseFile( sourceFile )</tt> - a convenience function, that accepts an
+input file object or filename. The file contents are passed as a
+string to <tt class="docutils literal">parseString()</tt>. <tt class="docutils literal">parseFile</tt> also supports the <tt class="docutils literal">parseAll</tt> argument.</p>
+</li>
+<li><p class="first"><tt class="docutils literal">scanString( sourceString )</tt> - generator function, used to find and
+extract matching text in the given source string; for each matched text,
+returns a tuple of:</p>
+<ul class="simple">
+<li>matched tokens (packaged as a <a class="reference internal" href="#parseresults">ParseResults</a> object)</li>
+<li>start location of the matched text in the given source string</li>
+<li>end location in the given source string</li>
+</ul>
+<p><tt class="docutils literal">scanString</tt> allows you to scan through the input source string for
+random matches, instead of exhaustively defining the grammar for the entire
+source text (as would be required with <tt class="docutils literal">parseString</tt>).</p>
+</li>
+<li><p class="first"><tt class="docutils literal">transformString( sourceString )</tt> - convenience wrapper function for
+<tt class="docutils literal">scanString</tt>, to process the input source string, and replace matching
+text with the tokens returned from parse actions defined in the grammar
+(see <a class="reference internal" href="#setparseaction">setParseAction</a>).</p>
+</li>
+<li><p class="first"><tt class="docutils literal">searchString( sourceString )</tt> - another convenience wrapper function for
+<tt class="docutils literal">scanString</tt>, returns a list of the matching tokens returned from each
+call to <tt class="docutils literal">scanString</tt>.</p>
+</li>
+<li><p class="first"><tt class="docutils literal">setName( name )</tt> - associate a short descriptive name for this
+element, useful in displaying exceptions and trace information</p>
+</li>
+<li><p class="first"><tt class="docutils literal">setResultsName( string, listAllMatches=False )</tt> - name to be given
+to tokens matching
+the element; if multiple tokens within
+a repetition group (such as <tt class="docutils literal">ZeroOrMore</tt> or <tt class="docutils literal">delimitedList</tt>) the
+default is to return only the last matching token - if listAllMatches
+is set to True, then a list of all the matching tokens is returned.
+(New in 1.5.6 - a results name with a trailing '*' character will be
+interpreted as setting listAllMatches to True.)
+Note:
+<tt class="docutils literal">setResultsName</tt> returns a <em>copy</em> of the element so that a single
+basic element can be referenced multiple times and given
+different names within a complex grammar.</p>
+</li>
+</ul>
+<ul id="setparseaction">
+<li><p class="first"><tt class="docutils literal">setParseAction( *fn )</tt> - specify one or more functions to call after successful
+matching of the element; each function is defined as <tt class="docutils literal">fn( s,
+loc, toks )</tt>, where:</p>
+<ul class="simple">
+<li><tt class="docutils literal">s</tt> is the original parse string</li>
+<li><tt class="docutils literal">loc</tt> is the location in the string where matching started</li>
+<li><tt class="docutils literal">toks</tt> is the list of the matched tokens, packaged as a <a class="reference internal" href="#parseresults">ParseResults</a> object</li>
+</ul>
+<p>Multiple functions can be attached to a ParserElement by specifying multiple
+arguments to setParseAction, or by calling setParseAction multiple times.</p>
+<p>Each parse action function can return a modified <tt class="docutils literal">toks</tt> list, to perform conversion, or
+string modifications. For brevity, <tt class="docutils literal">fn</tt> may also be a
+lambda - here is an example of using a parse action to convert matched
+integer tokens from strings to integers:</p>
+<pre class="literal-block">
+intNumber = Word(nums).setParseAction( lambda s,l,t: [ int(t[0]) ] )
+</pre>
+<p>If <tt class="docutils literal">fn</tt> does not modify the <tt class="docutils literal">toks</tt> list, it does not need to return
+anything at all.</p>
+</li>
+<li><p class="first"><tt class="docutils literal">setBreak( breakFlag=True )</tt> - if breakFlag is True, calls pdb.set_break()
+as this expression is about to be parsed</p>
+</li>
+<li><p class="first"><tt class="docutils literal">copy()</tt> - returns a copy of a ParserElement; can be used to use the same
+parse expression in different places in a grammar, with different parse actions
+attached to each</p>
+</li>
+<li><p class="first"><tt class="docutils literal">leaveWhitespace()</tt> - change default behavior of skipping
+whitespace before starting matching (mostly used internally to the
+pyparsing module, rarely used by client code)</p>
+</li>
+<li><p class="first"><tt class="docutils literal">setWhitespaceChars( chars )</tt> - define the set of chars to be ignored
+as whitespace before trying to match a specific ParserElement, in place of the
+default set of whitespace (space, tab, newline, and return)</p>
+</li>
+<li><p class="first"><tt class="docutils literal">setDefaultWhitespaceChars( chars )</tt> - class-level method to override
+the default set of whitespace chars for all subsequently created ParserElements
+(including copies); useful when defining grammars that treat one or more of the
+default whitespace characters as significant (such as a line-sensitive grammar, to
+omit newline from the list of ignorable whitespace)</p>
+</li>
+<li><p class="first"><tt class="docutils literal">suppress()</tt> - convenience function to suppress the output of the
+given element, instead of wrapping it with a Suppress object.</p>
+</li>
+<li><p class="first"><tt class="docutils literal">ignore( expr )</tt> - function to specify parse expression to be
+ignored while matching defined patterns; can be called
+repeatedly to specify multiple expressions; useful to specify
+patterns of comment syntax, for example</p>
+</li>
+<li><p class="first"><tt class="docutils literal">setDebug( dbgFlag=True )</tt> - function to enable/disable tracing output
+when trying to match this element</p>
+</li>
+<li><p class="first"><tt class="docutils literal">validate()</tt> - function to verify that the defined grammar does not
+contain infinitely recursive constructs</p>
+</li>
+</ul>
+<ul class="simple" id="parsewithtabs">
+<li><tt class="docutils literal">parseWithTabs()</tt> - function to override default behavior of converting
+tabs to spaces before parsing the input string; rarely used, except when
+specifying whitespace-significant grammars using the <a class="reference internal" href="#white">White</a> class.</li>
+<li><tt class="docutils literal">enablePackrat()</tt> - a class-level static method to enable a memoizing
+performance enhancement, known as &quot;packrat parsing&quot;. packrat parsing is
+disabled by default, since it may conflict with some user programs that use
+parse actions. To activate the packrat feature, your
+program must call the class method ParserElement.enablePackrat(). If
+your program uses psyco to &quot;compile as you go&quot;, you must call
+enablePackrat before calling psyco.full(). If you do not do this,
+Python will crash. For best results, call enablePackrat() immediately
+after importing pyparsing.</li>
+</ul>
+</div>
+<div class="section" id="basic-parserelement-subclasses">
+<h2><a class="toc-backref" href="#id6">2.2&nbsp;&nbsp;&nbsp;Basic ParserElement subclasses</a></h2>
+<ul class="simple">
+<li><tt class="docutils literal">Literal</tt> - construct with a string to be matched exactly</li>
+<li><tt class="docutils literal">CaselessLiteral</tt> - construct with a string to be matched, but
+without case checking; results are always returned as the
+defining literal, NOT as they are found in the input string</li>
+<li><tt class="docutils literal">Keyword</tt> - similar to Literal, but must be immediately followed by
+whitespace, punctuation, or other non-keyword characters; prevents
+accidental matching of a non-keyword that happens to begin with a
+defined keyword</li>
+<li><tt class="docutils literal">CaselessKeyword</tt> - similar to Keyword, but with caseless matching
+behavior</li>
+</ul>
+<ul id="word">
+<li><p class="first"><tt class="docutils literal">Word</tt> - one or more contiguous characters; construct with a
+string containing the set of allowed initial characters, and an
+optional second string of allowed body characters; for instance,
+a common Word construct is to match a code identifier - in C, a
+valid identifier must start with an alphabetic character or an
+underscore ('_'), followed by a body that can also include numeric
+digits. That is, <tt class="docutils literal">a</tt>, <tt class="docutils literal">i</tt>, <tt class="docutils literal">MAX_LENGTH</tt>, <tt class="docutils literal">_a1</tt>, <tt class="docutils literal">b_109_</tt>, and
+<tt class="docutils literal">plan9FromOuterSpace</tt>
+are all valid identifiers; <tt class="docutils literal">9b7z</tt>, <tt class="docutils literal">$a</tt>, <tt class="docutils literal">.section</tt>, and <tt class="docutils literal">0debug</tt>
+are not. To
+define an identifier using a Word, use either of the following:</p>
+<pre class="literal-block">
+- Word( alphas+&quot;_&quot;, alphanums+&quot;_&quot; )
+- Word( srange(&quot;[a-zA-Z_]&quot;), srange(&quot;[a-zA-Z0-9_]&quot;) )
+</pre>
+<p>If only one
+string given, it specifies that the same character set defined
+for the initial character is used for the word body; for instance, to
+define an identifier that can only be composed of capital letters and
+underscores, use:</p>
+<pre class="literal-block">
+- Word( &quot;ABCDEFGHIJKLMNOPQRSTUVWXYZ_&quot; )
+- Word( srange(&quot;[A-Z_]&quot;) )
+</pre>
+<p>A Word may
+also be constructed with any of the following optional parameters:</p>
+<ul class="simple">
+<li>min - indicating a minimum length of matching characters</li>
+<li>max - indicating a maximum length of matching characters</li>
+<li>exact - indicating an exact length of matching characters</li>
+</ul>
+<p>If exact is specified, it will override any values for min or max.</p>
+<p>New in 1.5.6 - Sometimes you want to define a word using all
+characters in a range except for one or two of them; you can do this
+with the new excludeChars argument. This is helpful if you want to define
+a word with all printables except for a single delimiter character, such
+as '.'. Previously, you would have to create a custom string to pass to Word.
+With this change, you can just create <tt class="docutils literal">Word(printables, <span class="pre">excludeChars='.')</span></tt>.</p>
+</li>
+<li><p class="first"><tt class="docutils literal">CharsNotIn</tt> - similar to <a class="reference internal" href="#word">Word</a>, but matches characters not
+in the given constructor string (accepts only one string for both
+initial and body characters); also supports min, max, and exact
+optional parameters.</p>
+</li>
+<li><p class="first"><tt class="docutils literal">Regex</tt> - a powerful construct, that accepts a regular expression
+to be matched at the current parse position; accepts an optional
+flags parameter, corresponding to the flags parameter in the re.compile
+method; if the expression includes named sub-fields, they will be
+represented in the returned <a class="reference internal" href="#parseresults">ParseResults</a></p>
+</li>
+<li><p class="first"><tt class="docutils literal">QuotedString</tt> - supports the definition of custom quoted string
+formats, in addition to pyparsing's built-in dblQuotedString and
+sglQuotedString. QuotedString allows you to specify the following
+parameters:</p>
+<ul class="simple">
+<li>quoteChar - string of one or more characters defining the quote delimiting string</li>
+<li>escChar - character to escape quotes, typically backslash (default=None)</li>
+<li>escQuote - special quote sequence to escape an embedded quote string (such as SQL's &quot;&quot; to escape an embedded &quot;) (default=None)</li>
+<li>multiline - boolean indicating whether quotes can span multiple lines (default=False)</li>
+<li>unquoteResults - boolean indicating whether the matched text should be unquoted (default=True)</li>
+<li>endQuoteChar - string of one or more characters defining the end of the quote delimited string (default=None =&gt; same as quoteChar)</li>
+</ul>
+</li>
+<li><p class="first"><tt class="docutils literal">SkipTo</tt> - skips ahead in the input string, accepting any
+characters up to the specified pattern; may be constructed with
+the following optional parameters:</p>
+<ul class="simple">
+<li>include - if set to true, also consumes the match expression
+(default is false)</li>
+<li>ignore - allows the user to specify patterns to not be matched,
+to prevent false matches</li>
+<li>failOn - if a literal string or expression is given for this argument, it defines an expression that
+should cause the <tt class="docutils literal">SkipTo</tt> expression to fail, and not skip over that expression</li>
+</ul>
+</li>
+</ul>
+<ul class="simple" id="white">
+<li><tt class="docutils literal">White</tt> - also similar to <a class="reference internal" href="#word">Word</a>, but matches whitespace
+characters. Not usually needed, as whitespace is implicitly
+ignored by pyparsing. However, some grammars are whitespace-sensitive,
+such as those that use leading tabs or spaces to indicating grouping
+or hierarchy. (If matching on tab characters, be sure to call
+<a class="reference internal" href="#parsewithtabs">parseWithTabs</a> on the top-level parse element.)</li>
+<li><tt class="docutils literal">Empty</tt> - a null expression, requiring no characters - will always
+match; useful for debugging and for specialized grammars</li>
+<li><tt class="docutils literal">NoMatch</tt> - opposite of Empty, will never match; useful for debugging
+and for specialized grammars</li>
+</ul>
+</div>
+<div class="section" id="expression-subclasses">
+<h2><a class="toc-backref" href="#id7">2.3&nbsp;&nbsp;&nbsp;Expression subclasses</a></h2>
+<ul>
+<li><p class="first"><tt class="docutils literal">And</tt> - construct with a list of ParserElements, all of which must
+match for And to match; can also be created using the '+'
+operator; multiple expressions can be Anded together using the '*'
+operator as in:</p>
+<pre class="literal-block">
+ipAddress = Word(nums) + ('.'+Word(nums))*3
+</pre>
+<p>A tuple can be used as the multiplier, indicating a min/max:</p>
+<pre class="literal-block">
+usPhoneNumber = Word(nums) + ('-'+Word(nums))*(1,2)
+</pre>
+<p>A special form of <tt class="docutils literal">And</tt> is created if the '-' operator is used
+instead of the '+' operator. In the ipAddress example above, if
+no trailing '.' and Word(nums) are found after matching the initial
+Word(nums), then pyparsing will back up in the grammar and try other
+alternatives to ipAddress. However, if ipAddress is defined as:</p>
+<pre class="literal-block">
+strictIpAddress = Word(nums) - ('.'+Word(nums))*3
+</pre>
+<p>then no backing up is done. If the first Word(nums) of strictIpAddress
+is matched, then any mismatch after that will raise a ParseSyntaxException,
+which will halt the parsing process immediately. By careful use of the
+'-' operator, grammars can provide meaningful error messages close to
+the location where the incoming text does not match the specified
+grammar.</p>
+</li>
+<li><p class="first"><tt class="docutils literal">Or</tt> - construct with a list of ParserElements, any of which must
+match for Or to match; if more than one expression matches, the
+expression that makes the longest match will be used; can also
+be created using the '^' operator</p>
+</li>
+<li><p class="first"><tt class="docutils literal">MatchFirst</tt> - construct with a list of ParserElements, any of
+which must match for MatchFirst to match; matching is done
+left-to-right, taking the first expression that matches; can
+also be created using the '|' operator</p>
+</li>
+<li><p class="first"><tt class="docutils literal">Each</tt> - similar to And, in that all of the provided expressions
+must match; however, Each permits matching to be done in any order;
+can also be created using the '&amp;' operator</p>
+</li>
+<li><p class="first"><tt class="docutils literal">Optional</tt> - construct with a ParserElement, but this element is
+not required to match; can be constructed with an optional <tt class="docutils literal">default</tt> argument,
+containing a default string or object to be supplied if the given optional
+parse element is not found in the input string; parse action will only
+be called if a match is found, or if a default is specified</p>
+</li>
+<li><p class="first"><tt class="docutils literal">ZeroOrMore</tt> - similar to Optional, but can be repeated</p>
+</li>
+<li><p class="first"><tt class="docutils literal">OneOrMore</tt> - similar to ZeroOrMore, but at least one match must
+be present</p>
+</li>
+<li><p class="first"><tt class="docutils literal">FollowedBy</tt> - a lookahead expression, requires matching of the given
+expressions, but does not advance the parsing position within the input string</p>
+</li>
+<li><p class="first"><tt class="docutils literal">NotAny</tt> - a negative lookahead expression, prevents matching of named
+expressions, does not advance the parsing position within the input string;
+can also be created using the unary '~' operator</p>
+</li>
+</ul>
+</div>
+<div class="section" id="expression-operators">
+<span id="operators"></span><h2><a class="toc-backref" href="#id8">2.4&nbsp;&nbsp;&nbsp;Expression operators</a></h2>
+<ul class="simple">
+<li><tt class="docutils literal">~</tt> - creates NotAny using the expression after the operator</li>
+<li><tt class="docutils literal">+</tt> - creates And using the expressions before and after the operator</li>
+<li><tt class="docutils literal">|</tt> - creates MatchFirst (first left-to-right match) using the expressions before and after the operator</li>
+<li><tt class="docutils literal">^</tt> - creates Or (longest match) using the expressions before and after the operator</li>
+<li><tt class="docutils literal">&amp;</tt> - creates Each using the expressions before and after the operator</li>
+<li><tt class="docutils literal">*</tt> - creates And by multiplying the expression by the integer operand; if
+expression is multiplied by a 2-tuple, creates an And of (min,max)
+expressions (similar to &quot;{min,max}&quot; form in regular expressions); if
+min is None, intepret as (0,max); if max is None, interpret as
+expr*min + ZeroOrMore(expr)</li>
+<li><tt class="docutils literal">-</tt> - like <tt class="docutils literal">+</tt> but with no backup and retry of alternatives</li>
+<li><tt class="docutils literal">*</tt> - repetition of expression</li>
+<li><tt class="docutils literal">==</tt> - matching expression to string; returns True if the string matches the given expression</li>
+<li><tt class="docutils literal">&lt;&lt;</tt> - inserts the expression following the operator as the body of the
+Forward expression before the operator</li>
+</ul>
+</div>
+<div class="section" id="positional-subclasses">
+<h2><a class="toc-backref" href="#id9">2.5&nbsp;&nbsp;&nbsp;Positional subclasses</a></h2>
+<ul class="simple">
+<li><tt class="docutils literal">StringStart</tt> - matches beginning of the text</li>
+<li><tt class="docutils literal">StringEnd</tt> - matches the end of the text</li>
+<li><tt class="docutils literal">LineStart</tt> - matches beginning of a line (lines delimited by <tt class="docutils literal">\n</tt> characters)</li>
+<li><tt class="docutils literal">LineEnd</tt> - matches the end of a line</li>
+<li><tt class="docutils literal">WordStart</tt> - matches a leading word boundary</li>
+<li><tt class="docutils literal">WordEnd</tt> - matches a trailing word boundary</li>
+</ul>
+</div>
+<div class="section" id="converter-subclasses">
+<h2><a class="toc-backref" href="#id10">2.6&nbsp;&nbsp;&nbsp;Converter subclasses</a></h2>
+<ul class="simple">
+<li><tt class="docutils literal">Upcase</tt> - converts matched tokens to uppercase (deprecated -
+use <tt class="docutils literal">upcaseTokens</tt> parse action instead)</li>
+<li><tt class="docutils literal">Combine</tt> - joins all matched tokens into a single string, using
+specified joinString (default <tt class="docutils literal"><span class="pre">joinString=&quot;&quot;</span></tt>); expects
+all matching tokens to be adjacent, with no intervening
+whitespace (can be overridden by specifying <tt class="docutils literal">adjacent=False</tt> in constructor)</li>
+<li><tt class="docutils literal">Suppress</tt> - clears matched tokens; useful to keep returned
+results from being cluttered with required but uninteresting
+tokens (such as list delimiters)</li>
+</ul>
+</div>
+<div class="section" id="special-subclasses">
+<h2><a class="toc-backref" href="#id11">2.7&nbsp;&nbsp;&nbsp;Special subclasses</a></h2>
+<ul class="simple">
+<li><tt class="docutils literal">Group</tt> - causes the matched tokens to be enclosed in a list;
+useful in repeated elements like <tt class="docutils literal">ZeroOrMore</tt> and <tt class="docutils literal">OneOrMore</tt> to
+break up matched tokens into groups for each repeated pattern</li>
+<li><tt class="docutils literal">Dict</tt> - like <tt class="docutils literal">Group</tt>, but also constructs a dictionary, using the
+[0]'th elements of all enclosed token lists as the keys, and
+each token list as the value</li>
+<li><tt class="docutils literal">SkipTo</tt> - catch-all matching expression that accepts all characters
+up until the given pattern is found to match; useful for specifying
+incomplete grammars</li>
+<li><tt class="docutils literal">Forward</tt> - placeholder token used to define recursive token
+patterns; when defining the actual expression later in the
+program, insert it into the <tt class="docutils literal">Forward</tt> object using the <tt class="docutils literal">&lt;&lt;</tt>
+operator (see <tt class="docutils literal">fourFn.py</tt> for an example).</li>
+</ul>
+</div>
+<div class="section" id="other-classes">
+<h2><a class="toc-backref" href="#id12">2.8&nbsp;&nbsp;&nbsp;Other classes</a></h2>
+<ul id="parseresults">
+<li><p class="first"><tt class="docutils literal">ParseResults</tt> - class used to contain and manage the lists of tokens
+created from parsing the input using the user-defined parse
+expression. ParseResults can be accessed in a number of ways:</p>
+<ul class="simple">
+<li>as a list<ul>
+<li>total list of elements can be found using len()</li>
+<li>individual elements can be found using [0], [1], [-1], etc.</li>
+<li>elements can be deleted using <tt class="docutils literal">del</tt></li>
+<li>the -1th element can be extracted and removed in a single operation
+using <tt class="docutils literal">pop()</tt>, or any element can be extracted and removed
+using <tt class="docutils literal">pop(n)</tt></li>
+</ul>
+</li>
+<li>as a dictionary<ul>
+<li>if <tt class="docutils literal">setResultsName()</tt> is used to name elements within the
+overall parse expression, then these fields can be referenced
+as dictionary elements or as attributes</li>
+<li>the Dict class generates dictionary entries using the data of the
+input text - in addition to ParseResults listed as <tt class="docutils literal">[ [ a1, b1, c1, <span class="pre">...],</span> [ a2, b2, c2, <span class="pre">...]</span>&nbsp; ]</tt>
+it also acts as a dictionary with entries defined as <tt class="docutils literal">{ a1 : [ b1, c1, ... ] }, { a2 : [ b2, c2, ... ] }</tt>;
+this is especially useful when processing tabular data where the first column contains a key
+value for that line of data</li>
+<li>list elements that are deleted using <tt class="docutils literal">del</tt> will still be accessible by their
+dictionary keys</li>
+<li>supports <tt class="docutils literal">get()</tt>, <tt class="docutils literal">items()</tt> and <tt class="docutils literal">keys()</tt> methods, similar to a dictionary</li>
+<li>a keyed item can be extracted and removed using <tt class="docutils literal">pop(key)</tt>. Here
+key must be non-numeric (such as a string), in order to use dict
+extraction instead of list extraction.</li>
+<li>new named elements can be added (in a parse action, for instance), using the same
+syntax as adding an item to a dict (<tt class="docutils literal"><span class="pre">parseResults[&quot;X&quot;]=&quot;new</span> item&quot;</tt>); named elements can be removed using <tt class="docutils literal">del <span class="pre">parseResults[&quot;X&quot;]</span></tt></li>
+</ul>
+</li>
+<li>as a nested list<ul>
+<li>results returned from the Group class are encapsulated within their
+own list structure, so that the tokens can be handled as a hierarchical
+tree</li>
+</ul>
+</li>
+</ul>
+<p>ParseResults can also be converted to an ordinary list of strings
+by calling <tt class="docutils literal">asList()</tt>. Note that this will strip the results of any
+field names that have been defined for any embedded parse elements.
+(The <tt class="docutils literal">pprint</tt> module is especially good at printing out the nested contents
+given by <tt class="docutils literal">asList()</tt>.)</p>
+<p>Finally, ParseResults can be converted to an XML string by calling <tt class="docutils literal">asXML()</tt>. Where
+possible, results will be tagged using the results names defined for the respective
+ParseExpressions. <tt class="docutils literal">asXML()</tt> takes two optional arguments:</p>
+<ul class="simple">
+<li>doctagname - for ParseResults that do not have a defined name, this argument
+will wrap the resulting XML in a set of opening and closing tags <tt class="docutils literal">&lt;doctagname&gt;</tt>
+and <tt class="docutils literal">&lt;/doctagname&gt;</tt>.</li>
+<li>namedItemsOnly (default=False) - flag to indicate if the generated XML should
+skip items that do not have defined names. If a nested group item is named, then all
+embedded items will be included, whether they have names or not.</li>
+</ul>
+</li>
+</ul>
+</div>
+<div class="section" id="exception-classes-and-troubleshooting">
+<h2><a class="toc-backref" href="#id13">2.9&nbsp;&nbsp;&nbsp;Exception classes and Troubleshooting</a></h2>
+<ul id="parseexception">
+<li><p class="first"><tt class="docutils literal">ParseException</tt> - exception returned when a grammar parse fails;
+ParseExceptions have attributes loc, msg, line, lineno, and column; to view the
+text line and location where the reported ParseException occurs, use:</p>
+<pre class="literal-block">
+except ParseException, err:
+ print err.line
+ print &quot; &quot;*(err.column-1) + &quot;^&quot;
+ print err
+</pre>
+</li>
+<li><p class="first"><tt class="docutils literal">RecursiveGrammarException</tt> - exception returned by <tt class="docutils literal">validate()</tt> if
+the grammar contains a recursive infinite loop, such as:</p>
+<pre class="literal-block">
+badGrammar = Forward()
+goodToken = Literal(&quot;A&quot;)
+badGrammar &lt;&lt; Optional(goodToken) + badGrammar
+</pre>
+</li>
+<li><p class="first"><tt class="docutils literal">ParseFatalException</tt> - exception that parse actions can raise to stop parsing
+immediately. Should be used when a semantic error is found in the input text, such
+as a mismatched XML tag.</p>
+</li>
+<li><p class="first"><tt class="docutils literal">ParseSyntaxException</tt> - subclass of <tt class="docutils literal">ParseFatalException</tt> raised when a
+syntax error is found, based on the use of the '-' operator when defining
+a sequence of expressions in an <tt class="docutils literal">And</tt> expression.</p>
+</li>
+</ul>
+<p>You can also get some insights into the parsing logic using diagnostic parse actions,
+and setDebug(), or test the matching of expression fragments by testing them using
+scanString().</p>
+</div>
+</div>
+<div class="section" id="miscellaneous-attributes-and-methods">
+<h1><a class="toc-backref" href="#id14">3&nbsp;&nbsp;&nbsp;Miscellaneous attributes and methods</a></h1>
+<div class="section" id="helper-methods">
+<h2><a class="toc-backref" href="#id15">3.1&nbsp;&nbsp;&nbsp;Helper methods</a></h2>
+<ul>
+<li><p class="first"><tt class="docutils literal">delimitedList( expr, <span class="pre">delim=',')</span></tt> - convenience function for
+matching one or more occurrences of expr, separated by delim.
+By default, the delimiters are suppressed, so the returned results contain
+only the separate list elements. Can optionally specify <tt class="docutils literal">combine=True</tt>,
+indicating that the expressions and delimiters should be returned as one
+combined value (useful for scoped variables, such as &quot;a.b.c&quot;, or
+&quot;a::b::c&quot;, or paths such as &quot;a/b/c&quot;).</p>
+</li>
+<li><p class="first"><tt class="docutils literal">countedArray( expr )</tt> - convenience function for a pattern where an list of
+instances of the given expression are preceded by an integer giving the count of
+elements in the list. Returns an expression that parses the leading integer,
+reads exactly that many expressions, and returns the array of expressions in the
+parse results - the leading integer is suppressed from the results (although it
+is easily reconstructed by using len on the returned array).</p>
+</li>
+<li><p class="first"><tt class="docutils literal">oneOf( string, caseless=False )</tt> - convenience function for quickly declaring an
+alternative set of <tt class="docutils literal">Literal</tt> tokens, by splitting the given string on
+whitespace boundaries. The tokens are sorted so that longer
+matches are attempted first; this ensures that a short token does
+not mask a longer one that starts with the same characters. If <tt class="docutils literal">caseless=True</tt>,
+will create an alternative set of CaselessLiteral tokens.</p>
+</li>
+<li><p class="first"><tt class="docutils literal">dictOf( key, value )</tt> - convenience function for quickly declaring a
+dictionary pattern of <tt class="docutils literal">Dict( ZeroOrMore( Group( key + value ) ) )</tt>.</p>
+</li>
+<li><p class="first"><tt class="docutils literal">makeHTMLTags( tagName )</tt> and <tt class="docutils literal">makeXMLTags( tagName )</tt> - convenience
+functions to create definitions of opening and closing tag expressions. Returns
+a pair of expressions, for the corresponding &lt;tag&gt; and &lt;/tag&gt; strings. Includes
+support for attributes in the opening tag, such as &lt;tag attr1=&quot;abc&quot;&gt; - attributes
+are returned as keyed tokens in the returned ParseResults. <tt class="docutils literal">makeHTMLTags</tt> is less
+restrictive than <tt class="docutils literal">makeXMLTags</tt>, especially with respect to case sensitivity.</p>
+</li>
+<li><p class="first"><tt class="docutils literal">operatorPrecedence(baseOperand, operatorList)</tt> - convenience function to define a
+grammar for parsing
+expressions with a hierarchical precedence of operators. To use the operatorPrecedence
+helper:</p>
+<ol class="arabic simple">
+<li>Define the base &quot;atom&quot; operand term of the grammar.
+For this simple grammar, the smallest operand is either
+and integer or a variable. This will be the first argument
+to the operatorPrecedence method.</li>
+<li>Define a list of tuples for each level of operator
+precendence. Each tuple is of the form
+<tt class="docutils literal">(opExpr, numTerms, rightLeftAssoc, parseAction)</tt>, where:<ul>
+<li>opExpr is the pyparsing expression for the operator;
+may also be a string, which will be converted to a Literal; if
+None, indicates an empty operator, such as the implied
+multiplication operation between 'm' and 'x' in &quot;y = mx + b&quot;.</li>
+<li>numTerms is the number of terms for this operator (must
+be 1 or 2)</li>
+<li>rightLeftAssoc is the indicator whether the operator is
+right or left associative, using the pyparsing-defined
+constants <tt class="docutils literal">opAssoc.RIGHT</tt> and <tt class="docutils literal">opAssoc.LEFT</tt>.</li>
+<li>parseAction is the parse action to be associated with
+expressions matching this operator expression (the
+parse action tuple member may be omitted)</li>
+</ul>
+</li>
+<li>Call operatorPrecedence passing the operand expression and
+the operator precedence list, and save the returned value
+as the generated pyparsing expression. You can then use
+this expression to parse input strings, or incorporate it
+into a larger, more complex grammar.</li>
+</ol>
+</li>
+<li><p class="first"><tt class="docutils literal">matchPreviousLiteral</tt> and <tt class="docutils literal">matchPreviousExpr</tt> - function to define and
+expression that matches the same content
+as was parsed in a previous parse expression. For instance:</p>
+<pre class="literal-block">
+first = Word(nums)
+matchExpr = first + &quot;:&quot; + matchPreviousLiteral(first)
+</pre>
+<p>will match &quot;1:1&quot;, but not &quot;1:2&quot;. Since this matches at the literal
+level, this will also match the leading &quot;1:1&quot; in &quot;1:10&quot;.</p>
+<p>In contrast:</p>
+<pre class="literal-block">
+first = Word(nums)
+matchExpr = first + &quot;:&quot; + matchPreviousExpr(first)
+</pre>
+<p>will <em>not</em> match the leading &quot;1:1&quot; in &quot;1:10&quot;; the expressions are
+evaluated first, and then compared, so &quot;1&quot; is compared with &quot;10&quot;.</p>
+</li>
+<li><p class="first"><tt class="docutils literal">nestedExpr(opener, closer, content=None, ignoreExpr=quotedString)</tt> - method for defining nested
+lists enclosed in opening and closing delimiters.</p>
+<ul class="simple">
+<li>opener - opening character for a nested list (default=&quot;(&quot;); can also be a pyparsing expression</li>
+<li>closer - closing character for a nested list (default=&quot;)&quot;); can also be a pyparsing expression</li>
+<li>content - expression for items within the nested lists (default=None)</li>
+<li>ignoreExpr - expression for ignoring opening and closing delimiters (default=quotedString)</li>
+</ul>
+<p>If an expression is not provided for the content argument, the nested
+expression will capture all whitespace-delimited content between delimiters
+as a list of separate values.</p>
+<p>Use the ignoreExpr argument to define expressions that may contain
+opening or closing characters that should not be treated as opening
+or closing characters for nesting, such as quotedString or a comment
+expression. Specify multiple expressions using an Or or MatchFirst.
+The default is quotedString, but if no expressions are to be ignored,
+then pass None for this argument.</p>
+</li>
+<li><p class="first"><tt class="docutils literal">indentedBlock( statementExpr, indentationStackVar, indent=True)</tt> -
+function to define an indented block of statements, similar to
+indentation-based blocking in Python source code:</p>
+<ul class="simple">
+<li>statementExpr is the expression defining a statement that
+will be found in the indented block; a valid indentedBlock
+must contain at least 1 matching statementExpr</li>
+<li>indentationStackVar is a Python list variable; this variable
+should be common to all <tt class="docutils literal">indentedBlock</tt> expressions defined
+within the same grammar, and should be reinitialized to [1]
+each time the grammar is to be used</li>
+<li>indent is a boolean flag indicating whether the expressions
+within the block must be indented from the current parse
+location; if using indentedBlock to define the left-most
+statements (all starting in column 1), set indent to False</li>
+</ul>
+</li>
+</ul>
+<ul id="originaltextfor">
+<li><p class="first"><tt class="docutils literal">originalTextFor( expr )</tt> - helper function to preserve the originally parsed text, regardless of any
+token processing or conversion done by the contained expression. For instance, the following expression:</p>
+<pre class="literal-block">
+fullName = Word(alphas) + Word(alphas)
+</pre>
+<p>will return the parse of &quot;John Smith&quot; as ['John', 'Smith']. In some applications, the actual name as it
+was given in the input string is what is desired. To do this, use <tt class="docutils literal">originalTextFor</tt>:</p>
+<pre class="literal-block">
+fullName = originalTextFor(Word(alphas) + Word(alphas))
+</pre>
+</li>
+<li><p class="first"><tt class="docutils literal">ungroup( expr )</tt> - function to &quot;ungroup&quot; returned tokens; useful
+to undo the default behavior of And to always group the returned tokens, even
+if there is only one in the list. (New in 1.5.6)</p>
+</li>
+<li><p class="first"><tt class="docutils literal">lineno( loc, string )</tt> - function to give the line number of the
+location within the string; the first line is line 1, newlines
+start new rows</p>
+</li>
+<li><p class="first"><tt class="docutils literal">col( loc, string )</tt> - function to give the column number of the
+location within the string; the first column is column 1,
+newlines reset the column number to 1</p>
+</li>
+<li><p class="first"><tt class="docutils literal">line( loc, string )</tt> - function to retrieve the line of text
+representing <tt class="docutils literal">lineno( loc, string )</tt>; useful when printing out diagnostic
+messages for exceptions</p>
+</li>
+<li><p class="first"><tt class="docutils literal">srange( rangeSpec )</tt> - function to define a string of characters,
+given a string of the form used by regexp string ranges, such as <tt class="docutils literal"><span class="pre">&quot;[0-9]&quot;</span></tt> for
+all numeric digits, <tt class="docutils literal"><span class="pre">&quot;[A-Z_]&quot;</span></tt> for uppercase characters plus underscore, and
+so on (note that rangeSpec does not include support for generic regular
+expressions, just string range specs)</p>
+</li>
+<li><p class="first"><tt class="docutils literal">getTokensEndLoc()</tt> - function to call from within a parse action to get
+the ending location for the matched tokens</p>
+</li>
+<li><p class="first"><tt class="docutils literal">traceParseAction(fn)</tt> - decorator function to debug parse actions. Lists
+each call, called arguments, and return value or exception</p>
+</li>
+</ul>
+</div>
+<div class="section" id="helper-parse-actions">
+<h2><a class="toc-backref" href="#id16">3.2&nbsp;&nbsp;&nbsp;Helper parse actions</a></h2>
+<ul>
+<li><p class="first"><tt class="docutils literal">removeQuotes</tt> - removes the first and last characters of a quoted string;
+useful to remove the delimiting quotes from quoted strings</p>
+</li>
+<li><p class="first"><tt class="docutils literal">replaceWith(replString)</tt> - returns a parse action that simply returns the
+replString; useful when using transformString, or converting HTML entities, as in:</p>
+<pre class="literal-block">
+nbsp = Literal(&quot;&amp;nbsp;&quot;).setParseAction( replaceWith(&quot;&lt;BLANK&gt;&quot;) )
+</pre>
+</li>
+<li><p class="first"><tt class="docutils literal">keepOriginalText</tt>- (deprecated, use <a class="reference internal" href="#originaltextfor">originalTextFor</a> instead) restores any internal whitespace or suppressed
+text within the tokens for a matched parse
+expression. This is especially useful when defining expressions
+for scanString or transformString applications.</p>
+</li>
+<li><p class="first"><tt class="docutils literal">withAttribute( *args, **kwargs )</tt> - helper to create a validating parse action to be used with start tags created
+with <tt class="docutils literal">makeXMLTags</tt> or <tt class="docutils literal">makeHTMLTags</tt>. Use <tt class="docutils literal">withAttribute</tt> to qualify a starting tag
+with a required attribute value, to avoid false matches on common tags such as
+<tt class="docutils literal">&lt;TD&gt;</tt> or <tt class="docutils literal">&lt;DIV&gt;</tt>.</p>
+<p><tt class="docutils literal">withAttribute</tt> can be called with:</p>
+<ul class="simple">
+<li>keyword arguments, as in <tt class="docutils literal"><span class="pre">(class=&quot;Customer&quot;,align=&quot;right&quot;)</span></tt>, or</li>
+<li>a list of name-value tuples, as in <tt class="docutils literal">( (&quot;ns1:class&quot;, <span class="pre">&quot;Customer&quot;),</span> <span class="pre">(&quot;ns2:align&quot;,&quot;right&quot;)</span> )</tt></li>
+</ul>
+<p>An attribute can be specified to have the special value
+<tt class="docutils literal">withAttribute.ANY_VALUE</tt>, which will match any value - use this to
+ensure that an attribute is present but any attribute value is
+acceptable.</p>
+</li>
+<li><p class="first"><tt class="docutils literal">downcaseTokens</tt> - converts all matched tokens to lowercase</p>
+</li>
+<li><p class="first"><tt class="docutils literal">upcaseTokens</tt> - converts all matched tokens to uppercase</p>
+</li>
+<li><p class="first"><tt class="docutils literal">matchOnlyAtCol( columnNumber )</tt> - a parse action that verifies that
+an expression was matched at a particular column, raising a
+ParseException if matching at a different column number; useful when parsing
+tabular data</p>
+</li>
+</ul>
+</div>
+<div class="section" id="common-string-and-token-constants">
+<h2><a class="toc-backref" href="#id17">3.3&nbsp;&nbsp;&nbsp;Common string and token constants</a></h2>
+<ul>
+<li><p class="first"><tt class="docutils literal">alphas</tt> - same as <tt class="docutils literal">string.letters</tt></p>
+</li>
+<li><p class="first"><tt class="docutils literal">nums</tt> - same as <tt class="docutils literal">string.digits</tt></p>
+</li>
+<li><p class="first"><tt class="docutils literal">alphanums</tt> - a string containing <tt class="docutils literal">alphas + nums</tt></p>
+</li>
+<li><p class="first"><tt class="docutils literal">alphas8bit</tt> - a string containing alphabetic 8-bit characters:</p>
+<pre class="literal-block">
+ÀÃÂÃÄÅÆÇÈÉÊËÌÃÃŽÃÃÑÒÓÔÕÖØÙÚÛÜÃÞßàáâãäåæçèéêëìíîïðñòóôõöøùúûüýþ
+</pre>
+</li>
+<li><p class="first"><tt class="docutils literal">printables</tt> - same as <tt class="docutils literal">string.printable</tt>, minus the space (<tt class="docutils literal">' '</tt>) character</p>
+</li>
+<li><p class="first"><tt class="docutils literal">empty</tt> - a global <tt class="docutils literal">Empty()</tt>; will always match</p>
+</li>
+<li><p class="first"><tt class="docutils literal">sglQuotedString</tt> - a string of characters enclosed in 's; may
+include whitespace, but not newlines</p>
+</li>
+<li><p class="first"><tt class="docutils literal">dblQuotedString</tt> - a string of characters enclosed in &quot;s; may
+include whitespace, but not newlines</p>
+</li>
+<li><p class="first"><tt class="docutils literal">quotedString</tt> - <tt class="docutils literal">sglQuotedString | dblQuotedString</tt></p>
+</li>
+<li><p class="first"><tt class="docutils literal">cStyleComment</tt> - a comment block delimited by <tt class="docutils literal"><span class="pre">'/*'</span></tt> and <tt class="docutils literal"><span class="pre">'*/'</span></tt> sequences; can span
+multiple lines, but does not support nesting of comments</p>
+</li>
+<li><p class="first"><tt class="docutils literal">htmlComment</tt> - a comment block delimited by <tt class="docutils literal"><span class="pre">'&lt;!--'</span></tt> and <tt class="docutils literal"><span class="pre">'--&gt;'</span></tt> sequences; can span
+multiple lines, but does not support nesting of comments</p>
+</li>
+<li><p class="first"><tt class="docutils literal">commaSeparatedList</tt> - similar to <tt class="docutils literal">delimitedList</tt>, except that the
+list expressions can be any text value, or a quoted string; quoted strings can
+safely include commas without incorrectly breaking the string into two tokens</p>
+</li>
+<li><p class="first"><tt class="docutils literal">restOfLine</tt> - all remaining printable characters up to but not including the next
+newline</p>
+</li>
+</ul>
+</div>
+</div>
+</div>
+</body>
+</html>
diff --git a/src/HowToUsePyparsing.txt b/src/HowToUsePyparsing.txt
new file mode 100644
index 0000000..2c7e89f
--- /dev/null
+++ b/src/HowToUsePyparsing.txt
@@ -0,0 +1,993 @@
+==========================
+Using the pyparsing module
+==========================
+
+:author: Paul McGuire
+:address: ptmcg@users.sourceforge.net
+
+:revision: 1.5.6
+:date: June, 2011
+
+:copyright: Copyright |copy| 2003-2011 Paul McGuire.
+
+.. |copy| unicode:: 0xA9
+
+:abstract: This document provides how-to instructions for the
+ pyparsing library, an easy-to-use Python module for constructing
+ and executing basic text parsers. The pyparsing module is useful
+ for evaluating user-definable
+ expressions, processing custom application language commands, or
+ extracting data from formatted reports.
+
+.. sectnum:: :depth: 4
+
+.. contents:: :depth: 4
+
+
+Steps to follow
+===============
+
+To parse an incoming data string, the client code must follow these steps:
+
+1. First define the tokens and patterns to be matched, and assign
+ this to a program variable. Optional results names or parsing
+ actions can also be defined at this time.
+
+2. Call ``parseString()`` or ``scanString()`` on this variable, passing in
+ the string to
+ be parsed. During the matching process, whitespace between
+ tokens is skipped by default (although this can be changed).
+ When token matches occur, any defined parse action methods are
+ called.
+
+3. Process the parsed results, returned as a list of strings.
+ Matching results may also be accessed as named attributes of
+ the returned results, if names are defined in the definition of
+ the token pattern, using ``setResultsName()``.
+
+
+Hello, World!
+-------------
+
+The following complete Python program will parse the greeting "Hello, World!",
+or any other greeting of the form "<salutation>, <addressee>!"::
+
+ from pyparsing import Word, alphas
+
+ greet = Word( alphas ) + "," + Word( alphas ) + "!"
+ greeting = greet.parseString( "Hello, World!" )
+ print greeting
+
+The parsed tokens are returned in the following form::
+
+ ['Hello', ',', 'World', '!']
+
+
+Usage notes
+-----------
+
+- The pyparsing module can be used to interpret simple command
+ strings or algebraic expressions, or can be used to extract data
+ from text reports with complicated format and structure ("screen
+ or report scraping"). However, it is possible that your defined
+ matching patterns may accept invalid inputs. Use pyparsing to
+ extract data from strings assumed to be well-formatted.
+
+- To keep up the readability of your code, use operators_ such as ``+``, ``|``,
+ ``^``, and ``~`` to combine expressions. You can also combine
+ string literals with ParseExpressions - they will be
+ automatically converted to Literal objects. For example::
+
+ integer = Word( nums ) # simple unsigned integer
+ variable = Word( alphas, max=1 ) # single letter variable, such as x, z, m, etc.
+ arithOp = Word( "+-*/", max=1 ) # arithmetic operators
+ equation = variable + "=" + integer + arithOp + integer # will match "x=2+2", etc.
+
+ In the definition of ``equation``, the string ``"="`` will get added as
+ a ``Literal("=")``, but in a more readable way.
+
+- The pyparsing module's default behavior is to ignore whitespace. This is the
+ case for 99% of all parsers ever written. This allows you to write simple, clean,
+ grammars, such as the above ``equation``, without having to clutter it up with
+ extraneous ``ws`` markers. The ``equation`` grammar will successfully parse all of the
+ following statements::
+
+ x=2+2
+ x = 2+2
+ a = 10 * 4
+ r= 1234/ 100000
+
+ Of course, it is quite simple to extend this example to support more elaborate expressions, with
+ nesting with parentheses, floating point numbers, scientific notation, and named constants
+ (such as ``e`` or ``pi``). See ``fourFn.py``, included in the examples directory.
+
+- To modify pyparsing's default whitespace skipping, you can use one or
+ more of the following methods:
+
+ - use the static method ``ParserElement.setDefaultWhitespaceChars``
+ to override the normal set of whitespace chars (' \t\n'). For instance
+ when defining a grammar in which newlines are significant, you should
+ call ``ParserElement.setDefaultWhitespaceChars(' \t')`` to remove
+ newline from the set of skippable whitespace characters. Calling
+ this method will affect all pyparsing expressions defined afterward.
+
+ - call ``leaveWhitespace()`` on individual expressions, to suppress the
+ skipping of whitespace before trying to match the expression
+
+ - use ``Combine`` to require that successive expressions must be
+ adjacent in the input string. For instance, this expression::
+
+ real = Word(nums) + '.' + Word(nums)
+
+ will match "3.14159", but will also match "3 . 12". It will also
+ return the matched results as ['3', '.', '14159']. By changing this
+ expression to::
+
+ real = Combine( Word(nums) + '.' + Word(nums) )
+
+ it will not match numbers with embedded spaces, and it will return a
+ single concatenated string '3.14159' as the parsed token.
+
+- Repetition of expressions can be indicated using the '*' operator. An
+ expression may be multiplied by an integer value (to indicate an exact
+ repetition count), or by a tuple containing
+ two integers, or None and an integer, representing min and max repetitions
+ (with None representing no min or no max, depending whether it is the first or
+ second tuple element). See the following examples, where n is used to
+ indicate an integer value:
+
+ - ``expr*3`` is equivalent to ``expr + expr + expr``
+
+ - ``expr*(2,3)`` is equivalent to ``expr + expr + Optional(expr)``
+
+ - ``expr*(n,None)`` or ``expr*(n,)`` is equivalent
+ to ``expr*n + ZeroOrMore(expr)`` (read as "at least n instances of expr")
+
+ - ``expr*(None,n)`` is equivalent to ``expr*(0,n)``
+ (read as "0 to n instances of expr")
+
+ - ``expr*(None,None)`` is equivalent to ``ZeroOrMore(expr)``
+
+ - ``expr*(1,None)`` is equivalent to ``OneOrMore(expr)``
+
+ Note that ``expr*(None,n)`` does not raise an exception if
+ more than n exprs exist in the input stream; that is,
+ ``expr*(None,n)`` does not enforce a maximum number of expr
+ occurrences. If this behavior is desired, then write
+ ``expr*(None,n) + ~expr``.
+
+- ``MatchFirst`` expressions are matched left-to-right, and the first
+ match found will skip all later expressions within, so be sure
+ to define less-specific patterns after more-specific patterns.
+ If you are not sure which expressions are most specific, use Or
+ expressions (defined using the ``^`` operator) - they will always
+ match the longest expression, although they are more
+ compute-intensive.
+
+- ``Or`` expressions will evaluate all of the specified subexpressions
+ to determine which is the "best" match, that is, which matches
+ the longest string in the input data. In case of a tie, the
+ left-most expression in the ``Or`` list will win.
+
+- If parsing the contents of an entire file, pass it to the
+ ``parseFile`` method using::
+
+ expr.parseFile( sourceFile )
+
+- ``ParseExceptions`` will report the location where an expected token
+ or expression failed to match. For example, if we tried to use our
+ "Hello, World!" parser to parse "Hello World!" (leaving out the separating
+ comma), we would get an exception, with the message::
+
+ pyparsing.ParseException: Expected "," (6), (1,7)
+
+ In the case of complex
+ expressions, the reported location may not be exactly where you
+ would expect. See more information under ParseException_ .
+
+- Use the ``Group`` class to enclose logical groups of tokens within a
+ sublist. This will help organize your results into more
+ hierarchical form (the default behavior is to return matching
+ tokens as a flat list of matching input strings).
+
+- Punctuation may be significant for matching, but is rarely of
+ much interest in the parsed results. Use the ``suppress()`` method
+ to keep these tokens from cluttering up your returned lists of
+ tokens. For example, ``delimitedList()`` matches a succession of
+ one or more expressions, separated by delimiters (commas by
+ default), but only returns a list of the actual expressions -
+ the delimiters are used for parsing, but are suppressed from the
+ returned output.
+
+- Parse actions can be used to convert values from strings to
+ other data types (ints, floats, booleans, etc.).
+
+- Results names are recommended for retrieving tokens from complex
+ expressions. It is much easier to access a token using its field
+ name than using a positional index, especially if the expression
+ contains optional elements. You can also shortcut
+ the ``setResultsName`` call::
+
+ stats = "AVE:" + realNum.setResultsName("average") + \
+ "MIN:" + realNum.setResultsName("min") + \
+ "MAX:" + realNum.setResultsName("max")
+
+ can now be written as this::
+
+ stats = "AVE:" + realNum("average") + \
+ "MIN:" + realNum("min") + \
+ "MAX:" + realNum("max")
+
+- Be careful when defining parse actions that modify global variables or
+ data structures (as in ``fourFn.py``), especially for low level tokens
+ or expressions that may occur within an ``And`` expression; an early element
+ of an ``And`` may match, but the overall expression may fail.
+
+- Performance of pyparsing may be slow for complex grammars and/or large
+ input strings. The psyco_ package can be used to improve the speed of the
+ pyparsing module with no changes to grammar or program logic - observed
+ improvments have been in the 20-50% range.
+
+.. _psyco: http://psyco.sourceforge.net/
+
+
+Classes
+=======
+
+Classes in the pyparsing module
+-------------------------------
+
+``ParserElement`` - abstract base class for all pyparsing classes;
+methods for code to use are:
+
+- ``parseString( sourceString, parseAll=False )`` - only called once, on the overall
+ matching pattern; returns a ParseResults_ object that makes the
+ matched tokens available as a list, and optionally as a dictionary,
+ or as an object with named attributes; if parseAll is set to True, then
+ parseString will raise a ParseException if the grammar does not process
+ the complete input string.
+
+- ``parseFile( sourceFile )`` - a convenience function, that accepts an
+ input file object or filename. The file contents are passed as a
+ string to ``parseString()``. ``parseFile`` also supports the ``parseAll`` argument.
+
+- ``scanString( sourceString )`` - generator function, used to find and
+ extract matching text in the given source string; for each matched text,
+ returns a tuple of:
+
+ - matched tokens (packaged as a ParseResults_ object)
+
+ - start location of the matched text in the given source string
+
+ - end location in the given source string
+
+ ``scanString`` allows you to scan through the input source string for
+ random matches, instead of exhaustively defining the grammar for the entire
+ source text (as would be required with ``parseString``).
+
+- ``transformString( sourceString )`` - convenience wrapper function for
+ ``scanString``, to process the input source string, and replace matching
+ text with the tokens returned from parse actions defined in the grammar
+ (see setParseAction_).
+
+- ``searchString( sourceString )`` - another convenience wrapper function for
+ ``scanString``, returns a list of the matching tokens returned from each
+ call to ``scanString``.
+
+- ``setName( name )`` - associate a short descriptive name for this
+ element, useful in displaying exceptions and trace information
+
+- ``setResultsName( string, listAllMatches=False )`` - name to be given
+ to tokens matching
+ the element; if multiple tokens within
+ a repetition group (such as ``ZeroOrMore`` or ``delimitedList``) the
+ default is to return only the last matching token - if listAllMatches
+ is set to True, then a list of all the matching tokens is returned.
+ (New in 1.5.6 - a results name with a trailing '*' character will be
+ interpreted as setting listAllMatches to True.)
+ Note:
+ ``setResultsName`` returns a *copy* of the element so that a single
+ basic element can be referenced multiple times and given
+ different names within a complex grammar.
+
+.. _setParseAction:
+
+- ``setParseAction( *fn )`` - specify one or more functions to call after successful
+ matching of the element; each function is defined as ``fn( s,
+ loc, toks )``, where:
+
+ - ``s`` is the original parse string
+
+ - ``loc`` is the location in the string where matching started
+
+ - ``toks`` is the list of the matched tokens, packaged as a ParseResults_ object
+
+ Multiple functions can be attached to a ParserElement by specifying multiple
+ arguments to setParseAction, or by calling setParseAction multiple times.
+
+ Each parse action function can return a modified ``toks`` list, to perform conversion, or
+ string modifications. For brevity, ``fn`` may also be a
+ lambda - here is an example of using a parse action to convert matched
+ integer tokens from strings to integers::
+
+ intNumber = Word(nums).setParseAction( lambda s,l,t: [ int(t[0]) ] )
+
+ If ``fn`` does not modify the ``toks`` list, it does not need to return
+ anything at all.
+
+- ``setBreak( breakFlag=True )`` - if breakFlag is True, calls pdb.set_break()
+ as this expression is about to be parsed
+
+- ``copy()`` - returns a copy of a ParserElement; can be used to use the same
+ parse expression in different places in a grammar, with different parse actions
+ attached to each
+
+- ``leaveWhitespace()`` - change default behavior of skipping
+ whitespace before starting matching (mostly used internally to the
+ pyparsing module, rarely used by client code)
+
+- ``setWhitespaceChars( chars )`` - define the set of chars to be ignored
+ as whitespace before trying to match a specific ParserElement, in place of the
+ default set of whitespace (space, tab, newline, and return)
+
+- ``setDefaultWhitespaceChars( chars )`` - class-level method to override
+ the default set of whitespace chars for all subsequently created ParserElements
+ (including copies); useful when defining grammars that treat one or more of the
+ default whitespace characters as significant (such as a line-sensitive grammar, to
+ omit newline from the list of ignorable whitespace)
+
+- ``suppress()`` - convenience function to suppress the output of the
+ given element, instead of wrapping it with a Suppress object.
+
+- ``ignore( expr )`` - function to specify parse expression to be
+ ignored while matching defined patterns; can be called
+ repeatedly to specify multiple expressions; useful to specify
+ patterns of comment syntax, for example
+
+- ``setDebug( dbgFlag=True )`` - function to enable/disable tracing output
+ when trying to match this element
+
+- ``validate()`` - function to verify that the defined grammar does not
+ contain infinitely recursive constructs
+
+.. _parseWithTabs:
+
+- ``parseWithTabs()`` - function to override default behavior of converting
+ tabs to spaces before parsing the input string; rarely used, except when
+ specifying whitespace-significant grammars using the White_ class.
+
+- ``enablePackrat()`` - a class-level static method to enable a memoizing
+ performance enhancement, known as "packrat parsing". packrat parsing is
+ disabled by default, since it may conflict with some user programs that use
+ parse actions. To activate the packrat feature, your
+ program must call the class method ParserElement.enablePackrat(). If
+ your program uses psyco to "compile as you go", you must call
+ enablePackrat before calling psyco.full(). If you do not do this,
+ Python will crash. For best results, call enablePackrat() immediately
+ after importing pyparsing.
+
+
+Basic ParserElement subclasses
+------------------------------
+
+- ``Literal`` - construct with a string to be matched exactly
+
+- ``CaselessLiteral`` - construct with a string to be matched, but
+ without case checking; results are always returned as the
+ defining literal, NOT as they are found in the input string
+
+- ``Keyword`` - similar to Literal, but must be immediately followed by
+ whitespace, punctuation, or other non-keyword characters; prevents
+ accidental matching of a non-keyword that happens to begin with a
+ defined keyword
+
+- ``CaselessKeyword`` - similar to Keyword, but with caseless matching
+ behavior
+
+.. _Word:
+
+- ``Word`` - one or more contiguous characters; construct with a
+ string containing the set of allowed initial characters, and an
+ optional second string of allowed body characters; for instance,
+ a common Word construct is to match a code identifier - in C, a
+ valid identifier must start with an alphabetic character or an
+ underscore ('_'), followed by a body that can also include numeric
+ digits. That is, ``a``, ``i``, ``MAX_LENGTH``, ``_a1``, ``b_109_``, and
+ ``plan9FromOuterSpace``
+ are all valid identifiers; ``9b7z``, ``$a``, ``.section``, and ``0debug``
+ are not. To
+ define an identifier using a Word, use either of the following::
+
+ - Word( alphas+"_", alphanums+"_" )
+ - Word( srange("[a-zA-Z_]"), srange("[a-zA-Z0-9_]") )
+
+ If only one
+ string given, it specifies that the same character set defined
+ for the initial character is used for the word body; for instance, to
+ define an identifier that can only be composed of capital letters and
+ underscores, use::
+
+ - Word( "ABCDEFGHIJKLMNOPQRSTUVWXYZ_" )
+ - Word( srange("[A-Z_]") )
+
+ A Word may
+ also be constructed with any of the following optional parameters:
+
+ - min - indicating a minimum length of matching characters
+
+ - max - indicating a maximum length of matching characters
+
+ - exact - indicating an exact length of matching characters
+
+ If exact is specified, it will override any values for min or max.
+
+ New in 1.5.6 - Sometimes you want to define a word using all
+ characters in a range except for one or two of them; you can do this
+ with the new excludeChars argument. This is helpful if you want to define
+ a word with all printables except for a single delimiter character, such
+ as '.'. Previously, you would have to create a custom string to pass to Word.
+ With this change, you can just create ``Word(printables, excludeChars='.')``.
+
+- ``CharsNotIn`` - similar to Word_, but matches characters not
+ in the given constructor string (accepts only one string for both
+ initial and body characters); also supports min, max, and exact
+ optional parameters.
+
+- ``Regex`` - a powerful construct, that accepts a regular expression
+ to be matched at the current parse position; accepts an optional
+ flags parameter, corresponding to the flags parameter in the re.compile
+ method; if the expression includes named sub-fields, they will be
+ represented in the returned ParseResults_
+
+- ``QuotedString`` - supports the definition of custom quoted string
+ formats, in addition to pyparsing's built-in dblQuotedString and
+ sglQuotedString. QuotedString allows you to specify the following
+ parameters:
+
+ - quoteChar - string of one or more characters defining the quote delimiting string
+
+ - escChar - character to escape quotes, typically backslash (default=None)
+
+ - escQuote - special quote sequence to escape an embedded quote string (such as SQL's "" to escape an embedded ") (default=None)
+
+ - multiline - boolean indicating whether quotes can span multiple lines (default=False)
+
+ - unquoteResults - boolean indicating whether the matched text should be unquoted (default=True)
+
+ - endQuoteChar - string of one or more characters defining the end of the quote delimited string (default=None => same as quoteChar)
+
+- ``SkipTo`` - skips ahead in the input string, accepting any
+ characters up to the specified pattern; may be constructed with
+ the following optional parameters:
+
+ - include - if set to true, also consumes the match expression
+ (default is false)
+
+ - ignore - allows the user to specify patterns to not be matched,
+ to prevent false matches
+
+ - failOn - if a literal string or expression is given for this argument, it defines an expression that
+ should cause the ``SkipTo`` expression to fail, and not skip over that expression
+
+.. _White:
+
+- ``White`` - also similar to Word_, but matches whitespace
+ characters. Not usually needed, as whitespace is implicitly
+ ignored by pyparsing. However, some grammars are whitespace-sensitive,
+ such as those that use leading tabs or spaces to indicating grouping
+ or hierarchy. (If matching on tab characters, be sure to call
+ parseWithTabs_ on the top-level parse element.)
+
+- ``Empty`` - a null expression, requiring no characters - will always
+ match; useful for debugging and for specialized grammars
+
+- ``NoMatch`` - opposite of Empty, will never match; useful for debugging
+ and for specialized grammars
+
+
+Expression subclasses
+---------------------
+
+- ``And`` - construct with a list of ParserElements, all of which must
+ match for And to match; can also be created using the '+'
+ operator; multiple expressions can be Anded together using the '*'
+ operator as in::
+
+ ipAddress = Word(nums) + ('.'+Word(nums))*3
+
+ A tuple can be used as the multiplier, indicating a min/max::
+
+ usPhoneNumber = Word(nums) + ('-'+Word(nums))*(1,2)
+
+ A special form of ``And`` is created if the '-' operator is used
+ instead of the '+' operator. In the ipAddress example above, if
+ no trailing '.' and Word(nums) are found after matching the initial
+ Word(nums), then pyparsing will back up in the grammar and try other
+ alternatives to ipAddress. However, if ipAddress is defined as::
+
+ strictIpAddress = Word(nums) - ('.'+Word(nums))*3
+
+ then no backing up is done. If the first Word(nums) of strictIpAddress
+ is matched, then any mismatch after that will raise a ParseSyntaxException,
+ which will halt the parsing process immediately. By careful use of the
+ '-' operator, grammars can provide meaningful error messages close to
+ the location where the incoming text does not match the specified
+ grammar.
+
+- ``Or`` - construct with a list of ParserElements, any of which must
+ match for Or to match; if more than one expression matches, the
+ expression that makes the longest match will be used; can also
+ be created using the '^' operator
+
+- ``MatchFirst`` - construct with a list of ParserElements, any of
+ which must match for MatchFirst to match; matching is done
+ left-to-right, taking the first expression that matches; can
+ also be created using the '|' operator
+
+- ``Each`` - similar to And, in that all of the provided expressions
+ must match; however, Each permits matching to be done in any order;
+ can also be created using the '&' operator
+
+- ``Optional`` - construct with a ParserElement, but this element is
+ not required to match; can be constructed with an optional ``default`` argument,
+ containing a default string or object to be supplied if the given optional
+ parse element is not found in the input string; parse action will only
+ be called if a match is found, or if a default is specified
+
+- ``ZeroOrMore`` - similar to Optional, but can be repeated
+
+- ``OneOrMore`` - similar to ZeroOrMore, but at least one match must
+ be present
+
+- ``FollowedBy`` - a lookahead expression, requires matching of the given
+ expressions, but does not advance the parsing position within the input string
+
+- ``NotAny`` - a negative lookahead expression, prevents matching of named
+ expressions, does not advance the parsing position within the input string;
+ can also be created using the unary '~' operator
+
+
+.. _operators:
+
+Expression operators
+--------------------
+
+- ``~`` - creates NotAny using the expression after the operator
+
+- ``+`` - creates And using the expressions before and after the operator
+
+- ``|`` - creates MatchFirst (first left-to-right match) using the expressions before and after the operator
+
+- ``^`` - creates Or (longest match) using the expressions before and after the operator
+
+- ``&`` - creates Each using the expressions before and after the operator
+
+- ``*`` - creates And by multiplying the expression by the integer operand; if
+ expression is multiplied by a 2-tuple, creates an And of (min,max)
+ expressions (similar to "{min,max}" form in regular expressions); if
+ min is None, intepret as (0,max); if max is None, interpret as
+ expr*min + ZeroOrMore(expr)
+
+- ``-`` - like ``+`` but with no backup and retry of alternatives
+
+- ``*`` - repetition of expression
+
+- ``==`` - matching expression to string; returns True if the string matches the given expression
+
+- ``<<`` - inserts the expression following the operator as the body of the
+ Forward expression before the operator
+
+
+
+Positional subclasses
+---------------------
+
+- ``StringStart`` - matches beginning of the text
+
+- ``StringEnd`` - matches the end of the text
+
+- ``LineStart`` - matches beginning of a line (lines delimited by ``\n`` characters)
+
+- ``LineEnd`` - matches the end of a line
+
+- ``WordStart`` - matches a leading word boundary
+
+- ``WordEnd`` - matches a trailing word boundary
+
+
+
+Converter subclasses
+--------------------
+
+- ``Upcase`` - converts matched tokens to uppercase (deprecated -
+ use ``upcaseTokens`` parse action instead)
+
+- ``Combine`` - joins all matched tokens into a single string, using
+ specified joinString (default ``joinString=""``); expects
+ all matching tokens to be adjacent, with no intervening
+ whitespace (can be overridden by specifying ``adjacent=False`` in constructor)
+
+- ``Suppress`` - clears matched tokens; useful to keep returned
+ results from being cluttered with required but uninteresting
+ tokens (such as list delimiters)
+
+
+Special subclasses
+------------------
+
+- ``Group`` - causes the matched tokens to be enclosed in a list;
+ useful in repeated elements like ``ZeroOrMore`` and ``OneOrMore`` to
+ break up matched tokens into groups for each repeated pattern
+
+- ``Dict`` - like ``Group``, but also constructs a dictionary, using the
+ [0]'th elements of all enclosed token lists as the keys, and
+ each token list as the value
+
+- ``SkipTo`` - catch-all matching expression that accepts all characters
+ up until the given pattern is found to match; useful for specifying
+ incomplete grammars
+
+- ``Forward`` - placeholder token used to define recursive token
+ patterns; when defining the actual expression later in the
+ program, insert it into the ``Forward`` object using the ``<<``
+ operator (see ``fourFn.py`` for an example).
+
+
+Other classes
+-------------
+.. _ParseResults:
+
+- ``ParseResults`` - class used to contain and manage the lists of tokens
+ created from parsing the input using the user-defined parse
+ expression. ParseResults can be accessed in a number of ways:
+
+ - as a list
+
+ - total list of elements can be found using len()
+
+ - individual elements can be found using [0], [1], [-1], etc.
+
+ - elements can be deleted using ``del``
+
+ - the -1th element can be extracted and removed in a single operation
+ using ``pop()``, or any element can be extracted and removed
+ using ``pop(n)``
+
+ - as a dictionary
+
+ - if ``setResultsName()`` is used to name elements within the
+ overall parse expression, then these fields can be referenced
+ as dictionary elements or as attributes
+
+ - the Dict class generates dictionary entries using the data of the
+ input text - in addition to ParseResults listed as ``[ [ a1, b1, c1, ...], [ a2, b2, c2, ...] ]``
+ it also acts as a dictionary with entries defined as ``{ a1 : [ b1, c1, ... ] }, { a2 : [ b2, c2, ... ] }``;
+ this is especially useful when processing tabular data where the first column contains a key
+ value for that line of data
+
+ - list elements that are deleted using ``del`` will still be accessible by their
+ dictionary keys
+
+ - supports ``get()``, ``items()`` and ``keys()`` methods, similar to a dictionary
+
+ - a keyed item can be extracted and removed using ``pop(key)``. Here
+ key must be non-numeric (such as a string), in order to use dict
+ extraction instead of list extraction.
+
+ - new named elements can be added (in a parse action, for instance), using the same
+ syntax as adding an item to a dict (``parseResults["X"]="new item"``); named elements can be removed using ``del parseResults["X"]``
+
+ - as a nested list
+
+ - results returned from the Group class are encapsulated within their
+ own list structure, so that the tokens can be handled as a hierarchical
+ tree
+
+ ParseResults can also be converted to an ordinary list of strings
+ by calling ``asList()``. Note that this will strip the results of any
+ field names that have been defined for any embedded parse elements.
+ (The ``pprint`` module is especially good at printing out the nested contents
+ given by ``asList()``.)
+
+ Finally, ParseResults can be converted to an XML string by calling ``asXML()``. Where
+ possible, results will be tagged using the results names defined for the respective
+ ParseExpressions. ``asXML()`` takes two optional arguments:
+
+ - doctagname - for ParseResults that do not have a defined name, this argument
+ will wrap the resulting XML in a set of opening and closing tags ``<doctagname>``
+ and ``</doctagname>``.
+
+ - namedItemsOnly (default=False) - flag to indicate if the generated XML should
+ skip items that do not have defined names. If a nested group item is named, then all
+ embedded items will be included, whether they have names or not.
+
+
+Exception classes and Troubleshooting
+-------------------------------------
+
+.. _ParseException:
+
+- ``ParseException`` - exception returned when a grammar parse fails;
+ ParseExceptions have attributes loc, msg, line, lineno, and column; to view the
+ text line and location where the reported ParseException occurs, use::
+
+ except ParseException, err:
+ print err.line
+ print " "*(err.column-1) + "^"
+ print err
+
+- ``RecursiveGrammarException`` - exception returned by ``validate()`` if
+ the grammar contains a recursive infinite loop, such as::
+
+ badGrammar = Forward()
+ goodToken = Literal("A")
+ badGrammar << Optional(goodToken) + badGrammar
+
+- ``ParseFatalException`` - exception that parse actions can raise to stop parsing
+ immediately. Should be used when a semantic error is found in the input text, such
+ as a mismatched XML tag.
+
+- ``ParseSyntaxException`` - subclass of ``ParseFatalException`` raised when a
+ syntax error is found, based on the use of the '-' operator when defining
+ a sequence of expressions in an ``And`` expression.
+
+You can also get some insights into the parsing logic using diagnostic parse actions,
+and setDebug(), or test the matching of expression fragments by testing them using
+scanString().
+
+
+Miscellaneous attributes and methods
+====================================
+
+Helper methods
+--------------
+
+- ``delimitedList( expr, delim=',')`` - convenience function for
+ matching one or more occurrences of expr, separated by delim.
+ By default, the delimiters are suppressed, so the returned results contain
+ only the separate list elements. Can optionally specify ``combine=True``,
+ indicating that the expressions and delimiters should be returned as one
+ combined value (useful for scoped variables, such as "a.b.c", or
+ "a::b::c", or paths such as "a/b/c").
+
+- ``countedArray( expr )`` - convenience function for a pattern where an list of
+ instances of the given expression are preceded by an integer giving the count of
+ elements in the list. Returns an expression that parses the leading integer,
+ reads exactly that many expressions, and returns the array of expressions in the
+ parse results - the leading integer is suppressed from the results (although it
+ is easily reconstructed by using len on the returned array).
+
+- ``oneOf( string, caseless=False )`` - convenience function for quickly declaring an
+ alternative set of ``Literal`` tokens, by splitting the given string on
+ whitespace boundaries. The tokens are sorted so that longer
+ matches are attempted first; this ensures that a short token does
+ not mask a longer one that starts with the same characters. If ``caseless=True``,
+ will create an alternative set of CaselessLiteral tokens.
+
+- ``dictOf( key, value )`` - convenience function for quickly declaring a
+ dictionary pattern of ``Dict( ZeroOrMore( Group( key + value ) ) )``.
+
+- ``makeHTMLTags( tagName )`` and ``makeXMLTags( tagName )`` - convenience
+ functions to create definitions of opening and closing tag expressions. Returns
+ a pair of expressions, for the corresponding <tag> and </tag> strings. Includes
+ support for attributes in the opening tag, such as <tag attr1="abc"> - attributes
+ are returned as keyed tokens in the returned ParseResults. ``makeHTMLTags`` is less
+ restrictive than ``makeXMLTags``, especially with respect to case sensitivity.
+
+- ``operatorPrecedence(baseOperand, operatorList)`` - convenience function to define a
+ grammar for parsing
+ expressions with a hierarchical precedence of operators. To use the operatorPrecedence
+ helper:
+
+ 1. Define the base "atom" operand term of the grammar.
+ For this simple grammar, the smallest operand is either
+ and integer or a variable. This will be the first argument
+ to the operatorPrecedence method.
+
+ 2. Define a list of tuples for each level of operator
+ precendence. Each tuple is of the form
+ ``(opExpr, numTerms, rightLeftAssoc, parseAction)``, where:
+
+ - opExpr is the pyparsing expression for the operator;
+ may also be a string, which will be converted to a Literal; if
+ None, indicates an empty operator, such as the implied
+ multiplication operation between 'm' and 'x' in "y = mx + b".
+
+ - numTerms is the number of terms for this operator (must
+ be 1 or 2)
+
+ - rightLeftAssoc is the indicator whether the operator is
+ right or left associative, using the pyparsing-defined
+ constants ``opAssoc.RIGHT`` and ``opAssoc.LEFT``.
+
+ - parseAction is the parse action to be associated with
+ expressions matching this operator expression (the
+ parse action tuple member may be omitted)
+
+ 3. Call operatorPrecedence passing the operand expression and
+ the operator precedence list, and save the returned value
+ as the generated pyparsing expression. You can then use
+ this expression to parse input strings, or incorporate it
+ into a larger, more complex grammar.
+
+- ``matchPreviousLiteral`` and ``matchPreviousExpr`` - function to define and
+ expression that matches the same content
+ as was parsed in a previous parse expression. For instance::
+
+ first = Word(nums)
+ matchExpr = first + ":" + matchPreviousLiteral(first)
+
+ will match "1:1", but not "1:2". Since this matches at the literal
+ level, this will also match the leading "1:1" in "1:10".
+
+ In contrast::
+
+ first = Word(nums)
+ matchExpr = first + ":" + matchPreviousExpr(first)
+
+ will *not* match the leading "1:1" in "1:10"; the expressions are
+ evaluated first, and then compared, so "1" is compared with "10".
+
+- ``nestedExpr(opener, closer, content=None, ignoreExpr=quotedString)`` - method for defining nested
+ lists enclosed in opening and closing delimiters.
+
+ - opener - opening character for a nested list (default="("); can also be a pyparsing expression
+
+ - closer - closing character for a nested list (default=")"); can also be a pyparsing expression
+
+ - content - expression for items within the nested lists (default=None)
+
+ - ignoreExpr - expression for ignoring opening and closing delimiters (default=quotedString)
+
+ If an expression is not provided for the content argument, the nested
+ expression will capture all whitespace-delimited content between delimiters
+ as a list of separate values.
+
+ Use the ignoreExpr argument to define expressions that may contain
+ opening or closing characters that should not be treated as opening
+ or closing characters for nesting, such as quotedString or a comment
+ expression. Specify multiple expressions using an Or or MatchFirst.
+ The default is quotedString, but if no expressions are to be ignored,
+ then pass None for this argument.
+
+
+- ``indentedBlock( statementExpr, indentationStackVar, indent=True)`` -
+ function to define an indented block of statements, similar to
+ indentation-based blocking in Python source code:
+
+ - statementExpr is the expression defining a statement that
+ will be found in the indented block; a valid indentedBlock
+ must contain at least 1 matching statementExpr
+
+ - indentationStackVar is a Python list variable; this variable
+ should be common to all ``indentedBlock`` expressions defined
+ within the same grammar, and should be reinitialized to [1]
+ each time the grammar is to be used
+
+ - indent is a boolean flag indicating whether the expressions
+ within the block must be indented from the current parse
+ location; if using indentedBlock to define the left-most
+ statements (all starting in column 1), set indent to False
+
+.. _originalTextFor:
+
+- ``originalTextFor( expr )`` - helper function to preserve the originally parsed text, regardless of any
+ token processing or conversion done by the contained expression. For instance, the following expression::
+
+ fullName = Word(alphas) + Word(alphas)
+
+ will return the parse of "John Smith" as ['John', 'Smith']. In some applications, the actual name as it
+ was given in the input string is what is desired. To do this, use ``originalTextFor``::
+
+ fullName = originalTextFor(Word(alphas) + Word(alphas))
+
+- ``ungroup( expr )`` - function to "ungroup" returned tokens; useful
+ to undo the default behavior of And to always group the returned tokens, even
+ if there is only one in the list. (New in 1.5.6)
+
+- ``lineno( loc, string )`` - function to give the line number of the
+ location within the string; the first line is line 1, newlines
+ start new rows
+
+- ``col( loc, string )`` - function to give the column number of the
+ location within the string; the first column is column 1,
+ newlines reset the column number to 1
+
+- ``line( loc, string )`` - function to retrieve the line of text
+ representing ``lineno( loc, string )``; useful when printing out diagnostic
+ messages for exceptions
+
+- ``srange( rangeSpec )`` - function to define a string of characters,
+ given a string of the form used by regexp string ranges, such as ``"[0-9]"`` for
+ all numeric digits, ``"[A-Z_]"`` for uppercase characters plus underscore, and
+ so on (note that rangeSpec does not include support for generic regular
+ expressions, just string range specs)
+
+- ``getTokensEndLoc()`` - function to call from within a parse action to get
+ the ending location for the matched tokens
+
+- ``traceParseAction(fn)`` - decorator function to debug parse actions. Lists
+ each call, called arguments, and return value or exception
+
+
+
+Helper parse actions
+--------------------
+
+- ``removeQuotes`` - removes the first and last characters of a quoted string;
+ useful to remove the delimiting quotes from quoted strings
+
+- ``replaceWith(replString)`` - returns a parse action that simply returns the
+ replString; useful when using transformString, or converting HTML entities, as in::
+
+ nbsp = Literal("&nbsp;").setParseAction( replaceWith("<BLANK>") )
+
+- ``keepOriginalText``- (deprecated, use originalTextFor_ instead) restores any internal whitespace or suppressed
+ text within the tokens for a matched parse
+ expression. This is especially useful when defining expressions
+ for scanString or transformString applications.
+
+- ``withAttribute( *args, **kwargs )`` - helper to create a validating parse action to be used with start tags created
+ with ``makeXMLTags`` or ``makeHTMLTags``. Use ``withAttribute`` to qualify a starting tag
+ with a required attribute value, to avoid false matches on common tags such as
+ ``<TD>`` or ``<DIV>``.
+
+ ``withAttribute`` can be called with:
+
+ - keyword arguments, as in ``(class="Customer",align="right")``, or
+
+ - a list of name-value tuples, as in ``( ("ns1:class", "Customer"), ("ns2:align","right") )``
+
+ An attribute can be specified to have the special value
+ ``withAttribute.ANY_VALUE``, which will match any value - use this to
+ ensure that an attribute is present but any attribute value is
+ acceptable.
+
+- ``downcaseTokens`` - converts all matched tokens to lowercase
+
+- ``upcaseTokens`` - converts all matched tokens to uppercase
+
+- ``matchOnlyAtCol( columnNumber )`` - a parse action that verifies that
+ an expression was matched at a particular column, raising a
+ ParseException if matching at a different column number; useful when parsing
+ tabular data
+
+
+
+Common string and token constants
+---------------------------------
+
+- ``alphas`` - same as ``string.letters``
+
+- ``nums`` - same as ``string.digits``
+
+- ``alphanums`` - a string containing ``alphas + nums``
+
+- ``alphas8bit`` - a string containing alphabetic 8-bit characters::
+
+ ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖØÙÚÛÜÝÞßàáâãäåæçèéêëìíîïðñòóôõöøùúûüýþ
+
+- ``printables`` - same as ``string.printable``, minus the space (``' '``) character
+
+- ``empty`` - a global ``Empty()``; will always match
+
+- ``sglQuotedString`` - a string of characters enclosed in 's; may
+ include whitespace, but not newlines
+
+- ``dblQuotedString`` - a string of characters enclosed in "s; may
+ include whitespace, but not newlines
+
+- ``quotedString`` - ``sglQuotedString | dblQuotedString``
+
+- ``cStyleComment`` - a comment block delimited by ``'/*'`` and ``'*/'`` sequences; can span
+ multiple lines, but does not support nesting of comments
+
+- ``htmlComment`` - a comment block delimited by ``'<!--'`` and ``'-->'`` sequences; can span
+ multiple lines, but does not support nesting of comments
+
+- ``commaSeparatedList`` - similar to ``delimitedList``, except that the
+ list expressions can be any text value, or a quoted string; quoted strings can
+ safely include commas without incorrectly breaking the string into two tokens
+
+- ``restOfLine`` - all remaining printable characters up to but not including the next
+ newline
diff --git a/src/LICENSE b/src/LICENSE
new file mode 100644
index 0000000..bbc959e
--- /dev/null
+++ b/src/LICENSE
@@ -0,0 +1,18 @@
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of this software and associated documentation files (the
+"Software"), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+
+The above copyright notice and this permission notice shall be
+included in all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
+CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
diff --git a/src/MANIFEST.in b/src/MANIFEST.in
new file mode 100644
index 0000000..5f2b98b
--- /dev/null
+++ b/src/MANIFEST.in
@@ -0,0 +1,7 @@
+include pyparsing.py
+include HowToUsePyparsing.html pyparsingClassDiagram.*
+include README CHANGES LICENSE
+include examples/*.py examples/Setup.ini examples/*.dfm examples/*.ics examples/*.html
+include htmldoc/*.*
+include docs/*.*
+include robots.txt
diff --git a/src/MANIFEST.in_bdist b/src/MANIFEST.in_bdist
new file mode 100644
index 0000000..5f2b98b
--- /dev/null
+++ b/src/MANIFEST.in_bdist
@@ -0,0 +1,7 @@
+include pyparsing.py
+include HowToUsePyparsing.html pyparsingClassDiagram.*
+include README CHANGES LICENSE
+include examples/*.py examples/Setup.ini examples/*.dfm examples/*.ics examples/*.html
+include htmldoc/*.*
+include docs/*.*
+include robots.txt
diff --git a/src/MANIFEST.in_src b/src/MANIFEST.in_src
new file mode 100644
index 0000000..9b9159d
--- /dev/null
+++ b/src/MANIFEST.in_src
@@ -0,0 +1,7 @@
+include pyparsing_py2.py pyparsing_py3.py
+include HowToUsePyparsing.html pyparsingClassDiagram.*
+include README CHANGES LICENSE
+include examples/*.py examples/Setup.ini examples/*.dfm examples/*.ics examples/*.html
+include htmldoc/*.*
+include docs/*.*
+include robots.txt
diff --git a/src/README b/src/README
new file mode 100644
index 0000000..44dd51f
--- /dev/null
+++ b/src/README
@@ -0,0 +1,72 @@
+====================================
+PyParsing -- A Python Parsing Module
+====================================
+
+Introduction
+============
+
+The pyparsing module is an alternative approach to creating and executing
+simple grammars, vs. the traditional lex/yacc approach, or the use of
+regular expressions. The pyparsing module provides a library of classes
+that client code uses to construct the grammar directly in Python code.
+
+Here is a program to parse "Hello, World!" (or any greeting of the form
+"<salutation>, <addressee>!"):
+
+ from pyparsing import Word, alphas
+ greet = Word( alphas ) + "," + Word( alphas ) + "!"
+ hello = "Hello, World!"
+ print hello, "->", greet.parseString( hello )
+
+The program outputs the following:
+
+ Hello, World! -> ['Hello', ',', 'World', '!']
+
+The Python representation of the grammar is quite readable, owing to the
+self-explanatory class names, and the use of '+', '|' and '^' operator
+definitions.
+
+The parsed results returned from parseString() can be accessed as a
+nested list, a dictionary, or an object with named attributes.
+
+The pyparsing module handles some of the problems that are typically
+vexing when writing text parsers:
+- extra or missing whitespace (the above program will also handle
+ "Hello,World!", "Hello , World !", etc.)
+- quoted strings
+- embedded comments
+
+The .zip file includes examples of a simple SQL parser, simple CORBA IDL
+parser, a config file parser, a chemical formula parser, and a four-
+function algebraic notation parser. It also includes a simple how-to
+document, and a UML class diagram of the library's classes.
+
+
+
+Installation
+============
+
+Do the usual:
+
+ python setup.py install
+
+(pyparsing requires Python 2.3.2 or later.)
+
+
+Documentation
+=============
+
+See:
+
+ HowToUsePyparsing.html
+
+
+License
+=======
+
+ MIT License. See header of pyparsing.py
+
+History
+=======
+
+ See CHANGES file.
diff --git a/src/genEpydoc.bat b/src/genEpydoc.bat
new file mode 100644
index 0000000..1b715da
--- /dev/null
+++ b/src/genEpydoc.bat
@@ -0,0 +1 @@
+python c:\python26\scripts\epydoc -v --name pyparsing -o htmldoc --inheritance listed --no-private pyparsing.py
diff --git a/src/makeRelease.bat b/src/makeRelease.bat
new file mode 100644
index 0000000..fad963a
--- /dev/null
+++ b/src/makeRelease.bat
@@ -0,0 +1,25 @@
+set MAKING_PYPARSING_RELEASE=1
+
+if exist pyparsing.py del pyparsing.py
+rmdir build
+rmdir dist
+
+copy/y MANIFEST.in_src MANIFEST.in
+if exist MANIFEST del MANIFEST
+python setup.py sdist --formats=gztar,zip
+
+copy/y MANIFEST.in_bdist MANIFEST.in
+if exist MANIFEST del MANIFEST
+
+copy/y pyparsing_py2.py pyparsing.py
+python setup.py bdist_wininst --target-version=2.4
+python setup.py bdist_wininst --target-version=2.5
+python setup.py bdist_wininst --target-version=2.6
+python setup.py bdist_wininst --target-version=2.7
+
+copy/y pyparsing_py3.py pyparsing.py
+python setup.py bdist_wininst --target-version=3.0
+python setup.py bdist_wininst --target-version=3.1
+python setup.py bdist_wininst --target-version=3.2
+
+set MAKING_PYPARSING_RELEASE= \ No newline at end of file
diff --git a/src/pyparsing.py b/src/pyparsing.py
new file mode 100644
index 0000000..bbe38b8
--- /dev/null
+++ b/src/pyparsing.py
@@ -0,0 +1,3740 @@
+# module pyparsing.py
+#
+# Copyright (c) 2003-2011 Paul T. McGuire
+#
+# Permission is hereby granted, free of charge, to any person obtaining
+# a copy of this software and associated documentation files (the
+# "Software"), to deal in the Software without restriction, including
+# without limitation the rights to use, copy, modify, merge, publish,
+# distribute, sublicense, and/or sell copies of the Software, and to
+# permit persons to whom the Software is furnished to do so, subject to
+# the following conditions:
+#
+# The above copyright notice and this permission notice shall be
+# included in all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+# IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
+# CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+# TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+# SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+#
+#from __future__ import generators
+
+__doc__ = \
+"""
+pyparsing module - Classes and methods to define and execute parsing grammars
+
+The pyparsing module is an alternative approach to creating and executing simple grammars,
+vs. the traditional lex/yacc approach, or the use of regular expressions. With pyparsing, you
+don't need to learn a new syntax for defining grammars or matching expressions - the parsing module
+provides a library of classes that you use to construct the grammar directly in Python.
+
+Here is a program to parse "Hello, World!" (or any greeting of the form C{"<salutation>, <addressee>!"})::
+
+ from pyparsing import Word, alphas
+
+ # define grammar of a greeting
+ greet = Word( alphas ) + "," + Word( alphas ) + "!"
+
+ hello = "Hello, World!"
+ print hello, "->", greet.parseString( hello )
+
+The program outputs the following::
+
+ Hello, World! -> ['Hello', ',', 'World', '!']
+
+The Python representation of the grammar is quite readable, owing to the self-explanatory
+class names, and the use of '+', '|' and '^' operators.
+
+The parsed results returned from C{parseString()} can be accessed as a nested list, a dictionary, or an
+object with named attributes.
+
+The pyparsing module handles some of the problems that are typically vexing when writing text parsers:
+ - extra or missing whitespace (the above program will also handle "Hello,World!", "Hello , World !", etc.)
+ - quoted strings
+ - embedded comments
+"""
+
+__version__ = "1.5.7"
+__versionTime__ = "3 August 2012 05:00"
+__author__ = "Paul McGuire <ptmcg@users.sourceforge.net>"
+
+import string
+from weakref import ref as wkref
+import copy
+import sys
+import warnings
+import re
+import sre_constants
+#~ sys.stderr.write( "testing pyparsing module, version %s, %s\n" % (__version__,__versionTime__ ) )
+
+__all__ = [
+'And', 'CaselessKeyword', 'CaselessLiteral', 'CharsNotIn', 'Combine', 'Dict', 'Each', 'Empty',
+'FollowedBy', 'Forward', 'GoToColumn', 'Group', 'Keyword', 'LineEnd', 'LineStart', 'Literal',
+'MatchFirst', 'NoMatch', 'NotAny', 'OneOrMore', 'OnlyOnce', 'Optional', 'Or',
+'ParseBaseException', 'ParseElementEnhance', 'ParseException', 'ParseExpression', 'ParseFatalException',
+'ParseResults', 'ParseSyntaxException', 'ParserElement', 'QuotedString', 'RecursiveGrammarException',
+'Regex', 'SkipTo', 'StringEnd', 'StringStart', 'Suppress', 'Token', 'TokenConverter', 'Upcase',
+'White', 'Word', 'WordEnd', 'WordStart', 'ZeroOrMore',
+'alphanums', 'alphas', 'alphas8bit', 'anyCloseTag', 'anyOpenTag', 'cStyleComment', 'col',
+'commaSeparatedList', 'commonHTMLEntity', 'countedArray', 'cppStyleComment', 'dblQuotedString',
+'dblSlashComment', 'delimitedList', 'dictOf', 'downcaseTokens', 'empty', 'hexnums',
+'htmlComment', 'javaStyleComment', 'keepOriginalText', 'line', 'lineEnd', 'lineStart', 'lineno',
+'makeHTMLTags', 'makeXMLTags', 'matchOnlyAtCol', 'matchPreviousExpr', 'matchPreviousLiteral',
+'nestedExpr', 'nullDebugAction', 'nums', 'oneOf', 'opAssoc', 'operatorPrecedence', 'printables',
+'punc8bit', 'pythonStyleComment', 'quotedString', 'removeQuotes', 'replaceHTMLEntity',
+'replaceWith', 'restOfLine', 'sglQuotedString', 'srange', 'stringEnd',
+'stringStart', 'traceParseAction', 'unicodeString', 'upcaseTokens', 'withAttribute',
+'indentedBlock', 'originalTextFor', 'ungroup',
+]
+
+"""
+Detect if we are running version 3.X and make appropriate changes
+Robert A. Clark
+"""
+_PY3K = sys.version_info[0] > 2
+if _PY3K:
+ _MAX_INT = sys.maxsize
+ basestring = str
+ unichr = chr
+ _ustr = str
+else:
+ _MAX_INT = sys.maxint
+ range = xrange
+ set = lambda s : dict( [(c,0) for c in s] )
+
+ def _ustr(obj):
+ """Drop-in replacement for str(obj) that tries to be Unicode friendly. It first tries
+ str(obj). If that fails with a UnicodeEncodeError, then it tries unicode(obj). It
+ then < returns the unicode object | encodes it with the default encoding | ... >.
+ """
+ if isinstance(obj,unicode):
+ return obj
+
+ try:
+ # If this works, then _ustr(obj) has the same behaviour as str(obj), so
+ # it won't break any existing code.
+ return str(obj)
+
+ except UnicodeEncodeError:
+ # The Python docs (http://docs.python.org/ref/customization.html#l2h-182)
+ # state that "The return value must be a string object". However, does a
+ # unicode object (being a subclass of basestring) count as a "string
+ # object"?
+ # If so, then return a unicode object:
+ return unicode(obj)
+ # Else encode it... but how? There are many choices... :)
+ # Replace unprintables with escape codes?
+ #return unicode(obj).encode(sys.getdefaultencoding(), 'backslashreplace_errors')
+ # Replace unprintables with question marks?
+ #return unicode(obj).encode(sys.getdefaultencoding(), 'replace')
+ # ...
+
+# build list of single arg builtins, tolerant of Python version, that can be used as parse actions
+singleArgBuiltins = []
+import __builtin__
+for fname in "sum len sorted reversed list tuple set any all min max".split():
+ try:
+ singleArgBuiltins.append(getattr(__builtin__,fname))
+ except AttributeError:
+ continue
+
+def _xml_escape(data):
+ """Escape &, <, >, ", ', etc. in a string of data."""
+
+ # ampersand must be replaced first
+ from_symbols = '&><"\''
+ to_symbols = ['&'+s+';' for s in "amp gt lt quot apos".split()]
+ for from_,to_ in zip(from_symbols, to_symbols):
+ data = data.replace(from_, to_)
+ return data
+
+class _Constants(object):
+ pass
+
+alphas = string.ascii_lowercase + string.ascii_uppercase
+nums = "0123456789"
+hexnums = nums + "ABCDEFabcdef"
+alphanums = alphas + nums
+_bslash = chr(92)
+printables = "".join( [ c for c in string.printable if c not in string.whitespace ] )
+
+class ParseBaseException(Exception):
+ """base exception class for all parsing runtime exceptions"""
+ # Performance tuning: we construct a *lot* of these, so keep this
+ # constructor as small and fast as possible
+ def __init__( self, pstr, loc=0, msg=None, elem=None ):
+ self.loc = loc
+ if msg is None:
+ self.msg = pstr
+ self.pstr = ""
+ else:
+ self.msg = msg
+ self.pstr = pstr
+ self.parserElement = elem
+
+ def __getattr__( self, aname ):
+ """supported attributes by name are:
+ - lineno - returns the line number of the exception text
+ - col - returns the column number of the exception text
+ - line - returns the line containing the exception text
+ """
+ if( aname == "lineno" ):
+ return lineno( self.loc, self.pstr )
+ elif( aname in ("col", "column") ):
+ return col( self.loc, self.pstr )
+ elif( aname == "line" ):
+ return line( self.loc, self.pstr )
+ else:
+ raise AttributeError(aname)
+
+ def __str__( self ):
+ return "%s (at char %d), (line:%d, col:%d)" % \
+ ( self.msg, self.loc, self.lineno, self.column )
+ def __repr__( self ):
+ return _ustr(self)
+ def markInputline( self, markerString = ">!<" ):
+ """Extracts the exception line from the input string, and marks
+ the location of the exception with a special symbol.
+ """
+ line_str = self.line
+ line_column = self.column - 1
+ if markerString:
+ line_str = "".join( [line_str[:line_column],
+ markerString, line_str[line_column:]])
+ return line_str.strip()
+ def __dir__(self):
+ return "loc msg pstr parserElement lineno col line " \
+ "markInputline __str__ __repr__".split()
+
+class ParseException(ParseBaseException):
+ """exception thrown when parse expressions don't match class;
+ supported attributes by name are:
+ - lineno - returns the line number of the exception text
+ - col - returns the column number of the exception text
+ - line - returns the line containing the exception text
+ """
+ pass
+
+class ParseFatalException(ParseBaseException):
+ """user-throwable exception thrown when inconsistent parse content
+ is found; stops all parsing immediately"""
+ pass
+
+class ParseSyntaxException(ParseFatalException):
+ """just like C{L{ParseFatalException}}, but thrown internally when an
+ C{L{ErrorStop<And._ErrorStop>}} ('-' operator) indicates that parsing is to stop immediately because
+ an unbacktrackable syntax error has been found"""
+ def __init__(self, pe):
+ super(ParseSyntaxException, self).__init__(
+ pe.pstr, pe.loc, pe.msg, pe.parserElement)
+
+#~ class ReparseException(ParseBaseException):
+ #~ """Experimental class - parse actions can raise this exception to cause
+ #~ pyparsing to reparse the input string:
+ #~ - with a modified input string, and/or
+ #~ - with a modified start location
+ #~ Set the values of the ReparseException in the constructor, and raise the
+ #~ exception in a parse action to cause pyparsing to use the new string/location.
+ #~ Setting the values as None causes no change to be made.
+ #~ """
+ #~ def __init_( self, newstring, restartLoc ):
+ #~ self.newParseText = newstring
+ #~ self.reparseLoc = restartLoc
+
+class RecursiveGrammarException(Exception):
+ """exception thrown by C{validate()} if the grammar could be improperly recursive"""
+ def __init__( self, parseElementList ):
+ self.parseElementTrace = parseElementList
+
+ def __str__( self ):
+ return "RecursiveGrammarException: %s" % self.parseElementTrace
+
+class _ParseResultsWithOffset(object):
+ def __init__(self,p1,p2):
+ self.tup = (p1,p2)
+ def __getitem__(self,i):
+ return self.tup[i]
+ def __repr__(self):
+ return repr(self.tup)
+ def setOffset(self,i):
+ self.tup = (self.tup[0],i)
+
+class ParseResults(object):
+ """Structured parse results, to provide multiple means of access to the parsed data:
+ - as a list (C{len(results)})
+ - by list index (C{results[0], results[1]}, etc.)
+ - by attribute (C{results.<resultsName>})
+ """
+ #~ __slots__ = ( "__toklist", "__tokdict", "__doinit", "__name", "__parent", "__accumNames", "__weakref__" )
+ def __new__(cls, toklist, name=None, asList=True, modal=True ):
+ if isinstance(toklist, cls):
+ return toklist
+ retobj = object.__new__(cls)
+ retobj.__doinit = True
+ return retobj
+
+ # Performance tuning: we construct a *lot* of these, so keep this
+ # constructor as small and fast as possible
+ def __init__( self, toklist, name=None, asList=True, modal=True, isinstance=isinstance ):
+ if self.__doinit:
+ self.__doinit = False
+ self.__name = None
+ self.__parent = None
+ self.__accumNames = {}
+ if isinstance(toklist, list):
+ self.__toklist = toklist[:]
+ else:
+ self.__toklist = [toklist]
+ self.__tokdict = dict()
+
+ if name is not None and name:
+ if not modal:
+ self.__accumNames[name] = 0
+ if isinstance(name,int):
+ name = _ustr(name) # will always return a str, but use _ustr for consistency
+ self.__name = name
+ if not toklist in (None,'',[]):
+ if isinstance(toklist,basestring):
+ toklist = [ toklist ]
+ if asList:
+ if isinstance(toklist,ParseResults):
+ self[name] = _ParseResultsWithOffset(toklist.copy(),0)
+ else:
+ self[name] = _ParseResultsWithOffset(ParseResults(toklist[0]),0)
+ self[name].__name = name
+ else:
+ try:
+ self[name] = toklist[0]
+ except (KeyError,TypeError,IndexError):
+ self[name] = toklist
+
+ def __getitem__( self, i ):
+ if isinstance( i, (int,slice) ):
+ return self.__toklist[i]
+ else:
+ if i not in self.__accumNames:
+ return self.__tokdict[i][-1][0]
+ else:
+ return ParseResults([ v[0] for v in self.__tokdict[i] ])
+
+ def __setitem__( self, k, v, isinstance=isinstance ):
+ if isinstance(v,_ParseResultsWithOffset):
+ self.__tokdict[k] = self.__tokdict.get(k,list()) + [v]
+ sub = v[0]
+ elif isinstance(k,int):
+ self.__toklist[k] = v
+ sub = v
+ else:
+ self.__tokdict[k] = self.__tokdict.get(k,list()) + [_ParseResultsWithOffset(v,0)]
+ sub = v
+ if isinstance(sub,ParseResults):
+ sub.__parent = wkref(self)
+
+ def __delitem__( self, i ):
+ if isinstance(i,(int,slice)):
+ mylen = len( self.__toklist )
+ del self.__toklist[i]
+
+ # convert int to slice
+ if isinstance(i, int):
+ if i < 0:
+ i += mylen
+ i = slice(i, i+1)
+ # get removed indices
+ removed = list(range(*i.indices(mylen)))
+ removed.reverse()
+ # fixup indices in token dictionary
+ for name in self.__tokdict:
+ occurrences = self.__tokdict[name]
+ for j in removed:
+ for k, (value, position) in enumerate(occurrences):
+ occurrences[k] = _ParseResultsWithOffset(value, position - (position > j))
+ else:
+ del self.__tokdict[i]
+
+ def __contains__( self, k ):
+ return k in self.__tokdict
+
+ def __len__( self ): return len( self.__toklist )
+ def __bool__(self): return len( self.__toklist ) > 0
+ __nonzero__ = __bool__
+ def __iter__( self ): return iter( self.__toklist )
+ def __reversed__( self ): return iter( self.__toklist[::-1] )
+ def keys( self ):
+ """Returns all named result keys."""
+ return self.__tokdict.keys()
+
+ def pop( self, index=-1 ):
+ """Removes and returns item at specified index (default=last).
+ Will work with either numeric indices or dict-key indicies."""
+ ret = self[index]
+ del self[index]
+ return ret
+
+ def get(self, key, defaultValue=None):
+ """Returns named result matching the given key, or if there is no
+ such name, then returns the given C{defaultValue} or C{None} if no
+ C{defaultValue} is specified."""
+ if key in self:
+ return self[key]
+ else:
+ return defaultValue
+
+ def insert( self, index, insStr ):
+ """Inserts new element at location index in the list of parsed tokens."""
+ self.__toklist.insert(index, insStr)
+ # fixup indices in token dictionary
+ for name in self.__tokdict:
+ occurrences = self.__tokdict[name]
+ for k, (value, position) in enumerate(occurrences):
+ occurrences[k] = _ParseResultsWithOffset(value, position + (position > index))
+
+ def items( self ):
+ """Returns all named result keys and values as a list of tuples."""
+ return [(k,self[k]) for k in self.__tokdict]
+
+ def values( self ):
+ """Returns all named result values."""
+ return [ v[-1][0] for v in self.__tokdict.values() ]
+
+ def __getattr__( self, name ):
+ if True: #name not in self.__slots__:
+ if name in self.__tokdict:
+ if name not in self.__accumNames:
+ return self.__tokdict[name][-1][0]
+ else:
+ return ParseResults([ v[0] for v in self.__tokdict[name] ])
+ else:
+ return ""
+ return None
+
+ def __add__( self, other ):
+ ret = self.copy()
+ ret += other
+ return ret
+
+ def __iadd__( self, other ):
+ if other.__tokdict:
+ offset = len(self.__toklist)
+ addoffset = ( lambda a: (a<0 and offset) or (a+offset) )
+ otheritems = other.__tokdict.items()
+ otherdictitems = [(k, _ParseResultsWithOffset(v[0],addoffset(v[1])) )
+ for (k,vlist) in otheritems for v in vlist]
+ for k,v in otherdictitems:
+ self[k] = v
+ if isinstance(v[0],ParseResults):
+ v[0].__parent = wkref(self)
+
+ self.__toklist += other.__toklist
+ self.__accumNames.update( other.__accumNames )
+ return self
+
+ def __radd__(self, other):
+ if isinstance(other,int) and other == 0:
+ return self.copy()
+
+ def __repr__( self ):
+ return "(%s, %s)" % ( repr( self.__toklist ), repr( self.__tokdict ) )
+
+ def __str__( self ):
+ out = []
+ for i in self.__toklist:
+ if isinstance(i, ParseResults):
+ out.append(_ustr(i))
+ else:
+ out.append(repr(i))
+ return '[' + ', '.join(out) + ']'
+
+ def _asStringList( self, sep='' ):
+ out = []
+ for item in self.__toklist:
+ if out and sep:
+ out.append(sep)
+ if isinstance( item, ParseResults ):
+ out += item._asStringList()
+ else:
+ out.append( _ustr(item) )
+ return out
+
+ def asList( self ):
+ """Returns the parse results as a nested list of matching tokens, all converted to strings."""
+ out = []
+ for res in self.__toklist:
+ if isinstance(res,ParseResults):
+ out.append( res.asList() )
+ else:
+ out.append( res )
+ return out
+
+ def asDict( self ):
+ """Returns the named parse results as dictionary."""
+ return dict( self.items() )
+
+ def copy( self ):
+ """Returns a new copy of a C{ParseResults} object."""
+ ret = ParseResults( self.__toklist )
+ ret.__tokdict = self.__tokdict.copy()
+ ret.__parent = self.__parent
+ ret.__accumNames.update( self.__accumNames )
+ ret.__name = self.__name
+ return ret
+
+ def asXML( self, doctag=None, namedItemsOnly=False, indent="", formatted=True ):
+ """Returns the parse results as XML. Tags are created for tokens and lists that have defined results names."""
+ nl = "\n"
+ out = []
+ namedItems = dict( [ (v[1],k) for (k,vlist) in self.__tokdict.items()
+ for v in vlist ] )
+ nextLevelIndent = indent + " "
+
+ # collapse out indents if formatting is not desired
+ if not formatted:
+ indent = ""
+ nextLevelIndent = ""
+ nl = ""
+
+ selfTag = None
+ if doctag is not None:
+ selfTag = doctag
+ else:
+ if self.__name:
+ selfTag = self.__name
+
+ if not selfTag:
+ if namedItemsOnly:
+ return ""
+ else:
+ selfTag = "ITEM"
+
+ out += [ nl, indent, "<", selfTag, ">" ]
+
+ worklist = self.__toklist
+ for i,res in enumerate(worklist):
+ if isinstance(res,ParseResults):
+ if i in namedItems:
+ out += [ res.asXML(namedItems[i],
+ namedItemsOnly and doctag is None,
+ nextLevelIndent,
+ formatted)]
+ else:
+ out += [ res.asXML(None,
+ namedItemsOnly and doctag is None,
+ nextLevelIndent,
+ formatted)]
+ else:
+ # individual token, see if there is a name for it
+ resTag = None
+ if i in namedItems:
+ resTag = namedItems[i]
+ if not resTag:
+ if namedItemsOnly:
+ continue
+ else:
+ resTag = "ITEM"
+ xmlBodyText = _xml_escape(_ustr(res))
+ out += [ nl, nextLevelIndent, "<", resTag, ">",
+ xmlBodyText,
+ "</", resTag, ">" ]
+
+ out += [ nl, indent, "</", selfTag, ">" ]
+ return "".join(out)
+
+ def __lookup(self,sub):
+ for k,vlist in self.__tokdict.items():
+ for v,loc in vlist:
+ if sub is v:
+ return k
+ return None
+
+ def getName(self):
+ """Returns the results name for this token expression."""
+ if self.__name:
+ return self.__name
+ elif self.__parent:
+ par = self.__parent()
+ if par:
+ return par.__lookup(self)
+ else:
+ return None
+ elif (len(self) == 1 and
+ len(self.__tokdict) == 1 and
+ self.__tokdict.values()[0][0][1] in (0,-1)):
+ return self.__tokdict.keys()[0]
+ else:
+ return None
+
+ def dump(self,indent='',depth=0):
+ """Diagnostic method for listing out the contents of a C{ParseResults}.
+ Accepts an optional C{indent} argument so that this string can be embedded
+ in a nested display of other data."""
+ out = []
+ out.append( indent+_ustr(self.asList()) )
+ keys = self.items()
+ keys.sort()
+ for k,v in keys:
+ if out:
+ out.append('\n')
+ out.append( "%s%s- %s: " % (indent,(' '*depth), k) )
+ if isinstance(v,ParseResults):
+ if v.keys():
+ out.append( v.dump(indent,depth+1) )
+ else:
+ out.append(_ustr(v))
+ else:
+ out.append(_ustr(v))
+ return "".join(out)
+
+ # add support for pickle protocol
+ def __getstate__(self):
+ return ( self.__toklist,
+ ( self.__tokdict.copy(),
+ self.__parent is not None and self.__parent() or None,
+ self.__accumNames,
+ self.__name ) )
+
+ def __setstate__(self,state):
+ self.__toklist = state[0]
+ (self.__tokdict,
+ par,
+ inAccumNames,
+ self.__name) = state[1]
+ self.__accumNames = {}
+ self.__accumNames.update(inAccumNames)
+ if par is not None:
+ self.__parent = wkref(par)
+ else:
+ self.__parent = None
+
+ def __dir__(self):
+ return dir(super(ParseResults,self)) + list(self.keys())
+
+def col (loc,strg):
+ """Returns current column within a string, counting newlines as line separators.
+ The first column is number 1.
+
+ Note: the default parsing behavior is to expand tabs in the input string
+ before starting the parsing process. See L{I{ParserElement.parseString}<ParserElement.parseString>} for more information
+ on parsing strings containing C{<TAB>}s, and suggested methods to maintain a
+ consistent view of the parsed string, the parse location, and line and column
+ positions within the parsed string.
+ """
+ return (loc<len(strg) and strg[loc] == '\n') and 1 or loc - strg.rfind("\n", 0, loc)
+
+def lineno(loc,strg):
+ """Returns current line number within a string, counting newlines as line separators.
+ The first line is number 1.
+
+ Note: the default parsing behavior is to expand tabs in the input string
+ before starting the parsing process. See L{I{ParserElement.parseString}<ParserElement.parseString>} for more information
+ on parsing strings containing C{<TAB>}s, and suggested methods to maintain a
+ consistent view of the parsed string, the parse location, and line and column
+ positions within the parsed string.
+ """
+ return strg.count("\n",0,loc) + 1
+
+def line( loc, strg ):
+ """Returns the line of text containing loc within a string, counting newlines as line separators.
+ """
+ lastCR = strg.rfind("\n", 0, loc)
+ nextCR = strg.find("\n", loc)
+ if nextCR >= 0:
+ return strg[lastCR+1:nextCR]
+ else:
+ return strg[lastCR+1:]
+
+def _defaultStartDebugAction( instring, loc, expr ):
+ print ("Match " + _ustr(expr) + " at loc " + _ustr(loc) + "(%d,%d)" % ( lineno(loc,instring), col(loc,instring) ))
+
+def _defaultSuccessDebugAction( instring, startloc, endloc, expr, toks ):
+ print ("Matched " + _ustr(expr) + " -> " + str(toks.asList()))
+
+def _defaultExceptionDebugAction( instring, loc, expr, exc ):
+ print ("Exception raised:" + _ustr(exc))
+
+def nullDebugAction(*args):
+ """'Do-nothing' debug action, to suppress debugging output during parsing."""
+ pass
+
+'decorator to trim function calls to match the arity of the target'
+def _trim_arity(func, maxargs=2):
+ if func in singleArgBuiltins:
+ return lambda s,l,t: func(t)
+ limit = [0]
+ def wrapper(*args):
+ while 1:
+ try:
+ return func(*args[limit[0]:])
+ except TypeError:
+ if limit[0] <= maxargs:
+ limit[0] += 1
+ continue
+ raise
+ return wrapper
+
+class ParserElement(object):
+ """Abstract base level parser element class."""
+ DEFAULT_WHITE_CHARS = " \n\t\r"
+ verbose_stacktrace = False
+
+ def setDefaultWhitespaceChars( chars ):
+ """Overrides the default whitespace chars
+ """
+ ParserElement.DEFAULT_WHITE_CHARS = chars
+ setDefaultWhitespaceChars = staticmethod(setDefaultWhitespaceChars)
+
+ def inlineLiteralsUsing(cls):
+ """
+ Set class to be used for inclusion of string literals into a parser.
+ """
+ ParserElement.literalStringClass = cls
+ inlineLiteralsUsing = staticmethod(inlineLiteralsUsing)
+
+ def __init__( self, savelist=False ):
+ self.parseAction = list()
+ self.failAction = None
+ #~ self.name = "<unknown>" # don't define self.name, let subclasses try/except upcall
+ self.strRepr = None
+ self.resultsName = None
+ self.saveAsList = savelist
+ self.skipWhitespace = True
+ self.whiteChars = ParserElement.DEFAULT_WHITE_CHARS
+ self.copyDefaultWhiteChars = True
+ self.mayReturnEmpty = False # used when checking for left-recursion
+ self.keepTabs = False
+ self.ignoreExprs = list()
+ self.debug = False
+ self.streamlined = False
+ self.mayIndexError = True # used to optimize exception handling for subclasses that don't advance parse index
+ self.errmsg = ""
+ self.modalResults = True # used to mark results names as modal (report only last) or cumulative (list all)
+ self.debugActions = ( None, None, None ) #custom debug actions
+ self.re = None
+ self.callPreparse = True # used to avoid redundant calls to preParse
+ self.callDuringTry = False
+
+ def copy( self ):
+ """Make a copy of this C{ParserElement}. Useful for defining different parse actions
+ for the same parsing pattern, using copies of the original parse element."""
+ cpy = copy.copy( self )
+ cpy.parseAction = self.parseAction[:]
+ cpy.ignoreExprs = self.ignoreExprs[:]
+ if self.copyDefaultWhiteChars:
+ cpy.whiteChars = ParserElement.DEFAULT_WHITE_CHARS
+ return cpy
+
+ def setName( self, name ):
+ """Define name for this expression, for use in debugging."""
+ self.name = name
+ self.errmsg = "Expected " + self.name
+ if hasattr(self,"exception"):
+ self.exception.msg = self.errmsg
+ return self
+
+ def setResultsName( self, name, listAllMatches=False ):
+ """Define name for referencing matching tokens as a nested attribute
+ of the returned parse results.
+ NOTE: this returns a *copy* of the original C{ParserElement} object;
+ this is so that the client can define a basic element, such as an
+ integer, and reference it in multiple places with different names.
+
+ You can also set results names using the abbreviated syntax,
+ C{expr("name")} in place of C{expr.setResultsName("name")} -
+ see L{I{__call__}<__call__>}.
+ """
+ newself = self.copy()
+ if name.endswith("*"):
+ name = name[:-1]
+ listAllMatches=True
+ newself.resultsName = name
+ newself.modalResults = not listAllMatches
+ return newself
+
+ def setBreak(self,breakFlag = True):
+ """Method to invoke the Python pdb debugger when this element is
+ about to be parsed. Set C{breakFlag} to True to enable, False to
+ disable.
+ """
+ if breakFlag:
+ _parseMethod = self._parse
+ def breaker(instring, loc, doActions=True, callPreParse=True):
+ import pdb
+ pdb.set_trace()
+ return _parseMethod( instring, loc, doActions, callPreParse )
+ breaker._originalParseMethod = _parseMethod
+ self._parse = breaker
+ else:
+ if hasattr(self._parse,"_originalParseMethod"):
+ self._parse = self._parse._originalParseMethod
+ return self
+
+ def setParseAction( self, *fns, **kwargs ):
+ """Define action to perform when successfully matching parse element definition.
+ Parse action fn is a callable method with 0-3 arguments, called as C{fn(s,loc,toks)},
+ C{fn(loc,toks)}, C{fn(toks)}, or just C{fn()}, where:
+ - s = the original string being parsed (see note below)
+ - loc = the location of the matching substring
+ - toks = a list of the matched tokens, packaged as a C{L{ParseResults}} object
+ If the functions in fns modify the tokens, they can return them as the return
+ value from fn, and the modified list of tokens will replace the original.
+ Otherwise, fn does not need to return any value.
+
+ Note: the default parsing behavior is to expand tabs in the input string
+ before starting the parsing process. See L{I{parseString}<parseString>} for more information
+ on parsing strings containing C{<TAB>}s, and suggested methods to maintain a
+ consistent view of the parsed string, the parse location, and line and column
+ positions within the parsed string.
+ """
+ self.parseAction = list(map(_trim_arity, list(fns)))
+ self.callDuringTry = ("callDuringTry" in kwargs and kwargs["callDuringTry"])
+ return self
+
+ def addParseAction( self, *fns, **kwargs ):
+ """Add parse action to expression's list of parse actions. See L{I{setParseAction}<setParseAction>}."""
+ self.parseAction += list(map(_trim_arity, list(fns)))
+ self.callDuringTry = self.callDuringTry or ("callDuringTry" in kwargs and kwargs["callDuringTry"])
+ return self
+
+ def setFailAction( self, fn ):
+ """Define action to perform if parsing fails at this expression.
+ Fail acton fn is a callable function that takes the arguments
+ C{fn(s,loc,expr,err)} where:
+ - s = string being parsed
+ - loc = location where expression match was attempted and failed
+ - expr = the parse expression that failed
+ - err = the exception thrown
+ The function returns no value. It may throw C{L{ParseFatalException}}
+ if it is desired to stop parsing immediately."""
+ self.failAction = fn
+ return self
+
+ def _skipIgnorables( self, instring, loc ):
+ exprsFound = True
+ while exprsFound:
+ exprsFound = False
+ for e in self.ignoreExprs:
+ try:
+ while 1:
+ loc,dummy = e._parse( instring, loc )
+ exprsFound = True
+ except ParseException:
+ pass
+ return loc
+
+ def preParse( self, instring, loc ):
+ if self.ignoreExprs:
+ loc = self._skipIgnorables( instring, loc )
+
+ if self.skipWhitespace:
+ wt = self.whiteChars
+ instrlen = len(instring)
+ while loc < instrlen and instring[loc] in wt:
+ loc += 1
+
+ return loc
+
+ def parseImpl( self, instring, loc, doActions=True ):
+ return loc, []
+
+ def postParse( self, instring, loc, tokenlist ):
+ return tokenlist
+
+ #~ @profile
+ def _parseNoCache( self, instring, loc, doActions=True, callPreParse=True ):
+ debugging = ( self.debug ) #and doActions )
+
+ if debugging or self.failAction:
+ #~ print ("Match",self,"at loc",loc,"(%d,%d)" % ( lineno(loc,instring), col(loc,instring) ))
+ if (self.debugActions[0] ):
+ self.debugActions[0]( instring, loc, self )
+ if callPreParse and self.callPreparse:
+ preloc = self.preParse( instring, loc )
+ else:
+ preloc = loc
+ tokensStart = preloc
+ try:
+ try:
+ loc,tokens = self.parseImpl( instring, preloc, doActions )
+ except IndexError:
+ raise ParseException( instring, len(instring), self.errmsg, self )
+ except ParseBaseException:
+ #~ print ("Exception raised:", err)
+ err = None
+ if self.debugActions[2]:
+ err = sys.exc_info()[1]
+ self.debugActions[2]( instring, tokensStart, self, err )
+ if self.failAction:
+ if err is None:
+ err = sys.exc_info()[1]
+ self.failAction( instring, tokensStart, self, err )
+ raise
+ else:
+ if callPreParse and self.callPreparse:
+ preloc = self.preParse( instring, loc )
+ else:
+ preloc = loc
+ tokensStart = preloc
+ if self.mayIndexError or loc >= len(instring):
+ try:
+ loc,tokens = self.parseImpl( instring, preloc, doActions )
+ except IndexError:
+ raise ParseException( instring, len(instring), self.errmsg, self )
+ else:
+ loc,tokens = self.parseImpl( instring, preloc, doActions )
+
+ tokens = self.postParse( instring, loc, tokens )
+
+ retTokens = ParseResults( tokens, self.resultsName, asList=self.saveAsList, modal=self.modalResults )
+ if self.parseAction and (doActions or self.callDuringTry):
+ if debugging:
+ try:
+ for fn in self.parseAction:
+ tokens = fn( instring, tokensStart, retTokens )
+ if tokens is not None:
+ retTokens = ParseResults( tokens,
+ self.resultsName,
+ asList=self.saveAsList and isinstance(tokens,(ParseResults,list)),
+ modal=self.modalResults )
+ except ParseBaseException:
+ #~ print "Exception raised in user parse action:", err
+ if (self.debugActions[2] ):
+ err = sys.exc_info()[1]
+ self.debugActions[2]( instring, tokensStart, self, err )
+ raise
+ else:
+ for fn in self.parseAction:
+ tokens = fn( instring, tokensStart, retTokens )
+ if tokens is not None:
+ retTokens = ParseResults( tokens,
+ self.resultsName,
+ asList=self.saveAsList and isinstance(tokens,(ParseResults,list)),
+ modal=self.modalResults )
+
+ if debugging:
+ #~ print ("Matched",self,"->",retTokens.asList())
+ if (self.debugActions[1] ):
+ self.debugActions[1]( instring, tokensStart, loc, self, retTokens )
+
+ return loc, retTokens
+
+ def tryParse( self, instring, loc ):
+ try:
+ return self._parse( instring, loc, doActions=False )[0]
+ except ParseFatalException:
+ raise ParseException( instring, loc, self.errmsg, self)
+
+ # this method gets repeatedly called during backtracking with the same arguments -
+ # we can cache these arguments and save ourselves the trouble of re-parsing the contained expression
+ def _parseCache( self, instring, loc, doActions=True, callPreParse=True ):
+ lookup = (self,instring,loc,callPreParse,doActions)
+ if lookup in ParserElement._exprArgCache:
+ value = ParserElement._exprArgCache[ lookup ]
+ if isinstance(value, Exception):
+ raise value
+ return (value[0],value[1].copy())
+ else:
+ try:
+ value = self._parseNoCache( instring, loc, doActions, callPreParse )
+ ParserElement._exprArgCache[ lookup ] = (value[0],value[1].copy())
+ return value
+ except ParseBaseException:
+ pe = sys.exc_info()[1]
+ ParserElement._exprArgCache[ lookup ] = pe
+ raise
+
+ _parse = _parseNoCache
+
+ # argument cache for optimizing repeated calls when backtracking through recursive expressions
+ _exprArgCache = {}
+ def resetCache():
+ ParserElement._exprArgCache.clear()
+ resetCache = staticmethod(resetCache)
+
+ _packratEnabled = False
+ def enablePackrat():
+ """Enables "packrat" parsing, which adds memoizing to the parsing logic.
+ Repeated parse attempts at the same string location (which happens
+ often in many complex grammars) can immediately return a cached value,
+ instead of re-executing parsing/validating code. Memoizing is done of
+ both valid results and parsing exceptions.
+
+ This speedup may break existing programs that use parse actions that
+ have side-effects. For this reason, packrat parsing is disabled when
+ you first import pyparsing. To activate the packrat feature, your
+ program must call the class method C{ParserElement.enablePackrat()}. If
+ your program uses C{psyco} to "compile as you go", you must call
+ C{enablePackrat} before calling C{psyco.full()}. If you do not do this,
+ Python will crash. For best results, call C{enablePackrat()} immediately
+ after importing pyparsing.
+ """
+ if not ParserElement._packratEnabled:
+ ParserElement._packratEnabled = True
+ ParserElement._parse = ParserElement._parseCache
+ enablePackrat = staticmethod(enablePackrat)
+
+ def parseString( self, instring, parseAll=False ):
+ """Execute the parse expression with the given string.
+ This is the main interface to the client code, once the complete
+ expression has been built.
+
+ If you want the grammar to require that the entire input string be
+ successfully parsed, then set C{parseAll} to True (equivalent to ending
+ the grammar with C{L{StringEnd()}}).
+
+ Note: C{parseString} implicitly calls C{expandtabs()} on the input string,
+ in order to report proper column numbers in parse actions.
+ If the input string contains tabs and
+ the grammar uses parse actions that use the C{loc} argument to index into the
+ string being parsed, you can ensure you have a consistent view of the input
+ string by:
+ - calling C{parseWithTabs} on your grammar before calling C{parseString}
+ (see L{I{parseWithTabs}<parseWithTabs>})
+ - define your parse action using the full C{(s,loc,toks)} signature, and
+ reference the input string using the parse action's C{s} argument
+ - explictly expand the tabs in your input string before calling
+ C{parseString}
+ """
+ ParserElement.resetCache()
+ if not self.streamlined:
+ self.streamline()
+ #~ self.saveAsList = True
+ for e in self.ignoreExprs:
+ e.streamline()
+ if not self.keepTabs:
+ instring = instring.expandtabs()
+ try:
+ loc, tokens = self._parse( instring, 0 )
+ if parseAll:
+ loc = self.preParse( instring, loc )
+ se = Empty() + StringEnd()
+ se._parse( instring, loc )
+ except ParseBaseException:
+ if ParserElement.verbose_stacktrace:
+ raise
+ else:
+ # catch and re-raise exception from here, clears out pyparsing internal stack trace
+ exc = sys.exc_info()[1]
+ raise exc
+ else:
+ return tokens
+
+ def scanString( self, instring, maxMatches=_MAX_INT, overlap=False ):
+ """Scan the input string for expression matches. Each match will return the
+ matching tokens, start location, and end location. May be called with optional
+ C{maxMatches} argument, to clip scanning after 'n' matches are found. If
+ C{overlap} is specified, then overlapping matches will be reported.
+
+ Note that the start and end locations are reported relative to the string
+ being parsed. See L{I{parseString}<parseString>} for more information on parsing
+ strings with embedded tabs."""
+ if not self.streamlined:
+ self.streamline()
+ for e in self.ignoreExprs:
+ e.streamline()
+
+ if not self.keepTabs:
+ instring = _ustr(instring).expandtabs()
+ instrlen = len(instring)
+ loc = 0
+ preparseFn = self.preParse
+ parseFn = self._parse
+ ParserElement.resetCache()
+ matches = 0
+ try:
+ while loc <= instrlen and matches < maxMatches:
+ try:
+ preloc = preparseFn( instring, loc )
+ nextLoc,tokens = parseFn( instring, preloc, callPreParse=False )
+ except ParseException:
+ loc = preloc+1
+ else:
+ if nextLoc > loc:
+ matches += 1
+ yield tokens, preloc, nextLoc
+ if overlap:
+ nextloc = preparseFn( instring, loc )
+ if nextloc > loc:
+ loc = nextLoc
+ else:
+ loc += 1
+ else:
+ loc = nextLoc
+ else:
+ loc = preloc+1
+ except ParseBaseException:
+ if ParserElement.verbose_stacktrace:
+ raise
+ else:
+ # catch and re-raise exception from here, clears out pyparsing internal stack trace
+ exc = sys.exc_info()[1]
+ raise exc
+
+ def transformString( self, instring ):
+ """Extension to C{L{scanString}}, to modify matching text with modified tokens that may
+ be returned from a parse action. To use C{transformString}, define a grammar and
+ attach a parse action to it that modifies the returned token list.
+ Invoking C{transformString()} on a target string will then scan for matches,
+ and replace the matched text patterns according to the logic in the parse
+ action. C{transformString()} returns the resulting transformed string."""
+ out = []
+ lastE = 0
+ # force preservation of <TAB>s, to minimize unwanted transformation of string, and to
+ # keep string locs straight between transformString and scanString
+ self.keepTabs = True
+ try:
+ for t,s,e in self.scanString( instring ):
+ out.append( instring[lastE:s] )
+ if t:
+ if isinstance(t,ParseResults):
+ out += t.asList()
+ elif isinstance(t,list):
+ out += t
+ else:
+ out.append(t)
+ lastE = e
+ out.append(instring[lastE:])
+ out = [o for o in out if o]
+ return "".join(map(_ustr,_flatten(out)))
+ except ParseBaseException:
+ if ParserElement.verbose_stacktrace:
+ raise
+ else:
+ # catch and re-raise exception from here, clears out pyparsing internal stack trace
+ exc = sys.exc_info()[1]
+ raise exc
+
+ def searchString( self, instring, maxMatches=_MAX_INT ):
+ """Another extension to C{L{scanString}}, simplifying the access to the tokens found
+ to match the given parse expression. May be called with optional
+ C{maxMatches} argument, to clip searching after 'n' matches are found.
+ """
+ try:
+ return ParseResults([ t for t,s,e in self.scanString( instring, maxMatches ) ])
+ except ParseBaseException:
+ if ParserElement.verbose_stacktrace:
+ raise
+ else:
+ # catch and re-raise exception from here, clears out pyparsing internal stack trace
+ exc = sys.exc_info()[1]
+ raise exc
+
+ def __add__(self, other ):
+ """Implementation of + operator - returns C{L{And}}"""
+ if isinstance( other, basestring ):
+ other = ParserElement.literalStringClass( other )
+ if not isinstance( other, ParserElement ):
+ warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
+ SyntaxWarning, stacklevel=2)
+ return None
+ return And( [ self, other ] )
+
+ def __radd__(self, other ):
+ """Implementation of + operator when left operand is not a C{L{ParserElement}}"""
+ if isinstance( other, basestring ):
+ other = ParserElement.literalStringClass( other )
+ if not isinstance( other, ParserElement ):
+ warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
+ SyntaxWarning, stacklevel=2)
+ return None
+ return other + self
+
+ def __sub__(self, other):
+ """Implementation of - operator, returns C{L{And}} with error stop"""
+ if isinstance( other, basestring ):
+ other = ParserElement.literalStringClass( other )
+ if not isinstance( other, ParserElement ):
+ warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
+ SyntaxWarning, stacklevel=2)
+ return None
+ return And( [ self, And._ErrorStop(), other ] )
+
+ def __rsub__(self, other ):
+ """Implementation of - operator when left operand is not a C{L{ParserElement}}"""
+ if isinstance( other, basestring ):
+ other = ParserElement.literalStringClass( other )
+ if not isinstance( other, ParserElement ):
+ warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
+ SyntaxWarning, stacklevel=2)
+ return None
+ return other - self
+
+ def __mul__(self,other):
+ """Implementation of * operator, allows use of C{expr * 3} in place of
+ C{expr + expr + expr}. Expressions may also me multiplied by a 2-integer
+ tuple, similar to C{{min,max}} multipliers in regular expressions. Tuples
+ may also include C{None} as in:
+ - C{expr*(n,None)} or C{expr*(n,)} is equivalent
+ to C{expr*n + L{ZeroOrMore}(expr)}
+ (read as "at least n instances of C{expr}")
+ - C{expr*(None,n)} is equivalent to C{expr*(0,n)}
+ (read as "0 to n instances of C{expr}")
+ - C{expr*(None,None)} is equivalent to C{L{ZeroOrMore}(expr)}
+ - C{expr*(1,None)} is equivalent to C{L{OneOrMore}(expr)}
+
+ Note that C{expr*(None,n)} does not raise an exception if
+ more than n exprs exist in the input stream; that is,
+ C{expr*(None,n)} does not enforce a maximum number of expr
+ occurrences. If this behavior is desired, then write
+ C{expr*(None,n) + ~expr}
+
+ """
+ if isinstance(other,int):
+ minElements, optElements = other,0
+ elif isinstance(other,tuple):
+ other = (other + (None, None))[:2]
+ if other[0] is None:
+ other = (0, other[1])
+ if isinstance(other[0],int) and other[1] is None:
+ if other[0] == 0:
+ return ZeroOrMore(self)
+ if other[0] == 1:
+ return OneOrMore(self)
+ else:
+ return self*other[0] + ZeroOrMore(self)
+ elif isinstance(other[0],int) and isinstance(other[1],int):
+ minElements, optElements = other
+ optElements -= minElements
+ else:
+ raise TypeError("cannot multiply 'ParserElement' and ('%s','%s') objects", type(other[0]),type(other[1]))
+ else:
+ raise TypeError("cannot multiply 'ParserElement' and '%s' objects", type(other))
+
+ if minElements < 0:
+ raise ValueError("cannot multiply ParserElement by negative value")
+ if optElements < 0:
+ raise ValueError("second tuple value must be greater or equal to first tuple value")
+ if minElements == optElements == 0:
+ raise ValueError("cannot multiply ParserElement by 0 or (0,0)")
+
+ if (optElements):
+ def makeOptionalList(n):
+ if n>1:
+ return Optional(self + makeOptionalList(n-1))
+ else:
+ return Optional(self)
+ if minElements:
+ if minElements == 1:
+ ret = self + makeOptionalList(optElements)
+ else:
+ ret = And([self]*minElements) + makeOptionalList(optElements)
+ else:
+ ret = makeOptionalList(optElements)
+ else:
+ if minElements == 1:
+ ret = self
+ else:
+ ret = And([self]*minElements)
+ return ret
+
+ def __rmul__(self, other):
+ return self.__mul__(other)
+
+ def __or__(self, other ):
+ """Implementation of | operator - returns C{L{MatchFirst}}"""
+ if isinstance( other, basestring ):
+ other = ParserElement.literalStringClass( other )
+ if not isinstance( other, ParserElement ):
+ warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
+ SyntaxWarning, stacklevel=2)
+ return None
+ return MatchFirst( [ self, other ] )
+
+ def __ror__(self, other ):
+ """Implementation of | operator when left operand is not a C{L{ParserElement}}"""
+ if isinstance( other, basestring ):
+ other = ParserElement.literalStringClass( other )
+ if not isinstance( other, ParserElement ):
+ warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
+ SyntaxWarning, stacklevel=2)
+ return None
+ return other | self
+
+ def __xor__(self, other ):
+ """Implementation of ^ operator - returns C{L{Or}}"""
+ if isinstance( other, basestring ):
+ other = ParserElement.literalStringClass( other )
+ if not isinstance( other, ParserElement ):
+ warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
+ SyntaxWarning, stacklevel=2)
+ return None
+ return Or( [ self, other ] )
+
+ def __rxor__(self, other ):
+ """Implementation of ^ operator when left operand is not a C{L{ParserElement}}"""
+ if isinstance( other, basestring ):
+ other = ParserElement.literalStringClass( other )
+ if not isinstance( other, ParserElement ):
+ warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
+ SyntaxWarning, stacklevel=2)
+ return None
+ return other ^ self
+
+ def __and__(self, other ):
+ """Implementation of & operator - returns C{L{Each}}"""
+ if isinstance( other, basestring ):
+ other = ParserElement.literalStringClass( other )
+ if not isinstance( other, ParserElement ):
+ warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
+ SyntaxWarning, stacklevel=2)
+ return None
+ return Each( [ self, other ] )
+
+ def __rand__(self, other ):
+ """Implementation of & operator when left operand is not a C{L{ParserElement}}"""
+ if isinstance( other, basestring ):
+ other = ParserElement.literalStringClass( other )
+ if not isinstance( other, ParserElement ):
+ warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
+ SyntaxWarning, stacklevel=2)
+ return None
+ return other & self
+
+ def __invert__( self ):
+ """Implementation of ~ operator - returns C{L{NotAny}}"""
+ return NotAny( self )
+
+ def __call__(self, name):
+ """Shortcut for C{L{setResultsName}}, with C{listAllMatches=default}::
+ userdata = Word(alphas).setResultsName("name") + Word(nums+"-").setResultsName("socsecno")
+ could be written as::
+ userdata = Word(alphas)("name") + Word(nums+"-")("socsecno")
+
+ If C{name} is given with a trailing C{'*'} character, then C{listAllMatches} will be
+ passed as C{True}.
+ """
+ return self.setResultsName(name)
+
+ def suppress( self ):
+ """Suppresses the output of this C{ParserElement}; useful to keep punctuation from
+ cluttering up returned output.
+ """
+ return Suppress( self )
+
+ def leaveWhitespace( self ):
+ """Disables the skipping of whitespace before matching the characters in the
+ C{ParserElement}'s defined pattern. This is normally only used internally by
+ the pyparsing module, but may be needed in some whitespace-sensitive grammars.
+ """
+ self.skipWhitespace = False
+ return self
+
+ def setWhitespaceChars( self, chars ):
+ """Overrides the default whitespace chars
+ """
+ self.skipWhitespace = True
+ self.whiteChars = chars
+ self.copyDefaultWhiteChars = False
+ return self
+
+ def parseWithTabs( self ):
+ """Overrides default behavior to expand C{<TAB>}s to spaces before parsing the input string.
+ Must be called before C{parseString} when the input grammar contains elements that
+ match C{<TAB>} characters."""
+ self.keepTabs = True
+ return self
+
+ def ignore( self, other ):
+ """Define expression to be ignored (e.g., comments) while doing pattern
+ matching; may be called repeatedly, to define multiple comment or other
+ ignorable patterns.
+ """
+ if isinstance( other, Suppress ):
+ if other not in self.ignoreExprs:
+ self.ignoreExprs.append( other.copy() )
+ else:
+ self.ignoreExprs.append( Suppress( other.copy() ) )
+ return self
+
+ def setDebugActions( self, startAction, successAction, exceptionAction ):
+ """Enable display of debugging messages while doing pattern matching."""
+ self.debugActions = (startAction or _defaultStartDebugAction,
+ successAction or _defaultSuccessDebugAction,
+ exceptionAction or _defaultExceptionDebugAction)
+ self.debug = True
+ return self
+
+ def setDebug( self, flag=True ):
+ """Enable display of debugging messages while doing pattern matching.
+ Set C{flag} to True to enable, False to disable."""
+ if flag:
+ self.setDebugActions( _defaultStartDebugAction, _defaultSuccessDebugAction, _defaultExceptionDebugAction )
+ else:
+ self.debug = False
+ return self
+
+ def __str__( self ):
+ return self.name
+
+ def __repr__( self ):
+ return _ustr(self)
+
+ def streamline( self ):
+ self.streamlined = True
+ self.strRepr = None
+ return self
+
+ def checkRecursion( self, parseElementList ):
+ pass
+
+ def validate( self, validateTrace=[] ):
+ """Check defined expressions for valid structure, check for infinite recursive definitions."""
+ self.checkRecursion( [] )
+
+ def parseFile( self, file_or_filename, parseAll=False ):
+ """Execute the parse expression on the given file or filename.
+ If a filename is specified (instead of a file object),
+ the entire file is opened, read, and closed before parsing.
+ """
+ try:
+ file_contents = file_or_filename.read()
+ except AttributeError:
+ f = open(file_or_filename, "r")
+ file_contents = f.read()
+ f.close()
+ try:
+ return self.parseString(file_contents, parseAll)
+ except ParseBaseException:
+ # catch and re-raise exception from here, clears out pyparsing internal stack trace
+ exc = sys.exc_info()[1]
+ raise exc
+
+ def getException(self):
+ return ParseException("",0,self.errmsg,self)
+
+ def __getattr__(self,aname):
+ if aname == "myException":
+ self.myException = ret = self.getException();
+ return ret;
+ else:
+ raise AttributeError("no such attribute " + aname)
+
+ def __eq__(self,other):
+ if isinstance(other, ParserElement):
+ return self is other or self.__dict__ == other.__dict__
+ elif isinstance(other, basestring):
+ try:
+ self.parseString(_ustr(other), parseAll=True)
+ return True
+ except ParseBaseException:
+ return False
+ else:
+ return super(ParserElement,self)==other
+
+ def __ne__(self,other):
+ return not (self == other)
+
+ def __hash__(self):
+ return hash(id(self))
+
+ def __req__(self,other):
+ return self == other
+
+ def __rne__(self,other):
+ return not (self == other)
+
+
+class Token(ParserElement):
+ """Abstract C{ParserElement} subclass, for defining atomic matching patterns."""
+ def __init__( self ):
+ super(Token,self).__init__( savelist=False )
+
+ def setName(self, name):
+ s = super(Token,self).setName(name)
+ self.errmsg = "Expected " + self.name
+ return s
+
+
+class Empty(Token):
+ """An empty token, will always match."""
+ def __init__( self ):
+ super(Empty,self).__init__()
+ self.name = "Empty"
+ self.mayReturnEmpty = True
+ self.mayIndexError = False
+
+
+class NoMatch(Token):
+ """A token that will never match."""
+ def __init__( self ):
+ super(NoMatch,self).__init__()
+ self.name = "NoMatch"
+ self.mayReturnEmpty = True
+ self.mayIndexError = False
+ self.errmsg = "Unmatchable token"
+
+ def parseImpl( self, instring, loc, doActions=True ):
+ exc = self.myException
+ exc.loc = loc
+ exc.pstr = instring
+ raise exc
+
+
+class Literal(Token):
+ """Token to exactly match a specified string."""
+ def __init__( self, matchString ):
+ super(Literal,self).__init__()
+ self.match = matchString
+ self.matchLen = len(matchString)
+ try:
+ self.firstMatchChar = matchString[0]
+ except IndexError:
+ warnings.warn("null string passed to Literal; use Empty() instead",
+ SyntaxWarning, stacklevel=2)
+ self.__class__ = Empty
+ self.name = '"%s"' % _ustr(self.match)
+ self.errmsg = "Expected " + self.name
+ self.mayReturnEmpty = False
+ self.mayIndexError = False
+
+ # Performance tuning: this routine gets called a *lot*
+ # if this is a single character match string and the first character matches,
+ # short-circuit as quickly as possible, and avoid calling startswith
+ #~ @profile
+ def parseImpl( self, instring, loc, doActions=True ):
+ if (instring[loc] == self.firstMatchChar and
+ (self.matchLen==1 or instring.startswith(self.match,loc)) ):
+ return loc+self.matchLen, self.match
+ #~ raise ParseException( instring, loc, self.errmsg )
+ exc = self.myException
+ exc.loc = loc
+ exc.pstr = instring
+ raise exc
+_L = Literal
+ParserElement.literalStringClass = Literal
+
+class Keyword(Token):
+ """Token to exactly match a specified string as a keyword, that is, it must be
+ immediately followed by a non-keyword character. Compare with C{L{Literal}}::
+ Literal("if") will match the leading C{'if'} in C{'ifAndOnlyIf'}.
+ Keyword("if") will not; it will only match the leading C{'if'} in C{'if x=1'}, or C{'if(y==2)'}
+ Accepts two optional constructor arguments in addition to the keyword string:
+ C{identChars} is a string of characters that would be valid identifier characters,
+ defaulting to all alphanumerics + "_" and "$"; C{caseless} allows case-insensitive
+ matching, default is C{False}.
+ """
+ DEFAULT_KEYWORD_CHARS = alphanums+"_$"
+
+ def __init__( self, matchString, identChars=DEFAULT_KEYWORD_CHARS, caseless=False ):
+ super(Keyword,self).__init__()
+ self.match = matchString
+ self.matchLen = len(matchString)
+ try:
+ self.firstMatchChar = matchString[0]
+ except IndexError:
+ warnings.warn("null string passed to Keyword; use Empty() instead",
+ SyntaxWarning, stacklevel=2)
+ self.name = '"%s"' % self.match
+ self.errmsg = "Expected " + self.name
+ self.mayReturnEmpty = False
+ self.mayIndexError = False
+ self.caseless = caseless
+ if caseless:
+ self.caselessmatch = matchString.upper()
+ identChars = identChars.upper()
+ self.identChars = set(identChars)
+
+ def parseImpl( self, instring, loc, doActions=True ):
+ if self.caseless:
+ if ( (instring[ loc:loc+self.matchLen ].upper() == self.caselessmatch) and
+ (loc >= len(instring)-self.matchLen or instring[loc+self.matchLen].upper() not in self.identChars) and
+ (loc == 0 or instring[loc-1].upper() not in self.identChars) ):
+ return loc+self.matchLen, self.match
+ else:
+ if (instring[loc] == self.firstMatchChar and
+ (self.matchLen==1 or instring.startswith(self.match,loc)) and
+ (loc >= len(instring)-self.matchLen or instring[loc+self.matchLen] not in self.identChars) and
+ (loc == 0 or instring[loc-1] not in self.identChars) ):
+ return loc+self.matchLen, self.match
+ #~ raise ParseException( instring, loc, self.errmsg )
+ exc = self.myException
+ exc.loc = loc
+ exc.pstr = instring
+ raise exc
+
+ def copy(self):
+ c = super(Keyword,self).copy()
+ c.identChars = Keyword.DEFAULT_KEYWORD_CHARS
+ return c
+
+ def setDefaultKeywordChars( chars ):
+ """Overrides the default Keyword chars
+ """
+ Keyword.DEFAULT_KEYWORD_CHARS = chars
+ setDefaultKeywordChars = staticmethod(setDefaultKeywordChars)
+
+class CaselessLiteral(Literal):
+ """Token to match a specified string, ignoring case of letters.
+ Note: the matched results will always be in the case of the given
+ match string, NOT the case of the input text.
+ """
+ def __init__( self, matchString ):
+ super(CaselessLiteral,self).__init__( matchString.upper() )
+ # Preserve the defining literal.
+ self.returnString = matchString
+ self.name = "'%s'" % self.returnString
+ self.errmsg = "Expected " + self.name
+
+ def parseImpl( self, instring, loc, doActions=True ):
+ if instring[ loc:loc+self.matchLen ].upper() == self.match:
+ return loc+self.matchLen, self.returnString
+ #~ raise ParseException( instring, loc, self.errmsg )
+ exc = self.myException
+ exc.loc = loc
+ exc.pstr = instring
+ raise exc
+
+class CaselessKeyword(Keyword):
+ def __init__( self, matchString, identChars=Keyword.DEFAULT_KEYWORD_CHARS ):
+ super(CaselessKeyword,self).__init__( matchString, identChars, caseless=True )
+
+ def parseImpl( self, instring, loc, doActions=True ):
+ if ( (instring[ loc:loc+self.matchLen ].upper() == self.caselessmatch) and
+ (loc >= len(instring)-self.matchLen or instring[loc+self.matchLen].upper() not in self.identChars) ):
+ return loc+self.matchLen, self.match
+ #~ raise ParseException( instring, loc, self.errmsg )
+ exc = self.myException
+ exc.loc = loc
+ exc.pstr = instring
+ raise exc
+
+class Word(Token):
+ """Token for matching words composed of allowed character sets.
+ Defined with string containing all allowed initial characters,
+ an optional string containing allowed body characters (if omitted,
+ defaults to the initial character set), and an optional minimum,
+ maximum, and/or exact length. The default value for C{min} is 1 (a
+ minimum value < 1 is not valid); the default values for C{max} and C{exact}
+ are 0, meaning no maximum or exact length restriction. An optional
+ C{exclude} parameter can list characters that might be found in
+ the input C{bodyChars} string; useful to define a word of all printables
+ except for one or two characters, for instance.
+ """
+ def __init__( self, initChars, bodyChars=None, min=1, max=0, exact=0, asKeyword=False, excludeChars=None ):
+ super(Word,self).__init__()
+ if excludeChars:
+ initChars = ''.join([c for c in initChars if c not in excludeChars])
+ if bodyChars:
+ bodyChars = ''.join([c for c in bodyChars if c not in excludeChars])
+ self.initCharsOrig = initChars
+ self.initChars = set(initChars)
+ if bodyChars :
+ self.bodyCharsOrig = bodyChars
+ self.bodyChars = set(bodyChars)
+ else:
+ self.bodyCharsOrig = initChars
+ self.bodyChars = set(initChars)
+
+ self.maxSpecified = max > 0
+
+ if min < 1:
+ raise ValueError("cannot specify a minimum length < 1; use Optional(Word()) if zero-length word is permitted")
+
+ self.minLen = min
+
+ if max > 0:
+ self.maxLen = max
+ else:
+ self.maxLen = _MAX_INT
+
+ if exact > 0:
+ self.maxLen = exact
+ self.minLen = exact
+
+ self.name = _ustr(self)
+ self.errmsg = "Expected " + self.name
+ self.mayIndexError = False
+ self.asKeyword = asKeyword
+
+ if ' ' not in self.initCharsOrig+self.bodyCharsOrig and (min==1 and max==0 and exact==0):
+ if self.bodyCharsOrig == self.initCharsOrig:
+ self.reString = "[%s]+" % _escapeRegexRangeChars(self.initCharsOrig)
+ elif len(self.bodyCharsOrig) == 1:
+ self.reString = "%s[%s]*" % \
+ (re.escape(self.initCharsOrig),
+ _escapeRegexRangeChars(self.bodyCharsOrig),)
+ else:
+ self.reString = "[%s][%s]*" % \
+ (_escapeRegexRangeChars(self.initCharsOrig),
+ _escapeRegexRangeChars(self.bodyCharsOrig),)
+ if self.asKeyword:
+ self.reString = r"\b"+self.reString+r"\b"
+ try:
+ self.re = re.compile( self.reString )
+ except:
+ self.re = None
+
+ def parseImpl( self, instring, loc, doActions=True ):
+ if self.re:
+ result = self.re.match(instring,loc)
+ if not result:
+ exc = self.myException
+ exc.loc = loc
+ exc.pstr = instring
+ raise exc
+
+ loc = result.end()
+ return loc, result.group()
+
+ if not(instring[ loc ] in self.initChars):
+ #~ raise ParseException( instring, loc, self.errmsg )
+ exc = self.myException
+ exc.loc = loc
+ exc.pstr = instring
+ raise exc
+ start = loc
+ loc += 1
+ instrlen = len(instring)
+ bodychars = self.bodyChars
+ maxloc = start + self.maxLen
+ maxloc = min( maxloc, instrlen )
+ while loc < maxloc and instring[loc] in bodychars:
+ loc += 1
+
+ throwException = False
+ if loc - start < self.minLen:
+ throwException = True
+ if self.maxSpecified and loc < instrlen and instring[loc] in bodychars:
+ throwException = True
+ if self.asKeyword:
+ if (start>0 and instring[start-1] in bodychars) or (loc<instrlen and instring[loc] in bodychars):
+ throwException = True
+
+ if throwException:
+ #~ raise ParseException( instring, loc, self.errmsg )
+ exc = self.myException
+ exc.loc = loc
+ exc.pstr = instring
+ raise exc
+
+ return loc, instring[start:loc]
+
+ def __str__( self ):
+ try:
+ return super(Word,self).__str__()
+ except:
+ pass
+
+
+ if self.strRepr is None:
+
+ def charsAsStr(s):
+ if len(s)>4:
+ return s[:4]+"..."
+ else:
+ return s
+
+ if ( self.initCharsOrig != self.bodyCharsOrig ):
+ self.strRepr = "W:(%s,%s)" % ( charsAsStr(self.initCharsOrig), charsAsStr(self.bodyCharsOrig) )
+ else:
+ self.strRepr = "W:(%s)" % charsAsStr(self.initCharsOrig)
+
+ return self.strRepr
+
+
+class Regex(Token):
+ """Token for matching strings that match a given regular expression.
+ Defined with string specifying the regular expression in a form recognized by the inbuilt Python re module.
+ """
+ compiledREtype = type(re.compile("[A-Z]"))
+ def __init__( self, pattern, flags=0):
+ """The parameters C{pattern} and C{flags} are passed to the C{re.compile()} function as-is. See the Python C{re} module for an explanation of the acceptable patterns and flags."""
+ super(Regex,self).__init__()
+
+ if isinstance(pattern, basestring):
+ if len(pattern) == 0:
+ warnings.warn("null string passed to Regex; use Empty() instead",
+ SyntaxWarning, stacklevel=2)
+
+ self.pattern = pattern
+ self.flags = flags
+
+ try:
+ self.re = re.compile(self.pattern, self.flags)
+ self.reString = self.pattern
+ except sre_constants.error:
+ warnings.warn("invalid pattern (%s) passed to Regex" % pattern,
+ SyntaxWarning, stacklevel=2)
+ raise
+
+ elif isinstance(pattern, Regex.compiledREtype):
+ self.re = pattern
+ self.pattern = \
+ self.reString = str(pattern)
+ self.flags = flags
+
+ else:
+ raise ValueError("Regex may only be constructed with a string or a compiled RE object")
+
+ self.name = _ustr(self)
+ self.errmsg = "Expected " + self.name
+ self.mayIndexError = False
+ self.mayReturnEmpty = True
+
+ def parseImpl( self, instring, loc, doActions=True ):
+ result = self.re.match(instring,loc)
+ if not result:
+ exc = self.myException
+ exc.loc = loc
+ exc.pstr = instring
+ raise exc
+
+ loc = result.end()
+ d = result.groupdict()
+ ret = ParseResults(result.group())
+ if d:
+ for k in d:
+ ret[k] = d[k]
+ return loc,ret
+
+ def __str__( self ):
+ try:
+ return super(Regex,self).__str__()
+ except:
+ pass
+
+ if self.strRepr is None:
+ self.strRepr = "Re:(%s)" % repr(self.pattern)
+
+ return self.strRepr
+
+
+class QuotedString(Token):
+ """Token for matching strings that are delimited by quoting characters.
+ """
+ def __init__( self, quoteChar, escChar=None, escQuote=None, multiline=False, unquoteResults=True, endQuoteChar=None):
+ """
+ Defined with the following parameters:
+ - quoteChar - string of one or more characters defining the quote delimiting string
+ - escChar - character to escape quotes, typically backslash (default=None)
+ - escQuote - special quote sequence to escape an embedded quote string (such as SQL's "" to escape an embedded ") (default=None)
+ - multiline - boolean indicating whether quotes can span multiple lines (default=C{False})
+ - unquoteResults - boolean indicating whether the matched text should be unquoted (default=C{True})
+ - endQuoteChar - string of one or more characters defining the end of the quote delimited string (default=C{None} => same as quoteChar)
+ """
+ super(QuotedString,self).__init__()
+
+ # remove white space from quote chars - wont work anyway
+ quoteChar = quoteChar.strip()
+ if len(quoteChar) == 0:
+ warnings.warn("quoteChar cannot be the empty string",SyntaxWarning,stacklevel=2)
+ raise SyntaxError()
+
+ if endQuoteChar is None:
+ endQuoteChar = quoteChar
+ else:
+ endQuoteChar = endQuoteChar.strip()
+ if len(endQuoteChar) == 0:
+ warnings.warn("endQuoteChar cannot be the empty string",SyntaxWarning,stacklevel=2)
+ raise SyntaxError()
+
+ self.quoteChar = quoteChar
+ self.quoteCharLen = len(quoteChar)
+ self.firstQuoteChar = quoteChar[0]
+ self.endQuoteChar = endQuoteChar
+ self.endQuoteCharLen = len(endQuoteChar)
+ self.escChar = escChar
+ self.escQuote = escQuote
+ self.unquoteResults = unquoteResults
+
+ if multiline:
+ self.flags = re.MULTILINE | re.DOTALL
+ self.pattern = r'%s(?:[^%s%s]' % \
+ ( re.escape(self.quoteChar),
+ _escapeRegexRangeChars(self.endQuoteChar[0]),
+ (escChar is not None and _escapeRegexRangeChars(escChar) or '') )
+ else:
+ self.flags = 0
+ self.pattern = r'%s(?:[^%s\n\r%s]' % \
+ ( re.escape(self.quoteChar),
+ _escapeRegexRangeChars(self.endQuoteChar[0]),
+ (escChar is not None and _escapeRegexRangeChars(escChar) or '') )
+ if len(self.endQuoteChar) > 1:
+ self.pattern += (
+ '|(?:' + ')|(?:'.join(["%s[^%s]" % (re.escape(self.endQuoteChar[:i]),
+ _escapeRegexRangeChars(self.endQuoteChar[i]))
+ for i in range(len(self.endQuoteChar)-1,0,-1)]) + ')'
+ )
+ if escQuote:
+ self.pattern += (r'|(?:%s)' % re.escape(escQuote))
+ if escChar:
+ self.pattern += (r'|(?:%s.)' % re.escape(escChar))
+ charset = ''.join(set(self.quoteChar[0]+self.endQuoteChar[0])).replace('^',r'\^').replace('-',r'\-')
+ self.escCharReplacePattern = re.escape(self.escChar)+("([%s])" % charset)
+ self.pattern += (r')*%s' % re.escape(self.endQuoteChar))
+
+ try:
+ self.re = re.compile(self.pattern, self.flags)
+ self.reString = self.pattern
+ except sre_constants.error:
+ warnings.warn("invalid pattern (%s) passed to Regex" % self.pattern,
+ SyntaxWarning, stacklevel=2)
+ raise
+
+ self.name = _ustr(self)
+ self.errmsg = "Expected " + self.name
+ self.mayIndexError = False
+ self.mayReturnEmpty = True
+
+ def parseImpl( self, instring, loc, doActions=True ):
+ result = instring[loc] == self.firstQuoteChar and self.re.match(instring,loc) or None
+ if not result:
+ exc = self.myException
+ exc.loc = loc
+ exc.pstr = instring
+ raise exc
+
+ loc = result.end()
+ ret = result.group()
+
+ if self.unquoteResults:
+
+ # strip off quotes
+ ret = ret[self.quoteCharLen:-self.endQuoteCharLen]
+
+ if isinstance(ret,basestring):
+ # replace escaped characters
+ if self.escChar:
+ ret = re.sub(self.escCharReplacePattern,"\g<1>",ret)
+
+ # replace escaped quotes
+ if self.escQuote:
+ ret = ret.replace(self.escQuote, self.endQuoteChar)
+
+ return loc, ret
+
+ def __str__( self ):
+ try:
+ return super(QuotedString,self).__str__()
+ except:
+ pass
+
+ if self.strRepr is None:
+ self.strRepr = "quoted string, starting with %s ending with %s" % (self.quoteChar, self.endQuoteChar)
+
+ return self.strRepr
+
+
+class CharsNotIn(Token):
+ """Token for matching words composed of characters *not* in a given set.
+ Defined with string containing all disallowed characters, and an optional
+ minimum, maximum, and/or exact length. The default value for C{min} is 1 (a
+ minimum value < 1 is not valid); the default values for C{max} and C{exact}
+ are 0, meaning no maximum or exact length restriction.
+ """
+ def __init__( self, notChars, min=1, max=0, exact=0 ):
+ super(CharsNotIn,self).__init__()
+ self.skipWhitespace = False
+ self.notChars = notChars
+
+ if min < 1:
+ raise ValueError("cannot specify a minimum length < 1; use Optional(CharsNotIn()) if zero-length char group is permitted")
+
+ self.minLen = min
+
+ if max > 0:
+ self.maxLen = max
+ else:
+ self.maxLen = _MAX_INT
+
+ if exact > 0:
+ self.maxLen = exact
+ self.minLen = exact
+
+ self.name = _ustr(self)
+ self.errmsg = "Expected " + self.name
+ self.mayReturnEmpty = ( self.minLen == 0 )
+ self.mayIndexError = False
+
+ def parseImpl( self, instring, loc, doActions=True ):
+ if instring[loc] in self.notChars:
+ #~ raise ParseException( instring, loc, self.errmsg )
+ exc = self.myException
+ exc.loc = loc
+ exc.pstr = instring
+ raise exc
+
+ start = loc
+ loc += 1
+ notchars = self.notChars
+ maxlen = min( start+self.maxLen, len(instring) )
+ while loc < maxlen and \
+ (instring[loc] not in notchars):
+ loc += 1
+
+ if loc - start < self.minLen:
+ #~ raise ParseException( instring, loc, self.errmsg )
+ exc = self.myException
+ exc.loc = loc
+ exc.pstr = instring
+ raise exc
+
+ return loc, instring[start:loc]
+
+ def __str__( self ):
+ try:
+ return super(CharsNotIn, self).__str__()
+ except:
+ pass
+
+ if self.strRepr is None:
+ if len(self.notChars) > 4:
+ self.strRepr = "!W:(%s...)" % self.notChars[:4]
+ else:
+ self.strRepr = "!W:(%s)" % self.notChars
+
+ return self.strRepr
+
+class White(Token):
+ """Special matching class for matching whitespace. Normally, whitespace is ignored
+ by pyparsing grammars. This class is included when some whitespace structures
+ are significant. Define with a string containing the whitespace characters to be
+ matched; default is C{" \\t\\r\\n"}. Also takes optional C{min}, C{max}, and C{exact} arguments,
+ as defined for the C{L{Word}} class."""
+ whiteStrs = {
+ " " : "<SPC>",
+ "\t": "<TAB>",
+ "\n": "<LF>",
+ "\r": "<CR>",
+ "\f": "<FF>",
+ }
+ def __init__(self, ws=" \t\r\n", min=1, max=0, exact=0):
+ super(White,self).__init__()
+ self.matchWhite = ws
+ self.setWhitespaceChars( "".join([c for c in self.whiteChars if c not in self.matchWhite]) )
+ #~ self.leaveWhitespace()
+ self.name = ("".join([White.whiteStrs[c] for c in self.matchWhite]))
+ self.mayReturnEmpty = True
+ self.errmsg = "Expected " + self.name
+
+ self.minLen = min
+
+ if max > 0:
+ self.maxLen = max
+ else:
+ self.maxLen = _MAX_INT
+
+ if exact > 0:
+ self.maxLen = exact
+ self.minLen = exact
+
+ def parseImpl( self, instring, loc, doActions=True ):
+ if not(instring[ loc ] in self.matchWhite):
+ #~ raise ParseException( instring, loc, self.errmsg )
+ exc = self.myException
+ exc.loc = loc
+ exc.pstr = instring
+ raise exc
+ start = loc
+ loc += 1
+ maxloc = start + self.maxLen
+ maxloc = min( maxloc, len(instring) )
+ while loc < maxloc and instring[loc] in self.matchWhite:
+ loc += 1
+
+ if loc - start < self.minLen:
+ #~ raise ParseException( instring, loc, self.errmsg )
+ exc = self.myException
+ exc.loc = loc
+ exc.pstr = instring
+ raise exc
+
+ return loc, instring[start:loc]
+
+
+class _PositionToken(Token):
+ def __init__( self ):
+ super(_PositionToken,self).__init__()
+ self.name=self.__class__.__name__
+ self.mayReturnEmpty = True
+ self.mayIndexError = False
+
+class GoToColumn(_PositionToken):
+ """Token to advance to a specific column of input text; useful for tabular report scraping."""
+ def __init__( self, colno ):
+ super(GoToColumn,self).__init__()
+ self.col = colno
+
+ def preParse( self, instring, loc ):
+ if col(loc,instring) != self.col:
+ instrlen = len(instring)
+ if self.ignoreExprs:
+ loc = self._skipIgnorables( instring, loc )
+ while loc < instrlen and instring[loc].isspace() and col( loc, instring ) != self.col :
+ loc += 1
+ return loc
+
+ def parseImpl( self, instring, loc, doActions=True ):
+ thiscol = col( loc, instring )
+ if thiscol > self.col:
+ raise ParseException( instring, loc, "Text not in expected column", self )
+ newloc = loc + self.col - thiscol
+ ret = instring[ loc: newloc ]
+ return newloc, ret
+
+class LineStart(_PositionToken):
+ """Matches if current position is at the beginning of a line within the parse string"""
+ def __init__( self ):
+ super(LineStart,self).__init__()
+ self.setWhitespaceChars( ParserElement.DEFAULT_WHITE_CHARS.replace("\n","") )
+ self.errmsg = "Expected start of line"
+
+ def preParse( self, instring, loc ):
+ preloc = super(LineStart,self).preParse(instring,loc)
+ if instring[preloc] == "\n":
+ loc += 1
+ return loc
+
+ def parseImpl( self, instring, loc, doActions=True ):
+ if not( loc==0 or
+ (loc == self.preParse( instring, 0 )) or
+ (instring[loc-1] == "\n") ): #col(loc, instring) != 1:
+ #~ raise ParseException( instring, loc, "Expected start of line" )
+ exc = self.myException
+ exc.loc = loc
+ exc.pstr = instring
+ raise exc
+ return loc, []
+
+class LineEnd(_PositionToken):
+ """Matches if current position is at the end of a line within the parse string"""
+ def __init__( self ):
+ super(LineEnd,self).__init__()
+ self.setWhitespaceChars( ParserElement.DEFAULT_WHITE_CHARS.replace("\n","") )
+ self.errmsg = "Expected end of line"
+
+ def parseImpl( self, instring, loc, doActions=True ):
+ if loc<len(instring):
+ if instring[loc] == "\n":
+ return loc+1, "\n"
+ else:
+ #~ raise ParseException( instring, loc, "Expected end of line" )
+ exc = self.myException
+ exc.loc = loc
+ exc.pstr = instring
+ raise exc
+ elif loc == len(instring):
+ return loc+1, []
+ else:
+ exc = self.myException
+ exc.loc = loc
+ exc.pstr = instring
+ raise exc
+
+class StringStart(_PositionToken):
+ """Matches if current position is at the beginning of the parse string"""
+ def __init__( self ):
+ super(StringStart,self).__init__()
+ self.errmsg = "Expected start of text"
+
+ def parseImpl( self, instring, loc, doActions=True ):
+ if loc != 0:
+ # see if entire string up to here is just whitespace and ignoreables
+ if loc != self.preParse( instring, 0 ):
+ #~ raise ParseException( instring, loc, "Expected start of text" )
+ exc = self.myException
+ exc.loc = loc
+ exc.pstr = instring
+ raise exc
+ return loc, []
+
+class StringEnd(_PositionToken):
+ """Matches if current position is at the end of the parse string"""
+ def __init__( self ):
+ super(StringEnd,self).__init__()
+ self.errmsg = "Expected end of text"
+
+ def parseImpl( self, instring, loc, doActions=True ):
+ if loc < len(instring):
+ #~ raise ParseException( instring, loc, "Expected end of text" )
+ exc = self.myException
+ exc.loc = loc
+ exc.pstr = instring
+ raise exc
+ elif loc == len(instring):
+ return loc+1, []
+ elif loc > len(instring):
+ return loc, []
+ else:
+ exc = self.myException
+ exc.loc = loc
+ exc.pstr = instring
+ raise exc
+
+class WordStart(_PositionToken):
+ """Matches if the current position is at the beginning of a Word, and
+ is not preceded by any character in a given set of C{wordChars}
+ (default=C{printables}). To emulate the C{\b} behavior of regular expressions,
+ use C{WordStart(alphanums)}. C{WordStart} will also match at the beginning of
+ the string being parsed, or at the beginning of a line.
+ """
+ def __init__(self, wordChars = printables):
+ super(WordStart,self).__init__()
+ self.wordChars = set(wordChars)
+ self.errmsg = "Not at the start of a word"
+
+ def parseImpl(self, instring, loc, doActions=True ):
+ if loc != 0:
+ if (instring[loc-1] in self.wordChars or
+ instring[loc] not in self.wordChars):
+ exc = self.myException
+ exc.loc = loc
+ exc.pstr = instring
+ raise exc
+ return loc, []
+
+class WordEnd(_PositionToken):
+ """Matches if the current position is at the end of a Word, and
+ is not followed by any character in a given set of C{wordChars}
+ (default=C{printables}). To emulate the C{\b} behavior of regular expressions,
+ use C{WordEnd(alphanums)}. C{WordEnd} will also match at the end of
+ the string being parsed, or at the end of a line.
+ """
+ def __init__(self, wordChars = printables):
+ super(WordEnd,self).__init__()
+ self.wordChars = set(wordChars)
+ self.skipWhitespace = False
+ self.errmsg = "Not at the end of a word"
+
+ def parseImpl(self, instring, loc, doActions=True ):
+ instrlen = len(instring)
+ if instrlen>0 and loc<instrlen:
+ if (instring[loc] in self.wordChars or
+ instring[loc-1] not in self.wordChars):
+ #~ raise ParseException( instring, loc, "Expected end of word" )
+ exc = self.myException
+ exc.loc = loc
+ exc.pstr = instring
+ raise exc
+ return loc, []
+
+
+class ParseExpression(ParserElement):
+ """Abstract subclass of ParserElement, for combining and post-processing parsed tokens."""
+ def __init__( self, exprs, savelist = False ):
+ super(ParseExpression,self).__init__(savelist)
+ if isinstance( exprs, list ):
+ self.exprs = exprs
+ elif isinstance( exprs, basestring ):
+ self.exprs = [ Literal( exprs ) ]
+ else:
+ try:
+ self.exprs = list( exprs )
+ except TypeError:
+ self.exprs = [ exprs ]
+ self.callPreparse = False
+
+ def __getitem__( self, i ):
+ return self.exprs[i]
+
+ def append( self, other ):
+ self.exprs.append( other )
+ self.strRepr = None
+ return self
+
+ def leaveWhitespace( self ):
+ """Extends C{leaveWhitespace} defined in base class, and also invokes C{leaveWhitespace} on
+ all contained expressions."""
+ self.skipWhitespace = False
+ self.exprs = [ e.copy() for e in self.exprs ]
+ for e in self.exprs:
+ e.leaveWhitespace()
+ return self
+
+ def ignore( self, other ):
+ if isinstance( other, Suppress ):
+ if other not in self.ignoreExprs:
+ super( ParseExpression, self).ignore( other )
+ for e in self.exprs:
+ e.ignore( self.ignoreExprs[-1] )
+ else:
+ super( ParseExpression, self).ignore( other )
+ for e in self.exprs:
+ e.ignore( self.ignoreExprs[-1] )
+ return self
+
+ def __str__( self ):
+ try:
+ return super(ParseExpression,self).__str__()
+ except:
+ pass
+
+ if self.strRepr is None:
+ self.strRepr = "%s:(%s)" % ( self.__class__.__name__, _ustr(self.exprs) )
+ return self.strRepr
+
+ def streamline( self ):
+ super(ParseExpression,self).streamline()
+
+ for e in self.exprs:
+ e.streamline()
+
+ # collapse nested And's of the form And( And( And( a,b), c), d) to And( a,b,c,d )
+ # but only if there are no parse actions or resultsNames on the nested And's
+ # (likewise for Or's and MatchFirst's)
+ if ( len(self.exprs) == 2 ):
+ other = self.exprs[0]
+ if ( isinstance( other, self.__class__ ) and
+ not(other.parseAction) and
+ other.resultsName is None and
+ not other.debug ):
+ self.exprs = other.exprs[:] + [ self.exprs[1] ]
+ self.strRepr = None
+ self.mayReturnEmpty |= other.mayReturnEmpty
+ self.mayIndexError |= other.mayIndexError
+
+ other = self.exprs[-1]
+ if ( isinstance( other, self.__class__ ) and
+ not(other.parseAction) and
+ other.resultsName is None and
+ not other.debug ):
+ self.exprs = self.exprs[:-1] + other.exprs[:]
+ self.strRepr = None
+ self.mayReturnEmpty |= other.mayReturnEmpty
+ self.mayIndexError |= other.mayIndexError
+
+ return self
+
+ def setResultsName( self, name, listAllMatches=False ):
+ ret = super(ParseExpression,self).setResultsName(name,listAllMatches)
+ return ret
+
+ def validate( self, validateTrace=[] ):
+ tmp = validateTrace[:]+[self]
+ for e in self.exprs:
+ e.validate(tmp)
+ self.checkRecursion( [] )
+
+ def copy(self):
+ ret = super(ParseExpression,self).copy()
+ ret.exprs = [e.copy() for e in self.exprs]
+ return ret
+
+class And(ParseExpression):
+ """Requires all given C{ParseExpression}s to be found in the given order.
+ Expressions may be separated by whitespace.
+ May be constructed using the C{'+'} operator.
+ """
+
+ class _ErrorStop(Empty):
+ def __init__(self, *args, **kwargs):
+ super(And._ErrorStop,self).__init__(*args, **kwargs)
+ self.leaveWhitespace()
+
+ def __init__( self, exprs, savelist = True ):
+ super(And,self).__init__(exprs, savelist)
+ self.mayReturnEmpty = True
+ for e in self.exprs:
+ if not e.mayReturnEmpty:
+ self.mayReturnEmpty = False
+ break
+ self.setWhitespaceChars( exprs[0].whiteChars )
+ self.skipWhitespace = exprs[0].skipWhitespace
+ self.callPreparse = True
+
+ def parseImpl( self, instring, loc, doActions=True ):
+ # pass False as last arg to _parse for first element, since we already
+ # pre-parsed the string as part of our And pre-parsing
+ loc, resultlist = self.exprs[0]._parse( instring, loc, doActions, callPreParse=False )
+ errorStop = False
+ for e in self.exprs[1:]:
+ if isinstance(e, And._ErrorStop):
+ errorStop = True
+ continue
+ if errorStop:
+ try:
+ loc, exprtokens = e._parse( instring, loc, doActions )
+ except ParseSyntaxException:
+ raise
+ except ParseBaseException:
+ pe = sys.exc_info()[1]
+ raise ParseSyntaxException(pe)
+ except IndexError:
+ raise ParseSyntaxException( ParseException(instring, len(instring), self.errmsg, self) )
+ else:
+ loc, exprtokens = e._parse( instring, loc, doActions )
+ if exprtokens or exprtokens.keys():
+ resultlist += exprtokens
+ return loc, resultlist
+
+ def __iadd__(self, other ):
+ if isinstance( other, basestring ):
+ other = Literal( other )
+ return self.append( other ) #And( [ self, other ] )
+
+ def checkRecursion( self, parseElementList ):
+ subRecCheckList = parseElementList[:] + [ self ]
+ for e in self.exprs:
+ e.checkRecursion( subRecCheckList )
+ if not e.mayReturnEmpty:
+ break
+
+ def __str__( self ):
+ if hasattr(self,"name"):
+ return self.name
+
+ if self.strRepr is None:
+ self.strRepr = "{" + " ".join( [ _ustr(e) for e in self.exprs ] ) + "}"
+
+ return self.strRepr
+
+
+class Or(ParseExpression):
+ """Requires that at least one C{ParseExpression} is found.
+ If two expressions match, the expression that matches the longest string will be used.
+ May be constructed using the C{'^'} operator.
+ """
+ def __init__( self, exprs, savelist = False ):
+ super(Or,self).__init__(exprs, savelist)
+ self.mayReturnEmpty = False
+ for e in self.exprs:
+ if e.mayReturnEmpty:
+ self.mayReturnEmpty = True
+ break
+
+ def parseImpl( self, instring, loc, doActions=True ):
+ maxExcLoc = -1
+ maxMatchLoc = -1
+ maxException = None
+ for e in self.exprs:
+ try:
+ loc2 = e.tryParse( instring, loc )
+ except ParseException:
+ err = sys.exc_info()[1]
+ if err.loc > maxExcLoc:
+ maxException = err
+ maxExcLoc = err.loc
+ except IndexError:
+ if len(instring) > maxExcLoc:
+ maxException = ParseException(instring,len(instring),e.errmsg,self)
+ maxExcLoc = len(instring)
+ else:
+ if loc2 > maxMatchLoc:
+ maxMatchLoc = loc2
+ maxMatchExp = e
+
+ if maxMatchLoc < 0:
+ if maxException is not None:
+ raise maxException
+ else:
+ raise ParseException(instring, loc, "no defined alternatives to match", self)
+
+ return maxMatchExp._parse( instring, loc, doActions )
+
+ def __ixor__(self, other ):
+ if isinstance( other, basestring ):
+ other = ParserElement.literalStringClass( other )
+ return self.append( other ) #Or( [ self, other ] )
+
+ def __str__( self ):
+ if hasattr(self,"name"):
+ return self.name
+
+ if self.strRepr is None:
+ self.strRepr = "{" + " ^ ".join( [ _ustr(e) for e in self.exprs ] ) + "}"
+
+ return self.strRepr
+
+ def checkRecursion( self, parseElementList ):
+ subRecCheckList = parseElementList[:] + [ self ]
+ for e in self.exprs:
+ e.checkRecursion( subRecCheckList )
+
+
+class MatchFirst(ParseExpression):
+ """Requires that at least one C{ParseExpression} is found.
+ If two expressions match, the first one listed is the one that will match.
+ May be constructed using the C{'|'} operator.
+ """
+ def __init__( self, exprs, savelist = False ):
+ super(MatchFirst,self).__init__(exprs, savelist)
+ if exprs:
+ self.mayReturnEmpty = False
+ for e in self.exprs:
+ if e.mayReturnEmpty:
+ self.mayReturnEmpty = True
+ break
+ else:
+ self.mayReturnEmpty = True
+
+ def parseImpl( self, instring, loc, doActions=True ):
+ maxExcLoc = -1
+ maxException = None
+ for e in self.exprs:
+ try:
+ ret = e._parse( instring, loc, doActions )
+ return ret
+ except ParseException, err:
+ if err.loc > maxExcLoc:
+ maxException = err
+ maxExcLoc = err.loc
+ except IndexError:
+ if len(instring) > maxExcLoc:
+ maxException = ParseException(instring,len(instring),e.errmsg,self)
+ maxExcLoc = len(instring)
+
+ # only got here if no expression matched, raise exception for match that made it the furthest
+ else:
+ if maxException is not None:
+ raise maxException
+ else:
+ raise ParseException(instring, loc, "no defined alternatives to match", self)
+
+ def __ior__(self, other ):
+ if isinstance( other, basestring ):
+ other = ParserElement.literalStringClass( other )
+ return self.append( other ) #MatchFirst( [ self, other ] )
+
+ def __str__( self ):
+ if hasattr(self,"name"):
+ return self.name
+
+ if self.strRepr is None:
+ self.strRepr = "{" + " | ".join( [ _ustr(e) for e in self.exprs ] ) + "}"
+
+ return self.strRepr
+
+ def checkRecursion( self, parseElementList ):
+ subRecCheckList = parseElementList[:] + [ self ]
+ for e in self.exprs:
+ e.checkRecursion( subRecCheckList )
+
+
+class Each(ParseExpression):
+ """Requires all given C{ParseExpression}s to be found, but in any order.
+ Expressions may be separated by whitespace.
+ May be constructed using the C{'&'} operator.
+ """
+ def __init__( self, exprs, savelist = True ):
+ super(Each,self).__init__(exprs, savelist)
+ self.mayReturnEmpty = True
+ for e in self.exprs:
+ if not e.mayReturnEmpty:
+ self.mayReturnEmpty = False
+ break
+ self.skipWhitespace = True
+ self.initExprGroups = True
+
+ def parseImpl( self, instring, loc, doActions=True ):
+ if self.initExprGroups:
+ opt1 = [ e.expr for e in self.exprs if isinstance(e,Optional) ]
+ opt2 = [ e for e in self.exprs if e.mayReturnEmpty and e not in opt1 ]
+ self.optionals = opt1 + opt2
+ self.multioptionals = [ e.expr for e in self.exprs if isinstance(e,ZeroOrMore) ]
+ self.multirequired = [ e.expr for e in self.exprs if isinstance(e,OneOrMore) ]
+ self.required = [ e for e in self.exprs if not isinstance(e,(Optional,ZeroOrMore,OneOrMore)) ]
+ self.required += self.multirequired
+ self.initExprGroups = False
+ tmpLoc = loc
+ tmpReqd = self.required[:]
+ tmpOpt = self.optionals[:]
+ matchOrder = []
+
+ keepMatching = True
+ while keepMatching:
+ tmpExprs = tmpReqd + tmpOpt + self.multioptionals + self.multirequired
+ failed = []
+ for e in tmpExprs:
+ try:
+ tmpLoc = e.tryParse( instring, tmpLoc )
+ except ParseException:
+ failed.append(e)
+ else:
+ matchOrder.append(e)
+ if e in tmpReqd:
+ tmpReqd.remove(e)
+ elif e in tmpOpt:
+ tmpOpt.remove(e)
+ if len(failed) == len(tmpExprs):
+ keepMatching = False
+
+ if tmpReqd:
+ missing = ", ".join( [ _ustr(e) for e in tmpReqd ] )
+ raise ParseException(instring,loc,"Missing one or more required elements (%s)" % missing )
+
+ # add any unmatched Optionals, in case they have default values defined
+ matchOrder += [e for e in self.exprs if isinstance(e,Optional) and e.expr in tmpOpt]
+
+ resultlist = []
+ for e in matchOrder:
+ loc,results = e._parse(instring,loc,doActions)
+ resultlist.append(results)
+
+ finalResults = ParseResults([])
+ for r in resultlist:
+ dups = {}
+ for k in r.keys():
+ if k in finalResults.keys():
+ tmp = ParseResults(finalResults[k])
+ tmp += ParseResults(r[k])
+ dups[k] = tmp
+ finalResults += ParseResults(r)
+ for k,v in dups.items():
+ finalResults[k] = v
+ return loc, finalResults
+
+ def __str__( self ):
+ if hasattr(self,"name"):
+ return self.name
+
+ if self.strRepr is None:
+ self.strRepr = "{" + " & ".join( [ _ustr(e) for e in self.exprs ] ) + "}"
+
+ return self.strRepr
+
+ def checkRecursion( self, parseElementList ):
+ subRecCheckList = parseElementList[:] + [ self ]
+ for e in self.exprs:
+ e.checkRecursion( subRecCheckList )
+
+
+class ParseElementEnhance(ParserElement):
+ """Abstract subclass of C{ParserElement}, for combining and post-processing parsed tokens."""
+ def __init__( self, expr, savelist=False ):
+ super(ParseElementEnhance,self).__init__(savelist)
+ if isinstance( expr, basestring ):
+ expr = Literal(expr)
+ self.expr = expr
+ self.strRepr = None
+ if expr is not None:
+ self.mayIndexError = expr.mayIndexError
+ self.mayReturnEmpty = expr.mayReturnEmpty
+ self.setWhitespaceChars( expr.whiteChars )
+ self.skipWhitespace = expr.skipWhitespace
+ self.saveAsList = expr.saveAsList
+ self.callPreparse = expr.callPreparse
+ self.ignoreExprs.extend(expr.ignoreExprs)
+
+ def parseImpl( self, instring, loc, doActions=True ):
+ if self.expr is not None:
+ return self.expr._parse( instring, loc, doActions, callPreParse=False )
+ else:
+ raise ParseException("",loc,self.errmsg,self)
+
+ def leaveWhitespace( self ):
+ self.skipWhitespace = False
+ self.expr = self.expr.copy()
+ if self.expr is not None:
+ self.expr.leaveWhitespace()
+ return self
+
+ def ignore( self, other ):
+ if isinstance( other, Suppress ):
+ if other not in self.ignoreExprs:
+ super( ParseElementEnhance, self).ignore( other )
+ if self.expr is not None:
+ self.expr.ignore( self.ignoreExprs[-1] )
+ else:
+ super( ParseElementEnhance, self).ignore( other )
+ if self.expr is not None:
+ self.expr.ignore( self.ignoreExprs[-1] )
+ return self
+
+ def streamline( self ):
+ super(ParseElementEnhance,self).streamline()
+ if self.expr is not None:
+ self.expr.streamline()
+ return self
+
+ def checkRecursion( self, parseElementList ):
+ if self in parseElementList:
+ raise RecursiveGrammarException( parseElementList+[self] )
+ subRecCheckList = parseElementList[:] + [ self ]
+ if self.expr is not None:
+ self.expr.checkRecursion( subRecCheckList )
+
+ def validate( self, validateTrace=[] ):
+ tmp = validateTrace[:]+[self]
+ if self.expr is not None:
+ self.expr.validate(tmp)
+ self.checkRecursion( [] )
+
+ def __str__( self ):
+ try:
+ return super(ParseElementEnhance,self).__str__()
+ except:
+ pass
+
+ if self.strRepr is None and self.expr is not None:
+ self.strRepr = "%s:(%s)" % ( self.__class__.__name__, _ustr(self.expr) )
+ return self.strRepr
+
+
+class FollowedBy(ParseElementEnhance):
+ """Lookahead matching of the given parse expression. C{FollowedBy}
+ does *not* advance the parsing position within the input string, it only
+ verifies that the specified parse expression matches at the current
+ position. C{FollowedBy} always returns a null token list."""
+ def __init__( self, expr ):
+ super(FollowedBy,self).__init__(expr)
+ self.mayReturnEmpty = True
+
+ def parseImpl( self, instring, loc, doActions=True ):
+ self.expr.tryParse( instring, loc )
+ return loc, []
+
+
+class NotAny(ParseElementEnhance):
+ """Lookahead to disallow matching with the given parse expression. C{NotAny}
+ does *not* advance the parsing position within the input string, it only
+ verifies that the specified parse expression does *not* match at the current
+ position. Also, C{NotAny} does *not* skip over leading whitespace. C{NotAny}
+ always returns a null token list. May be constructed using the '~' operator."""
+ def __init__( self, expr ):
+ super(NotAny,self).__init__(expr)
+ #~ self.leaveWhitespace()
+ self.skipWhitespace = False # do NOT use self.leaveWhitespace(), don't want to propagate to exprs
+ self.mayReturnEmpty = True
+ self.errmsg = "Found unwanted token, "+_ustr(self.expr)
+
+ def parseImpl( self, instring, loc, doActions=True ):
+ try:
+ self.expr.tryParse( instring, loc )
+ except (ParseException,IndexError):
+ pass
+ else:
+ #~ raise ParseException(instring, loc, self.errmsg )
+ exc = self.myException
+ exc.loc = loc
+ exc.pstr = instring
+ raise exc
+ return loc, []
+
+ def __str__( self ):
+ if hasattr(self,"name"):
+ return self.name
+
+ if self.strRepr is None:
+ self.strRepr = "~{" + _ustr(self.expr) + "}"
+
+ return self.strRepr
+
+
+class ZeroOrMore(ParseElementEnhance):
+ """Optional repetition of zero or more of the given expression."""
+ def __init__( self, expr ):
+ super(ZeroOrMore,self).__init__(expr)
+ self.mayReturnEmpty = True
+
+ def parseImpl( self, instring, loc, doActions=True ):
+ tokens = []
+ try:
+ loc, tokens = self.expr._parse( instring, loc, doActions, callPreParse=False )
+ hasIgnoreExprs = ( len(self.ignoreExprs) > 0 )
+ while 1:
+ if hasIgnoreExprs:
+ preloc = self._skipIgnorables( instring, loc )
+ else:
+ preloc = loc
+ loc, tmptokens = self.expr._parse( instring, preloc, doActions )
+ if tmptokens or tmptokens.keys():
+ tokens += tmptokens
+ except (ParseException,IndexError):
+ pass
+
+ return loc, tokens
+
+ def __str__( self ):
+ if hasattr(self,"name"):
+ return self.name
+
+ if self.strRepr is None:
+ self.strRepr = "[" + _ustr(self.expr) + "]..."
+
+ return self.strRepr
+
+ def setResultsName( self, name, listAllMatches=False ):
+ ret = super(ZeroOrMore,self).setResultsName(name,listAllMatches)
+ ret.saveAsList = True
+ return ret
+
+
+class OneOrMore(ParseElementEnhance):
+ """Repetition of one or more of the given expression."""
+ def parseImpl( self, instring, loc, doActions=True ):
+ # must be at least one
+ loc, tokens = self.expr._parse( instring, loc, doActions, callPreParse=False )
+ try:
+ hasIgnoreExprs = ( len(self.ignoreExprs) > 0 )
+ while 1:
+ if hasIgnoreExprs:
+ preloc = self._skipIgnorables( instring, loc )
+ else:
+ preloc = loc
+ loc, tmptokens = self.expr._parse( instring, preloc, doActions )
+ if tmptokens or tmptokens.keys():
+ tokens += tmptokens
+ except (ParseException,IndexError):
+ pass
+
+ return loc, tokens
+
+ def __str__( self ):
+ if hasattr(self,"name"):
+ return self.name
+
+ if self.strRepr is None:
+ self.strRepr = "{" + _ustr(self.expr) + "}..."
+
+ return self.strRepr
+
+ def setResultsName( self, name, listAllMatches=False ):
+ ret = super(OneOrMore,self).setResultsName(name,listAllMatches)
+ ret.saveAsList = True
+ return ret
+
+class _NullToken(object):
+ def __bool__(self):
+ return False
+ __nonzero__ = __bool__
+ def __str__(self):
+ return ""
+
+_optionalNotMatched = _NullToken()
+class Optional(ParseElementEnhance):
+ """Optional matching of the given expression.
+ A default return string can also be specified, if the optional expression
+ is not found.
+ """
+ def __init__( self, exprs, default=_optionalNotMatched ):
+ super(Optional,self).__init__( exprs, savelist=False )
+ self.defaultValue = default
+ self.mayReturnEmpty = True
+
+ def parseImpl( self, instring, loc, doActions=True ):
+ try:
+ loc, tokens = self.expr._parse( instring, loc, doActions, callPreParse=False )
+ except (ParseException,IndexError):
+ if self.defaultValue is not _optionalNotMatched:
+ if self.expr.resultsName:
+ tokens = ParseResults([ self.defaultValue ])
+ tokens[self.expr.resultsName] = self.defaultValue
+ else:
+ tokens = [ self.defaultValue ]
+ else:
+ tokens = []
+ return loc, tokens
+
+ def __str__( self ):
+ if hasattr(self,"name"):
+ return self.name
+
+ if self.strRepr is None:
+ self.strRepr = "[" + _ustr(self.expr) + "]"
+
+ return self.strRepr
+
+
+class SkipTo(ParseElementEnhance):
+ """Token for skipping over all undefined text until the matched expression is found.
+ If C{include} is set to true, the matched expression is also parsed (the skipped text
+ and matched expression are returned as a 2-element list). The C{ignore}
+ argument is used to define grammars (typically quoted strings and comments) that
+ might contain false matches.
+ """
+ def __init__( self, other, include=False, ignore=None, failOn=None ):
+ super( SkipTo, self ).__init__( other )
+ self.ignoreExpr = ignore
+ self.mayReturnEmpty = True
+ self.mayIndexError = False
+ self.includeMatch = include
+ self.asList = False
+ if failOn is not None and isinstance(failOn, basestring):
+ self.failOn = Literal(failOn)
+ else:
+ self.failOn = failOn
+ self.errmsg = "No match found for "+_ustr(self.expr)
+
+ def parseImpl( self, instring, loc, doActions=True ):
+ startLoc = loc
+ instrlen = len(instring)
+ expr = self.expr
+ failParse = False
+ while loc <= instrlen:
+ try:
+ if self.failOn:
+ try:
+ self.failOn.tryParse(instring, loc)
+ except ParseBaseException:
+ pass
+ else:
+ failParse = True
+ raise ParseException(instring, loc, "Found expression " + str(self.failOn))
+ failParse = False
+ if self.ignoreExpr is not None:
+ while 1:
+ try:
+ loc = self.ignoreExpr.tryParse(instring,loc)
+ # print "found ignoreExpr, advance to", loc
+ except ParseBaseException:
+ break
+ expr._parse( instring, loc, doActions=False, callPreParse=False )
+ skipText = instring[startLoc:loc]
+ if self.includeMatch:
+ loc,mat = expr._parse(instring,loc,doActions,callPreParse=False)
+ if mat:
+ skipRes = ParseResults( skipText )
+ skipRes += mat
+ return loc, [ skipRes ]
+ else:
+ return loc, [ skipText ]
+ else:
+ return loc, [ skipText ]
+ except (ParseException,IndexError):
+ if failParse:
+ raise
+ else:
+ loc += 1
+ exc = self.myException
+ exc.loc = loc
+ exc.pstr = instring
+ raise exc
+
+class Forward(ParseElementEnhance):
+ """Forward declaration of an expression to be defined later -
+ used for recursive grammars, such as algebraic infix notation.
+ When the expression is known, it is assigned to the C{Forward} variable using the '<<' operator.
+
+ Note: take care when assigning to C{Forward} not to overlook precedence of operators.
+ Specifically, '|' has a lower precedence than '<<', so that::
+ fwdExpr << a | b | c
+ will actually be evaluated as::
+ (fwdExpr << a) | b | c
+ thereby leaving b and c out as parseable alternatives. It is recommended that you
+ explicitly group the values inserted into the C{Forward}::
+ fwdExpr << (a | b | c)
+ Converting to use the '<<=' operator instead will avoid this problem.
+ """
+ def __init__( self, other=None ):
+ super(Forward,self).__init__( other, savelist=False )
+
+ def __lshift__( self, other ):
+ if isinstance( other, basestring ):
+ other = ParserElement.literalStringClass(other)
+ self.expr = other
+ self.mayReturnEmpty = other.mayReturnEmpty
+ self.strRepr = None
+ self.mayIndexError = self.expr.mayIndexError
+ self.mayReturnEmpty = self.expr.mayReturnEmpty
+ self.setWhitespaceChars( self.expr.whiteChars )
+ self.skipWhitespace = self.expr.skipWhitespace
+ self.saveAsList = self.expr.saveAsList
+ self.ignoreExprs.extend(self.expr.ignoreExprs)
+ return None
+ __ilshift__ = __lshift__
+
+ def leaveWhitespace( self ):
+ self.skipWhitespace = False
+ return self
+
+ def streamline( self ):
+ if not self.streamlined:
+ self.streamlined = True
+ if self.expr is not None:
+ self.expr.streamline()
+ return self
+
+ def validate( self, validateTrace=[] ):
+ if self not in validateTrace:
+ tmp = validateTrace[:]+[self]
+ if self.expr is not None:
+ self.expr.validate(tmp)
+ self.checkRecursion([])
+
+ def __str__( self ):
+ if hasattr(self,"name"):
+ return self.name
+
+ self._revertClass = self.__class__
+ self.__class__ = _ForwardNoRecurse
+ try:
+ if self.expr is not None:
+ retString = _ustr(self.expr)
+ else:
+ retString = "None"
+ finally:
+ self.__class__ = self._revertClass
+ return self.__class__.__name__ + ": " + retString
+
+ def copy(self):
+ if self.expr is not None:
+ return super(Forward,self).copy()
+ else:
+ ret = Forward()
+ ret << self
+ return ret
+
+class _ForwardNoRecurse(Forward):
+ def __str__( self ):
+ return "..."
+
+class TokenConverter(ParseElementEnhance):
+ """Abstract subclass of C{ParseExpression}, for converting parsed results."""
+ def __init__( self, expr, savelist=False ):
+ super(TokenConverter,self).__init__( expr )#, savelist )
+ self.saveAsList = False
+
+class Upcase(TokenConverter):
+ """Converter to upper case all matching tokens."""
+ def __init__(self, *args):
+ super(Upcase,self).__init__(*args)
+ warnings.warn("Upcase class is deprecated, use upcaseTokens parse action instead",
+ DeprecationWarning,stacklevel=2)
+
+ def postParse( self, instring, loc, tokenlist ):
+ return list(map( string.upper, tokenlist ))
+
+
+class Combine(TokenConverter):
+ """Converter to concatenate all matching tokens to a single string.
+ By default, the matching patterns must also be contiguous in the input string;
+ this can be disabled by specifying C{'adjacent=False'} in the constructor.
+ """
+ def __init__( self, expr, joinString="", adjacent=True ):
+ super(Combine,self).__init__( expr )
+ # suppress whitespace-stripping in contained parse expressions, but re-enable it on the Combine itself
+ if adjacent:
+ self.leaveWhitespace()
+ self.adjacent = adjacent
+ self.skipWhitespace = True
+ self.joinString = joinString
+ self.callPreparse = True
+
+ def ignore( self, other ):
+ if self.adjacent:
+ ParserElement.ignore(self, other)
+ else:
+ super( Combine, self).ignore( other )
+ return self
+
+ def postParse( self, instring, loc, tokenlist ):
+ retToks = tokenlist.copy()
+ del retToks[:]
+ retToks += ParseResults([ "".join(tokenlist._asStringList(self.joinString)) ], modal=self.modalResults)
+
+ if self.resultsName and len(retToks.keys())>0:
+ return [ retToks ]
+ else:
+ return retToks
+
+class Group(TokenConverter):
+ """Converter to return the matched tokens as a list - useful for returning tokens of C{L{ZeroOrMore}} and C{L{OneOrMore}} expressions."""
+ def __init__( self, expr ):
+ super(Group,self).__init__( expr )
+ self.saveAsList = True
+
+ def postParse( self, instring, loc, tokenlist ):
+ return [ tokenlist ]
+
+class Dict(TokenConverter):
+ """Converter to return a repetitive expression as a list, but also as a dictionary.
+ Each element can also be referenced using the first token in the expression as its key.
+ Useful for tabular report scraping when the first column can be used as a item key.
+ """
+ def __init__( self, exprs ):
+ super(Dict,self).__init__( exprs )
+ self.saveAsList = True
+
+ def postParse( self, instring, loc, tokenlist ):
+ for i,tok in enumerate(tokenlist):
+ if len(tok) == 0:
+ continue
+ ikey = tok[0]
+ if isinstance(ikey,int):
+ ikey = _ustr(tok[0]).strip()
+ if len(tok)==1:
+ tokenlist[ikey] = _ParseResultsWithOffset("",i)
+ elif len(tok)==2 and not isinstance(tok[1],ParseResults):
+ tokenlist[ikey] = _ParseResultsWithOffset(tok[1],i)
+ else:
+ dictvalue = tok.copy() #ParseResults(i)
+ del dictvalue[0]
+ if len(dictvalue)!= 1 or (isinstance(dictvalue,ParseResults) and dictvalue.keys()):
+ tokenlist[ikey] = _ParseResultsWithOffset(dictvalue,i)
+ else:
+ tokenlist[ikey] = _ParseResultsWithOffset(dictvalue[0],i)
+
+ if self.resultsName:
+ return [ tokenlist ]
+ else:
+ return tokenlist
+
+
+class Suppress(TokenConverter):
+ """Converter for ignoring the results of a parsed expression."""
+ def postParse( self, instring, loc, tokenlist ):
+ return []
+
+ def suppress( self ):
+ return self
+
+
+class OnlyOnce(object):
+ """Wrapper for parse actions, to ensure they are only called once."""
+ def __init__(self, methodCall):
+ self.callable = _trim_arity(methodCall)
+ self.called = False
+ def __call__(self,s,l,t):
+ if not self.called:
+ results = self.callable(s,l,t)
+ self.called = True
+ return results
+ raise ParseException(s,l,"")
+ def reset(self):
+ self.called = False
+
+def traceParseAction(f):
+ """Decorator for debugging parse actions."""
+ f = _trim_arity(f)
+ def z(*paArgs):
+ thisFunc = f.func_name
+ s,l,t = paArgs[-3:]
+ if len(paArgs)>3:
+ thisFunc = paArgs[0].__class__.__name__ + '.' + thisFunc
+ sys.stderr.write( ">>entering %s(line: '%s', %d, %s)\n" % (thisFunc,line(l,s),l,t) )
+ try:
+ ret = f(*paArgs)
+ except Exception:
+ exc = sys.exc_info()[1]
+ sys.stderr.write( "<<leaving %s (exception: %s)\n" % (thisFunc,exc) )
+ raise
+ sys.stderr.write( "<<leaving %s (ret: %s)\n" % (thisFunc,ret) )
+ return ret
+ try:
+ z.__name__ = f.__name__
+ except AttributeError:
+ pass
+ return z
+
+#
+# global helpers
+#
+def delimitedList( expr, delim=",", combine=False ):
+ """Helper to define a delimited list of expressions - the delimiter defaults to ','.
+ By default, the list elements and delimiters can have intervening whitespace, and
+ comments, but this can be overridden by passing C{combine=True} in the constructor.
+ If C{combine} is set to C{True}, the matching tokens are returned as a single token
+ string, with the delimiters included; otherwise, the matching tokens are returned
+ as a list of tokens, with the delimiters suppressed.
+ """
+ dlName = _ustr(expr)+" ["+_ustr(delim)+" "+_ustr(expr)+"]..."
+ if combine:
+ return Combine( expr + ZeroOrMore( delim + expr ) ).setName(dlName)
+ else:
+ return ( expr + ZeroOrMore( Suppress( delim ) + expr ) ).setName(dlName)
+
+def countedArray( expr, intExpr=None ):
+ """Helper to define a counted list of expressions.
+ This helper defines a pattern of the form::
+ integer expr expr expr...
+ where the leading integer tells how many expr expressions follow.
+ The matched tokens returns the array of expr tokens as a list - the leading count token is suppressed.
+ """
+ arrayExpr = Forward()
+ def countFieldParseAction(s,l,t):
+ n = t[0]
+ arrayExpr << (n and Group(And([expr]*n)) or Group(empty))
+ return []
+ if intExpr is None:
+ intExpr = Word(nums).setParseAction(lambda t:int(t[0]))
+ else:
+ intExpr = intExpr.copy()
+ intExpr.setName("arrayLen")
+ intExpr.addParseAction(countFieldParseAction, callDuringTry=True)
+ return ( intExpr + arrayExpr )
+
+def _flatten(L):
+ ret = []
+ for i in L:
+ if isinstance(i,list):
+ ret.extend(_flatten(i))
+ else:
+ ret.append(i)
+ return ret
+
+def matchPreviousLiteral(expr):
+ """Helper to define an expression that is indirectly defined from
+ the tokens matched in a previous expression, that is, it looks
+ for a 'repeat' of a previous expression. For example::
+ first = Word(nums)
+ second = matchPreviousLiteral(first)
+ matchExpr = first + ":" + second
+ will match C{"1:1"}, but not C{"1:2"}. Because this matches a
+ previous literal, will also match the leading C{"1:1"} in C{"1:10"}.
+ If this is not desired, use C{matchPreviousExpr}.
+ Do *not* use with packrat parsing enabled.
+ """
+ rep = Forward()
+ def copyTokenToRepeater(s,l,t):
+ if t:
+ if len(t) == 1:
+ rep << t[0]
+ else:
+ # flatten t tokens
+ tflat = _flatten(t.asList())
+ rep << And( [ Literal(tt) for tt in tflat ] )
+ else:
+ rep << Empty()
+ expr.addParseAction(copyTokenToRepeater, callDuringTry=True)
+ return rep
+
+def matchPreviousExpr(expr):
+ """Helper to define an expression that is indirectly defined from
+ the tokens matched in a previous expression, that is, it looks
+ for a 'repeat' of a previous expression. For example::
+ first = Word(nums)
+ second = matchPreviousExpr(first)
+ matchExpr = first + ":" + second
+ will match C{"1:1"}, but not C{"1:2"}. Because this matches by
+ expressions, will *not* match the leading C{"1:1"} in C{"1:10"};
+ the expressions are evaluated first, and then compared, so
+ C{"1"} is compared with C{"10"}.
+ Do *not* use with packrat parsing enabled.
+ """
+ rep = Forward()
+ e2 = expr.copy()
+ rep << e2
+ def copyTokenToRepeater(s,l,t):
+ matchTokens = _flatten(t.asList())
+ def mustMatchTheseTokens(s,l,t):
+ theseTokens = _flatten(t.asList())
+ if theseTokens != matchTokens:
+ raise ParseException("",0,"")
+ rep.setParseAction( mustMatchTheseTokens, callDuringTry=True )
+ expr.addParseAction(copyTokenToRepeater, callDuringTry=True)
+ return rep
+
+def _escapeRegexRangeChars(s):
+ #~ escape these chars: ^-]
+ for c in r"\^-]":
+ s = s.replace(c,_bslash+c)
+ s = s.replace("\n",r"\n")
+ s = s.replace("\t",r"\t")
+ return _ustr(s)
+
+def oneOf( strs, caseless=False, useRegex=True ):
+ """Helper to quickly define a set of alternative Literals, and makes sure to do
+ longest-first testing when there is a conflict, regardless of the input order,
+ but returns a C{L{MatchFirst}} for best performance.
+
+ Parameters:
+ - strs - a string of space-delimited literals, or a list of string literals
+ - caseless - (default=False) - treat all literals as caseless
+ - useRegex - (default=True) - as an optimization, will generate a Regex
+ object; otherwise, will generate a C{MatchFirst} object (if C{caseless=True}, or
+ if creating a C{Regex} raises an exception)
+ """
+ if caseless:
+ isequal = ( lambda a,b: a.upper() == b.upper() )
+ masks = ( lambda a,b: b.upper().startswith(a.upper()) )
+ parseElementClass = CaselessLiteral
+ else:
+ isequal = ( lambda a,b: a == b )
+ masks = ( lambda a,b: b.startswith(a) )
+ parseElementClass = Literal
+
+ if isinstance(strs,(list,tuple)):
+ symbols = list(strs[:])
+ elif isinstance(strs,basestring):
+ symbols = strs.split()
+ else:
+ warnings.warn("Invalid argument to oneOf, expected string or list",
+ SyntaxWarning, stacklevel=2)
+
+ i = 0
+ while i < len(symbols)-1:
+ cur = symbols[i]
+ for j,other in enumerate(symbols[i+1:]):
+ if ( isequal(other, cur) ):
+ del symbols[i+j+1]
+ break
+ elif ( masks(cur, other) ):
+ del symbols[i+j+1]
+ symbols.insert(i,other)
+ cur = other
+ break
+ else:
+ i += 1
+
+ if not caseless and useRegex:
+ #~ print (strs,"->", "|".join( [ _escapeRegexChars(sym) for sym in symbols] ))
+ try:
+ if len(symbols)==len("".join(symbols)):
+ return Regex( "[%s]" % "".join( [ _escapeRegexRangeChars(sym) for sym in symbols] ) )
+ else:
+ return Regex( "|".join( [ re.escape(sym) for sym in symbols] ) )
+ except:
+ warnings.warn("Exception creating Regex for oneOf, building MatchFirst",
+ SyntaxWarning, stacklevel=2)
+
+
+ # last resort, just use MatchFirst
+ return MatchFirst( [ parseElementClass(sym) for sym in symbols ] )
+
+def dictOf( key, value ):
+ """Helper to easily and clearly define a dictionary by specifying the respective patterns
+ for the key and value. Takes care of defining the C{L{Dict}}, C{L{ZeroOrMore}}, and C{L{Group}} tokens
+ in the proper order. The key pattern can include delimiting markers or punctuation,
+ as long as they are suppressed, thereby leaving the significant key text. The value
+ pattern can include named results, so that the C{Dict} results can include named token
+ fields.
+ """
+ return Dict( ZeroOrMore( Group ( key + value ) ) )
+
+def originalTextFor(expr, asString=True):
+ """Helper to return the original, untokenized text for a given expression. Useful to
+ restore the parsed fields of an HTML start tag into the raw tag text itself, or to
+ revert separate tokens with intervening whitespace back to the original matching
+ input text. Simpler to use than the parse action C{L{keepOriginalText}}, and does not
+ require the inspect module to chase up the call stack. By default, returns a
+ string containing the original parsed text.
+
+ If the optional C{asString} argument is passed as C{False}, then the return value is a
+ C{L{ParseResults}} containing any results names that were originally matched, and a
+ single token containing the original matched text from the input string. So if
+ the expression passed to C{L{originalTextFor}} contains expressions with defined
+ results names, you must set C{asString} to C{False} if you want to preserve those
+ results name values."""
+ locMarker = Empty().setParseAction(lambda s,loc,t: loc)
+ endlocMarker = locMarker.copy()
+ endlocMarker.callPreparse = False
+ matchExpr = locMarker("_original_start") + expr + endlocMarker("_original_end")
+ if asString:
+ extractText = lambda s,l,t: s[t._original_start:t._original_end]
+ else:
+ def extractText(s,l,t):
+ del t[:]
+ t.insert(0, s[t._original_start:t._original_end])
+ del t["_original_start"]
+ del t["_original_end"]
+ matchExpr.setParseAction(extractText)
+ return matchExpr
+
+def ungroup(expr):
+ """Helper to undo pyparsing's default grouping of And expressions, even
+ if all but one are non-empty."""
+ return TokenConverter(expr).setParseAction(lambda t:t[0])
+
+# convenience constants for positional expressions
+empty = Empty().setName("empty")
+lineStart = LineStart().setName("lineStart")
+lineEnd = LineEnd().setName("lineEnd")
+stringStart = StringStart().setName("stringStart")
+stringEnd = StringEnd().setName("stringEnd")
+
+_escapedPunc = Word( _bslash, r"\[]-*.$+^?()~ ", exact=2 ).setParseAction(lambda s,l,t:t[0][1])
+_printables_less_backslash = "".join([ c for c in printables if c not in r"\]" ])
+_escapedHexChar = Regex(r"\\0?[xX][0-9a-fA-F]+").setParseAction(lambda s,l,t:unichr(int(t[0].lstrip(r'\0x'),16)))
+_escapedOctChar = Regex(r"\\0[0-7]+").setParseAction(lambda s,l,t:unichr(int(t[0][1:],8)))
+_singleChar = _escapedPunc | _escapedHexChar | _escapedOctChar | Word(_printables_less_backslash,exact=1)
+_charRange = Group(_singleChar + Suppress("-") + _singleChar)
+_reBracketExpr = Literal("[") + Optional("^").setResultsName("negate") + Group( OneOrMore( _charRange | _singleChar ) ).setResultsName("body") + "]"
+
+_expanded = lambda p: (isinstance(p,ParseResults) and ''.join([ unichr(c) for c in range(ord(p[0]),ord(p[1])+1) ]) or p)
+
+def srange(s):
+ r"""Helper to easily define string ranges for use in Word construction. Borrows
+ syntax from regexp '[]' string range definitions::
+ srange("[0-9]") -> "0123456789"
+ srange("[a-z]") -> "abcdefghijklmnopqrstuvwxyz"
+ srange("[a-z$_]") -> "abcdefghijklmnopqrstuvwxyz$_"
+ The input string must be enclosed in []'s, and the returned string is the expanded
+ character set joined into a single string.
+ The values enclosed in the []'s may be::
+ a single character
+ an escaped character with a leading backslash (such as \- or \])
+ an escaped hex character with a leading '\x' (\x21, which is a '!' character)
+ (\0x## is also supported for backwards compatibility)
+ an escaped octal character with a leading '\0' (\041, which is a '!' character)
+ a range of any of the above, separated by a dash ('a-z', etc.)
+ any combination of the above ('aeiouy', 'a-zA-Z0-9_$', etc.)
+ """
+ try:
+ return "".join([_expanded(part) for part in _reBracketExpr.parseString(s).body])
+ except:
+ return ""
+
+def matchOnlyAtCol(n):
+ """Helper method for defining parse actions that require matching at a specific
+ column in the input text.
+ """
+ def verifyCol(strg,locn,toks):
+ if col(locn,strg) != n:
+ raise ParseException(strg,locn,"matched token not at column %d" % n)
+ return verifyCol
+
+def replaceWith(replStr):
+ """Helper method for common parse actions that simply return a literal value. Especially
+ useful when used with C{L{transformString<ParserElement.transformString>}()}.
+ """
+ def _replFunc(*args):
+ return [replStr]
+ return _replFunc
+
+def removeQuotes(s,l,t):
+ """Helper parse action for removing quotation marks from parsed quoted strings.
+ To use, add this parse action to quoted string using::
+ quotedString.setParseAction( removeQuotes )
+ """
+ return t[0][1:-1]
+
+def upcaseTokens(s,l,t):
+ """Helper parse action to convert tokens to upper case."""
+ return [ tt.upper() for tt in map(_ustr,t) ]
+
+def downcaseTokens(s,l,t):
+ """Helper parse action to convert tokens to lower case."""
+ return [ tt.lower() for tt in map(_ustr,t) ]
+
+def keepOriginalText(s,startLoc,t):
+ """DEPRECATED - use new helper method C{L{originalTextFor}}.
+ Helper parse action to preserve original parsed text,
+ overriding any nested parse actions."""
+ try:
+ endloc = getTokensEndLoc()
+ except ParseException:
+ raise ParseFatalException("incorrect usage of keepOriginalText - may only be called as a parse action")
+ del t[:]
+ t += ParseResults(s[startLoc:endloc])
+ return t
+
+def getTokensEndLoc():
+ """Method to be called from within a parse action to determine the end
+ location of the parsed tokens."""
+ import inspect
+ fstack = inspect.stack()
+ try:
+ # search up the stack (through intervening argument normalizers) for correct calling routine
+ for f in fstack[2:]:
+ if f[3] == "_parseNoCache":
+ endloc = f[0].f_locals["loc"]
+ return endloc
+ else:
+ raise ParseFatalException("incorrect usage of getTokensEndLoc - may only be called from within a parse action")
+ finally:
+ del fstack
+
+def _makeTags(tagStr, xml):
+ """Internal helper to construct opening and closing tag expressions, given a tag name"""
+ if isinstance(tagStr,basestring):
+ resname = tagStr
+ tagStr = Keyword(tagStr, caseless=not xml)
+ else:
+ resname = tagStr.name
+
+ tagAttrName = Word(alphas,alphanums+"_-:")
+ if (xml):
+ tagAttrValue = dblQuotedString.copy().setParseAction( removeQuotes )
+ openTag = Suppress("<") + tagStr("tag") + \
+ Dict(ZeroOrMore(Group( tagAttrName + Suppress("=") + tagAttrValue ))) + \
+ Optional("/",default=[False]).setResultsName("empty").setParseAction(lambda s,l,t:t[0]=='/') + Suppress(">")
+ else:
+ printablesLessRAbrack = "".join( [ c for c in printables if c not in ">" ] )
+ tagAttrValue = quotedString.copy().setParseAction( removeQuotes ) | Word(printablesLessRAbrack)
+ openTag = Suppress("<") + tagStr("tag") + \
+ Dict(ZeroOrMore(Group( tagAttrName.setParseAction(downcaseTokens) + \
+ Optional( Suppress("=") + tagAttrValue ) ))) + \
+ Optional("/",default=[False]).setResultsName("empty").setParseAction(lambda s,l,t:t[0]=='/') + Suppress(">")
+ closeTag = Combine(_L("</") + tagStr + ">")
+
+ openTag = openTag.setResultsName("start"+"".join(resname.replace(":"," ").title().split())).setName("<%s>" % tagStr)
+ closeTag = closeTag.setResultsName("end"+"".join(resname.replace(":"," ").title().split())).setName("</%s>" % tagStr)
+ openTag.tag = resname
+ closeTag.tag = resname
+ return openTag, closeTag
+
+def makeHTMLTags(tagStr):
+ """Helper to construct opening and closing tag expressions for HTML, given a tag name"""
+ return _makeTags( tagStr, False )
+
+def makeXMLTags(tagStr):
+ """Helper to construct opening and closing tag expressions for XML, given a tag name"""
+ return _makeTags( tagStr, True )
+
+def withAttribute(*args,**attrDict):
+ """Helper to create a validating parse action to be used with start tags created
+ with C{L{makeXMLTags}} or C{L{makeHTMLTags}}. Use C{withAttribute} to qualify a starting tag
+ with a required attribute value, to avoid false matches on common tags such as
+ C{<TD>} or C{<DIV>}.
+
+ Call C{withAttribute} with a series of attribute names and values. Specify the list
+ of filter attributes names and values as:
+ - keyword arguments, as in C{(align="right")}, or
+ - as an explicit dict with C{**} operator, when an attribute name is also a Python
+ reserved word, as in C{**{"class":"Customer", "align":"right"}}
+ - a list of name-value tuples, as in ( ("ns1:class", "Customer"), ("ns2:align","right") )
+ For attribute names with a namespace prefix, you must use the second form. Attribute
+ names are matched insensitive to upper/lower case.
+
+ To verify that the attribute exists, but without specifying a value, pass
+ C{withAttribute.ANY_VALUE} as the value.
+ """
+ if args:
+ attrs = args[:]
+ else:
+ attrs = attrDict.items()
+ attrs = [(k,v) for k,v in attrs]
+ def pa(s,l,tokens):
+ for attrName,attrValue in attrs:
+ if attrName not in tokens:
+ raise ParseException(s,l,"no matching attribute " + attrName)
+ if attrValue != withAttribute.ANY_VALUE and tokens[attrName] != attrValue:
+ raise ParseException(s,l,"attribute '%s' has value '%s', must be '%s'" %
+ (attrName, tokens[attrName], attrValue))
+ return pa
+withAttribute.ANY_VALUE = object()
+
+opAssoc = _Constants()
+opAssoc.LEFT = object()
+opAssoc.RIGHT = object()
+
+def operatorPrecedence( baseExpr, opList ):
+ """Helper method for constructing grammars of expressions made up of
+ operators working in a precedence hierarchy. Operators may be unary or
+ binary, left- or right-associative. Parse actions can also be attached
+ to operator expressions.
+
+ Parameters:
+ - baseExpr - expression representing the most basic element for the nested
+ - opList - list of tuples, one for each operator precedence level in the
+ expression grammar; each tuple is of the form
+ (opExpr, numTerms, rightLeftAssoc, parseAction), where:
+ - opExpr is the pyparsing expression for the operator;
+ may also be a string, which will be converted to a Literal;
+ if numTerms is 3, opExpr is a tuple of two expressions, for the
+ two operators separating the 3 terms
+ - numTerms is the number of terms for this operator (must
+ be 1, 2, or 3)
+ - rightLeftAssoc is the indicator whether the operator is
+ right or left associative, using the pyparsing-defined
+ constants C{opAssoc.RIGHT} and C{opAssoc.LEFT}.
+ - parseAction is the parse action to be associated with
+ expressions matching this operator expression (the
+ parse action tuple member may be omitted)
+ """
+ ret = Forward()
+ lastExpr = baseExpr | ( Suppress('(') + ret + Suppress(')') )
+ for i,operDef in enumerate(opList):
+ opExpr,arity,rightLeftAssoc,pa = (operDef + (None,))[:4]
+ if arity == 3:
+ if opExpr is None or len(opExpr) != 2:
+ raise ValueError("if numterms=3, opExpr must be a tuple or list of two expressions")
+ opExpr1, opExpr2 = opExpr
+ thisExpr = Forward()#.setName("expr%d" % i)
+ if rightLeftAssoc == opAssoc.LEFT:
+ if arity == 1:
+ matchExpr = FollowedBy(lastExpr + opExpr) + Group( lastExpr + OneOrMore( opExpr ) )
+ elif arity == 2:
+ if opExpr is not None:
+ matchExpr = FollowedBy(lastExpr + opExpr + lastExpr) + Group( lastExpr + OneOrMore( opExpr + lastExpr ) )
+ else:
+ matchExpr = FollowedBy(lastExpr+lastExpr) + Group( lastExpr + OneOrMore(lastExpr) )
+ elif arity == 3:
+ matchExpr = FollowedBy(lastExpr + opExpr1 + lastExpr + opExpr2 + lastExpr) + \
+ Group( lastExpr + opExpr1 + lastExpr + opExpr2 + lastExpr )
+ else:
+ raise ValueError("operator must be unary (1), binary (2), or ternary (3)")
+ elif rightLeftAssoc == opAssoc.RIGHT:
+ if arity == 1:
+ # try to avoid LR with this extra test
+ if not isinstance(opExpr, Optional):
+ opExpr = Optional(opExpr)
+ matchExpr = FollowedBy(opExpr.expr + thisExpr) + Group( opExpr + thisExpr )
+ elif arity == 2:
+ if opExpr is not None:
+ matchExpr = FollowedBy(lastExpr + opExpr + thisExpr) + Group( lastExpr + OneOrMore( opExpr + thisExpr ) )
+ else:
+ matchExpr = FollowedBy(lastExpr + thisExpr) + Group( lastExpr + OneOrMore( thisExpr ) )
+ elif arity == 3:
+ matchExpr = FollowedBy(lastExpr + opExpr1 + thisExpr + opExpr2 + thisExpr) + \
+ Group( lastExpr + opExpr1 + thisExpr + opExpr2 + thisExpr )
+ else:
+ raise ValueError("operator must be unary (1), binary (2), or ternary (3)")
+ else:
+ raise ValueError("operator must indicate right or left associativity")
+ if pa:
+ matchExpr.setParseAction( pa )
+ thisExpr << ( matchExpr | lastExpr )
+ lastExpr = thisExpr
+ ret << lastExpr
+ return ret
+
+dblQuotedString = Regex(r'"(?:[^"\n\r\\]|(?:"")|(?:\\x[0-9a-fA-F]+)|(?:\\.))*"').setName("string enclosed in double quotes")
+sglQuotedString = Regex(r"'(?:[^'\n\r\\]|(?:'')|(?:\\x[0-9a-fA-F]+)|(?:\\.))*'").setName("string enclosed in single quotes")
+quotedString = Regex(r'''(?:"(?:[^"\n\r\\]|(?:"")|(?:\\x[0-9a-fA-F]+)|(?:\\.))*")|(?:'(?:[^'\n\r\\]|(?:'')|(?:\\x[0-9a-fA-F]+)|(?:\\.))*')''').setName("quotedString using single or double quotes")
+unicodeString = Combine(_L('u') + quotedString.copy())
+
+def nestedExpr(opener="(", closer=")", content=None, ignoreExpr=quotedString.copy()):
+ """Helper method for defining nested lists enclosed in opening and closing
+ delimiters ("(" and ")" are the default).
+
+ Parameters:
+ - opener - opening character for a nested list (default="("); can also be a pyparsing expression
+ - closer - closing character for a nested list (default=")"); can also be a pyparsing expression
+ - content - expression for items within the nested lists (default=None)
+ - ignoreExpr - expression for ignoring opening and closing delimiters (default=quotedString)
+
+ If an expression is not provided for the content argument, the nested
+ expression will capture all whitespace-delimited content between delimiters
+ as a list of separate values.
+
+ Use the C{ignoreExpr} argument to define expressions that may contain
+ opening or closing characters that should not be treated as opening
+ or closing characters for nesting, such as quotedString or a comment
+ expression. Specify multiple expressions using an C{L{Or}} or C{L{MatchFirst}}.
+ The default is L{quotedString}, but if no expressions are to be ignored,
+ then pass C{None} for this argument.
+ """
+ if opener == closer:
+ raise ValueError("opening and closing strings cannot be the same")
+ if content is None:
+ if isinstance(opener,basestring) and isinstance(closer,basestring):
+ if len(opener) == 1 and len(closer)==1:
+ if ignoreExpr is not None:
+ content = (Combine(OneOrMore(~ignoreExpr +
+ CharsNotIn(opener+closer+ParserElement.DEFAULT_WHITE_CHARS,exact=1))
+ ).setParseAction(lambda t:t[0].strip()))
+ else:
+ content = (empty.copy()+CharsNotIn(opener+closer+ParserElement.DEFAULT_WHITE_CHARS
+ ).setParseAction(lambda t:t[0].strip()))
+ else:
+ if ignoreExpr is not None:
+ content = (Combine(OneOrMore(~ignoreExpr +
+ ~Literal(opener) + ~Literal(closer) +
+ CharsNotIn(ParserElement.DEFAULT_WHITE_CHARS,exact=1))
+ ).setParseAction(lambda t:t[0].strip()))
+ else:
+ content = (Combine(OneOrMore(~Literal(opener) + ~Literal(closer) +
+ CharsNotIn(ParserElement.DEFAULT_WHITE_CHARS,exact=1))
+ ).setParseAction(lambda t:t[0].strip()))
+ else:
+ raise ValueError("opening and closing arguments must be strings if no content expression is given")
+ ret = Forward()
+ if ignoreExpr is not None:
+ ret << Group( Suppress(opener) + ZeroOrMore( ignoreExpr | ret | content ) + Suppress(closer) )
+ else:
+ ret << Group( Suppress(opener) + ZeroOrMore( ret | content ) + Suppress(closer) )
+ return ret
+
+def indentedBlock(blockStatementExpr, indentStack, indent=True):
+ """Helper method for defining space-delimited indentation blocks, such as
+ those used to define block statements in Python source code.
+
+ Parameters:
+ - blockStatementExpr - expression defining syntax of statement that
+ is repeated within the indented block
+ - indentStack - list created by caller to manage indentation stack
+ (multiple statementWithIndentedBlock expressions within a single grammar
+ should share a common indentStack)
+ - indent - boolean indicating whether block must be indented beyond the
+ the current level; set to False for block of left-most statements
+ (default=True)
+
+ A valid block must contain at least one C{blockStatement}.
+ """
+ def checkPeerIndent(s,l,t):
+ if l >= len(s): return
+ curCol = col(l,s)
+ if curCol != indentStack[-1]:
+ if curCol > indentStack[-1]:
+ raise ParseFatalException(s,l,"illegal nesting")
+ raise ParseException(s,l,"not a peer entry")
+
+ def checkSubIndent(s,l,t):
+ curCol = col(l,s)
+ if curCol > indentStack[-1]:
+ indentStack.append( curCol )
+ else:
+ raise ParseException(s,l,"not a subentry")
+
+ def checkUnindent(s,l,t):
+ if l >= len(s): return
+ curCol = col(l,s)
+ if not(indentStack and curCol < indentStack[-1] and curCol <= indentStack[-2]):
+ raise ParseException(s,l,"not an unindent")
+ indentStack.pop()
+
+ NL = OneOrMore(LineEnd().setWhitespaceChars("\t ").suppress())
+ INDENT = Empty() + Empty().setParseAction(checkSubIndent)
+ PEER = Empty().setParseAction(checkPeerIndent)
+ UNDENT = Empty().setParseAction(checkUnindent)
+ if indent:
+ smExpr = Group( Optional(NL) +
+ #~ FollowedBy(blockStatementExpr) +
+ INDENT + (OneOrMore( PEER + Group(blockStatementExpr) + Optional(NL) )) + UNDENT)
+ else:
+ smExpr = Group( Optional(NL) +
+ (OneOrMore( PEER + Group(blockStatementExpr) + Optional(NL) )) )
+ blockStatementExpr.ignore(_bslash + LineEnd())
+ return smExpr
+
+alphas8bit = srange(r"[\0xc0-\0xd6\0xd8-\0xf6\0xf8-\0xff]")
+punc8bit = srange(r"[\0xa1-\0xbf\0xd7\0xf7]")
+
+anyOpenTag,anyCloseTag = makeHTMLTags(Word(alphas,alphanums+"_:"))
+commonHTMLEntity = Combine(_L("&") + oneOf("gt lt amp nbsp quot").setResultsName("entity") +";").streamline()
+_htmlEntityMap = dict(zip("gt lt amp nbsp quot".split(),'><& "'))
+replaceHTMLEntity = lambda t : t.entity in _htmlEntityMap and _htmlEntityMap[t.entity] or None
+
+# it's easy to get these comment structures wrong - they're very common, so may as well make them available
+cStyleComment = Regex(r"/\*(?:[^*]*\*+)+?/").setName("C style comment")
+
+htmlComment = Regex(r"<!--[\s\S]*?-->")
+restOfLine = Regex(r".*").leaveWhitespace()
+dblSlashComment = Regex(r"\/\/(\\\n|.)*").setName("// comment")
+cppStyleComment = Regex(r"/(?:\*(?:[^*]*\*+)+?/|/[^\n]*(?:\n[^\n]*)*?(?:(?<!\\)|\Z))").setName("C++ style comment")
+
+javaStyleComment = cppStyleComment
+pythonStyleComment = Regex(r"#.*").setName("Python style comment")
+_noncomma = "".join( [ c for c in printables if c != "," ] )
+_commasepitem = Combine(OneOrMore(Word(_noncomma) +
+ Optional( Word(" \t") +
+ ~Literal(",") + ~LineEnd() ) ) ).streamline().setName("commaItem")
+commaSeparatedList = delimitedList( Optional( quotedString.copy() | _commasepitem, default="") ).setName("commaSeparatedList")
+
+
+if __name__ == "__main__":
+
+ def test( teststring ):
+ try:
+ tokens = simpleSQL.parseString( teststring )
+ tokenlist = tokens.asList()
+ print (teststring + "->" + str(tokenlist))
+ print ("tokens = " + str(tokens))
+ print ("tokens.columns = " + str(tokens.columns))
+ print ("tokens.tables = " + str(tokens.tables))
+ print (tokens.asXML("SQL",True))
+ except ParseBaseException:
+ err = sys.exc_info()[1]
+ print (teststring + "->")
+ print (err.line)
+ print (" "*(err.column-1) + "^")
+ print (err)
+ print()
+
+ selectToken = CaselessLiteral( "select" )
+ fromToken = CaselessLiteral( "from" )
+
+ ident = Word( alphas, alphanums + "_$" )
+ columnName = delimitedList( ident, ".", combine=True ).setParseAction( upcaseTokens )
+ columnNameList = Group( delimitedList( columnName ) )#.setName("columns")
+ tableName = delimitedList( ident, ".", combine=True ).setParseAction( upcaseTokens )
+ tableNameList = Group( delimitedList( tableName ) )#.setName("tables")
+ simpleSQL = ( selectToken + \
+ ( '*' | columnNameList ).setResultsName( "columns" ) + \
+ fromToken + \
+ tableNameList.setResultsName( "tables" ) )
+
+ test( "SELECT * from XYZZY, ABC" )
+ test( "select * from SYS.XYZZY" )
+ test( "Select A from Sys.dual" )
+ test( "Select AA,BB,CC from Sys.dual" )
+ test( "Select A, B, C from Sys.dual" )
+ test( "Select A, B, C from Sys.dual" )
+ test( "Xelect A, B, C from Sys.dual" )
+ test( "Select A, B, C frox Sys.dual" )
+ test( "Select" )
+ test( "Select ^^^ frox Sys.dual" )
+ test( "Select A, B, C from Sys.dual, Table2 " )
diff --git a/src/pyparsingClassDiagram.JPG b/src/pyparsingClassDiagram.JPG
new file mode 100644
index 0000000..ef10424
--- /dev/null
+++ b/src/pyparsingClassDiagram.JPG
Binary files differ
diff --git a/src/pyparsingClassDiagram.PNG b/src/pyparsingClassDiagram.PNG
new file mode 100644
index 0000000..f59baaf
--- /dev/null
+++ b/src/pyparsingClassDiagram.PNG
Binary files differ
diff --git a/src/pyparsing_py2.py b/src/pyparsing_py2.py
new file mode 100644
index 0000000..bbe38b8
--- /dev/null
+++ b/src/pyparsing_py2.py
@@ -0,0 +1,3740 @@
+# module pyparsing.py
+#
+# Copyright (c) 2003-2011 Paul T. McGuire
+#
+# Permission is hereby granted, free of charge, to any person obtaining
+# a copy of this software and associated documentation files (the
+# "Software"), to deal in the Software without restriction, including
+# without limitation the rights to use, copy, modify, merge, publish,
+# distribute, sublicense, and/or sell copies of the Software, and to
+# permit persons to whom the Software is furnished to do so, subject to
+# the following conditions:
+#
+# The above copyright notice and this permission notice shall be
+# included in all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+# IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
+# CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+# TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+# SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+#
+#from __future__ import generators
+
+__doc__ = \
+"""
+pyparsing module - Classes and methods to define and execute parsing grammars
+
+The pyparsing module is an alternative approach to creating and executing simple grammars,
+vs. the traditional lex/yacc approach, or the use of regular expressions. With pyparsing, you
+don't need to learn a new syntax for defining grammars or matching expressions - the parsing module
+provides a library of classes that you use to construct the grammar directly in Python.
+
+Here is a program to parse "Hello, World!" (or any greeting of the form C{"<salutation>, <addressee>!"})::
+
+ from pyparsing import Word, alphas
+
+ # define grammar of a greeting
+ greet = Word( alphas ) + "," + Word( alphas ) + "!"
+
+ hello = "Hello, World!"
+ print hello, "->", greet.parseString( hello )
+
+The program outputs the following::
+
+ Hello, World! -> ['Hello', ',', 'World', '!']
+
+The Python representation of the grammar is quite readable, owing to the self-explanatory
+class names, and the use of '+', '|' and '^' operators.
+
+The parsed results returned from C{parseString()} can be accessed as a nested list, a dictionary, or an
+object with named attributes.
+
+The pyparsing module handles some of the problems that are typically vexing when writing text parsers:
+ - extra or missing whitespace (the above program will also handle "Hello,World!", "Hello , World !", etc.)
+ - quoted strings
+ - embedded comments
+"""
+
+__version__ = "1.5.7"
+__versionTime__ = "3 August 2012 05:00"
+__author__ = "Paul McGuire <ptmcg@users.sourceforge.net>"
+
+import string
+from weakref import ref as wkref
+import copy
+import sys
+import warnings
+import re
+import sre_constants
+#~ sys.stderr.write( "testing pyparsing module, version %s, %s\n" % (__version__,__versionTime__ ) )
+
+__all__ = [
+'And', 'CaselessKeyword', 'CaselessLiteral', 'CharsNotIn', 'Combine', 'Dict', 'Each', 'Empty',
+'FollowedBy', 'Forward', 'GoToColumn', 'Group', 'Keyword', 'LineEnd', 'LineStart', 'Literal',
+'MatchFirst', 'NoMatch', 'NotAny', 'OneOrMore', 'OnlyOnce', 'Optional', 'Or',
+'ParseBaseException', 'ParseElementEnhance', 'ParseException', 'ParseExpression', 'ParseFatalException',
+'ParseResults', 'ParseSyntaxException', 'ParserElement', 'QuotedString', 'RecursiveGrammarException',
+'Regex', 'SkipTo', 'StringEnd', 'StringStart', 'Suppress', 'Token', 'TokenConverter', 'Upcase',
+'White', 'Word', 'WordEnd', 'WordStart', 'ZeroOrMore',
+'alphanums', 'alphas', 'alphas8bit', 'anyCloseTag', 'anyOpenTag', 'cStyleComment', 'col',
+'commaSeparatedList', 'commonHTMLEntity', 'countedArray', 'cppStyleComment', 'dblQuotedString',
+'dblSlashComment', 'delimitedList', 'dictOf', 'downcaseTokens', 'empty', 'hexnums',
+'htmlComment', 'javaStyleComment', 'keepOriginalText', 'line', 'lineEnd', 'lineStart', 'lineno',
+'makeHTMLTags', 'makeXMLTags', 'matchOnlyAtCol', 'matchPreviousExpr', 'matchPreviousLiteral',
+'nestedExpr', 'nullDebugAction', 'nums', 'oneOf', 'opAssoc', 'operatorPrecedence', 'printables',
+'punc8bit', 'pythonStyleComment', 'quotedString', 'removeQuotes', 'replaceHTMLEntity',
+'replaceWith', 'restOfLine', 'sglQuotedString', 'srange', 'stringEnd',
+'stringStart', 'traceParseAction', 'unicodeString', 'upcaseTokens', 'withAttribute',
+'indentedBlock', 'originalTextFor', 'ungroup',
+]
+
+"""
+Detect if we are running version 3.X and make appropriate changes
+Robert A. Clark
+"""
+_PY3K = sys.version_info[0] > 2
+if _PY3K:
+ _MAX_INT = sys.maxsize
+ basestring = str
+ unichr = chr
+ _ustr = str
+else:
+ _MAX_INT = sys.maxint
+ range = xrange
+ set = lambda s : dict( [(c,0) for c in s] )
+
+ def _ustr(obj):
+ """Drop-in replacement for str(obj) that tries to be Unicode friendly. It first tries
+ str(obj). If that fails with a UnicodeEncodeError, then it tries unicode(obj). It
+ then < returns the unicode object | encodes it with the default encoding | ... >.
+ """
+ if isinstance(obj,unicode):
+ return obj
+
+ try:
+ # If this works, then _ustr(obj) has the same behaviour as str(obj), so
+ # it won't break any existing code.
+ return str(obj)
+
+ except UnicodeEncodeError:
+ # The Python docs (http://docs.python.org/ref/customization.html#l2h-182)
+ # state that "The return value must be a string object". However, does a
+ # unicode object (being a subclass of basestring) count as a "string
+ # object"?
+ # If so, then return a unicode object:
+ return unicode(obj)
+ # Else encode it... but how? There are many choices... :)
+ # Replace unprintables with escape codes?
+ #return unicode(obj).encode(sys.getdefaultencoding(), 'backslashreplace_errors')
+ # Replace unprintables with question marks?
+ #return unicode(obj).encode(sys.getdefaultencoding(), 'replace')
+ # ...
+
+# build list of single arg builtins, tolerant of Python version, that can be used as parse actions
+singleArgBuiltins = []
+import __builtin__
+for fname in "sum len sorted reversed list tuple set any all min max".split():
+ try:
+ singleArgBuiltins.append(getattr(__builtin__,fname))
+ except AttributeError:
+ continue
+
+def _xml_escape(data):
+ """Escape &, <, >, ", ', etc. in a string of data."""
+
+ # ampersand must be replaced first
+ from_symbols = '&><"\''
+ to_symbols = ['&'+s+';' for s in "amp gt lt quot apos".split()]
+ for from_,to_ in zip(from_symbols, to_symbols):
+ data = data.replace(from_, to_)
+ return data
+
+class _Constants(object):
+ pass
+
+alphas = string.ascii_lowercase + string.ascii_uppercase
+nums = "0123456789"
+hexnums = nums + "ABCDEFabcdef"
+alphanums = alphas + nums
+_bslash = chr(92)
+printables = "".join( [ c for c in string.printable if c not in string.whitespace ] )
+
+class ParseBaseException(Exception):
+ """base exception class for all parsing runtime exceptions"""
+ # Performance tuning: we construct a *lot* of these, so keep this
+ # constructor as small and fast as possible
+ def __init__( self, pstr, loc=0, msg=None, elem=None ):
+ self.loc = loc
+ if msg is None:
+ self.msg = pstr
+ self.pstr = ""
+ else:
+ self.msg = msg
+ self.pstr = pstr
+ self.parserElement = elem
+
+ def __getattr__( self, aname ):
+ """supported attributes by name are:
+ - lineno - returns the line number of the exception text
+ - col - returns the column number of the exception text
+ - line - returns the line containing the exception text
+ """
+ if( aname == "lineno" ):
+ return lineno( self.loc, self.pstr )
+ elif( aname in ("col", "column") ):
+ return col( self.loc, self.pstr )
+ elif( aname == "line" ):
+ return line( self.loc, self.pstr )
+ else:
+ raise AttributeError(aname)
+
+ def __str__( self ):
+ return "%s (at char %d), (line:%d, col:%d)" % \
+ ( self.msg, self.loc, self.lineno, self.column )
+ def __repr__( self ):
+ return _ustr(self)
+ def markInputline( self, markerString = ">!<" ):
+ """Extracts the exception line from the input string, and marks
+ the location of the exception with a special symbol.
+ """
+ line_str = self.line
+ line_column = self.column - 1
+ if markerString:
+ line_str = "".join( [line_str[:line_column],
+ markerString, line_str[line_column:]])
+ return line_str.strip()
+ def __dir__(self):
+ return "loc msg pstr parserElement lineno col line " \
+ "markInputline __str__ __repr__".split()
+
+class ParseException(ParseBaseException):
+ """exception thrown when parse expressions don't match class;
+ supported attributes by name are:
+ - lineno - returns the line number of the exception text
+ - col - returns the column number of the exception text
+ - line - returns the line containing the exception text
+ """
+ pass
+
+class ParseFatalException(ParseBaseException):
+ """user-throwable exception thrown when inconsistent parse content
+ is found; stops all parsing immediately"""
+ pass
+
+class ParseSyntaxException(ParseFatalException):
+ """just like C{L{ParseFatalException}}, but thrown internally when an
+ C{L{ErrorStop<And._ErrorStop>}} ('-' operator) indicates that parsing is to stop immediately because
+ an unbacktrackable syntax error has been found"""
+ def __init__(self, pe):
+ super(ParseSyntaxException, self).__init__(
+ pe.pstr, pe.loc, pe.msg, pe.parserElement)
+
+#~ class ReparseException(ParseBaseException):
+ #~ """Experimental class - parse actions can raise this exception to cause
+ #~ pyparsing to reparse the input string:
+ #~ - with a modified input string, and/or
+ #~ - with a modified start location
+ #~ Set the values of the ReparseException in the constructor, and raise the
+ #~ exception in a parse action to cause pyparsing to use the new string/location.
+ #~ Setting the values as None causes no change to be made.
+ #~ """
+ #~ def __init_( self, newstring, restartLoc ):
+ #~ self.newParseText = newstring
+ #~ self.reparseLoc = restartLoc
+
+class RecursiveGrammarException(Exception):
+ """exception thrown by C{validate()} if the grammar could be improperly recursive"""
+ def __init__( self, parseElementList ):
+ self.parseElementTrace = parseElementList
+
+ def __str__( self ):
+ return "RecursiveGrammarException: %s" % self.parseElementTrace
+
+class _ParseResultsWithOffset(object):
+ def __init__(self,p1,p2):
+ self.tup = (p1,p2)
+ def __getitem__(self,i):
+ return self.tup[i]
+ def __repr__(self):
+ return repr(self.tup)
+ def setOffset(self,i):
+ self.tup = (self.tup[0],i)
+
+class ParseResults(object):
+ """Structured parse results, to provide multiple means of access to the parsed data:
+ - as a list (C{len(results)})
+ - by list index (C{results[0], results[1]}, etc.)
+ - by attribute (C{results.<resultsName>})
+ """
+ #~ __slots__ = ( "__toklist", "__tokdict", "__doinit", "__name", "__parent", "__accumNames", "__weakref__" )
+ def __new__(cls, toklist, name=None, asList=True, modal=True ):
+ if isinstance(toklist, cls):
+ return toklist
+ retobj = object.__new__(cls)
+ retobj.__doinit = True
+ return retobj
+
+ # Performance tuning: we construct a *lot* of these, so keep this
+ # constructor as small and fast as possible
+ def __init__( self, toklist, name=None, asList=True, modal=True, isinstance=isinstance ):
+ if self.__doinit:
+ self.__doinit = False
+ self.__name = None
+ self.__parent = None
+ self.__accumNames = {}
+ if isinstance(toklist, list):
+ self.__toklist = toklist[:]
+ else:
+ self.__toklist = [toklist]
+ self.__tokdict = dict()
+
+ if name is not None and name:
+ if not modal:
+ self.__accumNames[name] = 0
+ if isinstance(name,int):
+ name = _ustr(name) # will always return a str, but use _ustr for consistency
+ self.__name = name
+ if not toklist in (None,'',[]):
+ if isinstance(toklist,basestring):
+ toklist = [ toklist ]
+ if asList:
+ if isinstance(toklist,ParseResults):
+ self[name] = _ParseResultsWithOffset(toklist.copy(),0)
+ else:
+ self[name] = _ParseResultsWithOffset(ParseResults(toklist[0]),0)
+ self[name].__name = name
+ else:
+ try:
+ self[name] = toklist[0]
+ except (KeyError,TypeError,IndexError):
+ self[name] = toklist
+
+ def __getitem__( self, i ):
+ if isinstance( i, (int,slice) ):
+ return self.__toklist[i]
+ else:
+ if i not in self.__accumNames:
+ return self.__tokdict[i][-1][0]
+ else:
+ return ParseResults([ v[0] for v in self.__tokdict[i] ])
+
+ def __setitem__( self, k, v, isinstance=isinstance ):
+ if isinstance(v,_ParseResultsWithOffset):
+ self.__tokdict[k] = self.__tokdict.get(k,list()) + [v]
+ sub = v[0]
+ elif isinstance(k,int):
+ self.__toklist[k] = v
+ sub = v
+ else:
+ self.__tokdict[k] = self.__tokdict.get(k,list()) + [_ParseResultsWithOffset(v,0)]
+ sub = v
+ if isinstance(sub,ParseResults):
+ sub.__parent = wkref(self)
+
+ def __delitem__( self, i ):
+ if isinstance(i,(int,slice)):
+ mylen = len( self.__toklist )
+ del self.__toklist[i]
+
+ # convert int to slice
+ if isinstance(i, int):
+ if i < 0:
+ i += mylen
+ i = slice(i, i+1)
+ # get removed indices
+ removed = list(range(*i.indices(mylen)))
+ removed.reverse()
+ # fixup indices in token dictionary
+ for name in self.__tokdict:
+ occurrences = self.__tokdict[name]
+ for j in removed:
+ for k, (value, position) in enumerate(occurrences):
+ occurrences[k] = _ParseResultsWithOffset(value, position - (position > j))
+ else:
+ del self.__tokdict[i]
+
+ def __contains__( self, k ):
+ return k in self.__tokdict
+
+ def __len__( self ): return len( self.__toklist )
+ def __bool__(self): return len( self.__toklist ) > 0
+ __nonzero__ = __bool__
+ def __iter__( self ): return iter( self.__toklist )
+ def __reversed__( self ): return iter( self.__toklist[::-1] )
+ def keys( self ):
+ """Returns all named result keys."""
+ return self.__tokdict.keys()
+
+ def pop( self, index=-1 ):
+ """Removes and returns item at specified index (default=last).
+ Will work with either numeric indices or dict-key indicies."""
+ ret = self[index]
+ del self[index]
+ return ret
+
+ def get(self, key, defaultValue=None):
+ """Returns named result matching the given key, or if there is no
+ such name, then returns the given C{defaultValue} or C{None} if no
+ C{defaultValue} is specified."""
+ if key in self:
+ return self[key]
+ else:
+ return defaultValue
+
+ def insert( self, index, insStr ):
+ """Inserts new element at location index in the list of parsed tokens."""
+ self.__toklist.insert(index, insStr)
+ # fixup indices in token dictionary
+ for name in self.__tokdict:
+ occurrences = self.__tokdict[name]
+ for k, (value, position) in enumerate(occurrences):
+ occurrences[k] = _ParseResultsWithOffset(value, position + (position > index))
+
+ def items( self ):
+ """Returns all named result keys and values as a list of tuples."""
+ return [(k,self[k]) for k in self.__tokdict]
+
+ def values( self ):
+ """Returns all named result values."""
+ return [ v[-1][0] for v in self.__tokdict.values() ]
+
+ def __getattr__( self, name ):
+ if True: #name not in self.__slots__:
+ if name in self.__tokdict:
+ if name not in self.__accumNames:
+ return self.__tokdict[name][-1][0]
+ else:
+ return ParseResults([ v[0] for v in self.__tokdict[name] ])
+ else:
+ return ""
+ return None
+
+ def __add__( self, other ):
+ ret = self.copy()
+ ret += other
+ return ret
+
+ def __iadd__( self, other ):
+ if other.__tokdict:
+ offset = len(self.__toklist)
+ addoffset = ( lambda a: (a<0 and offset) or (a+offset) )
+ otheritems = other.__tokdict.items()
+ otherdictitems = [(k, _ParseResultsWithOffset(v[0],addoffset(v[1])) )
+ for (k,vlist) in otheritems for v in vlist]
+ for k,v in otherdictitems:
+ self[k] = v
+ if isinstance(v[0],ParseResults):
+ v[0].__parent = wkref(self)
+
+ self.__toklist += other.__toklist
+ self.__accumNames.update( other.__accumNames )
+ return self
+
+ def __radd__(self, other):
+ if isinstance(other,int) and other == 0:
+ return self.copy()
+
+ def __repr__( self ):
+ return "(%s, %s)" % ( repr( self.__toklist ), repr( self.__tokdict ) )
+
+ def __str__( self ):
+ out = []
+ for i in self.__toklist:
+ if isinstance(i, ParseResults):
+ out.append(_ustr(i))
+ else:
+ out.append(repr(i))
+ return '[' + ', '.join(out) + ']'
+
+ def _asStringList( self, sep='' ):
+ out = []
+ for item in self.__toklist:
+ if out and sep:
+ out.append(sep)
+ if isinstance( item, ParseResults ):
+ out += item._asStringList()
+ else:
+ out.append( _ustr(item) )
+ return out
+
+ def asList( self ):
+ """Returns the parse results as a nested list of matching tokens, all converted to strings."""
+ out = []
+ for res in self.__toklist:
+ if isinstance(res,ParseResults):
+ out.append( res.asList() )
+ else:
+ out.append( res )
+ return out
+
+ def asDict( self ):
+ """Returns the named parse results as dictionary."""
+ return dict( self.items() )
+
+ def copy( self ):
+ """Returns a new copy of a C{ParseResults} object."""
+ ret = ParseResults( self.__toklist )
+ ret.__tokdict = self.__tokdict.copy()
+ ret.__parent = self.__parent
+ ret.__accumNames.update( self.__accumNames )
+ ret.__name = self.__name
+ return ret
+
+ def asXML( self, doctag=None, namedItemsOnly=False, indent="", formatted=True ):
+ """Returns the parse results as XML. Tags are created for tokens and lists that have defined results names."""
+ nl = "\n"
+ out = []
+ namedItems = dict( [ (v[1],k) for (k,vlist) in self.__tokdict.items()
+ for v in vlist ] )
+ nextLevelIndent = indent + " "
+
+ # collapse out indents if formatting is not desired
+ if not formatted:
+ indent = ""
+ nextLevelIndent = ""
+ nl = ""
+
+ selfTag = None
+ if doctag is not None:
+ selfTag = doctag
+ else:
+ if self.__name:
+ selfTag = self.__name
+
+ if not selfTag:
+ if namedItemsOnly:
+ return ""
+ else:
+ selfTag = "ITEM"
+
+ out += [ nl, indent, "<", selfTag, ">" ]
+
+ worklist = self.__toklist
+ for i,res in enumerate(worklist):
+ if isinstance(res,ParseResults):
+ if i in namedItems:
+ out += [ res.asXML(namedItems[i],
+ namedItemsOnly and doctag is None,
+ nextLevelIndent,
+ formatted)]
+ else:
+ out += [ res.asXML(None,
+ namedItemsOnly and doctag is None,
+ nextLevelIndent,
+ formatted)]
+ else:
+ # individual token, see if there is a name for it
+ resTag = None
+ if i in namedItems:
+ resTag = namedItems[i]
+ if not resTag:
+ if namedItemsOnly:
+ continue
+ else:
+ resTag = "ITEM"
+ xmlBodyText = _xml_escape(_ustr(res))
+ out += [ nl, nextLevelIndent, "<", resTag, ">",
+ xmlBodyText,
+ "</", resTag, ">" ]
+
+ out += [ nl, indent, "</", selfTag, ">" ]
+ return "".join(out)
+
+ def __lookup(self,sub):
+ for k,vlist in self.__tokdict.items():
+ for v,loc in vlist:
+ if sub is v:
+ return k
+ return None
+
+ def getName(self):
+ """Returns the results name for this token expression."""
+ if self.__name:
+ return self.__name
+ elif self.__parent:
+ par = self.__parent()
+ if par:
+ return par.__lookup(self)
+ else:
+ return None
+ elif (len(self) == 1 and
+ len(self.__tokdict) == 1 and
+ self.__tokdict.values()[0][0][1] in (0,-1)):
+ return self.__tokdict.keys()[0]
+ else:
+ return None
+
+ def dump(self,indent='',depth=0):
+ """Diagnostic method for listing out the contents of a C{ParseResults}.
+ Accepts an optional C{indent} argument so that this string can be embedded
+ in a nested display of other data."""
+ out = []
+ out.append( indent+_ustr(self.asList()) )
+ keys = self.items()
+ keys.sort()
+ for k,v in keys:
+ if out:
+ out.append('\n')
+ out.append( "%s%s- %s: " % (indent,(' '*depth), k) )
+ if isinstance(v,ParseResults):
+ if v.keys():
+ out.append( v.dump(indent,depth+1) )
+ else:
+ out.append(_ustr(v))
+ else:
+ out.append(_ustr(v))
+ return "".join(out)
+
+ # add support for pickle protocol
+ def __getstate__(self):
+ return ( self.__toklist,
+ ( self.__tokdict.copy(),
+ self.__parent is not None and self.__parent() or None,
+ self.__accumNames,
+ self.__name ) )
+
+ def __setstate__(self,state):
+ self.__toklist = state[0]
+ (self.__tokdict,
+ par,
+ inAccumNames,
+ self.__name) = state[1]
+ self.__accumNames = {}
+ self.__accumNames.update(inAccumNames)
+ if par is not None:
+ self.__parent = wkref(par)
+ else:
+ self.__parent = None
+
+ def __dir__(self):
+ return dir(super(ParseResults,self)) + list(self.keys())
+
+def col (loc,strg):
+ """Returns current column within a string, counting newlines as line separators.
+ The first column is number 1.
+
+ Note: the default parsing behavior is to expand tabs in the input string
+ before starting the parsing process. See L{I{ParserElement.parseString}<ParserElement.parseString>} for more information
+ on parsing strings containing C{<TAB>}s, and suggested methods to maintain a
+ consistent view of the parsed string, the parse location, and line and column
+ positions within the parsed string.
+ """
+ return (loc<len(strg) and strg[loc] == '\n') and 1 or loc - strg.rfind("\n", 0, loc)
+
+def lineno(loc,strg):
+ """Returns current line number within a string, counting newlines as line separators.
+ The first line is number 1.
+
+ Note: the default parsing behavior is to expand tabs in the input string
+ before starting the parsing process. See L{I{ParserElement.parseString}<ParserElement.parseString>} for more information
+ on parsing strings containing C{<TAB>}s, and suggested methods to maintain a
+ consistent view of the parsed string, the parse location, and line and column
+ positions within the parsed string.
+ """
+ return strg.count("\n",0,loc) + 1
+
+def line( loc, strg ):
+ """Returns the line of text containing loc within a string, counting newlines as line separators.
+ """
+ lastCR = strg.rfind("\n", 0, loc)
+ nextCR = strg.find("\n", loc)
+ if nextCR >= 0:
+ return strg[lastCR+1:nextCR]
+ else:
+ return strg[lastCR+1:]
+
+def _defaultStartDebugAction( instring, loc, expr ):
+ print ("Match " + _ustr(expr) + " at loc " + _ustr(loc) + "(%d,%d)" % ( lineno(loc,instring), col(loc,instring) ))
+
+def _defaultSuccessDebugAction( instring, startloc, endloc, expr, toks ):
+ print ("Matched " + _ustr(expr) + " -> " + str(toks.asList()))
+
+def _defaultExceptionDebugAction( instring, loc, expr, exc ):
+ print ("Exception raised:" + _ustr(exc))
+
+def nullDebugAction(*args):
+ """'Do-nothing' debug action, to suppress debugging output during parsing."""
+ pass
+
+'decorator to trim function calls to match the arity of the target'
+def _trim_arity(func, maxargs=2):
+ if func in singleArgBuiltins:
+ return lambda s,l,t: func(t)
+ limit = [0]
+ def wrapper(*args):
+ while 1:
+ try:
+ return func(*args[limit[0]:])
+ except TypeError:
+ if limit[0] <= maxargs:
+ limit[0] += 1
+ continue
+ raise
+ return wrapper
+
+class ParserElement(object):
+ """Abstract base level parser element class."""
+ DEFAULT_WHITE_CHARS = " \n\t\r"
+ verbose_stacktrace = False
+
+ def setDefaultWhitespaceChars( chars ):
+ """Overrides the default whitespace chars
+ """
+ ParserElement.DEFAULT_WHITE_CHARS = chars
+ setDefaultWhitespaceChars = staticmethod(setDefaultWhitespaceChars)
+
+ def inlineLiteralsUsing(cls):
+ """
+ Set class to be used for inclusion of string literals into a parser.
+ """
+ ParserElement.literalStringClass = cls
+ inlineLiteralsUsing = staticmethod(inlineLiteralsUsing)
+
+ def __init__( self, savelist=False ):
+ self.parseAction = list()
+ self.failAction = None
+ #~ self.name = "<unknown>" # don't define self.name, let subclasses try/except upcall
+ self.strRepr = None
+ self.resultsName = None
+ self.saveAsList = savelist
+ self.skipWhitespace = True
+ self.whiteChars = ParserElement.DEFAULT_WHITE_CHARS
+ self.copyDefaultWhiteChars = True
+ self.mayReturnEmpty = False # used when checking for left-recursion
+ self.keepTabs = False
+ self.ignoreExprs = list()
+ self.debug = False
+ self.streamlined = False
+ self.mayIndexError = True # used to optimize exception handling for subclasses that don't advance parse index
+ self.errmsg = ""
+ self.modalResults = True # used to mark results names as modal (report only last) or cumulative (list all)
+ self.debugActions = ( None, None, None ) #custom debug actions
+ self.re = None
+ self.callPreparse = True # used to avoid redundant calls to preParse
+ self.callDuringTry = False
+
+ def copy( self ):
+ """Make a copy of this C{ParserElement}. Useful for defining different parse actions
+ for the same parsing pattern, using copies of the original parse element."""
+ cpy = copy.copy( self )
+ cpy.parseAction = self.parseAction[:]
+ cpy.ignoreExprs = self.ignoreExprs[:]
+ if self.copyDefaultWhiteChars:
+ cpy.whiteChars = ParserElement.DEFAULT_WHITE_CHARS
+ return cpy
+
+ def setName( self, name ):
+ """Define name for this expression, for use in debugging."""
+ self.name = name
+ self.errmsg = "Expected " + self.name
+ if hasattr(self,"exception"):
+ self.exception.msg = self.errmsg
+ return self
+
+ def setResultsName( self, name, listAllMatches=False ):
+ """Define name for referencing matching tokens as a nested attribute
+ of the returned parse results.
+ NOTE: this returns a *copy* of the original C{ParserElement} object;
+ this is so that the client can define a basic element, such as an
+ integer, and reference it in multiple places with different names.
+
+ You can also set results names using the abbreviated syntax,
+ C{expr("name")} in place of C{expr.setResultsName("name")} -
+ see L{I{__call__}<__call__>}.
+ """
+ newself = self.copy()
+ if name.endswith("*"):
+ name = name[:-1]
+ listAllMatches=True
+ newself.resultsName = name
+ newself.modalResults = not listAllMatches
+ return newself
+
+ def setBreak(self,breakFlag = True):
+ """Method to invoke the Python pdb debugger when this element is
+ about to be parsed. Set C{breakFlag} to True to enable, False to
+ disable.
+ """
+ if breakFlag:
+ _parseMethod = self._parse
+ def breaker(instring, loc, doActions=True, callPreParse=True):
+ import pdb
+ pdb.set_trace()
+ return _parseMethod( instring, loc, doActions, callPreParse )
+ breaker._originalParseMethod = _parseMethod
+ self._parse = breaker
+ else:
+ if hasattr(self._parse,"_originalParseMethod"):
+ self._parse = self._parse._originalParseMethod
+ return self
+
+ def setParseAction( self, *fns, **kwargs ):
+ """Define action to perform when successfully matching parse element definition.
+ Parse action fn is a callable method with 0-3 arguments, called as C{fn(s,loc,toks)},
+ C{fn(loc,toks)}, C{fn(toks)}, or just C{fn()}, where:
+ - s = the original string being parsed (see note below)
+ - loc = the location of the matching substring
+ - toks = a list of the matched tokens, packaged as a C{L{ParseResults}} object
+ If the functions in fns modify the tokens, they can return them as the return
+ value from fn, and the modified list of tokens will replace the original.
+ Otherwise, fn does not need to return any value.
+
+ Note: the default parsing behavior is to expand tabs in the input string
+ before starting the parsing process. See L{I{parseString}<parseString>} for more information
+ on parsing strings containing C{<TAB>}s, and suggested methods to maintain a
+ consistent view of the parsed string, the parse location, and line and column
+ positions within the parsed string.
+ """
+ self.parseAction = list(map(_trim_arity, list(fns)))
+ self.callDuringTry = ("callDuringTry" in kwargs and kwargs["callDuringTry"])
+ return self
+
+ def addParseAction( self, *fns, **kwargs ):
+ """Add parse action to expression's list of parse actions. See L{I{setParseAction}<setParseAction>}."""
+ self.parseAction += list(map(_trim_arity, list(fns)))
+ self.callDuringTry = self.callDuringTry or ("callDuringTry" in kwargs and kwargs["callDuringTry"])
+ return self
+
+ def setFailAction( self, fn ):
+ """Define action to perform if parsing fails at this expression.
+ Fail acton fn is a callable function that takes the arguments
+ C{fn(s,loc,expr,err)} where:
+ - s = string being parsed
+ - loc = location where expression match was attempted and failed
+ - expr = the parse expression that failed
+ - err = the exception thrown
+ The function returns no value. It may throw C{L{ParseFatalException}}
+ if it is desired to stop parsing immediately."""
+ self.failAction = fn
+ return self
+
+ def _skipIgnorables( self, instring, loc ):
+ exprsFound = True
+ while exprsFound:
+ exprsFound = False
+ for e in self.ignoreExprs:
+ try:
+ while 1:
+ loc,dummy = e._parse( instring, loc )
+ exprsFound = True
+ except ParseException:
+ pass
+ return loc
+
+ def preParse( self, instring, loc ):
+ if self.ignoreExprs:
+ loc = self._skipIgnorables( instring, loc )
+
+ if self.skipWhitespace:
+ wt = self.whiteChars
+ instrlen = len(instring)
+ while loc < instrlen and instring[loc] in wt:
+ loc += 1
+
+ return loc
+
+ def parseImpl( self, instring, loc, doActions=True ):
+ return loc, []
+
+ def postParse( self, instring, loc, tokenlist ):
+ return tokenlist
+
+ #~ @profile
+ def _parseNoCache( self, instring, loc, doActions=True, callPreParse=True ):
+ debugging = ( self.debug ) #and doActions )
+
+ if debugging or self.failAction:
+ #~ print ("Match",self,"at loc",loc,"(%d,%d)" % ( lineno(loc,instring), col(loc,instring) ))
+ if (self.debugActions[0] ):
+ self.debugActions[0]( instring, loc, self )
+ if callPreParse and self.callPreparse:
+ preloc = self.preParse( instring, loc )
+ else:
+ preloc = loc
+ tokensStart = preloc
+ try:
+ try:
+ loc,tokens = self.parseImpl( instring, preloc, doActions )
+ except IndexError:
+ raise ParseException( instring, len(instring), self.errmsg, self )
+ except ParseBaseException:
+ #~ print ("Exception raised:", err)
+ err = None
+ if self.debugActions[2]:
+ err = sys.exc_info()[1]
+ self.debugActions[2]( instring, tokensStart, self, err )
+ if self.failAction:
+ if err is None:
+ err = sys.exc_info()[1]
+ self.failAction( instring, tokensStart, self, err )
+ raise
+ else:
+ if callPreParse and self.callPreparse:
+ preloc = self.preParse( instring, loc )
+ else:
+ preloc = loc
+ tokensStart = preloc
+ if self.mayIndexError or loc >= len(instring):
+ try:
+ loc,tokens = self.parseImpl( instring, preloc, doActions )
+ except IndexError:
+ raise ParseException( instring, len(instring), self.errmsg, self )
+ else:
+ loc,tokens = self.parseImpl( instring, preloc, doActions )
+
+ tokens = self.postParse( instring, loc, tokens )
+
+ retTokens = ParseResults( tokens, self.resultsName, asList=self.saveAsList, modal=self.modalResults )
+ if self.parseAction and (doActions or self.callDuringTry):
+ if debugging:
+ try:
+ for fn in self.parseAction:
+ tokens = fn( instring, tokensStart, retTokens )
+ if tokens is not None:
+ retTokens = ParseResults( tokens,
+ self.resultsName,
+ asList=self.saveAsList and isinstance(tokens,(ParseResults,list)),
+ modal=self.modalResults )
+ except ParseBaseException:
+ #~ print "Exception raised in user parse action:", err
+ if (self.debugActions[2] ):
+ err = sys.exc_info()[1]
+ self.debugActions[2]( instring, tokensStart, self, err )
+ raise
+ else:
+ for fn in self.parseAction:
+ tokens = fn( instring, tokensStart, retTokens )
+ if tokens is not None:
+ retTokens = ParseResults( tokens,
+ self.resultsName,
+ asList=self.saveAsList and isinstance(tokens,(ParseResults,list)),
+ modal=self.modalResults )
+
+ if debugging:
+ #~ print ("Matched",self,"->",retTokens.asList())
+ if (self.debugActions[1] ):
+ self.debugActions[1]( instring, tokensStart, loc, self, retTokens )
+
+ return loc, retTokens
+
+ def tryParse( self, instring, loc ):
+ try:
+ return self._parse( instring, loc, doActions=False )[0]
+ except ParseFatalException:
+ raise ParseException( instring, loc, self.errmsg, self)
+
+ # this method gets repeatedly called during backtracking with the same arguments -
+ # we can cache these arguments and save ourselves the trouble of re-parsing the contained expression
+ def _parseCache( self, instring, loc, doActions=True, callPreParse=True ):
+ lookup = (self,instring,loc,callPreParse,doActions)
+ if lookup in ParserElement._exprArgCache:
+ value = ParserElement._exprArgCache[ lookup ]
+ if isinstance(value, Exception):
+ raise value
+ return (value[0],value[1].copy())
+ else:
+ try:
+ value = self._parseNoCache( instring, loc, doActions, callPreParse )
+ ParserElement._exprArgCache[ lookup ] = (value[0],value[1].copy())
+ return value
+ except ParseBaseException:
+ pe = sys.exc_info()[1]
+ ParserElement._exprArgCache[ lookup ] = pe
+ raise
+
+ _parse = _parseNoCache
+
+ # argument cache for optimizing repeated calls when backtracking through recursive expressions
+ _exprArgCache = {}
+ def resetCache():
+ ParserElement._exprArgCache.clear()
+ resetCache = staticmethod(resetCache)
+
+ _packratEnabled = False
+ def enablePackrat():
+ """Enables "packrat" parsing, which adds memoizing to the parsing logic.
+ Repeated parse attempts at the same string location (which happens
+ often in many complex grammars) can immediately return a cached value,
+ instead of re-executing parsing/validating code. Memoizing is done of
+ both valid results and parsing exceptions.
+
+ This speedup may break existing programs that use parse actions that
+ have side-effects. For this reason, packrat parsing is disabled when
+ you first import pyparsing. To activate the packrat feature, your
+ program must call the class method C{ParserElement.enablePackrat()}. If
+ your program uses C{psyco} to "compile as you go", you must call
+ C{enablePackrat} before calling C{psyco.full()}. If you do not do this,
+ Python will crash. For best results, call C{enablePackrat()} immediately
+ after importing pyparsing.
+ """
+ if not ParserElement._packratEnabled:
+ ParserElement._packratEnabled = True
+ ParserElement._parse = ParserElement._parseCache
+ enablePackrat = staticmethod(enablePackrat)
+
+ def parseString( self, instring, parseAll=False ):
+ """Execute the parse expression with the given string.
+ This is the main interface to the client code, once the complete
+ expression has been built.
+
+ If you want the grammar to require that the entire input string be
+ successfully parsed, then set C{parseAll} to True (equivalent to ending
+ the grammar with C{L{StringEnd()}}).
+
+ Note: C{parseString} implicitly calls C{expandtabs()} on the input string,
+ in order to report proper column numbers in parse actions.
+ If the input string contains tabs and
+ the grammar uses parse actions that use the C{loc} argument to index into the
+ string being parsed, you can ensure you have a consistent view of the input
+ string by:
+ - calling C{parseWithTabs} on your grammar before calling C{parseString}
+ (see L{I{parseWithTabs}<parseWithTabs>})
+ - define your parse action using the full C{(s,loc,toks)} signature, and
+ reference the input string using the parse action's C{s} argument
+ - explictly expand the tabs in your input string before calling
+ C{parseString}
+ """
+ ParserElement.resetCache()
+ if not self.streamlined:
+ self.streamline()
+ #~ self.saveAsList = True
+ for e in self.ignoreExprs:
+ e.streamline()
+ if not self.keepTabs:
+ instring = instring.expandtabs()
+ try:
+ loc, tokens = self._parse( instring, 0 )
+ if parseAll:
+ loc = self.preParse( instring, loc )
+ se = Empty() + StringEnd()
+ se._parse( instring, loc )
+ except ParseBaseException:
+ if ParserElement.verbose_stacktrace:
+ raise
+ else:
+ # catch and re-raise exception from here, clears out pyparsing internal stack trace
+ exc = sys.exc_info()[1]
+ raise exc
+ else:
+ return tokens
+
+ def scanString( self, instring, maxMatches=_MAX_INT, overlap=False ):
+ """Scan the input string for expression matches. Each match will return the
+ matching tokens, start location, and end location. May be called with optional
+ C{maxMatches} argument, to clip scanning after 'n' matches are found. If
+ C{overlap} is specified, then overlapping matches will be reported.
+
+ Note that the start and end locations are reported relative to the string
+ being parsed. See L{I{parseString}<parseString>} for more information on parsing
+ strings with embedded tabs."""
+ if not self.streamlined:
+ self.streamline()
+ for e in self.ignoreExprs:
+ e.streamline()
+
+ if not self.keepTabs:
+ instring = _ustr(instring).expandtabs()
+ instrlen = len(instring)
+ loc = 0
+ preparseFn = self.preParse
+ parseFn = self._parse
+ ParserElement.resetCache()
+ matches = 0
+ try:
+ while loc <= instrlen and matches < maxMatches:
+ try:
+ preloc = preparseFn( instring, loc )
+ nextLoc,tokens = parseFn( instring, preloc, callPreParse=False )
+ except ParseException:
+ loc = preloc+1
+ else:
+ if nextLoc > loc:
+ matches += 1
+ yield tokens, preloc, nextLoc
+ if overlap:
+ nextloc = preparseFn( instring, loc )
+ if nextloc > loc:
+ loc = nextLoc
+ else:
+ loc += 1
+ else:
+ loc = nextLoc
+ else:
+ loc = preloc+1
+ except ParseBaseException:
+ if ParserElement.verbose_stacktrace:
+ raise
+ else:
+ # catch and re-raise exception from here, clears out pyparsing internal stack trace
+ exc = sys.exc_info()[1]
+ raise exc
+
+ def transformString( self, instring ):
+ """Extension to C{L{scanString}}, to modify matching text with modified tokens that may
+ be returned from a parse action. To use C{transformString}, define a grammar and
+ attach a parse action to it that modifies the returned token list.
+ Invoking C{transformString()} on a target string will then scan for matches,
+ and replace the matched text patterns according to the logic in the parse
+ action. C{transformString()} returns the resulting transformed string."""
+ out = []
+ lastE = 0
+ # force preservation of <TAB>s, to minimize unwanted transformation of string, and to
+ # keep string locs straight between transformString and scanString
+ self.keepTabs = True
+ try:
+ for t,s,e in self.scanString( instring ):
+ out.append( instring[lastE:s] )
+ if t:
+ if isinstance(t,ParseResults):
+ out += t.asList()
+ elif isinstance(t,list):
+ out += t
+ else:
+ out.append(t)
+ lastE = e
+ out.append(instring[lastE:])
+ out = [o for o in out if o]
+ return "".join(map(_ustr,_flatten(out)))
+ except ParseBaseException:
+ if ParserElement.verbose_stacktrace:
+ raise
+ else:
+ # catch and re-raise exception from here, clears out pyparsing internal stack trace
+ exc = sys.exc_info()[1]
+ raise exc
+
+ def searchString( self, instring, maxMatches=_MAX_INT ):
+ """Another extension to C{L{scanString}}, simplifying the access to the tokens found
+ to match the given parse expression. May be called with optional
+ C{maxMatches} argument, to clip searching after 'n' matches are found.
+ """
+ try:
+ return ParseResults([ t for t,s,e in self.scanString( instring, maxMatches ) ])
+ except ParseBaseException:
+ if ParserElement.verbose_stacktrace:
+ raise
+ else:
+ # catch and re-raise exception from here, clears out pyparsing internal stack trace
+ exc = sys.exc_info()[1]
+ raise exc
+
+ def __add__(self, other ):
+ """Implementation of + operator - returns C{L{And}}"""
+ if isinstance( other, basestring ):
+ other = ParserElement.literalStringClass( other )
+ if not isinstance( other, ParserElement ):
+ warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
+ SyntaxWarning, stacklevel=2)
+ return None
+ return And( [ self, other ] )
+
+ def __radd__(self, other ):
+ """Implementation of + operator when left operand is not a C{L{ParserElement}}"""
+ if isinstance( other, basestring ):
+ other = ParserElement.literalStringClass( other )
+ if not isinstance( other, ParserElement ):
+ warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
+ SyntaxWarning, stacklevel=2)
+ return None
+ return other + self
+
+ def __sub__(self, other):
+ """Implementation of - operator, returns C{L{And}} with error stop"""
+ if isinstance( other, basestring ):
+ other = ParserElement.literalStringClass( other )
+ if not isinstance( other, ParserElement ):
+ warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
+ SyntaxWarning, stacklevel=2)
+ return None
+ return And( [ self, And._ErrorStop(), other ] )
+
+ def __rsub__(self, other ):
+ """Implementation of - operator when left operand is not a C{L{ParserElement}}"""
+ if isinstance( other, basestring ):
+ other = ParserElement.literalStringClass( other )
+ if not isinstance( other, ParserElement ):
+ warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
+ SyntaxWarning, stacklevel=2)
+ return None
+ return other - self
+
+ def __mul__(self,other):
+ """Implementation of * operator, allows use of C{expr * 3} in place of
+ C{expr + expr + expr}. Expressions may also me multiplied by a 2-integer
+ tuple, similar to C{{min,max}} multipliers in regular expressions. Tuples
+ may also include C{None} as in:
+ - C{expr*(n,None)} or C{expr*(n,)} is equivalent
+ to C{expr*n + L{ZeroOrMore}(expr)}
+ (read as "at least n instances of C{expr}")
+ - C{expr*(None,n)} is equivalent to C{expr*(0,n)}
+ (read as "0 to n instances of C{expr}")
+ - C{expr*(None,None)} is equivalent to C{L{ZeroOrMore}(expr)}
+ - C{expr*(1,None)} is equivalent to C{L{OneOrMore}(expr)}
+
+ Note that C{expr*(None,n)} does not raise an exception if
+ more than n exprs exist in the input stream; that is,
+ C{expr*(None,n)} does not enforce a maximum number of expr
+ occurrences. If this behavior is desired, then write
+ C{expr*(None,n) + ~expr}
+
+ """
+ if isinstance(other,int):
+ minElements, optElements = other,0
+ elif isinstance(other,tuple):
+ other = (other + (None, None))[:2]
+ if other[0] is None:
+ other = (0, other[1])
+ if isinstance(other[0],int) and other[1] is None:
+ if other[0] == 0:
+ return ZeroOrMore(self)
+ if other[0] == 1:
+ return OneOrMore(self)
+ else:
+ return self*other[0] + ZeroOrMore(self)
+ elif isinstance(other[0],int) and isinstance(other[1],int):
+ minElements, optElements = other
+ optElements -= minElements
+ else:
+ raise TypeError("cannot multiply 'ParserElement' and ('%s','%s') objects", type(other[0]),type(other[1]))
+ else:
+ raise TypeError("cannot multiply 'ParserElement' and '%s' objects", type(other))
+
+ if minElements < 0:
+ raise ValueError("cannot multiply ParserElement by negative value")
+ if optElements < 0:
+ raise ValueError("second tuple value must be greater or equal to first tuple value")
+ if minElements == optElements == 0:
+ raise ValueError("cannot multiply ParserElement by 0 or (0,0)")
+
+ if (optElements):
+ def makeOptionalList(n):
+ if n>1:
+ return Optional(self + makeOptionalList(n-1))
+ else:
+ return Optional(self)
+ if minElements:
+ if minElements == 1:
+ ret = self + makeOptionalList(optElements)
+ else:
+ ret = And([self]*minElements) + makeOptionalList(optElements)
+ else:
+ ret = makeOptionalList(optElements)
+ else:
+ if minElements == 1:
+ ret = self
+ else:
+ ret = And([self]*minElements)
+ return ret
+
+ def __rmul__(self, other):
+ return self.__mul__(other)
+
+ def __or__(self, other ):
+ """Implementation of | operator - returns C{L{MatchFirst}}"""
+ if isinstance( other, basestring ):
+ other = ParserElement.literalStringClass( other )
+ if not isinstance( other, ParserElement ):
+ warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
+ SyntaxWarning, stacklevel=2)
+ return None
+ return MatchFirst( [ self, other ] )
+
+ def __ror__(self, other ):
+ """Implementation of | operator when left operand is not a C{L{ParserElement}}"""
+ if isinstance( other, basestring ):
+ other = ParserElement.literalStringClass( other )
+ if not isinstance( other, ParserElement ):
+ warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
+ SyntaxWarning, stacklevel=2)
+ return None
+ return other | self
+
+ def __xor__(self, other ):
+ """Implementation of ^ operator - returns C{L{Or}}"""
+ if isinstance( other, basestring ):
+ other = ParserElement.literalStringClass( other )
+ if not isinstance( other, ParserElement ):
+ warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
+ SyntaxWarning, stacklevel=2)
+ return None
+ return Or( [ self, other ] )
+
+ def __rxor__(self, other ):
+ """Implementation of ^ operator when left operand is not a C{L{ParserElement}}"""
+ if isinstance( other, basestring ):
+ other = ParserElement.literalStringClass( other )
+ if not isinstance( other, ParserElement ):
+ warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
+ SyntaxWarning, stacklevel=2)
+ return None
+ return other ^ self
+
+ def __and__(self, other ):
+ """Implementation of & operator - returns C{L{Each}}"""
+ if isinstance( other, basestring ):
+ other = ParserElement.literalStringClass( other )
+ if not isinstance( other, ParserElement ):
+ warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
+ SyntaxWarning, stacklevel=2)
+ return None
+ return Each( [ self, other ] )
+
+ def __rand__(self, other ):
+ """Implementation of & operator when left operand is not a C{L{ParserElement}}"""
+ if isinstance( other, basestring ):
+ other = ParserElement.literalStringClass( other )
+ if not isinstance( other, ParserElement ):
+ warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
+ SyntaxWarning, stacklevel=2)
+ return None
+ return other & self
+
+ def __invert__( self ):
+ """Implementation of ~ operator - returns C{L{NotAny}}"""
+ return NotAny( self )
+
+ def __call__(self, name):
+ """Shortcut for C{L{setResultsName}}, with C{listAllMatches=default}::
+ userdata = Word(alphas).setResultsName("name") + Word(nums+"-").setResultsName("socsecno")
+ could be written as::
+ userdata = Word(alphas)("name") + Word(nums+"-")("socsecno")
+
+ If C{name} is given with a trailing C{'*'} character, then C{listAllMatches} will be
+ passed as C{True}.
+ """
+ return self.setResultsName(name)
+
+ def suppress( self ):
+ """Suppresses the output of this C{ParserElement}; useful to keep punctuation from
+ cluttering up returned output.
+ """
+ return Suppress( self )
+
+ def leaveWhitespace( self ):
+ """Disables the skipping of whitespace before matching the characters in the
+ C{ParserElement}'s defined pattern. This is normally only used internally by
+ the pyparsing module, but may be needed in some whitespace-sensitive grammars.
+ """
+ self.skipWhitespace = False
+ return self
+
+ def setWhitespaceChars( self, chars ):
+ """Overrides the default whitespace chars
+ """
+ self.skipWhitespace = True
+ self.whiteChars = chars
+ self.copyDefaultWhiteChars = False
+ return self
+
+ def parseWithTabs( self ):
+ """Overrides default behavior to expand C{<TAB>}s to spaces before parsing the input string.
+ Must be called before C{parseString} when the input grammar contains elements that
+ match C{<TAB>} characters."""
+ self.keepTabs = True
+ return self
+
+ def ignore( self, other ):
+ """Define expression to be ignored (e.g., comments) while doing pattern
+ matching; may be called repeatedly, to define multiple comment or other
+ ignorable patterns.
+ """
+ if isinstance( other, Suppress ):
+ if other not in self.ignoreExprs:
+ self.ignoreExprs.append( other.copy() )
+ else:
+ self.ignoreExprs.append( Suppress( other.copy() ) )
+ return self
+
+ def setDebugActions( self, startAction, successAction, exceptionAction ):
+ """Enable display of debugging messages while doing pattern matching."""
+ self.debugActions = (startAction or _defaultStartDebugAction,
+ successAction or _defaultSuccessDebugAction,
+ exceptionAction or _defaultExceptionDebugAction)
+ self.debug = True
+ return self
+
+ def setDebug( self, flag=True ):
+ """Enable display of debugging messages while doing pattern matching.
+ Set C{flag} to True to enable, False to disable."""
+ if flag:
+ self.setDebugActions( _defaultStartDebugAction, _defaultSuccessDebugAction, _defaultExceptionDebugAction )
+ else:
+ self.debug = False
+ return self
+
+ def __str__( self ):
+ return self.name
+
+ def __repr__( self ):
+ return _ustr(self)
+
+ def streamline( self ):
+ self.streamlined = True
+ self.strRepr = None
+ return self
+
+ def checkRecursion( self, parseElementList ):
+ pass
+
+ def validate( self, validateTrace=[] ):
+ """Check defined expressions for valid structure, check for infinite recursive definitions."""
+ self.checkRecursion( [] )
+
+ def parseFile( self, file_or_filename, parseAll=False ):
+ """Execute the parse expression on the given file or filename.
+ If a filename is specified (instead of a file object),
+ the entire file is opened, read, and closed before parsing.
+ """
+ try:
+ file_contents = file_or_filename.read()
+ except AttributeError:
+ f = open(file_or_filename, "r")
+ file_contents = f.read()
+ f.close()
+ try:
+ return self.parseString(file_contents, parseAll)
+ except ParseBaseException:
+ # catch and re-raise exception from here, clears out pyparsing internal stack trace
+ exc = sys.exc_info()[1]
+ raise exc
+
+ def getException(self):
+ return ParseException("",0,self.errmsg,self)
+
+ def __getattr__(self,aname):
+ if aname == "myException":
+ self.myException = ret = self.getException();
+ return ret;
+ else:
+ raise AttributeError("no such attribute " + aname)
+
+ def __eq__(self,other):
+ if isinstance(other, ParserElement):
+ return self is other or self.__dict__ == other.__dict__
+ elif isinstance(other, basestring):
+ try:
+ self.parseString(_ustr(other), parseAll=True)
+ return True
+ except ParseBaseException:
+ return False
+ else:
+ return super(ParserElement,self)==other
+
+ def __ne__(self,other):
+ return not (self == other)
+
+ def __hash__(self):
+ return hash(id(self))
+
+ def __req__(self,other):
+ return self == other
+
+ def __rne__(self,other):
+ return not (self == other)
+
+
+class Token(ParserElement):
+ """Abstract C{ParserElement} subclass, for defining atomic matching patterns."""
+ def __init__( self ):
+ super(Token,self).__init__( savelist=False )
+
+ def setName(self, name):
+ s = super(Token,self).setName(name)
+ self.errmsg = "Expected " + self.name
+ return s
+
+
+class Empty(Token):
+ """An empty token, will always match."""
+ def __init__( self ):
+ super(Empty,self).__init__()
+ self.name = "Empty"
+ self.mayReturnEmpty = True
+ self.mayIndexError = False
+
+
+class NoMatch(Token):
+ """A token that will never match."""
+ def __init__( self ):
+ super(NoMatch,self).__init__()
+ self.name = "NoMatch"
+ self.mayReturnEmpty = True
+ self.mayIndexError = False
+ self.errmsg = "Unmatchable token"
+
+ def parseImpl( self, instring, loc, doActions=True ):
+ exc = self.myException
+ exc.loc = loc
+ exc.pstr = instring
+ raise exc
+
+
+class Literal(Token):
+ """Token to exactly match a specified string."""
+ def __init__( self, matchString ):
+ super(Literal,self).__init__()
+ self.match = matchString
+ self.matchLen = len(matchString)
+ try:
+ self.firstMatchChar = matchString[0]
+ except IndexError:
+ warnings.warn("null string passed to Literal; use Empty() instead",
+ SyntaxWarning, stacklevel=2)
+ self.__class__ = Empty
+ self.name = '"%s"' % _ustr(self.match)
+ self.errmsg = "Expected " + self.name
+ self.mayReturnEmpty = False
+ self.mayIndexError = False
+
+ # Performance tuning: this routine gets called a *lot*
+ # if this is a single character match string and the first character matches,
+ # short-circuit as quickly as possible, and avoid calling startswith
+ #~ @profile
+ def parseImpl( self, instring, loc, doActions=True ):
+ if (instring[loc] == self.firstMatchChar and
+ (self.matchLen==1 or instring.startswith(self.match,loc)) ):
+ return loc+self.matchLen, self.match
+ #~ raise ParseException( instring, loc, self.errmsg )
+ exc = self.myException
+ exc.loc = loc
+ exc.pstr = instring
+ raise exc
+_L = Literal
+ParserElement.literalStringClass = Literal
+
+class Keyword(Token):
+ """Token to exactly match a specified string as a keyword, that is, it must be
+ immediately followed by a non-keyword character. Compare with C{L{Literal}}::
+ Literal("if") will match the leading C{'if'} in C{'ifAndOnlyIf'}.
+ Keyword("if") will not; it will only match the leading C{'if'} in C{'if x=1'}, or C{'if(y==2)'}
+ Accepts two optional constructor arguments in addition to the keyword string:
+ C{identChars} is a string of characters that would be valid identifier characters,
+ defaulting to all alphanumerics + "_" and "$"; C{caseless} allows case-insensitive
+ matching, default is C{False}.
+ """
+ DEFAULT_KEYWORD_CHARS = alphanums+"_$"
+
+ def __init__( self, matchString, identChars=DEFAULT_KEYWORD_CHARS, caseless=False ):
+ super(Keyword,self).__init__()
+ self.match = matchString
+ self.matchLen = len(matchString)
+ try:
+ self.firstMatchChar = matchString[0]
+ except IndexError:
+ warnings.warn("null string passed to Keyword; use Empty() instead",
+ SyntaxWarning, stacklevel=2)
+ self.name = '"%s"' % self.match
+ self.errmsg = "Expected " + self.name
+ self.mayReturnEmpty = False
+ self.mayIndexError = False
+ self.caseless = caseless
+ if caseless:
+ self.caselessmatch = matchString.upper()
+ identChars = identChars.upper()
+ self.identChars = set(identChars)
+
+ def parseImpl( self, instring, loc, doActions=True ):
+ if self.caseless:
+ if ( (instring[ loc:loc+self.matchLen ].upper() == self.caselessmatch) and
+ (loc >= len(instring)-self.matchLen or instring[loc+self.matchLen].upper() not in self.identChars) and
+ (loc == 0 or instring[loc-1].upper() not in self.identChars) ):
+ return loc+self.matchLen, self.match
+ else:
+ if (instring[loc] == self.firstMatchChar and
+ (self.matchLen==1 or instring.startswith(self.match,loc)) and
+ (loc >= len(instring)-self.matchLen or instring[loc+self.matchLen] not in self.identChars) and
+ (loc == 0 or instring[loc-1] not in self.identChars) ):
+ return loc+self.matchLen, self.match
+ #~ raise ParseException( instring, loc, self.errmsg )
+ exc = self.myException
+ exc.loc = loc
+ exc.pstr = instring
+ raise exc
+
+ def copy(self):
+ c = super(Keyword,self).copy()
+ c.identChars = Keyword.DEFAULT_KEYWORD_CHARS
+ return c
+
+ def setDefaultKeywordChars( chars ):
+ """Overrides the default Keyword chars
+ """
+ Keyword.DEFAULT_KEYWORD_CHARS = chars
+ setDefaultKeywordChars = staticmethod(setDefaultKeywordChars)
+
+class CaselessLiteral(Literal):
+ """Token to match a specified string, ignoring case of letters.
+ Note: the matched results will always be in the case of the given
+ match string, NOT the case of the input text.
+ """
+ def __init__( self, matchString ):
+ super(CaselessLiteral,self).__init__( matchString.upper() )
+ # Preserve the defining literal.
+ self.returnString = matchString
+ self.name = "'%s'" % self.returnString
+ self.errmsg = "Expected " + self.name
+
+ def parseImpl( self, instring, loc, doActions=True ):
+ if instring[ loc:loc+self.matchLen ].upper() == self.match:
+ return loc+self.matchLen, self.returnString
+ #~ raise ParseException( instring, loc, self.errmsg )
+ exc = self.myException
+ exc.loc = loc
+ exc.pstr = instring
+ raise exc
+
+class CaselessKeyword(Keyword):
+ def __init__( self, matchString, identChars=Keyword.DEFAULT_KEYWORD_CHARS ):
+ super(CaselessKeyword,self).__init__( matchString, identChars, caseless=True )
+
+ def parseImpl( self, instring, loc, doActions=True ):
+ if ( (instring[ loc:loc+self.matchLen ].upper() == self.caselessmatch) and
+ (loc >= len(instring)-self.matchLen or instring[loc+self.matchLen].upper() not in self.identChars) ):
+ return loc+self.matchLen, self.match
+ #~ raise ParseException( instring, loc, self.errmsg )
+ exc = self.myException
+ exc.loc = loc
+ exc.pstr = instring
+ raise exc
+
+class Word(Token):
+ """Token for matching words composed of allowed character sets.
+ Defined with string containing all allowed initial characters,
+ an optional string containing allowed body characters (if omitted,
+ defaults to the initial character set), and an optional minimum,
+ maximum, and/or exact length. The default value for C{min} is 1 (a
+ minimum value < 1 is not valid); the default values for C{max} and C{exact}
+ are 0, meaning no maximum or exact length restriction. An optional
+ C{exclude} parameter can list characters that might be found in
+ the input C{bodyChars} string; useful to define a word of all printables
+ except for one or two characters, for instance.
+ """
+ def __init__( self, initChars, bodyChars=None, min=1, max=0, exact=0, asKeyword=False, excludeChars=None ):
+ super(Word,self).__init__()
+ if excludeChars:
+ initChars = ''.join([c for c in initChars if c not in excludeChars])
+ if bodyChars:
+ bodyChars = ''.join([c for c in bodyChars if c not in excludeChars])
+ self.initCharsOrig = initChars
+ self.initChars = set(initChars)
+ if bodyChars :
+ self.bodyCharsOrig = bodyChars
+ self.bodyChars = set(bodyChars)
+ else:
+ self.bodyCharsOrig = initChars
+ self.bodyChars = set(initChars)
+
+ self.maxSpecified = max > 0
+
+ if min < 1:
+ raise ValueError("cannot specify a minimum length < 1; use Optional(Word()) if zero-length word is permitted")
+
+ self.minLen = min
+
+ if max > 0:
+ self.maxLen = max
+ else:
+ self.maxLen = _MAX_INT
+
+ if exact > 0:
+ self.maxLen = exact
+ self.minLen = exact
+
+ self.name = _ustr(self)
+ self.errmsg = "Expected " + self.name
+ self.mayIndexError = False
+ self.asKeyword = asKeyword
+
+ if ' ' not in self.initCharsOrig+self.bodyCharsOrig and (min==1 and max==0 and exact==0):
+ if self.bodyCharsOrig == self.initCharsOrig:
+ self.reString = "[%s]+" % _escapeRegexRangeChars(self.initCharsOrig)
+ elif len(self.bodyCharsOrig) == 1:
+ self.reString = "%s[%s]*" % \
+ (re.escape(self.initCharsOrig),
+ _escapeRegexRangeChars(self.bodyCharsOrig),)
+ else:
+ self.reString = "[%s][%s]*" % \
+ (_escapeRegexRangeChars(self.initCharsOrig),
+ _escapeRegexRangeChars(self.bodyCharsOrig),)
+ if self.asKeyword:
+ self.reString = r"\b"+self.reString+r"\b"
+ try:
+ self.re = re.compile( self.reString )
+ except:
+ self.re = None
+
+ def parseImpl( self, instring, loc, doActions=True ):
+ if self.re:
+ result = self.re.match(instring,loc)
+ if not result:
+ exc = self.myException
+ exc.loc = loc
+ exc.pstr = instring
+ raise exc
+
+ loc = result.end()
+ return loc, result.group()
+
+ if not(instring[ loc ] in self.initChars):
+ #~ raise ParseException( instring, loc, self.errmsg )
+ exc = self.myException
+ exc.loc = loc
+ exc.pstr = instring
+ raise exc
+ start = loc
+ loc += 1
+ instrlen = len(instring)
+ bodychars = self.bodyChars
+ maxloc = start + self.maxLen
+ maxloc = min( maxloc, instrlen )
+ while loc < maxloc and instring[loc] in bodychars:
+ loc += 1
+
+ throwException = False
+ if loc - start < self.minLen:
+ throwException = True
+ if self.maxSpecified and loc < instrlen and instring[loc] in bodychars:
+ throwException = True
+ if self.asKeyword:
+ if (start>0 and instring[start-1] in bodychars) or (loc<instrlen and instring[loc] in bodychars):
+ throwException = True
+
+ if throwException:
+ #~ raise ParseException( instring, loc, self.errmsg )
+ exc = self.myException
+ exc.loc = loc
+ exc.pstr = instring
+ raise exc
+
+ return loc, instring[start:loc]
+
+ def __str__( self ):
+ try:
+ return super(Word,self).__str__()
+ except:
+ pass
+
+
+ if self.strRepr is None:
+
+ def charsAsStr(s):
+ if len(s)>4:
+ return s[:4]+"..."
+ else:
+ return s
+
+ if ( self.initCharsOrig != self.bodyCharsOrig ):
+ self.strRepr = "W:(%s,%s)" % ( charsAsStr(self.initCharsOrig), charsAsStr(self.bodyCharsOrig) )
+ else:
+ self.strRepr = "W:(%s)" % charsAsStr(self.initCharsOrig)
+
+ return self.strRepr
+
+
+class Regex(Token):
+ """Token for matching strings that match a given regular expression.
+ Defined with string specifying the regular expression in a form recognized by the inbuilt Python re module.
+ """
+ compiledREtype = type(re.compile("[A-Z]"))
+ def __init__( self, pattern, flags=0):
+ """The parameters C{pattern} and C{flags} are passed to the C{re.compile()} function as-is. See the Python C{re} module for an explanation of the acceptable patterns and flags."""
+ super(Regex,self).__init__()
+
+ if isinstance(pattern, basestring):
+ if len(pattern) == 0:
+ warnings.warn("null string passed to Regex; use Empty() instead",
+ SyntaxWarning, stacklevel=2)
+
+ self.pattern = pattern
+ self.flags = flags
+
+ try:
+ self.re = re.compile(self.pattern, self.flags)
+ self.reString = self.pattern
+ except sre_constants.error:
+ warnings.warn("invalid pattern (%s) passed to Regex" % pattern,
+ SyntaxWarning, stacklevel=2)
+ raise
+
+ elif isinstance(pattern, Regex.compiledREtype):
+ self.re = pattern
+ self.pattern = \
+ self.reString = str(pattern)
+ self.flags = flags
+
+ else:
+ raise ValueError("Regex may only be constructed with a string or a compiled RE object")
+
+ self.name = _ustr(self)
+ self.errmsg = "Expected " + self.name
+ self.mayIndexError = False
+ self.mayReturnEmpty = True
+
+ def parseImpl( self, instring, loc, doActions=True ):
+ result = self.re.match(instring,loc)
+ if not result:
+ exc = self.myException
+ exc.loc = loc
+ exc.pstr = instring
+ raise exc
+
+ loc = result.end()
+ d = result.groupdict()
+ ret = ParseResults(result.group())
+ if d:
+ for k in d:
+ ret[k] = d[k]
+ return loc,ret
+
+ def __str__( self ):
+ try:
+ return super(Regex,self).__str__()
+ except:
+ pass
+
+ if self.strRepr is None:
+ self.strRepr = "Re:(%s)" % repr(self.pattern)
+
+ return self.strRepr
+
+
+class QuotedString(Token):
+ """Token for matching strings that are delimited by quoting characters.
+ """
+ def __init__( self, quoteChar, escChar=None, escQuote=None, multiline=False, unquoteResults=True, endQuoteChar=None):
+ """
+ Defined with the following parameters:
+ - quoteChar - string of one or more characters defining the quote delimiting string
+ - escChar - character to escape quotes, typically backslash (default=None)
+ - escQuote - special quote sequence to escape an embedded quote string (such as SQL's "" to escape an embedded ") (default=None)
+ - multiline - boolean indicating whether quotes can span multiple lines (default=C{False})
+ - unquoteResults - boolean indicating whether the matched text should be unquoted (default=C{True})
+ - endQuoteChar - string of one or more characters defining the end of the quote delimited string (default=C{None} => same as quoteChar)
+ """
+ super(QuotedString,self).__init__()
+
+ # remove white space from quote chars - wont work anyway
+ quoteChar = quoteChar.strip()
+ if len(quoteChar) == 0:
+ warnings.warn("quoteChar cannot be the empty string",SyntaxWarning,stacklevel=2)
+ raise SyntaxError()
+
+ if endQuoteChar is None:
+ endQuoteChar = quoteChar
+ else:
+ endQuoteChar = endQuoteChar.strip()
+ if len(endQuoteChar) == 0:
+ warnings.warn("endQuoteChar cannot be the empty string",SyntaxWarning,stacklevel=2)
+ raise SyntaxError()
+
+ self.quoteChar = quoteChar
+ self.quoteCharLen = len(quoteChar)
+ self.firstQuoteChar = quoteChar[0]
+ self.endQuoteChar = endQuoteChar
+ self.endQuoteCharLen = len(endQuoteChar)
+ self.escChar = escChar
+ self.escQuote = escQuote
+ self.unquoteResults = unquoteResults
+
+ if multiline:
+ self.flags = re.MULTILINE | re.DOTALL
+ self.pattern = r'%s(?:[^%s%s]' % \
+ ( re.escape(self.quoteChar),
+ _escapeRegexRangeChars(self.endQuoteChar[0]),
+ (escChar is not None and _escapeRegexRangeChars(escChar) or '') )
+ else:
+ self.flags = 0
+ self.pattern = r'%s(?:[^%s\n\r%s]' % \
+ ( re.escape(self.quoteChar),
+ _escapeRegexRangeChars(self.endQuoteChar[0]),
+ (escChar is not None and _escapeRegexRangeChars(escChar) or '') )
+ if len(self.endQuoteChar) > 1:
+ self.pattern += (
+ '|(?:' + ')|(?:'.join(["%s[^%s]" % (re.escape(self.endQuoteChar[:i]),
+ _escapeRegexRangeChars(self.endQuoteChar[i]))
+ for i in range(len(self.endQuoteChar)-1,0,-1)]) + ')'
+ )
+ if escQuote:
+ self.pattern += (r'|(?:%s)' % re.escape(escQuote))
+ if escChar:
+ self.pattern += (r'|(?:%s.)' % re.escape(escChar))
+ charset = ''.join(set(self.quoteChar[0]+self.endQuoteChar[0])).replace('^',r'\^').replace('-',r'\-')
+ self.escCharReplacePattern = re.escape(self.escChar)+("([%s])" % charset)
+ self.pattern += (r')*%s' % re.escape(self.endQuoteChar))
+
+ try:
+ self.re = re.compile(self.pattern, self.flags)
+ self.reString = self.pattern
+ except sre_constants.error:
+ warnings.warn("invalid pattern (%s) passed to Regex" % self.pattern,
+ SyntaxWarning, stacklevel=2)
+ raise
+
+ self.name = _ustr(self)
+ self.errmsg = "Expected " + self.name
+ self.mayIndexError = False
+ self.mayReturnEmpty = True
+
+ def parseImpl( self, instring, loc, doActions=True ):
+ result = instring[loc] == self.firstQuoteChar and self.re.match(instring,loc) or None
+ if not result:
+ exc = self.myException
+ exc.loc = loc
+ exc.pstr = instring
+ raise exc
+
+ loc = result.end()
+ ret = result.group()
+
+ if self.unquoteResults:
+
+ # strip off quotes
+ ret = ret[self.quoteCharLen:-self.endQuoteCharLen]
+
+ if isinstance(ret,basestring):
+ # replace escaped characters
+ if self.escChar:
+ ret = re.sub(self.escCharReplacePattern,"\g<1>",ret)
+
+ # replace escaped quotes
+ if self.escQuote:
+ ret = ret.replace(self.escQuote, self.endQuoteChar)
+
+ return loc, ret
+
+ def __str__( self ):
+ try:
+ return super(QuotedString,self).__str__()
+ except:
+ pass
+
+ if self.strRepr is None:
+ self.strRepr = "quoted string, starting with %s ending with %s" % (self.quoteChar, self.endQuoteChar)
+
+ return self.strRepr
+
+
+class CharsNotIn(Token):
+ """Token for matching words composed of characters *not* in a given set.
+ Defined with string containing all disallowed characters, and an optional
+ minimum, maximum, and/or exact length. The default value for C{min} is 1 (a
+ minimum value < 1 is not valid); the default values for C{max} and C{exact}
+ are 0, meaning no maximum or exact length restriction.
+ """
+ def __init__( self, notChars, min=1, max=0, exact=0 ):
+ super(CharsNotIn,self).__init__()
+ self.skipWhitespace = False
+ self.notChars = notChars
+
+ if min < 1:
+ raise ValueError("cannot specify a minimum length < 1; use Optional(CharsNotIn()) if zero-length char group is permitted")
+
+ self.minLen = min
+
+ if max > 0:
+ self.maxLen = max
+ else:
+ self.maxLen = _MAX_INT
+
+ if exact > 0:
+ self.maxLen = exact
+ self.minLen = exact
+
+ self.name = _ustr(self)
+ self.errmsg = "Expected " + self.name
+ self.mayReturnEmpty = ( self.minLen == 0 )
+ self.mayIndexError = False
+
+ def parseImpl( self, instring, loc, doActions=True ):
+ if instring[loc] in self.notChars:
+ #~ raise ParseException( instring, loc, self.errmsg )
+ exc = self.myException
+ exc.loc = loc
+ exc.pstr = instring
+ raise exc
+
+ start = loc
+ loc += 1
+ notchars = self.notChars
+ maxlen = min( start+self.maxLen, len(instring) )
+ while loc < maxlen and \
+ (instring[loc] not in notchars):
+ loc += 1
+
+ if loc - start < self.minLen:
+ #~ raise ParseException( instring, loc, self.errmsg )
+ exc = self.myException
+ exc.loc = loc
+ exc.pstr = instring
+ raise exc
+
+ return loc, instring[start:loc]
+
+ def __str__( self ):
+ try:
+ return super(CharsNotIn, self).__str__()
+ except:
+ pass
+
+ if self.strRepr is None:
+ if len(self.notChars) > 4:
+ self.strRepr = "!W:(%s...)" % self.notChars[:4]
+ else:
+ self.strRepr = "!W:(%s)" % self.notChars
+
+ return self.strRepr
+
+class White(Token):
+ """Special matching class for matching whitespace. Normally, whitespace is ignored
+ by pyparsing grammars. This class is included when some whitespace structures
+ are significant. Define with a string containing the whitespace characters to be
+ matched; default is C{" \\t\\r\\n"}. Also takes optional C{min}, C{max}, and C{exact} arguments,
+ as defined for the C{L{Word}} class."""
+ whiteStrs = {
+ " " : "<SPC>",
+ "\t": "<TAB>",
+ "\n": "<LF>",
+ "\r": "<CR>",
+ "\f": "<FF>",
+ }
+ def __init__(self, ws=" \t\r\n", min=1, max=0, exact=0):
+ super(White,self).__init__()
+ self.matchWhite = ws
+ self.setWhitespaceChars( "".join([c for c in self.whiteChars if c not in self.matchWhite]) )
+ #~ self.leaveWhitespace()
+ self.name = ("".join([White.whiteStrs[c] for c in self.matchWhite]))
+ self.mayReturnEmpty = True
+ self.errmsg = "Expected " + self.name
+
+ self.minLen = min
+
+ if max > 0:
+ self.maxLen = max
+ else:
+ self.maxLen = _MAX_INT
+
+ if exact > 0:
+ self.maxLen = exact
+ self.minLen = exact
+
+ def parseImpl( self, instring, loc, doActions=True ):
+ if not(instring[ loc ] in self.matchWhite):
+ #~ raise ParseException( instring, loc, self.errmsg )
+ exc = self.myException
+ exc.loc = loc
+ exc.pstr = instring
+ raise exc
+ start = loc
+ loc += 1
+ maxloc = start + self.maxLen
+ maxloc = min( maxloc, len(instring) )
+ while loc < maxloc and instring[loc] in self.matchWhite:
+ loc += 1
+
+ if loc - start < self.minLen:
+ #~ raise ParseException( instring, loc, self.errmsg )
+ exc = self.myException
+ exc.loc = loc
+ exc.pstr = instring
+ raise exc
+
+ return loc, instring[start:loc]
+
+
+class _PositionToken(Token):
+ def __init__( self ):
+ super(_PositionToken,self).__init__()
+ self.name=self.__class__.__name__
+ self.mayReturnEmpty = True
+ self.mayIndexError = False
+
+class GoToColumn(_PositionToken):
+ """Token to advance to a specific column of input text; useful for tabular report scraping."""
+ def __init__( self, colno ):
+ super(GoToColumn,self).__init__()
+ self.col = colno
+
+ def preParse( self, instring, loc ):
+ if col(loc,instring) != self.col:
+ instrlen = len(instring)
+ if self.ignoreExprs:
+ loc = self._skipIgnorables( instring, loc )
+ while loc < instrlen and instring[loc].isspace() and col( loc, instring ) != self.col :
+ loc += 1
+ return loc
+
+ def parseImpl( self, instring, loc, doActions=True ):
+ thiscol = col( loc, instring )
+ if thiscol > self.col:
+ raise ParseException( instring, loc, "Text not in expected column", self )
+ newloc = loc + self.col - thiscol
+ ret = instring[ loc: newloc ]
+ return newloc, ret
+
+class LineStart(_PositionToken):
+ """Matches if current position is at the beginning of a line within the parse string"""
+ def __init__( self ):
+ super(LineStart,self).__init__()
+ self.setWhitespaceChars( ParserElement.DEFAULT_WHITE_CHARS.replace("\n","") )
+ self.errmsg = "Expected start of line"
+
+ def preParse( self, instring, loc ):
+ preloc = super(LineStart,self).preParse(instring,loc)
+ if instring[preloc] == "\n":
+ loc += 1
+ return loc
+
+ def parseImpl( self, instring, loc, doActions=True ):
+ if not( loc==0 or
+ (loc == self.preParse( instring, 0 )) or
+ (instring[loc-1] == "\n") ): #col(loc, instring) != 1:
+ #~ raise ParseException( instring, loc, "Expected start of line" )
+ exc = self.myException
+ exc.loc = loc
+ exc.pstr = instring
+ raise exc
+ return loc, []
+
+class LineEnd(_PositionToken):
+ """Matches if current position is at the end of a line within the parse string"""
+ def __init__( self ):
+ super(LineEnd,self).__init__()
+ self.setWhitespaceChars( ParserElement.DEFAULT_WHITE_CHARS.replace("\n","") )
+ self.errmsg = "Expected end of line"
+
+ def parseImpl( self, instring, loc, doActions=True ):
+ if loc<len(instring):
+ if instring[loc] == "\n":
+ return loc+1, "\n"
+ else:
+ #~ raise ParseException( instring, loc, "Expected end of line" )
+ exc = self.myException
+ exc.loc = loc
+ exc.pstr = instring
+ raise exc
+ elif loc == len(instring):
+ return loc+1, []
+ else:
+ exc = self.myException
+ exc.loc = loc
+ exc.pstr = instring
+ raise exc
+
+class StringStart(_PositionToken):
+ """Matches if current position is at the beginning of the parse string"""
+ def __init__( self ):
+ super(StringStart,self).__init__()
+ self.errmsg = "Expected start of text"
+
+ def parseImpl( self, instring, loc, doActions=True ):
+ if loc != 0:
+ # see if entire string up to here is just whitespace and ignoreables
+ if loc != self.preParse( instring, 0 ):
+ #~ raise ParseException( instring, loc, "Expected start of text" )
+ exc = self.myException
+ exc.loc = loc
+ exc.pstr = instring
+ raise exc
+ return loc, []
+
+class StringEnd(_PositionToken):
+ """Matches if current position is at the end of the parse string"""
+ def __init__( self ):
+ super(StringEnd,self).__init__()
+ self.errmsg = "Expected end of text"
+
+ def parseImpl( self, instring, loc, doActions=True ):
+ if loc < len(instring):
+ #~ raise ParseException( instring, loc, "Expected end of text" )
+ exc = self.myException
+ exc.loc = loc
+ exc.pstr = instring
+ raise exc
+ elif loc == len(instring):
+ return loc+1, []
+ elif loc > len(instring):
+ return loc, []
+ else:
+ exc = self.myException
+ exc.loc = loc
+ exc.pstr = instring
+ raise exc
+
+class WordStart(_PositionToken):
+ """Matches if the current position is at the beginning of a Word, and
+ is not preceded by any character in a given set of C{wordChars}
+ (default=C{printables}). To emulate the C{\b} behavior of regular expressions,
+ use C{WordStart(alphanums)}. C{WordStart} will also match at the beginning of
+ the string being parsed, or at the beginning of a line.
+ """
+ def __init__(self, wordChars = printables):
+ super(WordStart,self).__init__()
+ self.wordChars = set(wordChars)
+ self.errmsg = "Not at the start of a word"
+
+ def parseImpl(self, instring, loc, doActions=True ):
+ if loc != 0:
+ if (instring[loc-1] in self.wordChars or
+ instring[loc] not in self.wordChars):
+ exc = self.myException
+ exc.loc = loc
+ exc.pstr = instring
+ raise exc
+ return loc, []
+
+class WordEnd(_PositionToken):
+ """Matches if the current position is at the end of a Word, and
+ is not followed by any character in a given set of C{wordChars}
+ (default=C{printables}). To emulate the C{\b} behavior of regular expressions,
+ use C{WordEnd(alphanums)}. C{WordEnd} will also match at the end of
+ the string being parsed, or at the end of a line.
+ """
+ def __init__(self, wordChars = printables):
+ super(WordEnd,self).__init__()
+ self.wordChars = set(wordChars)
+ self.skipWhitespace = False
+ self.errmsg = "Not at the end of a word"
+
+ def parseImpl(self, instring, loc, doActions=True ):
+ instrlen = len(instring)
+ if instrlen>0 and loc<instrlen:
+ if (instring[loc] in self.wordChars or
+ instring[loc-1] not in self.wordChars):
+ #~ raise ParseException( instring, loc, "Expected end of word" )
+ exc = self.myException
+ exc.loc = loc
+ exc.pstr = instring
+ raise exc
+ return loc, []
+
+
+class ParseExpression(ParserElement):
+ """Abstract subclass of ParserElement, for combining and post-processing parsed tokens."""
+ def __init__( self, exprs, savelist = False ):
+ super(ParseExpression,self).__init__(savelist)
+ if isinstance( exprs, list ):
+ self.exprs = exprs
+ elif isinstance( exprs, basestring ):
+ self.exprs = [ Literal( exprs ) ]
+ else:
+ try:
+ self.exprs = list( exprs )
+ except TypeError:
+ self.exprs = [ exprs ]
+ self.callPreparse = False
+
+ def __getitem__( self, i ):
+ return self.exprs[i]
+
+ def append( self, other ):
+ self.exprs.append( other )
+ self.strRepr = None
+ return self
+
+ def leaveWhitespace( self ):
+ """Extends C{leaveWhitespace} defined in base class, and also invokes C{leaveWhitespace} on
+ all contained expressions."""
+ self.skipWhitespace = False
+ self.exprs = [ e.copy() for e in self.exprs ]
+ for e in self.exprs:
+ e.leaveWhitespace()
+ return self
+
+ def ignore( self, other ):
+ if isinstance( other, Suppress ):
+ if other not in self.ignoreExprs:
+ super( ParseExpression, self).ignore( other )
+ for e in self.exprs:
+ e.ignore( self.ignoreExprs[-1] )
+ else:
+ super( ParseExpression, self).ignore( other )
+ for e in self.exprs:
+ e.ignore( self.ignoreExprs[-1] )
+ return self
+
+ def __str__( self ):
+ try:
+ return super(ParseExpression,self).__str__()
+ except:
+ pass
+
+ if self.strRepr is None:
+ self.strRepr = "%s:(%s)" % ( self.__class__.__name__, _ustr(self.exprs) )
+ return self.strRepr
+
+ def streamline( self ):
+ super(ParseExpression,self).streamline()
+
+ for e in self.exprs:
+ e.streamline()
+
+ # collapse nested And's of the form And( And( And( a,b), c), d) to And( a,b,c,d )
+ # but only if there are no parse actions or resultsNames on the nested And's
+ # (likewise for Or's and MatchFirst's)
+ if ( len(self.exprs) == 2 ):
+ other = self.exprs[0]
+ if ( isinstance( other, self.__class__ ) and
+ not(other.parseAction) and
+ other.resultsName is None and
+ not other.debug ):
+ self.exprs = other.exprs[:] + [ self.exprs[1] ]
+ self.strRepr = None
+ self.mayReturnEmpty |= other.mayReturnEmpty
+ self.mayIndexError |= other.mayIndexError
+
+ other = self.exprs[-1]
+ if ( isinstance( other, self.__class__ ) and
+ not(other.parseAction) and
+ other.resultsName is None and
+ not other.debug ):
+ self.exprs = self.exprs[:-1] + other.exprs[:]
+ self.strRepr = None
+ self.mayReturnEmpty |= other.mayReturnEmpty
+ self.mayIndexError |= other.mayIndexError
+
+ return self
+
+ def setResultsName( self, name, listAllMatches=False ):
+ ret = super(ParseExpression,self).setResultsName(name,listAllMatches)
+ return ret
+
+ def validate( self, validateTrace=[] ):
+ tmp = validateTrace[:]+[self]
+ for e in self.exprs:
+ e.validate(tmp)
+ self.checkRecursion( [] )
+
+ def copy(self):
+ ret = super(ParseExpression,self).copy()
+ ret.exprs = [e.copy() for e in self.exprs]
+ return ret
+
+class And(ParseExpression):
+ """Requires all given C{ParseExpression}s to be found in the given order.
+ Expressions may be separated by whitespace.
+ May be constructed using the C{'+'} operator.
+ """
+
+ class _ErrorStop(Empty):
+ def __init__(self, *args, **kwargs):
+ super(And._ErrorStop,self).__init__(*args, **kwargs)
+ self.leaveWhitespace()
+
+ def __init__( self, exprs, savelist = True ):
+ super(And,self).__init__(exprs, savelist)
+ self.mayReturnEmpty = True
+ for e in self.exprs:
+ if not e.mayReturnEmpty:
+ self.mayReturnEmpty = False
+ break
+ self.setWhitespaceChars( exprs[0].whiteChars )
+ self.skipWhitespace = exprs[0].skipWhitespace
+ self.callPreparse = True
+
+ def parseImpl( self, instring, loc, doActions=True ):
+ # pass False as last arg to _parse for first element, since we already
+ # pre-parsed the string as part of our And pre-parsing
+ loc, resultlist = self.exprs[0]._parse( instring, loc, doActions, callPreParse=False )
+ errorStop = False
+ for e in self.exprs[1:]:
+ if isinstance(e, And._ErrorStop):
+ errorStop = True
+ continue
+ if errorStop:
+ try:
+ loc, exprtokens = e._parse( instring, loc, doActions )
+ except ParseSyntaxException:
+ raise
+ except ParseBaseException:
+ pe = sys.exc_info()[1]
+ raise ParseSyntaxException(pe)
+ except IndexError:
+ raise ParseSyntaxException( ParseException(instring, len(instring), self.errmsg, self) )
+ else:
+ loc, exprtokens = e._parse( instring, loc, doActions )
+ if exprtokens or exprtokens.keys():
+ resultlist += exprtokens
+ return loc, resultlist
+
+ def __iadd__(self, other ):
+ if isinstance( other, basestring ):
+ other = Literal( other )
+ return self.append( other ) #And( [ self, other ] )
+
+ def checkRecursion( self, parseElementList ):
+ subRecCheckList = parseElementList[:] + [ self ]
+ for e in self.exprs:
+ e.checkRecursion( subRecCheckList )
+ if not e.mayReturnEmpty:
+ break
+
+ def __str__( self ):
+ if hasattr(self,"name"):
+ return self.name
+
+ if self.strRepr is None:
+ self.strRepr = "{" + " ".join( [ _ustr(e) for e in self.exprs ] ) + "}"
+
+ return self.strRepr
+
+
+class Or(ParseExpression):
+ """Requires that at least one C{ParseExpression} is found.
+ If two expressions match, the expression that matches the longest string will be used.
+ May be constructed using the C{'^'} operator.
+ """
+ def __init__( self, exprs, savelist = False ):
+ super(Or,self).__init__(exprs, savelist)
+ self.mayReturnEmpty = False
+ for e in self.exprs:
+ if e.mayReturnEmpty:
+ self.mayReturnEmpty = True
+ break
+
+ def parseImpl( self, instring, loc, doActions=True ):
+ maxExcLoc = -1
+ maxMatchLoc = -1
+ maxException = None
+ for e in self.exprs:
+ try:
+ loc2 = e.tryParse( instring, loc )
+ except ParseException:
+ err = sys.exc_info()[1]
+ if err.loc > maxExcLoc:
+ maxException = err
+ maxExcLoc = err.loc
+ except IndexError:
+ if len(instring) > maxExcLoc:
+ maxException = ParseException(instring,len(instring),e.errmsg,self)
+ maxExcLoc = len(instring)
+ else:
+ if loc2 > maxMatchLoc:
+ maxMatchLoc = loc2
+ maxMatchExp = e
+
+ if maxMatchLoc < 0:
+ if maxException is not None:
+ raise maxException
+ else:
+ raise ParseException(instring, loc, "no defined alternatives to match", self)
+
+ return maxMatchExp._parse( instring, loc, doActions )
+
+ def __ixor__(self, other ):
+ if isinstance( other, basestring ):
+ other = ParserElement.literalStringClass( other )
+ return self.append( other ) #Or( [ self, other ] )
+
+ def __str__( self ):
+ if hasattr(self,"name"):
+ return self.name
+
+ if self.strRepr is None:
+ self.strRepr = "{" + " ^ ".join( [ _ustr(e) for e in self.exprs ] ) + "}"
+
+ return self.strRepr
+
+ def checkRecursion( self, parseElementList ):
+ subRecCheckList = parseElementList[:] + [ self ]
+ for e in self.exprs:
+ e.checkRecursion( subRecCheckList )
+
+
+class MatchFirst(ParseExpression):
+ """Requires that at least one C{ParseExpression} is found.
+ If two expressions match, the first one listed is the one that will match.
+ May be constructed using the C{'|'} operator.
+ """
+ def __init__( self, exprs, savelist = False ):
+ super(MatchFirst,self).__init__(exprs, savelist)
+ if exprs:
+ self.mayReturnEmpty = False
+ for e in self.exprs:
+ if e.mayReturnEmpty:
+ self.mayReturnEmpty = True
+ break
+ else:
+ self.mayReturnEmpty = True
+
+ def parseImpl( self, instring, loc, doActions=True ):
+ maxExcLoc = -1
+ maxException = None
+ for e in self.exprs:
+ try:
+ ret = e._parse( instring, loc, doActions )
+ return ret
+ except ParseException, err:
+ if err.loc > maxExcLoc:
+ maxException = err
+ maxExcLoc = err.loc
+ except IndexError:
+ if len(instring) > maxExcLoc:
+ maxException = ParseException(instring,len(instring),e.errmsg,self)
+ maxExcLoc = len(instring)
+
+ # only got here if no expression matched, raise exception for match that made it the furthest
+ else:
+ if maxException is not None:
+ raise maxException
+ else:
+ raise ParseException(instring, loc, "no defined alternatives to match", self)
+
+ def __ior__(self, other ):
+ if isinstance( other, basestring ):
+ other = ParserElement.literalStringClass( other )
+ return self.append( other ) #MatchFirst( [ self, other ] )
+
+ def __str__( self ):
+ if hasattr(self,"name"):
+ return self.name
+
+ if self.strRepr is None:
+ self.strRepr = "{" + " | ".join( [ _ustr(e) for e in self.exprs ] ) + "}"
+
+ return self.strRepr
+
+ def checkRecursion( self, parseElementList ):
+ subRecCheckList = parseElementList[:] + [ self ]
+ for e in self.exprs:
+ e.checkRecursion( subRecCheckList )
+
+
+class Each(ParseExpression):
+ """Requires all given C{ParseExpression}s to be found, but in any order.
+ Expressions may be separated by whitespace.
+ May be constructed using the C{'&'} operator.
+ """
+ def __init__( self, exprs, savelist = True ):
+ super(Each,self).__init__(exprs, savelist)
+ self.mayReturnEmpty = True
+ for e in self.exprs:
+ if not e.mayReturnEmpty:
+ self.mayReturnEmpty = False
+ break
+ self.skipWhitespace = True
+ self.initExprGroups = True
+
+ def parseImpl( self, instring, loc, doActions=True ):
+ if self.initExprGroups:
+ opt1 = [ e.expr for e in self.exprs if isinstance(e,Optional) ]
+ opt2 = [ e for e in self.exprs if e.mayReturnEmpty and e not in opt1 ]
+ self.optionals = opt1 + opt2
+ self.multioptionals = [ e.expr for e in self.exprs if isinstance(e,ZeroOrMore) ]
+ self.multirequired = [ e.expr for e in self.exprs if isinstance(e,OneOrMore) ]
+ self.required = [ e for e in self.exprs if not isinstance(e,(Optional,ZeroOrMore,OneOrMore)) ]
+ self.required += self.multirequired
+ self.initExprGroups = False
+ tmpLoc = loc
+ tmpReqd = self.required[:]
+ tmpOpt = self.optionals[:]
+ matchOrder = []
+
+ keepMatching = True
+ while keepMatching:
+ tmpExprs = tmpReqd + tmpOpt + self.multioptionals + self.multirequired
+ failed = []
+ for e in tmpExprs:
+ try:
+ tmpLoc = e.tryParse( instring, tmpLoc )
+ except ParseException:
+ failed.append(e)
+ else:
+ matchOrder.append(e)
+ if e in tmpReqd:
+ tmpReqd.remove(e)
+ elif e in tmpOpt:
+ tmpOpt.remove(e)
+ if len(failed) == len(tmpExprs):
+ keepMatching = False
+
+ if tmpReqd:
+ missing = ", ".join( [ _ustr(e) for e in tmpReqd ] )
+ raise ParseException(instring,loc,"Missing one or more required elements (%s)" % missing )
+
+ # add any unmatched Optionals, in case they have default values defined
+ matchOrder += [e for e in self.exprs if isinstance(e,Optional) and e.expr in tmpOpt]
+
+ resultlist = []
+ for e in matchOrder:
+ loc,results = e._parse(instring,loc,doActions)
+ resultlist.append(results)
+
+ finalResults = ParseResults([])
+ for r in resultlist:
+ dups = {}
+ for k in r.keys():
+ if k in finalResults.keys():
+ tmp = ParseResults(finalResults[k])
+ tmp += ParseResults(r[k])
+ dups[k] = tmp
+ finalResults += ParseResults(r)
+ for k,v in dups.items():
+ finalResults[k] = v
+ return loc, finalResults
+
+ def __str__( self ):
+ if hasattr(self,"name"):
+ return self.name
+
+ if self.strRepr is None:
+ self.strRepr = "{" + " & ".join( [ _ustr(e) for e in self.exprs ] ) + "}"
+
+ return self.strRepr
+
+ def checkRecursion( self, parseElementList ):
+ subRecCheckList = parseElementList[:] + [ self ]
+ for e in self.exprs:
+ e.checkRecursion( subRecCheckList )
+
+
+class ParseElementEnhance(ParserElement):
+ """Abstract subclass of C{ParserElement}, for combining and post-processing parsed tokens."""
+ def __init__( self, expr, savelist=False ):
+ super(ParseElementEnhance,self).__init__(savelist)
+ if isinstance( expr, basestring ):
+ expr = Literal(expr)
+ self.expr = expr
+ self.strRepr = None
+ if expr is not None:
+ self.mayIndexError = expr.mayIndexError
+ self.mayReturnEmpty = expr.mayReturnEmpty
+ self.setWhitespaceChars( expr.whiteChars )
+ self.skipWhitespace = expr.skipWhitespace
+ self.saveAsList = expr.saveAsList
+ self.callPreparse = expr.callPreparse
+ self.ignoreExprs.extend(expr.ignoreExprs)
+
+ def parseImpl( self, instring, loc, doActions=True ):
+ if self.expr is not None:
+ return self.expr._parse( instring, loc, doActions, callPreParse=False )
+ else:
+ raise ParseException("",loc,self.errmsg,self)
+
+ def leaveWhitespace( self ):
+ self.skipWhitespace = False
+ self.expr = self.expr.copy()
+ if self.expr is not None:
+ self.expr.leaveWhitespace()
+ return self
+
+ def ignore( self, other ):
+ if isinstance( other, Suppress ):
+ if other not in self.ignoreExprs:
+ super( ParseElementEnhance, self).ignore( other )
+ if self.expr is not None:
+ self.expr.ignore( self.ignoreExprs[-1] )
+ else:
+ super( ParseElementEnhance, self).ignore( other )
+ if self.expr is not None:
+ self.expr.ignore( self.ignoreExprs[-1] )
+ return self
+
+ def streamline( self ):
+ super(ParseElementEnhance,self).streamline()
+ if self.expr is not None:
+ self.expr.streamline()
+ return self
+
+ def checkRecursion( self, parseElementList ):
+ if self in parseElementList:
+ raise RecursiveGrammarException( parseElementList+[self] )
+ subRecCheckList = parseElementList[:] + [ self ]
+ if self.expr is not None:
+ self.expr.checkRecursion( subRecCheckList )
+
+ def validate( self, validateTrace=[] ):
+ tmp = validateTrace[:]+[self]
+ if self.expr is not None:
+ self.expr.validate(tmp)
+ self.checkRecursion( [] )
+
+ def __str__( self ):
+ try:
+ return super(ParseElementEnhance,self).__str__()
+ except:
+ pass
+
+ if self.strRepr is None and self.expr is not None:
+ self.strRepr = "%s:(%s)" % ( self.__class__.__name__, _ustr(self.expr) )
+ return self.strRepr
+
+
+class FollowedBy(ParseElementEnhance):
+ """Lookahead matching of the given parse expression. C{FollowedBy}
+ does *not* advance the parsing position within the input string, it only
+ verifies that the specified parse expression matches at the current
+ position. C{FollowedBy} always returns a null token list."""
+ def __init__( self, expr ):
+ super(FollowedBy,self).__init__(expr)
+ self.mayReturnEmpty = True
+
+ def parseImpl( self, instring, loc, doActions=True ):
+ self.expr.tryParse( instring, loc )
+ return loc, []
+
+
+class NotAny(ParseElementEnhance):
+ """Lookahead to disallow matching with the given parse expression. C{NotAny}
+ does *not* advance the parsing position within the input string, it only
+ verifies that the specified parse expression does *not* match at the current
+ position. Also, C{NotAny} does *not* skip over leading whitespace. C{NotAny}
+ always returns a null token list. May be constructed using the '~' operator."""
+ def __init__( self, expr ):
+ super(NotAny,self).__init__(expr)
+ #~ self.leaveWhitespace()
+ self.skipWhitespace = False # do NOT use self.leaveWhitespace(), don't want to propagate to exprs
+ self.mayReturnEmpty = True
+ self.errmsg = "Found unwanted token, "+_ustr(self.expr)
+
+ def parseImpl( self, instring, loc, doActions=True ):
+ try:
+ self.expr.tryParse( instring, loc )
+ except (ParseException,IndexError):
+ pass
+ else:
+ #~ raise ParseException(instring, loc, self.errmsg )
+ exc = self.myException
+ exc.loc = loc
+ exc.pstr = instring
+ raise exc
+ return loc, []
+
+ def __str__( self ):
+ if hasattr(self,"name"):
+ return self.name
+
+ if self.strRepr is None:
+ self.strRepr = "~{" + _ustr(self.expr) + "}"
+
+ return self.strRepr
+
+
+class ZeroOrMore(ParseElementEnhance):
+ """Optional repetition of zero or more of the given expression."""
+ def __init__( self, expr ):
+ super(ZeroOrMore,self).__init__(expr)
+ self.mayReturnEmpty = True
+
+ def parseImpl( self, instring, loc, doActions=True ):
+ tokens = []
+ try:
+ loc, tokens = self.expr._parse( instring, loc, doActions, callPreParse=False )
+ hasIgnoreExprs = ( len(self.ignoreExprs) > 0 )
+ while 1:
+ if hasIgnoreExprs:
+ preloc = self._skipIgnorables( instring, loc )
+ else:
+ preloc = loc
+ loc, tmptokens = self.expr._parse( instring, preloc, doActions )
+ if tmptokens or tmptokens.keys():
+ tokens += tmptokens
+ except (ParseException,IndexError):
+ pass
+
+ return loc, tokens
+
+ def __str__( self ):
+ if hasattr(self,"name"):
+ return self.name
+
+ if self.strRepr is None:
+ self.strRepr = "[" + _ustr(self.expr) + "]..."
+
+ return self.strRepr
+
+ def setResultsName( self, name, listAllMatches=False ):
+ ret = super(ZeroOrMore,self).setResultsName(name,listAllMatches)
+ ret.saveAsList = True
+ return ret
+
+
+class OneOrMore(ParseElementEnhance):
+ """Repetition of one or more of the given expression."""
+ def parseImpl( self, instring, loc, doActions=True ):
+ # must be at least one
+ loc, tokens = self.expr._parse( instring, loc, doActions, callPreParse=False )
+ try:
+ hasIgnoreExprs = ( len(self.ignoreExprs) > 0 )
+ while 1:
+ if hasIgnoreExprs:
+ preloc = self._skipIgnorables( instring, loc )
+ else:
+ preloc = loc
+ loc, tmptokens = self.expr._parse( instring, preloc, doActions )
+ if tmptokens or tmptokens.keys():
+ tokens += tmptokens
+ except (ParseException,IndexError):
+ pass
+
+ return loc, tokens
+
+ def __str__( self ):
+ if hasattr(self,"name"):
+ return self.name
+
+ if self.strRepr is None:
+ self.strRepr = "{" + _ustr(self.expr) + "}..."
+
+ return self.strRepr
+
+ def setResultsName( self, name, listAllMatches=False ):
+ ret = super(OneOrMore,self).setResultsName(name,listAllMatches)
+ ret.saveAsList = True
+ return ret
+
+class _NullToken(object):
+ def __bool__(self):
+ return False
+ __nonzero__ = __bool__
+ def __str__(self):
+ return ""
+
+_optionalNotMatched = _NullToken()
+class Optional(ParseElementEnhance):
+ """Optional matching of the given expression.
+ A default return string can also be specified, if the optional expression
+ is not found.
+ """
+ def __init__( self, exprs, default=_optionalNotMatched ):
+ super(Optional,self).__init__( exprs, savelist=False )
+ self.defaultValue = default
+ self.mayReturnEmpty = True
+
+ def parseImpl( self, instring, loc, doActions=True ):
+ try:
+ loc, tokens = self.expr._parse( instring, loc, doActions, callPreParse=False )
+ except (ParseException,IndexError):
+ if self.defaultValue is not _optionalNotMatched:
+ if self.expr.resultsName:
+ tokens = ParseResults([ self.defaultValue ])
+ tokens[self.expr.resultsName] = self.defaultValue
+ else:
+ tokens = [ self.defaultValue ]
+ else:
+ tokens = []
+ return loc, tokens
+
+ def __str__( self ):
+ if hasattr(self,"name"):
+ return self.name
+
+ if self.strRepr is None:
+ self.strRepr = "[" + _ustr(self.expr) + "]"
+
+ return self.strRepr
+
+
+class SkipTo(ParseElementEnhance):
+ """Token for skipping over all undefined text until the matched expression is found.
+ If C{include} is set to true, the matched expression is also parsed (the skipped text
+ and matched expression are returned as a 2-element list). The C{ignore}
+ argument is used to define grammars (typically quoted strings and comments) that
+ might contain false matches.
+ """
+ def __init__( self, other, include=False, ignore=None, failOn=None ):
+ super( SkipTo, self ).__init__( other )
+ self.ignoreExpr = ignore
+ self.mayReturnEmpty = True
+ self.mayIndexError = False
+ self.includeMatch = include
+ self.asList = False
+ if failOn is not None and isinstance(failOn, basestring):
+ self.failOn = Literal(failOn)
+ else:
+ self.failOn = failOn
+ self.errmsg = "No match found for "+_ustr(self.expr)
+
+ def parseImpl( self, instring, loc, doActions=True ):
+ startLoc = loc
+ instrlen = len(instring)
+ expr = self.expr
+ failParse = False
+ while loc <= instrlen:
+ try:
+ if self.failOn:
+ try:
+ self.failOn.tryParse(instring, loc)
+ except ParseBaseException:
+ pass
+ else:
+ failParse = True
+ raise ParseException(instring, loc, "Found expression " + str(self.failOn))
+ failParse = False
+ if self.ignoreExpr is not None:
+ while 1:
+ try:
+ loc = self.ignoreExpr.tryParse(instring,loc)
+ # print "found ignoreExpr, advance to", loc
+ except ParseBaseException:
+ break
+ expr._parse( instring, loc, doActions=False, callPreParse=False )
+ skipText = instring[startLoc:loc]
+ if self.includeMatch:
+ loc,mat = expr._parse(instring,loc,doActions,callPreParse=False)
+ if mat:
+ skipRes = ParseResults( skipText )
+ skipRes += mat
+ return loc, [ skipRes ]
+ else:
+ return loc, [ skipText ]
+ else:
+ return loc, [ skipText ]
+ except (ParseException,IndexError):
+ if failParse:
+ raise
+ else:
+ loc += 1
+ exc = self.myException
+ exc.loc = loc
+ exc.pstr = instring
+ raise exc
+
+class Forward(ParseElementEnhance):
+ """Forward declaration of an expression to be defined later -
+ used for recursive grammars, such as algebraic infix notation.
+ When the expression is known, it is assigned to the C{Forward} variable using the '<<' operator.
+
+ Note: take care when assigning to C{Forward} not to overlook precedence of operators.
+ Specifically, '|' has a lower precedence than '<<', so that::
+ fwdExpr << a | b | c
+ will actually be evaluated as::
+ (fwdExpr << a) | b | c
+ thereby leaving b and c out as parseable alternatives. It is recommended that you
+ explicitly group the values inserted into the C{Forward}::
+ fwdExpr << (a | b | c)
+ Converting to use the '<<=' operator instead will avoid this problem.
+ """
+ def __init__( self, other=None ):
+ super(Forward,self).__init__( other, savelist=False )
+
+ def __lshift__( self, other ):
+ if isinstance( other, basestring ):
+ other = ParserElement.literalStringClass(other)
+ self.expr = other
+ self.mayReturnEmpty = other.mayReturnEmpty
+ self.strRepr = None
+ self.mayIndexError = self.expr.mayIndexError
+ self.mayReturnEmpty = self.expr.mayReturnEmpty
+ self.setWhitespaceChars( self.expr.whiteChars )
+ self.skipWhitespace = self.expr.skipWhitespace
+ self.saveAsList = self.expr.saveAsList
+ self.ignoreExprs.extend(self.expr.ignoreExprs)
+ return None
+ __ilshift__ = __lshift__
+
+ def leaveWhitespace( self ):
+ self.skipWhitespace = False
+ return self
+
+ def streamline( self ):
+ if not self.streamlined:
+ self.streamlined = True
+ if self.expr is not None:
+ self.expr.streamline()
+ return self
+
+ def validate( self, validateTrace=[] ):
+ if self not in validateTrace:
+ tmp = validateTrace[:]+[self]
+ if self.expr is not None:
+ self.expr.validate(tmp)
+ self.checkRecursion([])
+
+ def __str__( self ):
+ if hasattr(self,"name"):
+ return self.name
+
+ self._revertClass = self.__class__
+ self.__class__ = _ForwardNoRecurse
+ try:
+ if self.expr is not None:
+ retString = _ustr(self.expr)
+ else:
+ retString = "None"
+ finally:
+ self.__class__ = self._revertClass
+ return self.__class__.__name__ + ": " + retString
+
+ def copy(self):
+ if self.expr is not None:
+ return super(Forward,self).copy()
+ else:
+ ret = Forward()
+ ret << self
+ return ret
+
+class _ForwardNoRecurse(Forward):
+ def __str__( self ):
+ return "..."
+
+class TokenConverter(ParseElementEnhance):
+ """Abstract subclass of C{ParseExpression}, for converting parsed results."""
+ def __init__( self, expr, savelist=False ):
+ super(TokenConverter,self).__init__( expr )#, savelist )
+ self.saveAsList = False
+
+class Upcase(TokenConverter):
+ """Converter to upper case all matching tokens."""
+ def __init__(self, *args):
+ super(Upcase,self).__init__(*args)
+ warnings.warn("Upcase class is deprecated, use upcaseTokens parse action instead",
+ DeprecationWarning,stacklevel=2)
+
+ def postParse( self, instring, loc, tokenlist ):
+ return list(map( string.upper, tokenlist ))
+
+
+class Combine(TokenConverter):
+ """Converter to concatenate all matching tokens to a single string.
+ By default, the matching patterns must also be contiguous in the input string;
+ this can be disabled by specifying C{'adjacent=False'} in the constructor.
+ """
+ def __init__( self, expr, joinString="", adjacent=True ):
+ super(Combine,self).__init__( expr )
+ # suppress whitespace-stripping in contained parse expressions, but re-enable it on the Combine itself
+ if adjacent:
+ self.leaveWhitespace()
+ self.adjacent = adjacent
+ self.skipWhitespace = True
+ self.joinString = joinString
+ self.callPreparse = True
+
+ def ignore( self, other ):
+ if self.adjacent:
+ ParserElement.ignore(self, other)
+ else:
+ super( Combine, self).ignore( other )
+ return self
+
+ def postParse( self, instring, loc, tokenlist ):
+ retToks = tokenlist.copy()
+ del retToks[:]
+ retToks += ParseResults([ "".join(tokenlist._asStringList(self.joinString)) ], modal=self.modalResults)
+
+ if self.resultsName and len(retToks.keys())>0:
+ return [ retToks ]
+ else:
+ return retToks
+
+class Group(TokenConverter):
+ """Converter to return the matched tokens as a list - useful for returning tokens of C{L{ZeroOrMore}} and C{L{OneOrMore}} expressions."""
+ def __init__( self, expr ):
+ super(Group,self).__init__( expr )
+ self.saveAsList = True
+
+ def postParse( self, instring, loc, tokenlist ):
+ return [ tokenlist ]
+
+class Dict(TokenConverter):
+ """Converter to return a repetitive expression as a list, but also as a dictionary.
+ Each element can also be referenced using the first token in the expression as its key.
+ Useful for tabular report scraping when the first column can be used as a item key.
+ """
+ def __init__( self, exprs ):
+ super(Dict,self).__init__( exprs )
+ self.saveAsList = True
+
+ def postParse( self, instring, loc, tokenlist ):
+ for i,tok in enumerate(tokenlist):
+ if len(tok) == 0:
+ continue
+ ikey = tok[0]
+ if isinstance(ikey,int):
+ ikey = _ustr(tok[0]).strip()
+ if len(tok)==1:
+ tokenlist[ikey] = _ParseResultsWithOffset("",i)
+ elif len(tok)==2 and not isinstance(tok[1],ParseResults):
+ tokenlist[ikey] = _ParseResultsWithOffset(tok[1],i)
+ else:
+ dictvalue = tok.copy() #ParseResults(i)
+ del dictvalue[0]
+ if len(dictvalue)!= 1 or (isinstance(dictvalue,ParseResults) and dictvalue.keys()):
+ tokenlist[ikey] = _ParseResultsWithOffset(dictvalue,i)
+ else:
+ tokenlist[ikey] = _ParseResultsWithOffset(dictvalue[0],i)
+
+ if self.resultsName:
+ return [ tokenlist ]
+ else:
+ return tokenlist
+
+
+class Suppress(TokenConverter):
+ """Converter for ignoring the results of a parsed expression."""
+ def postParse( self, instring, loc, tokenlist ):
+ return []
+
+ def suppress( self ):
+ return self
+
+
+class OnlyOnce(object):
+ """Wrapper for parse actions, to ensure they are only called once."""
+ def __init__(self, methodCall):
+ self.callable = _trim_arity(methodCall)
+ self.called = False
+ def __call__(self,s,l,t):
+ if not self.called:
+ results = self.callable(s,l,t)
+ self.called = True
+ return results
+ raise ParseException(s,l,"")
+ def reset(self):
+ self.called = False
+
+def traceParseAction(f):
+ """Decorator for debugging parse actions."""
+ f = _trim_arity(f)
+ def z(*paArgs):
+ thisFunc = f.func_name
+ s,l,t = paArgs[-3:]
+ if len(paArgs)>3:
+ thisFunc = paArgs[0].__class__.__name__ + '.' + thisFunc
+ sys.stderr.write( ">>entering %s(line: '%s', %d, %s)\n" % (thisFunc,line(l,s),l,t) )
+ try:
+ ret = f(*paArgs)
+ except Exception:
+ exc = sys.exc_info()[1]
+ sys.stderr.write( "<<leaving %s (exception: %s)\n" % (thisFunc,exc) )
+ raise
+ sys.stderr.write( "<<leaving %s (ret: %s)\n" % (thisFunc,ret) )
+ return ret
+ try:
+ z.__name__ = f.__name__
+ except AttributeError:
+ pass
+ return z
+
+#
+# global helpers
+#
+def delimitedList( expr, delim=",", combine=False ):
+ """Helper to define a delimited list of expressions - the delimiter defaults to ','.
+ By default, the list elements and delimiters can have intervening whitespace, and
+ comments, but this can be overridden by passing C{combine=True} in the constructor.
+ If C{combine} is set to C{True}, the matching tokens are returned as a single token
+ string, with the delimiters included; otherwise, the matching tokens are returned
+ as a list of tokens, with the delimiters suppressed.
+ """
+ dlName = _ustr(expr)+" ["+_ustr(delim)+" "+_ustr(expr)+"]..."
+ if combine:
+ return Combine( expr + ZeroOrMore( delim + expr ) ).setName(dlName)
+ else:
+ return ( expr + ZeroOrMore( Suppress( delim ) + expr ) ).setName(dlName)
+
+def countedArray( expr, intExpr=None ):
+ """Helper to define a counted list of expressions.
+ This helper defines a pattern of the form::
+ integer expr expr expr...
+ where the leading integer tells how many expr expressions follow.
+ The matched tokens returns the array of expr tokens as a list - the leading count token is suppressed.
+ """
+ arrayExpr = Forward()
+ def countFieldParseAction(s,l,t):
+ n = t[0]
+ arrayExpr << (n and Group(And([expr]*n)) or Group(empty))
+ return []
+ if intExpr is None:
+ intExpr = Word(nums).setParseAction(lambda t:int(t[0]))
+ else:
+ intExpr = intExpr.copy()
+ intExpr.setName("arrayLen")
+ intExpr.addParseAction(countFieldParseAction, callDuringTry=True)
+ return ( intExpr + arrayExpr )
+
+def _flatten(L):
+ ret = []
+ for i in L:
+ if isinstance(i,list):
+ ret.extend(_flatten(i))
+ else:
+ ret.append(i)
+ return ret
+
+def matchPreviousLiteral(expr):
+ """Helper to define an expression that is indirectly defined from
+ the tokens matched in a previous expression, that is, it looks
+ for a 'repeat' of a previous expression. For example::
+ first = Word(nums)
+ second = matchPreviousLiteral(first)
+ matchExpr = first + ":" + second
+ will match C{"1:1"}, but not C{"1:2"}. Because this matches a
+ previous literal, will also match the leading C{"1:1"} in C{"1:10"}.
+ If this is not desired, use C{matchPreviousExpr}.
+ Do *not* use with packrat parsing enabled.
+ """
+ rep = Forward()
+ def copyTokenToRepeater(s,l,t):
+ if t:
+ if len(t) == 1:
+ rep << t[0]
+ else:
+ # flatten t tokens
+ tflat = _flatten(t.asList())
+ rep << And( [ Literal(tt) for tt in tflat ] )
+ else:
+ rep << Empty()
+ expr.addParseAction(copyTokenToRepeater, callDuringTry=True)
+ return rep
+
+def matchPreviousExpr(expr):
+ """Helper to define an expression that is indirectly defined from
+ the tokens matched in a previous expression, that is, it looks
+ for a 'repeat' of a previous expression. For example::
+ first = Word(nums)
+ second = matchPreviousExpr(first)
+ matchExpr = first + ":" + second
+ will match C{"1:1"}, but not C{"1:2"}. Because this matches by
+ expressions, will *not* match the leading C{"1:1"} in C{"1:10"};
+ the expressions are evaluated first, and then compared, so
+ C{"1"} is compared with C{"10"}.
+ Do *not* use with packrat parsing enabled.
+ """
+ rep = Forward()
+ e2 = expr.copy()
+ rep << e2
+ def copyTokenToRepeater(s,l,t):
+ matchTokens = _flatten(t.asList())
+ def mustMatchTheseTokens(s,l,t):
+ theseTokens = _flatten(t.asList())
+ if theseTokens != matchTokens:
+ raise ParseException("",0,"")
+ rep.setParseAction( mustMatchTheseTokens, callDuringTry=True )
+ expr.addParseAction(copyTokenToRepeater, callDuringTry=True)
+ return rep
+
+def _escapeRegexRangeChars(s):
+ #~ escape these chars: ^-]
+ for c in r"\^-]":
+ s = s.replace(c,_bslash+c)
+ s = s.replace("\n",r"\n")
+ s = s.replace("\t",r"\t")
+ return _ustr(s)
+
+def oneOf( strs, caseless=False, useRegex=True ):
+ """Helper to quickly define a set of alternative Literals, and makes sure to do
+ longest-first testing when there is a conflict, regardless of the input order,
+ but returns a C{L{MatchFirst}} for best performance.
+
+ Parameters:
+ - strs - a string of space-delimited literals, or a list of string literals
+ - caseless - (default=False) - treat all literals as caseless
+ - useRegex - (default=True) - as an optimization, will generate a Regex
+ object; otherwise, will generate a C{MatchFirst} object (if C{caseless=True}, or
+ if creating a C{Regex} raises an exception)
+ """
+ if caseless:
+ isequal = ( lambda a,b: a.upper() == b.upper() )
+ masks = ( lambda a,b: b.upper().startswith(a.upper()) )
+ parseElementClass = CaselessLiteral
+ else:
+ isequal = ( lambda a,b: a == b )
+ masks = ( lambda a,b: b.startswith(a) )
+ parseElementClass = Literal
+
+ if isinstance(strs,(list,tuple)):
+ symbols = list(strs[:])
+ elif isinstance(strs,basestring):
+ symbols = strs.split()
+ else:
+ warnings.warn("Invalid argument to oneOf, expected string or list",
+ SyntaxWarning, stacklevel=2)
+
+ i = 0
+ while i < len(symbols)-1:
+ cur = symbols[i]
+ for j,other in enumerate(symbols[i+1:]):
+ if ( isequal(other, cur) ):
+ del symbols[i+j+1]
+ break
+ elif ( masks(cur, other) ):
+ del symbols[i+j+1]
+ symbols.insert(i,other)
+ cur = other
+ break
+ else:
+ i += 1
+
+ if not caseless and useRegex:
+ #~ print (strs,"->", "|".join( [ _escapeRegexChars(sym) for sym in symbols] ))
+ try:
+ if len(symbols)==len("".join(symbols)):
+ return Regex( "[%s]" % "".join( [ _escapeRegexRangeChars(sym) for sym in symbols] ) )
+ else:
+ return Regex( "|".join( [ re.escape(sym) for sym in symbols] ) )
+ except:
+ warnings.warn("Exception creating Regex for oneOf, building MatchFirst",
+ SyntaxWarning, stacklevel=2)
+
+
+ # last resort, just use MatchFirst
+ return MatchFirst( [ parseElementClass(sym) for sym in symbols ] )
+
+def dictOf( key, value ):
+ """Helper to easily and clearly define a dictionary by specifying the respective patterns
+ for the key and value. Takes care of defining the C{L{Dict}}, C{L{ZeroOrMore}}, and C{L{Group}} tokens
+ in the proper order. The key pattern can include delimiting markers or punctuation,
+ as long as they are suppressed, thereby leaving the significant key text. The value
+ pattern can include named results, so that the C{Dict} results can include named token
+ fields.
+ """
+ return Dict( ZeroOrMore( Group ( key + value ) ) )
+
+def originalTextFor(expr, asString=True):
+ """Helper to return the original, untokenized text for a given expression. Useful to
+ restore the parsed fields of an HTML start tag into the raw tag text itself, or to
+ revert separate tokens with intervening whitespace back to the original matching
+ input text. Simpler to use than the parse action C{L{keepOriginalText}}, and does not
+ require the inspect module to chase up the call stack. By default, returns a
+ string containing the original parsed text.
+
+ If the optional C{asString} argument is passed as C{False}, then the return value is a
+ C{L{ParseResults}} containing any results names that were originally matched, and a
+ single token containing the original matched text from the input string. So if
+ the expression passed to C{L{originalTextFor}} contains expressions with defined
+ results names, you must set C{asString} to C{False} if you want to preserve those
+ results name values."""
+ locMarker = Empty().setParseAction(lambda s,loc,t: loc)
+ endlocMarker = locMarker.copy()
+ endlocMarker.callPreparse = False
+ matchExpr = locMarker("_original_start") + expr + endlocMarker("_original_end")
+ if asString:
+ extractText = lambda s,l,t: s[t._original_start:t._original_end]
+ else:
+ def extractText(s,l,t):
+ del t[:]
+ t.insert(0, s[t._original_start:t._original_end])
+ del t["_original_start"]
+ del t["_original_end"]
+ matchExpr.setParseAction(extractText)
+ return matchExpr
+
+def ungroup(expr):
+ """Helper to undo pyparsing's default grouping of And expressions, even
+ if all but one are non-empty."""
+ return TokenConverter(expr).setParseAction(lambda t:t[0])
+
+# convenience constants for positional expressions
+empty = Empty().setName("empty")
+lineStart = LineStart().setName("lineStart")
+lineEnd = LineEnd().setName("lineEnd")
+stringStart = StringStart().setName("stringStart")
+stringEnd = StringEnd().setName("stringEnd")
+
+_escapedPunc = Word( _bslash, r"\[]-*.$+^?()~ ", exact=2 ).setParseAction(lambda s,l,t:t[0][1])
+_printables_less_backslash = "".join([ c for c in printables if c not in r"\]" ])
+_escapedHexChar = Regex(r"\\0?[xX][0-9a-fA-F]+").setParseAction(lambda s,l,t:unichr(int(t[0].lstrip(r'\0x'),16)))
+_escapedOctChar = Regex(r"\\0[0-7]+").setParseAction(lambda s,l,t:unichr(int(t[0][1:],8)))
+_singleChar = _escapedPunc | _escapedHexChar | _escapedOctChar | Word(_printables_less_backslash,exact=1)
+_charRange = Group(_singleChar + Suppress("-") + _singleChar)
+_reBracketExpr = Literal("[") + Optional("^").setResultsName("negate") + Group( OneOrMore( _charRange | _singleChar ) ).setResultsName("body") + "]"
+
+_expanded = lambda p: (isinstance(p,ParseResults) and ''.join([ unichr(c) for c in range(ord(p[0]),ord(p[1])+1) ]) or p)
+
+def srange(s):
+ r"""Helper to easily define string ranges for use in Word construction. Borrows
+ syntax from regexp '[]' string range definitions::
+ srange("[0-9]") -> "0123456789"
+ srange("[a-z]") -> "abcdefghijklmnopqrstuvwxyz"
+ srange("[a-z$_]") -> "abcdefghijklmnopqrstuvwxyz$_"
+ The input string must be enclosed in []'s, and the returned string is the expanded
+ character set joined into a single string.
+ The values enclosed in the []'s may be::
+ a single character
+ an escaped character with a leading backslash (such as \- or \])
+ an escaped hex character with a leading '\x' (\x21, which is a '!' character)
+ (\0x## is also supported for backwards compatibility)
+ an escaped octal character with a leading '\0' (\041, which is a '!' character)
+ a range of any of the above, separated by a dash ('a-z', etc.)
+ any combination of the above ('aeiouy', 'a-zA-Z0-9_$', etc.)
+ """
+ try:
+ return "".join([_expanded(part) for part in _reBracketExpr.parseString(s).body])
+ except:
+ return ""
+
+def matchOnlyAtCol(n):
+ """Helper method for defining parse actions that require matching at a specific
+ column in the input text.
+ """
+ def verifyCol(strg,locn,toks):
+ if col(locn,strg) != n:
+ raise ParseException(strg,locn,"matched token not at column %d" % n)
+ return verifyCol
+
+def replaceWith(replStr):
+ """Helper method for common parse actions that simply return a literal value. Especially
+ useful when used with C{L{transformString<ParserElement.transformString>}()}.
+ """
+ def _replFunc(*args):
+ return [replStr]
+ return _replFunc
+
+def removeQuotes(s,l,t):
+ """Helper parse action for removing quotation marks from parsed quoted strings.
+ To use, add this parse action to quoted string using::
+ quotedString.setParseAction( removeQuotes )
+ """
+ return t[0][1:-1]
+
+def upcaseTokens(s,l,t):
+ """Helper parse action to convert tokens to upper case."""
+ return [ tt.upper() for tt in map(_ustr,t) ]
+
+def downcaseTokens(s,l,t):
+ """Helper parse action to convert tokens to lower case."""
+ return [ tt.lower() for tt in map(_ustr,t) ]
+
+def keepOriginalText(s,startLoc,t):
+ """DEPRECATED - use new helper method C{L{originalTextFor}}.
+ Helper parse action to preserve original parsed text,
+ overriding any nested parse actions."""
+ try:
+ endloc = getTokensEndLoc()
+ except ParseException:
+ raise ParseFatalException("incorrect usage of keepOriginalText - may only be called as a parse action")
+ del t[:]
+ t += ParseResults(s[startLoc:endloc])
+ return t
+
+def getTokensEndLoc():
+ """Method to be called from within a parse action to determine the end
+ location of the parsed tokens."""
+ import inspect
+ fstack = inspect.stack()
+ try:
+ # search up the stack (through intervening argument normalizers) for correct calling routine
+ for f in fstack[2:]:
+ if f[3] == "_parseNoCache":
+ endloc = f[0].f_locals["loc"]
+ return endloc
+ else:
+ raise ParseFatalException("incorrect usage of getTokensEndLoc - may only be called from within a parse action")
+ finally:
+ del fstack
+
+def _makeTags(tagStr, xml):
+ """Internal helper to construct opening and closing tag expressions, given a tag name"""
+ if isinstance(tagStr,basestring):
+ resname = tagStr
+ tagStr = Keyword(tagStr, caseless=not xml)
+ else:
+ resname = tagStr.name
+
+ tagAttrName = Word(alphas,alphanums+"_-:")
+ if (xml):
+ tagAttrValue = dblQuotedString.copy().setParseAction( removeQuotes )
+ openTag = Suppress("<") + tagStr("tag") + \
+ Dict(ZeroOrMore(Group( tagAttrName + Suppress("=") + tagAttrValue ))) + \
+ Optional("/",default=[False]).setResultsName("empty").setParseAction(lambda s,l,t:t[0]=='/') + Suppress(">")
+ else:
+ printablesLessRAbrack = "".join( [ c for c in printables if c not in ">" ] )
+ tagAttrValue = quotedString.copy().setParseAction( removeQuotes ) | Word(printablesLessRAbrack)
+ openTag = Suppress("<") + tagStr("tag") + \
+ Dict(ZeroOrMore(Group( tagAttrName.setParseAction(downcaseTokens) + \
+ Optional( Suppress("=") + tagAttrValue ) ))) + \
+ Optional("/",default=[False]).setResultsName("empty").setParseAction(lambda s,l,t:t[0]=='/') + Suppress(">")
+ closeTag = Combine(_L("</") + tagStr + ">")
+
+ openTag = openTag.setResultsName("start"+"".join(resname.replace(":"," ").title().split())).setName("<%s>" % tagStr)
+ closeTag = closeTag.setResultsName("end"+"".join(resname.replace(":"," ").title().split())).setName("</%s>" % tagStr)
+ openTag.tag = resname
+ closeTag.tag = resname
+ return openTag, closeTag
+
+def makeHTMLTags(tagStr):
+ """Helper to construct opening and closing tag expressions for HTML, given a tag name"""
+ return _makeTags( tagStr, False )
+
+def makeXMLTags(tagStr):
+ """Helper to construct opening and closing tag expressions for XML, given a tag name"""
+ return _makeTags( tagStr, True )
+
+def withAttribute(*args,**attrDict):
+ """Helper to create a validating parse action to be used with start tags created
+ with C{L{makeXMLTags}} or C{L{makeHTMLTags}}. Use C{withAttribute} to qualify a starting tag
+ with a required attribute value, to avoid false matches on common tags such as
+ C{<TD>} or C{<DIV>}.
+
+ Call C{withAttribute} with a series of attribute names and values. Specify the list
+ of filter attributes names and values as:
+ - keyword arguments, as in C{(align="right")}, or
+ - as an explicit dict with C{**} operator, when an attribute name is also a Python
+ reserved word, as in C{**{"class":"Customer", "align":"right"}}
+ - a list of name-value tuples, as in ( ("ns1:class", "Customer"), ("ns2:align","right") )
+ For attribute names with a namespace prefix, you must use the second form. Attribute
+ names are matched insensitive to upper/lower case.
+
+ To verify that the attribute exists, but without specifying a value, pass
+ C{withAttribute.ANY_VALUE} as the value.
+ """
+ if args:
+ attrs = args[:]
+ else:
+ attrs = attrDict.items()
+ attrs = [(k,v) for k,v in attrs]
+ def pa(s,l,tokens):
+ for attrName,attrValue in attrs:
+ if attrName not in tokens:
+ raise ParseException(s,l,"no matching attribute " + attrName)
+ if attrValue != withAttribute.ANY_VALUE and tokens[attrName] != attrValue:
+ raise ParseException(s,l,"attribute '%s' has value '%s', must be '%s'" %
+ (attrName, tokens[attrName], attrValue))
+ return pa
+withAttribute.ANY_VALUE = object()
+
+opAssoc = _Constants()
+opAssoc.LEFT = object()
+opAssoc.RIGHT = object()
+
+def operatorPrecedence( baseExpr, opList ):
+ """Helper method for constructing grammars of expressions made up of
+ operators working in a precedence hierarchy. Operators may be unary or
+ binary, left- or right-associative. Parse actions can also be attached
+ to operator expressions.
+
+ Parameters:
+ - baseExpr - expression representing the most basic element for the nested
+ - opList - list of tuples, one for each operator precedence level in the
+ expression grammar; each tuple is of the form
+ (opExpr, numTerms, rightLeftAssoc, parseAction), where:
+ - opExpr is the pyparsing expression for the operator;
+ may also be a string, which will be converted to a Literal;
+ if numTerms is 3, opExpr is a tuple of two expressions, for the
+ two operators separating the 3 terms
+ - numTerms is the number of terms for this operator (must
+ be 1, 2, or 3)
+ - rightLeftAssoc is the indicator whether the operator is
+ right or left associative, using the pyparsing-defined
+ constants C{opAssoc.RIGHT} and C{opAssoc.LEFT}.
+ - parseAction is the parse action to be associated with
+ expressions matching this operator expression (the
+ parse action tuple member may be omitted)
+ """
+ ret = Forward()
+ lastExpr = baseExpr | ( Suppress('(') + ret + Suppress(')') )
+ for i,operDef in enumerate(opList):
+ opExpr,arity,rightLeftAssoc,pa = (operDef + (None,))[:4]
+ if arity == 3:
+ if opExpr is None or len(opExpr) != 2:
+ raise ValueError("if numterms=3, opExpr must be a tuple or list of two expressions")
+ opExpr1, opExpr2 = opExpr
+ thisExpr = Forward()#.setName("expr%d" % i)
+ if rightLeftAssoc == opAssoc.LEFT:
+ if arity == 1:
+ matchExpr = FollowedBy(lastExpr + opExpr) + Group( lastExpr + OneOrMore( opExpr ) )
+ elif arity == 2:
+ if opExpr is not None:
+ matchExpr = FollowedBy(lastExpr + opExpr + lastExpr) + Group( lastExpr + OneOrMore( opExpr + lastExpr ) )
+ else:
+ matchExpr = FollowedBy(lastExpr+lastExpr) + Group( lastExpr + OneOrMore(lastExpr) )
+ elif arity == 3:
+ matchExpr = FollowedBy(lastExpr + opExpr1 + lastExpr + opExpr2 + lastExpr) + \
+ Group( lastExpr + opExpr1 + lastExpr + opExpr2 + lastExpr )
+ else:
+ raise ValueError("operator must be unary (1), binary (2), or ternary (3)")
+ elif rightLeftAssoc == opAssoc.RIGHT:
+ if arity == 1:
+ # try to avoid LR with this extra test
+ if not isinstance(opExpr, Optional):
+ opExpr = Optional(opExpr)
+ matchExpr = FollowedBy(opExpr.expr + thisExpr) + Group( opExpr + thisExpr )
+ elif arity == 2:
+ if opExpr is not None:
+ matchExpr = FollowedBy(lastExpr + opExpr + thisExpr) + Group( lastExpr + OneOrMore( opExpr + thisExpr ) )
+ else:
+ matchExpr = FollowedBy(lastExpr + thisExpr) + Group( lastExpr + OneOrMore( thisExpr ) )
+ elif arity == 3:
+ matchExpr = FollowedBy(lastExpr + opExpr1 + thisExpr + opExpr2 + thisExpr) + \
+ Group( lastExpr + opExpr1 + thisExpr + opExpr2 + thisExpr )
+ else:
+ raise ValueError("operator must be unary (1), binary (2), or ternary (3)")
+ else:
+ raise ValueError("operator must indicate right or left associativity")
+ if pa:
+ matchExpr.setParseAction( pa )
+ thisExpr << ( matchExpr | lastExpr )
+ lastExpr = thisExpr
+ ret << lastExpr
+ return ret
+
+dblQuotedString = Regex(r'"(?:[^"\n\r\\]|(?:"")|(?:\\x[0-9a-fA-F]+)|(?:\\.))*"').setName("string enclosed in double quotes")
+sglQuotedString = Regex(r"'(?:[^'\n\r\\]|(?:'')|(?:\\x[0-9a-fA-F]+)|(?:\\.))*'").setName("string enclosed in single quotes")
+quotedString = Regex(r'''(?:"(?:[^"\n\r\\]|(?:"")|(?:\\x[0-9a-fA-F]+)|(?:\\.))*")|(?:'(?:[^'\n\r\\]|(?:'')|(?:\\x[0-9a-fA-F]+)|(?:\\.))*')''').setName("quotedString using single or double quotes")
+unicodeString = Combine(_L('u') + quotedString.copy())
+
+def nestedExpr(opener="(", closer=")", content=None, ignoreExpr=quotedString.copy()):
+ """Helper method for defining nested lists enclosed in opening and closing
+ delimiters ("(" and ")" are the default).
+
+ Parameters:
+ - opener - opening character for a nested list (default="("); can also be a pyparsing expression
+ - closer - closing character for a nested list (default=")"); can also be a pyparsing expression
+ - content - expression for items within the nested lists (default=None)
+ - ignoreExpr - expression for ignoring opening and closing delimiters (default=quotedString)
+
+ If an expression is not provided for the content argument, the nested
+ expression will capture all whitespace-delimited content between delimiters
+ as a list of separate values.
+
+ Use the C{ignoreExpr} argument to define expressions that may contain
+ opening or closing characters that should not be treated as opening
+ or closing characters for nesting, such as quotedString or a comment
+ expression. Specify multiple expressions using an C{L{Or}} or C{L{MatchFirst}}.
+ The default is L{quotedString}, but if no expressions are to be ignored,
+ then pass C{None} for this argument.
+ """
+ if opener == closer:
+ raise ValueError("opening and closing strings cannot be the same")
+ if content is None:
+ if isinstance(opener,basestring) and isinstance(closer,basestring):
+ if len(opener) == 1 and len(closer)==1:
+ if ignoreExpr is not None:
+ content = (Combine(OneOrMore(~ignoreExpr +
+ CharsNotIn(opener+closer+ParserElement.DEFAULT_WHITE_CHARS,exact=1))
+ ).setParseAction(lambda t:t[0].strip()))
+ else:
+ content = (empty.copy()+CharsNotIn(opener+closer+ParserElement.DEFAULT_WHITE_CHARS
+ ).setParseAction(lambda t:t[0].strip()))
+ else:
+ if ignoreExpr is not None:
+ content = (Combine(OneOrMore(~ignoreExpr +
+ ~Literal(opener) + ~Literal(closer) +
+ CharsNotIn(ParserElement.DEFAULT_WHITE_CHARS,exact=1))
+ ).setParseAction(lambda t:t[0].strip()))
+ else:
+ content = (Combine(OneOrMore(~Literal(opener) + ~Literal(closer) +
+ CharsNotIn(ParserElement.DEFAULT_WHITE_CHARS,exact=1))
+ ).setParseAction(lambda t:t[0].strip()))
+ else:
+ raise ValueError("opening and closing arguments must be strings if no content expression is given")
+ ret = Forward()
+ if ignoreExpr is not None:
+ ret << Group( Suppress(opener) + ZeroOrMore( ignoreExpr | ret | content ) + Suppress(closer) )
+ else:
+ ret << Group( Suppress(opener) + ZeroOrMore( ret | content ) + Suppress(closer) )
+ return ret
+
+def indentedBlock(blockStatementExpr, indentStack, indent=True):
+ """Helper method for defining space-delimited indentation blocks, such as
+ those used to define block statements in Python source code.
+
+ Parameters:
+ - blockStatementExpr - expression defining syntax of statement that
+ is repeated within the indented block
+ - indentStack - list created by caller to manage indentation stack
+ (multiple statementWithIndentedBlock expressions within a single grammar
+ should share a common indentStack)
+ - indent - boolean indicating whether block must be indented beyond the
+ the current level; set to False for block of left-most statements
+ (default=True)
+
+ A valid block must contain at least one C{blockStatement}.
+ """
+ def checkPeerIndent(s,l,t):
+ if l >= len(s): return
+ curCol = col(l,s)
+ if curCol != indentStack[-1]:
+ if curCol > indentStack[-1]:
+ raise ParseFatalException(s,l,"illegal nesting")
+ raise ParseException(s,l,"not a peer entry")
+
+ def checkSubIndent(s,l,t):
+ curCol = col(l,s)
+ if curCol > indentStack[-1]:
+ indentStack.append( curCol )
+ else:
+ raise ParseException(s,l,"not a subentry")
+
+ def checkUnindent(s,l,t):
+ if l >= len(s): return
+ curCol = col(l,s)
+ if not(indentStack and curCol < indentStack[-1] and curCol <= indentStack[-2]):
+ raise ParseException(s,l,"not an unindent")
+ indentStack.pop()
+
+ NL = OneOrMore(LineEnd().setWhitespaceChars("\t ").suppress())
+ INDENT = Empty() + Empty().setParseAction(checkSubIndent)
+ PEER = Empty().setParseAction(checkPeerIndent)
+ UNDENT = Empty().setParseAction(checkUnindent)
+ if indent:
+ smExpr = Group( Optional(NL) +
+ #~ FollowedBy(blockStatementExpr) +
+ INDENT + (OneOrMore( PEER + Group(blockStatementExpr) + Optional(NL) )) + UNDENT)
+ else:
+ smExpr = Group( Optional(NL) +
+ (OneOrMore( PEER + Group(blockStatementExpr) + Optional(NL) )) )
+ blockStatementExpr.ignore(_bslash + LineEnd())
+ return smExpr
+
+alphas8bit = srange(r"[\0xc0-\0xd6\0xd8-\0xf6\0xf8-\0xff]")
+punc8bit = srange(r"[\0xa1-\0xbf\0xd7\0xf7]")
+
+anyOpenTag,anyCloseTag = makeHTMLTags(Word(alphas,alphanums+"_:"))
+commonHTMLEntity = Combine(_L("&") + oneOf("gt lt amp nbsp quot").setResultsName("entity") +";").streamline()
+_htmlEntityMap = dict(zip("gt lt amp nbsp quot".split(),'><& "'))
+replaceHTMLEntity = lambda t : t.entity in _htmlEntityMap and _htmlEntityMap[t.entity] or None
+
+# it's easy to get these comment structures wrong - they're very common, so may as well make them available
+cStyleComment = Regex(r"/\*(?:[^*]*\*+)+?/").setName("C style comment")
+
+htmlComment = Regex(r"<!--[\s\S]*?-->")
+restOfLine = Regex(r".*").leaveWhitespace()
+dblSlashComment = Regex(r"\/\/(\\\n|.)*").setName("// comment")
+cppStyleComment = Regex(r"/(?:\*(?:[^*]*\*+)+?/|/[^\n]*(?:\n[^\n]*)*?(?:(?<!\\)|\Z))").setName("C++ style comment")
+
+javaStyleComment = cppStyleComment
+pythonStyleComment = Regex(r"#.*").setName("Python style comment")
+_noncomma = "".join( [ c for c in printables if c != "," ] )
+_commasepitem = Combine(OneOrMore(Word(_noncomma) +
+ Optional( Word(" \t") +
+ ~Literal(",") + ~LineEnd() ) ) ).streamline().setName("commaItem")
+commaSeparatedList = delimitedList( Optional( quotedString.copy() | _commasepitem, default="") ).setName("commaSeparatedList")
+
+
+if __name__ == "__main__":
+
+ def test( teststring ):
+ try:
+ tokens = simpleSQL.parseString( teststring )
+ tokenlist = tokens.asList()
+ print (teststring + "->" + str(tokenlist))
+ print ("tokens = " + str(tokens))
+ print ("tokens.columns = " + str(tokens.columns))
+ print ("tokens.tables = " + str(tokens.tables))
+ print (tokens.asXML("SQL",True))
+ except ParseBaseException:
+ err = sys.exc_info()[1]
+ print (teststring + "->")
+ print (err.line)
+ print (" "*(err.column-1) + "^")
+ print (err)
+ print()
+
+ selectToken = CaselessLiteral( "select" )
+ fromToken = CaselessLiteral( "from" )
+
+ ident = Word( alphas, alphanums + "_$" )
+ columnName = delimitedList( ident, ".", combine=True ).setParseAction( upcaseTokens )
+ columnNameList = Group( delimitedList( columnName ) )#.setName("columns")
+ tableName = delimitedList( ident, ".", combine=True ).setParseAction( upcaseTokens )
+ tableNameList = Group( delimitedList( tableName ) )#.setName("tables")
+ simpleSQL = ( selectToken + \
+ ( '*' | columnNameList ).setResultsName( "columns" ) + \
+ fromToken + \
+ tableNameList.setResultsName( "tables" ) )
+
+ test( "SELECT * from XYZZY, ABC" )
+ test( "select * from SYS.XYZZY" )
+ test( "Select A from Sys.dual" )
+ test( "Select AA,BB,CC from Sys.dual" )
+ test( "Select A, B, C from Sys.dual" )
+ test( "Select A, B, C from Sys.dual" )
+ test( "Xelect A, B, C from Sys.dual" )
+ test( "Select A, B, C frox Sys.dual" )
+ test( "Select" )
+ test( "Select ^^^ frox Sys.dual" )
+ test( "Select A, B, C from Sys.dual, Table2 " )
diff --git a/src/pyparsing_py3.py b/src/pyparsing_py3.py
new file mode 100644
index 0000000..d427ca7
--- /dev/null
+++ b/src/pyparsing_py3.py
@@ -0,0 +1,3595 @@
+# module pyparsing.py
+#
+# Copyright (c) 2003-2011 Paul T. McGuire
+#
+# Permission is hereby granted, free of charge, to any person obtaining
+# a copy of this software and associated documentation files (the
+# "Software"), to deal in the Software without restriction, including
+# without limitation the rights to use, copy, modify, merge, publish,
+# distribute, sublicense, and/or sell copies of the Software, and to
+# permit persons to whom the Software is furnished to do so, subject to
+# the following conditions:
+#
+# The above copyright notice and this permission notice shall be
+# included in all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+# IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
+# CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+# TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+# SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+#
+#from __future__ import generators
+
+__doc__ = \
+"""
+pyparsing module - Classes and methods to define and execute parsing grammars
+
+The pyparsing module is an alternative approach to creating and executing simple grammars,
+vs. the traditional lex/yacc approach, or the use of regular expressions. With pyparsing, you
+don't need to learn a new syntax for defining grammars or matching expressions - the parsing module
+provides a library of classes that you use to construct the grammar directly in Python.
+
+Here is a program to parse "Hello, World!" (or any greeting of the form C{"<salutation>, <addressee>!"})::
+
+ from pyparsing import Word, alphas
+
+ # define grammar of a greeting
+ greet = Word( alphas ) + "," + Word( alphas ) + "!"
+
+ hello = "Hello, World!"
+ print hello, "->", greet.parseString( hello )
+
+The program outputs the following::
+
+ Hello, World! -> ['Hello', ',', 'World', '!']
+
+The Python representation of the grammar is quite readable, owing to the self-explanatory
+class names, and the use of '+', '|' and '^' operators.
+
+The parsed results returned from C{parseString()} can be accessed as a nested list, a dictionary, or an
+object with named attributes.
+
+The pyparsing module handles some of the problems that are typically vexing when writing text parsers:
+ - extra or missing whitespace (the above program will also handle "Hello,World!", "Hello , World !", etc.)
+ - quoted strings
+ - embedded comments
+"""
+
+__version__ = "1.5.7"
+__versionTime__ = "3 August 2012 05:00"
+__author__ = "Paul McGuire <ptmcg@users.sourceforge.net>"
+
+import string
+from weakref import ref as wkref
+import copy
+import sys
+import warnings
+import re
+import sre_constants
+import collections
+#~ sys.stderr.write( "testing pyparsing module, version %s, %s\n" % (__version__,__versionTime__ ) )
+
+__all__ = [
+'And', 'CaselessKeyword', 'CaselessLiteral', 'CharsNotIn', 'Combine', 'Dict', 'Each', 'Empty',
+'FollowedBy', 'Forward', 'GoToColumn', 'Group', 'Keyword', 'LineEnd', 'LineStart', 'Literal',
+'MatchFirst', 'NoMatch', 'NotAny', 'OneOrMore', 'OnlyOnce', 'Optional', 'Or',
+'ParseBaseException', 'ParseElementEnhance', 'ParseException', 'ParseExpression', 'ParseFatalException',
+'ParseResults', 'ParseSyntaxException', 'ParserElement', 'QuotedString', 'RecursiveGrammarException',
+'Regex', 'SkipTo', 'StringEnd', 'StringStart', 'Suppress', 'Token', 'TokenConverter', 'Upcase',
+'White', 'Word', 'WordEnd', 'WordStart', 'ZeroOrMore',
+'alphanums', 'alphas', 'alphas8bit', 'anyCloseTag', 'anyOpenTag', 'cStyleComment', 'col',
+'commaSeparatedList', 'commonHTMLEntity', 'countedArray', 'cppStyleComment', 'dblQuotedString',
+'dblSlashComment', 'delimitedList', 'dictOf', 'downcaseTokens', 'empty', 'getTokensEndLoc', 'hexnums',
+'htmlComment', 'javaStyleComment', 'keepOriginalText', 'line', 'lineEnd', 'lineStart', 'lineno',
+'makeHTMLTags', 'makeXMLTags', 'matchOnlyAtCol', 'matchPreviousExpr', 'matchPreviousLiteral',
+'nestedExpr', 'nullDebugAction', 'nums', 'oneOf', 'opAssoc', 'operatorPrecedence', 'printables',
+'punc8bit', 'pythonStyleComment', 'quotedString', 'removeQuotes', 'replaceHTMLEntity',
+'replaceWith', 'restOfLine', 'sglQuotedString', 'srange', 'stringEnd',
+'stringStart', 'traceParseAction', 'unicodeString', 'upcaseTokens', 'withAttribute',
+'indentedBlock', 'originalTextFor', 'ungroup',
+]
+
+_MAX_INT = sys.maxsize
+basestring = str
+unichr = chr
+_ustr = str
+
+# build list of single arg builtins, that can be used as parse actions
+singleArgBuiltins = [sum, len, sorted, reversed, list, tuple, set, any, all, min, max]
+
+def _xml_escape(data):
+ """Escape &, <, >, ", ', etc. in a string of data."""
+
+ # ampersand must be replaced first
+ from_symbols = '&><"\''
+ to_symbols = ['&'+s+';' for s in "amp gt lt quot apos".split()]
+ for from_,to_ in zip(from_symbols, to_symbols):
+ data = data.replace(from_, to_)
+ return data
+
+class _Constants(object):
+ pass
+
+alphas = string.ascii_lowercase + string.ascii_uppercase
+nums = "0123456789"
+hexnums = nums + "ABCDEFabcdef"
+alphanums = alphas + nums
+_bslash = chr(92)
+printables = "".join( [ c for c in string.printable if c not in string.whitespace ] )
+
+class ParseBaseException(Exception):
+ """base exception class for all parsing runtime exceptions"""
+ # Performance tuning: we construct a *lot* of these, so keep this
+ # constructor as small and fast as possible
+ def __init__( self, pstr, loc=0, msg=None, elem=None ):
+ self.loc = loc
+ if msg is None:
+ self.msg = pstr
+ self.pstr = ""
+ else:
+ self.msg = msg
+ self.pstr = pstr
+ self.parserElement = elem
+
+ def __getattr__( self, aname ):
+ """supported attributes by name are:
+ - lineno - returns the line number of the exception text
+ - col - returns the column number of the exception text
+ - line - returns the line containing the exception text
+ """
+ if( aname == "lineno" ):
+ return lineno( self.loc, self.pstr )
+ elif( aname in ("col", "column") ):
+ return col( self.loc, self.pstr )
+ elif( aname == "line" ):
+ return line( self.loc, self.pstr )
+ else:
+ raise AttributeError(aname)
+
+ def __str__( self ):
+ return "%s (at char %d), (line:%d, col:%d)" % \
+ ( self.msg, self.loc, self.lineno, self.column )
+ def __repr__( self ):
+ return _ustr(self)
+ def markInputline( self, markerString = ">!<" ):
+ """Extracts the exception line from the input string, and marks
+ the location of the exception with a special symbol.
+ """
+ line_str = self.line
+ line_column = self.column - 1
+ if markerString:
+ line_str = "".join( [line_str[:line_column],
+ markerString, line_str[line_column:]])
+ return line_str.strip()
+ def __dir__(self):
+ return "loc msg pstr parserElement lineno col line " \
+ "markInputline __str__ __repr__".split()
+
+class ParseException(ParseBaseException):
+ """exception thrown when parse expressions don't match class;
+ supported attributes by name are:
+ - lineno - returns the line number of the exception text
+ - col - returns the column number of the exception text
+ - line - returns the line containing the exception text
+ """
+ pass
+
+class ParseFatalException(ParseBaseException):
+ """user-throwable exception thrown when inconsistent parse content
+ is found; stops all parsing immediately"""
+ pass
+
+class ParseSyntaxException(ParseFatalException):
+ """just like C{L{ParseFatalException}}, but thrown internally when an
+ C{L{ErrorStop<And._ErrorStop>}} ('-' operator) indicates that parsing is to stop immediately because
+ an unbacktrackable syntax error has been found"""
+ def __init__(self, pe):
+ super(ParseSyntaxException, self).__init__(
+ pe.pstr, pe.loc, pe.msg, pe.parserElement)
+
+#~ class ReparseException(ParseBaseException):
+ #~ """Experimental class - parse actions can raise this exception to cause
+ #~ pyparsing to reparse the input string:
+ #~ - with a modified input string, and/or
+ #~ - with a modified start location
+ #~ Set the values of the ReparseException in the constructor, and raise the
+ #~ exception in a parse action to cause pyparsing to use the new string/location.
+ #~ Setting the values as None causes no change to be made.
+ #~ """
+ #~ def __init_( self, newstring, restartLoc ):
+ #~ self.newParseText = newstring
+ #~ self.reparseLoc = restartLoc
+
+class RecursiveGrammarException(Exception):
+ """exception thrown by C{validate()} if the grammar could be improperly recursive"""
+ def __init__( self, parseElementList ):
+ self.parseElementTrace = parseElementList
+
+ def __str__( self ):
+ return "RecursiveGrammarException: %s" % self.parseElementTrace
+
+class _ParseResultsWithOffset(object):
+ def __init__(self,p1,p2):
+ self.tup = (p1,p2)
+ def __getitem__(self,i):
+ return self.tup[i]
+ def __repr__(self):
+ return repr(self.tup)
+ def setOffset(self,i):
+ self.tup = (self.tup[0],i)
+
+class ParseResults(object):
+ """Structured parse results, to provide multiple means of access to the parsed data:
+ - as a list (C{len(results)})
+ - by list index (C{results[0], results[1]}, etc.)
+ - by attribute (C{results.<resultsName>})
+ """
+ #~ __slots__ = ( "__toklist", "__tokdict", "__doinit", "__name", "__parent", "__accumNames", "__weakref__" )
+ def __new__(cls, toklist, name=None, asList=True, modal=True ):
+ if isinstance(toklist, cls):
+ return toklist
+ retobj = object.__new__(cls)
+ retobj.__doinit = True
+ return retobj
+
+ # Performance tuning: we construct a *lot* of these, so keep this
+ # constructor as small and fast as possible
+ def __init__( self, toklist, name=None, asList=True, modal=True, isinstance=isinstance ):
+ if self.__doinit:
+ self.__doinit = False
+ self.__name = None
+ self.__parent = None
+ self.__accumNames = {}
+ if isinstance(toklist, list):
+ self.__toklist = toklist[:]
+ else:
+ self.__toklist = [toklist]
+ self.__tokdict = dict()
+
+ if name is not None and name:
+ if not modal:
+ self.__accumNames[name] = 0
+ if isinstance(name,int):
+ name = _ustr(name) # will always return a str, but use _ustr for consistency
+ self.__name = name
+ if not toklist in (None,'',[]):
+ if isinstance(toklist,basestring):
+ toklist = [ toklist ]
+ if asList:
+ if isinstance(toklist,ParseResults):
+ self[name] = _ParseResultsWithOffset(toklist.copy(),0)
+ else:
+ self[name] = _ParseResultsWithOffset(ParseResults(toklist[0]),0)
+ self[name].__name = name
+ else:
+ try:
+ self[name] = toklist[0]
+ except (KeyError,TypeError,IndexError):
+ self[name] = toklist
+
+ def __getitem__( self, i ):
+ if isinstance( i, (int,slice) ):
+ return self.__toklist[i]
+ else:
+ if i not in self.__accumNames:
+ return self.__tokdict[i][-1][0]
+ else:
+ return ParseResults([ v[0] for v in self.__tokdict[i] ])
+
+ def __setitem__( self, k, v, isinstance=isinstance ):
+ if isinstance(v,_ParseResultsWithOffset):
+ self.__tokdict[k] = self.__tokdict.get(k,list()) + [v]
+ sub = v[0]
+ elif isinstance(k,int):
+ self.__toklist[k] = v
+ sub = v
+ else:
+ self.__tokdict[k] = self.__tokdict.get(k,list()) + [_ParseResultsWithOffset(v,0)]
+ sub = v
+ if isinstance(sub,ParseResults):
+ sub.__parent = wkref(self)
+
+ def __delitem__( self, i ):
+ if isinstance(i,(int,slice)):
+ mylen = len( self.__toklist )
+ del self.__toklist[i]
+
+ # convert int to slice
+ if isinstance(i, int):
+ if i < 0:
+ i += mylen
+ i = slice(i, i+1)
+ # get removed indices
+ removed = list(range(*i.indices(mylen)))
+ removed.reverse()
+ # fixup indices in token dictionary
+ for name in self.__tokdict:
+ occurrences = self.__tokdict[name]
+ for j in removed:
+ for k, (value, position) in enumerate(occurrences):
+ occurrences[k] = _ParseResultsWithOffset(value, position - (position > j))
+ else:
+ del self.__tokdict[i]
+
+ def __contains__( self, k ):
+ return k in self.__tokdict
+
+ def __len__( self ): return len( self.__toklist )
+ def __bool__(self): return len( self.__toklist ) > 0
+ __nonzero__ = __bool__
+ def __iter__( self ): return iter( self.__toklist )
+ def __reversed__( self ): return iter( self.__toklist[::-1] )
+ def keys( self ):
+ """Returns all named result keys."""
+ return self.__tokdict.keys()
+
+ def pop( self, index=-1 ):
+ """Removes and returns item at specified index (default=last).
+ Will work with either numeric indices or dict-key indicies."""
+ ret = self[index]
+ del self[index]
+ return ret
+
+ def get(self, key, defaultValue=None):
+ """Returns named result matching the given key, or if there is no
+ such name, then returns the given C{defaultValue} or C{None} if no
+ C{defaultValue} is specified."""
+ if key in self:
+ return self[key]
+ else:
+ return defaultValue
+
+ def insert( self, index, insStr ):
+ """Inserts new element at location index in the list of parsed tokens."""
+ self.__toklist.insert(index, insStr)
+ # fixup indices in token dictionary
+ for name in self.__tokdict:
+ occurrences = self.__tokdict[name]
+ for k, (value, position) in enumerate(occurrences):
+ occurrences[k] = _ParseResultsWithOffset(value, position + (position > index))
+
+ def items( self ):
+ """Returns all named result keys and values as a list of tuples."""
+ return [(k,self[k]) for k in self.__tokdict]
+
+ def values( self ):
+ """Returns all named result values."""
+ return [ v[-1][0] for v in self.__tokdict.values() ]
+
+ def __getattr__( self, name ):
+ if True: #name not in self.__slots__:
+ if name in self.__tokdict:
+ if name not in self.__accumNames:
+ return self.__tokdict[name][-1][0]
+ else:
+ return ParseResults([ v[0] for v in self.__tokdict[name] ])
+ else:
+ return ""
+ return None
+
+ def __add__( self, other ):
+ ret = self.copy()
+ ret += other
+ return ret
+
+ def __iadd__( self, other ):
+ if other.__tokdict:
+ offset = len(self.__toklist)
+ addoffset = ( lambda a: (a<0 and offset) or (a+offset) )
+ otheritems = other.__tokdict.items()
+ otherdictitems = [(k, _ParseResultsWithOffset(v[0],addoffset(v[1])) )
+ for (k,vlist) in otheritems for v in vlist]
+ for k,v in otherdictitems:
+ self[k] = v
+ if isinstance(v[0],ParseResults):
+ v[0].__parent = wkref(self)
+
+ self.__toklist += other.__toklist
+ self.__accumNames.update( other.__accumNames )
+ return self
+
+ def __radd__(self, other):
+ if isinstance(other,int) and other == 0:
+ return self.copy()
+
+ def __repr__( self ):
+ return "(%s, %s)" % ( repr( self.__toklist ), repr( self.__tokdict ) )
+
+ def __str__( self ):
+ out = []
+ for i in self.__toklist:
+ if isinstance(i, ParseResults):
+ out.append(_ustr(i))
+ else:
+ out.append(repr(i))
+ return '[' + ', '.join(out) + ']'
+
+ def _asStringList( self, sep='' ):
+ out = []
+ for item in self.__toklist:
+ if out and sep:
+ out.append(sep)
+ if isinstance( item, ParseResults ):
+ out += item._asStringList()
+ else:
+ out.append( _ustr(item) )
+ return out
+
+ def asList( self ):
+ """Returns the parse results as a nested list of matching tokens, all converted to strings."""
+ out = []
+ for res in self.__toklist:
+ if isinstance(res,ParseResults):
+ out.append( res.asList() )
+ else:
+ out.append( res )
+ return out
+
+ def asDict( self ):
+ """Returns the named parse results as dictionary."""
+ return dict( self.items() )
+
+ def copy( self ):
+ """Returns a new copy of a C{ParseResults} object."""
+ ret = ParseResults( self.__toklist )
+ ret.__tokdict = self.__tokdict.copy()
+ ret.__parent = self.__parent
+ ret.__accumNames.update( self.__accumNames )
+ ret.__name = self.__name
+ return ret
+
+ def asXML( self, doctag=None, namedItemsOnly=False, indent="", formatted=True ):
+ """Returns the parse results as XML. Tags are created for tokens and lists that have defined results names."""
+ nl = "\n"
+ out = []
+ namedItems = dict( [ (v[1],k) for (k,vlist) in self.__tokdict.items()
+ for v in vlist ] )
+ nextLevelIndent = indent + " "
+
+ # collapse out indents if formatting is not desired
+ if not formatted:
+ indent = ""
+ nextLevelIndent = ""
+ nl = ""
+
+ selfTag = None
+ if doctag is not None:
+ selfTag = doctag
+ else:
+ if self.__name:
+ selfTag = self.__name
+
+ if not selfTag:
+ if namedItemsOnly:
+ return ""
+ else:
+ selfTag = "ITEM"
+
+ out += [ nl, indent, "<", selfTag, ">" ]
+
+ worklist = self.__toklist
+ for i,res in enumerate(worklist):
+ if isinstance(res,ParseResults):
+ if i in namedItems:
+ out += [ res.asXML(namedItems[i],
+ namedItemsOnly and doctag is None,
+ nextLevelIndent,
+ formatted)]
+ else:
+ out += [ res.asXML(None,
+ namedItemsOnly and doctag is None,
+ nextLevelIndent,
+ formatted)]
+ else:
+ # individual token, see if there is a name for it
+ resTag = None
+ if i in namedItems:
+ resTag = namedItems[i]
+ if not resTag:
+ if namedItemsOnly:
+ continue
+ else:
+ resTag = "ITEM"
+ xmlBodyText = _xml_escape(_ustr(res))
+ out += [ nl, nextLevelIndent, "<", resTag, ">",
+ xmlBodyText,
+ "</", resTag, ">" ]
+
+ out += [ nl, indent, "</", selfTag, ">" ]
+ return "".join(out)
+
+ def __lookup(self,sub):
+ for k,vlist in self.__tokdict.items():
+ for v,loc in vlist:
+ if sub is v:
+ return k
+ return None
+
+ def getName(self):
+ """Returns the results name for this token expression."""
+ if self.__name:
+ return self.__name
+ elif self.__parent:
+ par = self.__parent()
+ if par:
+ return par.__lookup(self)
+ else:
+ return None
+ elif (len(self) == 1 and
+ len(self.__tokdict) == 1 and
+ self.__tokdict.values()[0][0][1] in (0,-1)):
+ return self.__tokdict.keys()[0]
+ else:
+ return None
+
+ def dump(self,indent='',depth=0):
+ """Diagnostic method for listing out the contents of a C{ParseResults}.
+ Accepts an optional C{indent} argument so that this string can be embedded
+ in a nested display of other data."""
+ out = []
+ out.append( indent+_ustr(self.asList()) )
+ keys = self.items()
+ keys.sort()
+ for k,v in keys:
+ if out:
+ out.append('\n')
+ out.append( "%s%s- %s: " % (indent,(' '*depth), k) )
+ if isinstance(v,ParseResults):
+ if v.keys():
+ out.append( v.dump(indent,depth+1) )
+ else:
+ out.append(_ustr(v))
+ else:
+ out.append(_ustr(v))
+ return "".join(out)
+
+ # add support for pickle protocol
+ def __getstate__(self):
+ return ( self.__toklist,
+ ( self.__tokdict.copy(),
+ self.__parent is not None and self.__parent() or None,
+ self.__accumNames,
+ self.__name ) )
+
+ def __setstate__(self,state):
+ self.__toklist = state[0]
+ (self.__tokdict,
+ par,
+ inAccumNames,
+ self.__name) = state[1]
+ self.__accumNames = {}
+ self.__accumNames.update(inAccumNames)
+ if par is not None:
+ self.__parent = wkref(par)
+ else:
+ self.__parent = None
+
+ def __dir__(self):
+ return dir(super(ParseResults,self)) + list(self.keys())
+
+collections.MutableMapping.register(ParseResults)
+
+def col (loc,strg):
+ """Returns current column within a string, counting newlines as line separators.
+ The first column is number 1.
+
+ Note: the default parsing behavior is to expand tabs in the input string
+ before starting the parsing process. See L{I{ParserElement.parseString}<ParserElement.parseString>} for more information
+ on parsing strings containing C{<TAB>}s, and suggested methods to maintain a
+ consistent view of the parsed string, the parse location, and line and column
+ positions within the parsed string.
+ """
+ return (loc<len(strg) and strg[loc] == '\n') and 1 or loc - strg.rfind("\n", 0, loc)
+
+def lineno(loc,strg):
+ """Returns current line number within a string, counting newlines as line separators.
+ The first line is number 1.
+
+ Note: the default parsing behavior is to expand tabs in the input string
+ before starting the parsing process. See L{I{ParserElement.parseString}<ParserElement.parseString>} for more information
+ on parsing strings containing C{<TAB>}s, and suggested methods to maintain a
+ consistent view of the parsed string, the parse location, and line and column
+ positions within the parsed string.
+ """
+ return strg.count("\n",0,loc) + 1
+
+def line( loc, strg ):
+ """Returns the line of text containing loc within a string, counting newlines as line separators.
+ """
+ lastCR = strg.rfind("\n", 0, loc)
+ nextCR = strg.find("\n", loc)
+ if nextCR >= 0:
+ return strg[lastCR+1:nextCR]
+ else:
+ return strg[lastCR+1:]
+
+def _defaultStartDebugAction( instring, loc, expr ):
+ print ("Match " + _ustr(expr) + " at loc " + _ustr(loc) + "(%d,%d)" % ( lineno(loc,instring), col(loc,instring) ))
+
+def _defaultSuccessDebugAction( instring, startloc, endloc, expr, toks ):
+ print ("Matched " + _ustr(expr) + " -> " + str(toks.asList()))
+
+def _defaultExceptionDebugAction( instring, loc, expr, exc ):
+ print ("Exception raised:" + _ustr(exc))
+
+def nullDebugAction(*args):
+ """'Do-nothing' debug action, to suppress debugging output during parsing."""
+ pass
+
+'decorator to trim function calls to match the arity of the target'
+def _trim_arity(func, maxargs=2):
+ if func in singleArgBuiltins:
+ return lambda s,l,t: func(t)
+ limit = maxargs
+ def wrapper(*args):
+ nonlocal limit
+ while 1:
+ try:
+ return func(*args[limit:])
+ except TypeError:
+ if limit:
+ limit -= 1
+ continue
+ raise
+ return wrapper
+
+class ParserElement(object):
+ """Abstract base level parser element class."""
+ DEFAULT_WHITE_CHARS = " \n\t\r"
+ verbose_stacktrace = False
+
+ def setDefaultWhitespaceChars( chars ):
+ """Overrides the default whitespace chars
+ """
+ ParserElement.DEFAULT_WHITE_CHARS = chars
+ setDefaultWhitespaceChars = staticmethod(setDefaultWhitespaceChars)
+
+ def inlineLiteralsUsing(cls):
+ """
+ Set class to be used for inclusion of string literals into a parser.
+ """
+ ParserElement.literalStringClass = cls
+ inlineLiteralsUsing = staticmethod(inlineLiteralsUsing)
+
+ def __init__( self, savelist=False ):
+ self.parseAction = list()
+ self.failAction = None
+ #~ self.name = "<unknown>" # don't define self.name, let subclasses try/except upcall
+ self.strRepr = None
+ self.resultsName = None
+ self.saveAsList = savelist
+ self.skipWhitespace = True
+ self.whiteChars = ParserElement.DEFAULT_WHITE_CHARS
+ self.copyDefaultWhiteChars = True
+ self.mayReturnEmpty = False # used when checking for left-recursion
+ self.keepTabs = False
+ self.ignoreExprs = list()
+ self.debug = False
+ self.streamlined = False
+ self.mayIndexError = True # used to optimize exception handling for subclasses that don't advance parse index
+ self.errmsg = ""
+ self.modalResults = True # used to mark results names as modal (report only last) or cumulative (list all)
+ self.debugActions = ( None, None, None ) #custom debug actions
+ self.re = None
+ self.callPreparse = True # used to avoid redundant calls to preParse
+ self.callDuringTry = False
+
+ def copy( self ):
+ """Make a copy of this C{ParserElement}. Useful for defining different parse actions
+ for the same parsing pattern, using copies of the original parse element."""
+ cpy = copy.copy( self )
+ cpy.parseAction = self.parseAction[:]
+ cpy.ignoreExprs = self.ignoreExprs[:]
+ if self.copyDefaultWhiteChars:
+ cpy.whiteChars = ParserElement.DEFAULT_WHITE_CHARS
+ return cpy
+
+ def setName( self, name ):
+ """Define name for this expression, for use in debugging."""
+ self.name = name
+ self.errmsg = "Expected " + self.name
+ if hasattr(self,"exception"):
+ self.exception.msg = self.errmsg
+ return self
+
+ def setResultsName( self, name, listAllMatches=False ):
+ """Define name for referencing matching tokens as a nested attribute
+ of the returned parse results.
+ NOTE: this returns a *copy* of the original C{ParserElement} object;
+ this is so that the client can define a basic element, such as an
+ integer, and reference it in multiple places with different names.
+
+ You can also set results names using the abbreviated syntax,
+ C{expr("name")} in place of C{expr.setResultsName("name")} -
+ see L{I{__call__}<__call__>}.
+ """
+ newself = self.copy()
+ if name.endswith("*"):
+ name = name[:-1]
+ listAllMatches=True
+ newself.resultsName = name
+ newself.modalResults = not listAllMatches
+ return newself
+
+ def setBreak(self,breakFlag = True):
+ """Method to invoke the Python pdb debugger when this element is
+ about to be parsed. Set C{breakFlag} to True to enable, False to
+ disable.
+ """
+ if breakFlag:
+ _parseMethod = self._parse
+ def breaker(instring, loc, doActions=True, callPreParse=True):
+ import pdb
+ pdb.set_trace()
+ return _parseMethod( instring, loc, doActions, callPreParse )
+ breaker._originalParseMethod = _parseMethod
+ self._parse = breaker
+ else:
+ if hasattr(self._parse,"_originalParseMethod"):
+ self._parse = self._parse._originalParseMethod
+ return self
+
+ def setParseAction( self, *fns, **kwargs ):
+ """Define action to perform when successfully matching parse element definition.
+ Parse action fn is a callable method with 0-3 arguments, called as C{fn(s,loc,toks)},
+ C{fn(loc,toks)}, C{fn(toks)}, or just C{fn()}, where:
+ - s = the original string being parsed (see note below)
+ - loc = the location of the matching substring
+ - toks = a list of the matched tokens, packaged as a C{L{ParseResults}} object
+ If the functions in fns modify the tokens, they can return them as the return
+ value from fn, and the modified list of tokens will replace the original.
+ Otherwise, fn does not need to return any value.
+
+ Note: the default parsing behavior is to expand tabs in the input string
+ before starting the parsing process. See L{I{parseString}<parseString>} for more information
+ on parsing strings containing C{<TAB>}s, and suggested methods to maintain a
+ consistent view of the parsed string, the parse location, and line and column
+ positions within the parsed string.
+ """
+ self.parseAction = list(map(_trim_arity, list(fns)))
+ self.callDuringTry = ("callDuringTry" in kwargs and kwargs["callDuringTry"])
+ return self
+
+ def addParseAction( self, *fns, **kwargs ):
+ """Add parse action to expression's list of parse actions. See L{I{setParseAction}<setParseAction>}."""
+ self.parseAction += list(map(_trim_arity, list(fns)))
+ self.callDuringTry = self.callDuringTry or ("callDuringTry" in kwargs and kwargs["callDuringTry"])
+ return self
+
+ def setFailAction( self, fn ):
+ """Define action to perform if parsing fails at this expression.
+ Fail acton fn is a callable function that takes the arguments
+ C{fn(s,loc,expr,err)} where:
+ - s = string being parsed
+ - loc = location where expression match was attempted and failed
+ - expr = the parse expression that failed
+ - err = the exception thrown
+ The function returns no value. It may throw C{L{ParseFatalException}}
+ if it is desired to stop parsing immediately."""
+ self.failAction = fn
+ return self
+
+ def _skipIgnorables( self, instring, loc ):
+ exprsFound = True
+ while exprsFound:
+ exprsFound = False
+ for e in self.ignoreExprs:
+ try:
+ while 1:
+ loc,dummy = e._parse( instring, loc )
+ exprsFound = True
+ except ParseException:
+ pass
+ return loc
+
+ def preParse( self, instring, loc ):
+ if self.ignoreExprs:
+ loc = self._skipIgnorables( instring, loc )
+
+ if self.skipWhitespace:
+ wt = self.whiteChars
+ instrlen = len(instring)
+ while loc < instrlen and instring[loc] in wt:
+ loc += 1
+
+ return loc
+
+ def parseImpl( self, instring, loc, doActions=True ):
+ return loc, []
+
+ def postParse( self, instring, loc, tokenlist ):
+ return tokenlist
+
+ #~ @profile
+ def _parseNoCache( self, instring, loc, doActions=True, callPreParse=True ):
+ debugging = ( self.debug ) #and doActions )
+
+ if debugging or self.failAction:
+ #~ print ("Match",self,"at loc",loc,"(%d,%d)" % ( lineno(loc,instring), col(loc,instring) ))
+ if (self.debugActions[0] ):
+ self.debugActions[0]( instring, loc, self )
+ if callPreParse and self.callPreparse:
+ preloc = self.preParse( instring, loc )
+ else:
+ preloc = loc
+ tokensStart = preloc
+ try:
+ try:
+ loc,tokens = self.parseImpl( instring, preloc, doActions )
+ except IndexError:
+ raise ParseException( instring, len(instring), self.errmsg, self )
+ except ParseBaseException as err:
+ #~ print ("Exception raised:", err)
+ if self.debugActions[2]:
+ self.debugActions[2]( instring, tokensStart, self, err )
+ if self.failAction:
+ self.failAction( instring, tokensStart, self, err )
+ raise
+ else:
+ if callPreParse and self.callPreparse:
+ preloc = self.preParse( instring, loc )
+ else:
+ preloc = loc
+ tokensStart = preloc
+ if self.mayIndexError or loc >= len(instring):
+ try:
+ loc,tokens = self.parseImpl( instring, preloc, doActions )
+ except IndexError:
+ raise ParseException( instring, len(instring), self.errmsg, self )
+ else:
+ loc,tokens = self.parseImpl( instring, preloc, doActions )
+
+ tokens = self.postParse( instring, loc, tokens )
+
+ retTokens = ParseResults( tokens, self.resultsName, asList=self.saveAsList, modal=self.modalResults )
+ if self.parseAction and (doActions or self.callDuringTry):
+ if debugging:
+ try:
+ for fn in self.parseAction:
+ tokens = fn( instring, tokensStart, retTokens )
+ if tokens is not None:
+ retTokens = ParseResults( tokens,
+ self.resultsName,
+ asList=self.saveAsList and isinstance(tokens,(ParseResults,list)),
+ modal=self.modalResults )
+ except ParseBaseException as err:
+ #~ print "Exception raised in user parse action:", err
+ if (self.debugActions[2] ):
+ self.debugActions[2]( instring, tokensStart, self, err )
+ raise
+ else:
+ for fn in self.parseAction:
+ tokens = fn( instring, tokensStart, retTokens )
+ if tokens is not None:
+ retTokens = ParseResults( tokens,
+ self.resultsName,
+ asList=self.saveAsList and isinstance(tokens,(ParseResults,list)),
+ modal=self.modalResults )
+
+ if debugging:
+ #~ print ("Matched",self,"->",retTokens.asList())
+ if (self.debugActions[1] ):
+ self.debugActions[1]( instring, tokensStart, loc, self, retTokens )
+
+ return loc, retTokens
+
+ def tryParse( self, instring, loc ):
+ try:
+ return self._parse( instring, loc, doActions=False )[0]
+ except ParseFatalException:
+ raise ParseException( instring, loc, self.errmsg, self)
+
+ # this method gets repeatedly called during backtracking with the same arguments -
+ # we can cache these arguments and save ourselves the trouble of re-parsing the contained expression
+ def _parseCache( self, instring, loc, doActions=True, callPreParse=True ):
+ lookup = (self,instring,loc,callPreParse,doActions)
+ if lookup in ParserElement._exprArgCache:
+ value = ParserElement._exprArgCache[ lookup ]
+ if isinstance(value, Exception):
+ raise value
+ return (value[0],value[1].copy())
+ else:
+ try:
+ value = self._parseNoCache( instring, loc, doActions, callPreParse )
+ ParserElement._exprArgCache[ lookup ] = (value[0],value[1].copy())
+ return value
+ except ParseBaseException as pe:
+ pe.__traceback__ = None
+ ParserElement._exprArgCache[ lookup ] = pe
+ raise
+
+ _parse = _parseNoCache
+
+ # argument cache for optimizing repeated calls when backtracking through recursive expressions
+ _exprArgCache = {}
+ def resetCache():
+ ParserElement._exprArgCache.clear()
+ resetCache = staticmethod(resetCache)
+
+ _packratEnabled = False
+ def enablePackrat():
+ """Enables "packrat" parsing, which adds memoizing to the parsing logic.
+ Repeated parse attempts at the same string location (which happens
+ often in many complex grammars) can immediately return a cached value,
+ instead of re-executing parsing/validating code. Memoizing is done of
+ both valid results and parsing exceptions.
+
+ This speedup may break existing programs that use parse actions that
+ have side-effects. For this reason, packrat parsing is disabled when
+ you first import pyparsing. To activate the packrat feature, your
+ program must call the class method C{ParserElement.enablePackrat()}. If
+ your program uses C{psyco} to "compile as you go", you must call
+ C{enablePackrat} before calling C{psyco.full()}. If you do not do this,
+ Python will crash. For best results, call C{enablePackrat()} immediately
+ after importing pyparsing.
+ """
+ if not ParserElement._packratEnabled:
+ ParserElement._packratEnabled = True
+ ParserElement._parse = ParserElement._parseCache
+ enablePackrat = staticmethod(enablePackrat)
+
+ def parseString( self, instring, parseAll=False ):
+ """Execute the parse expression with the given string.
+ This is the main interface to the client code, once the complete
+ expression has been built.
+
+ If you want the grammar to require that the entire input string be
+ successfully parsed, then set C{parseAll} to True (equivalent to ending
+ the grammar with C{L{StringEnd()}}).
+
+ Note: C{parseString} implicitly calls C{expandtabs()} on the input string,
+ in order to report proper column numbers in parse actions.
+ If the input string contains tabs and
+ the grammar uses parse actions that use the C{loc} argument to index into the
+ string being parsed, you can ensure you have a consistent view of the input
+ string by:
+ - calling C{parseWithTabs} on your grammar before calling C{parseString}
+ (see L{I{parseWithTabs}<parseWithTabs>})
+ - define your parse action using the full C{(s,loc,toks)} signature, and
+ reference the input string using the parse action's C{s} argument
+ - explictly expand the tabs in your input string before calling
+ C{parseString}
+ """
+ ParserElement.resetCache()
+ if not self.streamlined:
+ self.streamline()
+ #~ self.saveAsList = True
+ for e in self.ignoreExprs:
+ e.streamline()
+ if not self.keepTabs:
+ instring = instring.expandtabs()
+ try:
+ loc, tokens = self._parse( instring, 0 )
+ if parseAll:
+ loc = self.preParse( instring, loc )
+ se = Empty() + StringEnd()
+ se._parse( instring, loc )
+ except ParseBaseException as exc:
+ if ParserElement.verbose_stacktrace:
+ raise
+ else:
+ # catch and re-raise exception from here, clears out pyparsing internal stack trace
+ raise exc
+ else:
+ return tokens
+
+ def scanString( self, instring, maxMatches=_MAX_INT, overlap=False ):
+ """Scan the input string for expression matches. Each match will return the
+ matching tokens, start location, and end location. May be called with optional
+ C{maxMatches} argument, to clip scanning after 'n' matches are found. If
+ C{overlap} is specified, then overlapping matches will be reported.
+
+ Note that the start and end locations are reported relative to the string
+ being parsed. See L{I{parseString}<parseString>} for more information on parsing
+ strings with embedded tabs."""
+ if not self.streamlined:
+ self.streamline()
+ for e in self.ignoreExprs:
+ e.streamline()
+
+ if not self.keepTabs:
+ instring = _ustr(instring).expandtabs()
+ instrlen = len(instring)
+ loc = 0
+ preparseFn = self.preParse
+ parseFn = self._parse
+ ParserElement.resetCache()
+ matches = 0
+ try:
+ while loc <= instrlen and matches < maxMatches:
+ try:
+ preloc = preparseFn( instring, loc )
+ nextLoc,tokens = parseFn( instring, preloc, callPreParse=False )
+ except ParseException:
+ loc = preloc+1
+ else:
+ if nextLoc > loc:
+ matches += 1
+ yield tokens, preloc, nextLoc
+ if overlap:
+ nextloc = preparseFn( instring, loc )
+ if nextloc > loc:
+ loc = nextLoc
+ else:
+ loc += 1
+ else:
+ loc = nextLoc
+ else:
+ loc = preloc+1
+ except ParseBaseException as exc:
+ if ParserElement.verbose_stacktrace:
+ raise
+ else:
+ # catch and re-raise exception from here, clears out pyparsing internal stack trace
+ raise exc
+
+ def transformString( self, instring ):
+ """Extension to C{L{scanString}}, to modify matching text with modified tokens that may
+ be returned from a parse action. To use C{transformString}, define a grammar and
+ attach a parse action to it that modifies the returned token list.
+ Invoking C{transformString()} on a target string will then scan for matches,
+ and replace the matched text patterns according to the logic in the parse
+ action. C{transformString()} returns the resulting transformed string."""
+ out = []
+ lastE = 0
+ # force preservation of <TAB>s, to minimize unwanted transformation of string, and to
+ # keep string locs straight between transformString and scanString
+ self.keepTabs = True
+ try:
+ for t,s,e in self.scanString( instring ):
+ out.append( instring[lastE:s] )
+ if t:
+ if isinstance(t,ParseResults):
+ out += t.asList()
+ elif isinstance(t,list):
+ out += t
+ else:
+ out.append(t)
+ lastE = e
+ out.append(instring[lastE:])
+ out = [o for o in out if o]
+ return "".join(map(_ustr,_flatten(out)))
+ except ParseBaseException as exc:
+ if ParserElement.verbose_stacktrace:
+ raise
+ else:
+ # catch and re-raise exception from here, clears out pyparsing internal stack trace
+ raise exc
+
+ def searchString( self, instring, maxMatches=_MAX_INT ):
+ """Another extension to C{L{scanString}}, simplifying the access to the tokens found
+ to match the given parse expression. May be called with optional
+ C{maxMatches} argument, to clip searching after 'n' matches are found.
+ """
+ try:
+ return ParseResults([ t for t,s,e in self.scanString( instring, maxMatches ) ])
+ except ParseBaseException as exc:
+ if ParserElement.verbose_stacktrace:
+ raise
+ else:
+ # catch and re-raise exception from here, clears out pyparsing internal stack trace
+ raise exc
+
+ def __add__(self, other ):
+ """Implementation of + operator - returns C{L{And}}"""
+ if isinstance( other, basestring ):
+ other = ParserElement.literalStringClass( other )
+ if not isinstance( other, ParserElement ):
+ warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
+ SyntaxWarning, stacklevel=2)
+ return None
+ return And( [ self, other ] )
+
+ def __radd__(self, other ):
+ """Implementation of + operator when left operand is not a C{L{ParserElement}}"""
+ if isinstance( other, basestring ):
+ other = ParserElement.literalStringClass( other )
+ if not isinstance( other, ParserElement ):
+ warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
+ SyntaxWarning, stacklevel=2)
+ return None
+ return other + self
+
+ def __sub__(self, other):
+ """Implementation of - operator, returns C{L{And}} with error stop"""
+ if isinstance( other, basestring ):
+ other = ParserElement.literalStringClass( other )
+ if not isinstance( other, ParserElement ):
+ warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
+ SyntaxWarning, stacklevel=2)
+ return None
+ return And( [ self, And._ErrorStop(), other ] )
+
+ def __rsub__(self, other ):
+ """Implementation of - operator when left operand is not a C{L{ParserElement}}"""
+ if isinstance( other, basestring ):
+ other = ParserElement.literalStringClass( other )
+ if not isinstance( other, ParserElement ):
+ warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
+ SyntaxWarning, stacklevel=2)
+ return None
+ return other - self
+
+ def __mul__(self,other):
+ """Implementation of * operator, allows use of C{expr * 3} in place of
+ C{expr + expr + expr}. Expressions may also me multiplied by a 2-integer
+ tuple, similar to C{{min,max}} multipliers in regular expressions. Tuples
+ may also include C{None} as in:
+ - C{expr*(n,None)} or C{expr*(n,)} is equivalent
+ to C{expr*n + L{ZeroOrMore}(expr)}
+ (read as "at least n instances of C{expr}")
+ - C{expr*(None,n)} is equivalent to C{expr*(0,n)}
+ (read as "0 to n instances of C{expr}")
+ - C{expr*(None,None)} is equivalent to C{L{ZeroOrMore}(expr)}
+ - C{expr*(1,None)} is equivalent to C{L{OneOrMore}(expr)}
+
+ Note that C{expr*(None,n)} does not raise an exception if
+ more than n exprs exist in the input stream; that is,
+ C{expr*(None,n)} does not enforce a maximum number of expr
+ occurrences. If this behavior is desired, then write
+ C{expr*(None,n) + ~expr}
+
+ """
+ if isinstance(other,int):
+ minElements, optElements = other,0
+ elif isinstance(other,tuple):
+ other = (other + (None, None))[:2]
+ if other[0] is None:
+ other = (0, other[1])
+ if isinstance(other[0],int) and other[1] is None:
+ if other[0] == 0:
+ return ZeroOrMore(self)
+ if other[0] == 1:
+ return OneOrMore(self)
+ else:
+ return self*other[0] + ZeroOrMore(self)
+ elif isinstance(other[0],int) and isinstance(other[1],int):
+ minElements, optElements = other
+ optElements -= minElements
+ else:
+ raise TypeError("cannot multiply 'ParserElement' and ('%s','%s') objects", type(other[0]),type(other[1]))
+ else:
+ raise TypeError("cannot multiply 'ParserElement' and '%s' objects", type(other))
+
+ if minElements < 0:
+ raise ValueError("cannot multiply ParserElement by negative value")
+ if optElements < 0:
+ raise ValueError("second tuple value must be greater or equal to first tuple value")
+ if minElements == optElements == 0:
+ raise ValueError("cannot multiply ParserElement by 0 or (0,0)")
+
+ if (optElements):
+ def makeOptionalList(n):
+ if n>1:
+ return Optional(self + makeOptionalList(n-1))
+ else:
+ return Optional(self)
+ if minElements:
+ if minElements == 1:
+ ret = self + makeOptionalList(optElements)
+ else:
+ ret = And([self]*minElements) + makeOptionalList(optElements)
+ else:
+ ret = makeOptionalList(optElements)
+ else:
+ if minElements == 1:
+ ret = self
+ else:
+ ret = And([self]*minElements)
+ return ret
+
+ def __rmul__(self, other):
+ return self.__mul__(other)
+
+ def __or__(self, other ):
+ """Implementation of | operator - returns C{L{MatchFirst}}"""
+ if isinstance( other, basestring ):
+ other = ParserElement.literalStringClass( other )
+ if not isinstance( other, ParserElement ):
+ warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
+ SyntaxWarning, stacklevel=2)
+ return None
+ return MatchFirst( [ self, other ] )
+
+ def __ror__(self, other ):
+ """Implementation of | operator when left operand is not a C{L{ParserElement}}"""
+ if isinstance( other, basestring ):
+ other = ParserElement.literalStringClass( other )
+ if not isinstance( other, ParserElement ):
+ warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
+ SyntaxWarning, stacklevel=2)
+ return None
+ return other | self
+
+ def __xor__(self, other ):
+ """Implementation of ^ operator - returns C{L{Or}}"""
+ if isinstance( other, basestring ):
+ other = ParserElement.literalStringClass( other )
+ if not isinstance( other, ParserElement ):
+ warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
+ SyntaxWarning, stacklevel=2)
+ return None
+ return Or( [ self, other ] )
+
+ def __rxor__(self, other ):
+ """Implementation of ^ operator when left operand is not a C{L{ParserElement}}"""
+ if isinstance( other, basestring ):
+ other = ParserElement.literalStringClass( other )
+ if not isinstance( other, ParserElement ):
+ warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
+ SyntaxWarning, stacklevel=2)
+ return None
+ return other ^ self
+
+ def __and__(self, other ):
+ """Implementation of & operator - returns C{L{Each}}"""
+ if isinstance( other, basestring ):
+ other = ParserElement.literalStringClass( other )
+ if not isinstance( other, ParserElement ):
+ warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
+ SyntaxWarning, stacklevel=2)
+ return None
+ return Each( [ self, other ] )
+
+ def __rand__(self, other ):
+ """Implementation of & operator when left operand is not a C{L{ParserElement}}"""
+ if isinstance( other, basestring ):
+ other = ParserElement.literalStringClass( other )
+ if not isinstance( other, ParserElement ):
+ warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
+ SyntaxWarning, stacklevel=2)
+ return None
+ return other & self
+
+ def __invert__( self ):
+ """Implementation of ~ operator - returns C{L{NotAny}}"""
+ return NotAny( self )
+
+ def __call__(self, name):
+ """Shortcut for C{L{setResultsName}}, with C{listAllMatches=default}::
+ userdata = Word(alphas).setResultsName("name") + Word(nums+"-").setResultsName("socsecno")
+ could be written as::
+ userdata = Word(alphas)("name") + Word(nums+"-")("socsecno")
+
+ If C{name} is given with a trailing C{'*'} character, then C{listAllMatches} will be
+ passed as C{True}.
+ """
+ return self.setResultsName(name)
+
+ def suppress( self ):
+ """Suppresses the output of this C{ParserElement}; useful to keep punctuation from
+ cluttering up returned output.
+ """
+ return Suppress( self )
+
+ def leaveWhitespace( self ):
+ """Disables the skipping of whitespace before matching the characters in the
+ C{ParserElement}'s defined pattern. This is normally only used internally by
+ the pyparsing module, but may be needed in some whitespace-sensitive grammars.
+ """
+ self.skipWhitespace = False
+ return self
+
+ def setWhitespaceChars( self, chars ):
+ """Overrides the default whitespace chars
+ """
+ self.skipWhitespace = True
+ self.whiteChars = chars
+ self.copyDefaultWhiteChars = False
+ return self
+
+ def parseWithTabs( self ):
+ """Overrides default behavior to expand C{<TAB>}s to spaces before parsing the input string.
+ Must be called before C{parseString} when the input grammar contains elements that
+ match C{<TAB>} characters."""
+ self.keepTabs = True
+ return self
+
+ def ignore( self, other ):
+ """Define expression to be ignored (e.g., comments) while doing pattern
+ matching; may be called repeatedly, to define multiple comment or other
+ ignorable patterns.
+ """
+ if isinstance( other, Suppress ):
+ if other not in self.ignoreExprs:
+ self.ignoreExprs.append( other.copy() )
+ else:
+ self.ignoreExprs.append( Suppress( other.copy() ) )
+ return self
+
+ def setDebugActions( self, startAction, successAction, exceptionAction ):
+ """Enable display of debugging messages while doing pattern matching."""
+ self.debugActions = (startAction or _defaultStartDebugAction,
+ successAction or _defaultSuccessDebugAction,
+ exceptionAction or _defaultExceptionDebugAction)
+ self.debug = True
+ return self
+
+ def setDebug( self, flag=True ):
+ """Enable display of debugging messages while doing pattern matching.
+ Set C{flag} to True to enable, False to disable."""
+ if flag:
+ self.setDebugActions( _defaultStartDebugAction, _defaultSuccessDebugAction, _defaultExceptionDebugAction )
+ else:
+ self.debug = False
+ return self
+
+ def __str__( self ):
+ return self.name
+
+ def __repr__( self ):
+ return _ustr(self)
+
+ def streamline( self ):
+ self.streamlined = True
+ self.strRepr = None
+ return self
+
+ def checkRecursion( self, parseElementList ):
+ pass
+
+ def validate( self, validateTrace=[] ):
+ """Check defined expressions for valid structure, check for infinite recursive definitions."""
+ self.checkRecursion( [] )
+
+ def parseFile( self, file_or_filename, parseAll=False ):
+ """Execute the parse expression on the given file or filename.
+ If a filename is specified (instead of a file object),
+ the entire file is opened, read, and closed before parsing.
+ """
+ try:
+ file_contents = file_or_filename.read()
+ except AttributeError:
+ f = open(file_or_filename, "r")
+ file_contents = f.read()
+ f.close()
+ try:
+ return self.parseString(file_contents, parseAll)
+ except ParseBaseException as exc:
+ if ParserElement.verbose_stacktrace:
+ raise
+ else:
+ # catch and re-raise exception from here, clears out pyparsing internal stack trace
+ raise exc
+
+ def __eq__(self,other):
+ if isinstance(other, ParserElement):
+ return self is other or self.__dict__ == other.__dict__
+ elif isinstance(other, basestring):
+ try:
+ self.parseString(_ustr(other), parseAll=True)
+ return True
+ except ParseBaseException:
+ return False
+ else:
+ return super(ParserElement,self)==other
+
+ def __ne__(self,other):
+ return not (self == other)
+
+ def __hash__(self):
+ return hash(id(self))
+
+ def __req__(self,other):
+ return self == other
+
+ def __rne__(self,other):
+ return not (self == other)
+
+
+class Token(ParserElement):
+ """Abstract C{ParserElement} subclass, for defining atomic matching patterns."""
+ def __init__( self ):
+ super(Token,self).__init__( savelist=False )
+
+ def setName(self, name):
+ s = super(Token,self).setName(name)
+ self.errmsg = "Expected " + self.name
+ return s
+
+
+class Empty(Token):
+ """An empty token, will always match."""
+ def __init__( self ):
+ super(Empty,self).__init__()
+ self.name = "Empty"
+ self.mayReturnEmpty = True
+ self.mayIndexError = False
+
+
+class NoMatch(Token):
+ """A token that will never match."""
+ def __init__( self ):
+ super(NoMatch,self).__init__()
+ self.name = "NoMatch"
+ self.mayReturnEmpty = True
+ self.mayIndexError = False
+ self.errmsg = "Unmatchable token"
+
+ def parseImpl( self, instring, loc, doActions=True ):
+ raise ParseException(instring, loc, self.errmsg, self)
+
+
+class Literal(Token):
+ """Token to exactly match a specified string."""
+ def __init__( self, matchString ):
+ super(Literal,self).__init__()
+ self.match = matchString
+ self.matchLen = len(matchString)
+ try:
+ self.firstMatchChar = matchString[0]
+ except IndexError:
+ warnings.warn("null string passed to Literal; use Empty() instead",
+ SyntaxWarning, stacklevel=2)
+ self.__class__ = Empty
+ self.name = '"%s"' % _ustr(self.match)
+ self.errmsg = "Expected " + self.name
+ self.mayReturnEmpty = False
+ self.mayIndexError = False
+
+ # Performance tuning: this routine gets called a *lot*
+ # if this is a single character match string and the first character matches,
+ # short-circuit as quickly as possible, and avoid calling startswith
+ #~ @profile
+ def parseImpl( self, instring, loc, doActions=True ):
+ if (instring[loc] == self.firstMatchChar and
+ (self.matchLen==1 or instring.startswith(self.match,loc)) ):
+ return loc+self.matchLen, self.match
+ raise ParseException(instring, loc, self.errmsg, self)
+_L = Literal
+ParserElement.literalStringClass = Literal
+
+class Keyword(Token):
+ """Token to exactly match a specified string as a keyword, that is, it must be
+ immediately followed by a non-keyword character. Compare with C{L{Literal}}::
+ Literal("if") will match the leading C{'if'} in C{'ifAndOnlyIf'}.
+ Keyword("if") will not; it will only match the leading C{'if'} in C{'if x=1'}, or C{'if(y==2)'}
+ Accepts two optional constructor arguments in addition to the keyword string:
+ C{identChars} is a string of characters that would be valid identifier characters,
+ defaulting to all alphanumerics + "_" and "$"; C{caseless} allows case-insensitive
+ matching, default is C{False}.
+ """
+ DEFAULT_KEYWORD_CHARS = alphanums+"_$"
+
+ def __init__( self, matchString, identChars=DEFAULT_KEYWORD_CHARS, caseless=False ):
+ super(Keyword,self).__init__()
+ self.match = matchString
+ self.matchLen = len(matchString)
+ try:
+ self.firstMatchChar = matchString[0]
+ except IndexError:
+ warnings.warn("null string passed to Keyword; use Empty() instead",
+ SyntaxWarning, stacklevel=2)
+ self.name = '"%s"' % self.match
+ self.errmsg = "Expected " + self.name
+ self.mayReturnEmpty = False
+ self.mayIndexError = False
+ self.caseless = caseless
+ if caseless:
+ self.caselessmatch = matchString.upper()
+ identChars = identChars.upper()
+ self.identChars = set(identChars)
+
+ def parseImpl( self, instring, loc, doActions=True ):
+ if self.caseless:
+ if ( (instring[ loc:loc+self.matchLen ].upper() == self.caselessmatch) and
+ (loc >= len(instring)-self.matchLen or instring[loc+self.matchLen].upper() not in self.identChars) and
+ (loc == 0 or instring[loc-1].upper() not in self.identChars) ):
+ return loc+self.matchLen, self.match
+ else:
+ if (instring[loc] == self.firstMatchChar and
+ (self.matchLen==1 or instring.startswith(self.match,loc)) and
+ (loc >= len(instring)-self.matchLen or instring[loc+self.matchLen] not in self.identChars) and
+ (loc == 0 or instring[loc-1] not in self.identChars) ):
+ return loc+self.matchLen, self.match
+ raise ParseException(instring, loc, self.errmsg, self)
+
+ def copy(self):
+ c = super(Keyword,self).copy()
+ c.identChars = Keyword.DEFAULT_KEYWORD_CHARS
+ return c
+
+ def setDefaultKeywordChars( chars ):
+ """Overrides the default Keyword chars
+ """
+ Keyword.DEFAULT_KEYWORD_CHARS = chars
+ setDefaultKeywordChars = staticmethod(setDefaultKeywordChars)
+
+class CaselessLiteral(Literal):
+ """Token to match a specified string, ignoring case of letters.
+ Note: the matched results will always be in the case of the given
+ match string, NOT the case of the input text.
+ """
+ def __init__( self, matchString ):
+ super(CaselessLiteral,self).__init__( matchString.upper() )
+ # Preserve the defining literal.
+ self.returnString = matchString
+ self.name = "'%s'" % self.returnString
+ self.errmsg = "Expected " + self.name
+
+ def parseImpl( self, instring, loc, doActions=True ):
+ if instring[ loc:loc+self.matchLen ].upper() == self.match:
+ return loc+self.matchLen, self.returnString
+ raise ParseException(instring, loc, self.errmsg, self)
+
+class CaselessKeyword(Keyword):
+ def __init__( self, matchString, identChars=Keyword.DEFAULT_KEYWORD_CHARS ):
+ super(CaselessKeyword,self).__init__( matchString, identChars, caseless=True )
+
+ def parseImpl( self, instring, loc, doActions=True ):
+ if ( (instring[ loc:loc+self.matchLen ].upper() == self.caselessmatch) and
+ (loc >= len(instring)-self.matchLen or instring[loc+self.matchLen].upper() not in self.identChars) ):
+ return loc+self.matchLen, self.match
+ raise ParseException(instring, loc, self.errmsg, self)
+
+class Word(Token):
+ """Token for matching words composed of allowed character sets.
+ Defined with string containing all allowed initial characters,
+ an optional string containing allowed body characters (if omitted,
+ defaults to the initial character set), and an optional minimum,
+ maximum, and/or exact length. The default value for C{min} is 1 (a
+ minimum value < 1 is not valid); the default values for C{max} and C{exact}
+ are 0, meaning no maximum or exact length restriction. An optional
+ C{exclude} parameter can list characters that might be found in
+ the input C{bodyChars} string; useful to define a word of all printables
+ except for one or two characters, for instance.
+ """
+ def __init__( self, initChars, bodyChars=None, min=1, max=0, exact=0, asKeyword=False, excludeChars=None ):
+ super(Word,self).__init__()
+ if excludeChars:
+ initChars = ''.join([c for c in initChars if c not in excludeChars])
+ if bodyChars:
+ bodyChars = ''.join([c for c in bodyChars if c not in excludeChars])
+ self.initCharsOrig = initChars
+ self.initChars = set(initChars)
+ if bodyChars :
+ self.bodyCharsOrig = bodyChars
+ self.bodyChars = set(bodyChars)
+ else:
+ self.bodyCharsOrig = initChars
+ self.bodyChars = set(initChars)
+
+ self.maxSpecified = max > 0
+
+ if min < 1:
+ raise ValueError("cannot specify a minimum length < 1; use Optional(Word()) if zero-length word is permitted")
+
+ self.minLen = min
+
+ if max > 0:
+ self.maxLen = max
+ else:
+ self.maxLen = _MAX_INT
+
+ if exact > 0:
+ self.maxLen = exact
+ self.minLen = exact
+
+ self.name = _ustr(self)
+ self.errmsg = "Expected " + self.name
+ self.mayIndexError = False
+ self.asKeyword = asKeyword
+
+ if ' ' not in self.initCharsOrig+self.bodyCharsOrig and (min==1 and max==0 and exact==0):
+ if self.bodyCharsOrig == self.initCharsOrig:
+ self.reString = "[%s]+" % _escapeRegexRangeChars(self.initCharsOrig)
+ elif len(self.bodyCharsOrig) == 1:
+ self.reString = "%s[%s]*" % \
+ (re.escape(self.initCharsOrig),
+ _escapeRegexRangeChars(self.bodyCharsOrig),)
+ else:
+ self.reString = "[%s][%s]*" % \
+ (_escapeRegexRangeChars(self.initCharsOrig),
+ _escapeRegexRangeChars(self.bodyCharsOrig),)
+ if self.asKeyword:
+ self.reString = r"\b"+self.reString+r"\b"
+ try:
+ self.re = re.compile( self.reString )
+ except:
+ self.re = None
+
+ def parseImpl( self, instring, loc, doActions=True ):
+ if self.re:
+ result = self.re.match(instring,loc)
+ if not result:
+ raise ParseException(instring, loc, self.errmsg, self)
+
+ loc = result.end()
+ return loc, result.group()
+
+ if not(instring[ loc ] in self.initChars):
+ raise ParseException(instring, loc, self.errmsg, self)
+
+ start = loc
+ loc += 1
+ instrlen = len(instring)
+ bodychars = self.bodyChars
+ maxloc = start + self.maxLen
+ maxloc = min( maxloc, instrlen )
+ while loc < maxloc and instring[loc] in bodychars:
+ loc += 1
+
+ throwException = False
+ if loc - start < self.minLen:
+ throwException = True
+ if self.maxSpecified and loc < instrlen and instring[loc] in bodychars:
+ throwException = True
+ if self.asKeyword:
+ if (start>0 and instring[start-1] in bodychars) or (loc<instrlen and instring[loc] in bodychars):
+ throwException = True
+
+ if throwException:
+ raise ParseException(instring, loc, self.errmsg, self)
+
+ return loc, instring[start:loc]
+
+ def __str__( self ):
+ try:
+ return super(Word,self).__str__()
+ except:
+ pass
+
+
+ if self.strRepr is None:
+
+ def charsAsStr(s):
+ if len(s)>4:
+ return s[:4]+"..."
+ else:
+ return s
+
+ if ( self.initCharsOrig != self.bodyCharsOrig ):
+ self.strRepr = "W:(%s,%s)" % ( charsAsStr(self.initCharsOrig), charsAsStr(self.bodyCharsOrig) )
+ else:
+ self.strRepr = "W:(%s)" % charsAsStr(self.initCharsOrig)
+
+ return self.strRepr
+
+
+class Regex(Token):
+ """Token for matching strings that match a given regular expression.
+ Defined with string specifying the regular expression in a form recognized by the inbuilt Python re module.
+ """
+ compiledREtype = type(re.compile("[A-Z]"))
+ def __init__( self, pattern, flags=0):
+ """The parameters C{pattern} and C{flags} are passed to the C{re.compile()} function as-is. See the Python C{re} module for an explanation of the acceptable patterns and flags."""
+ super(Regex,self).__init__()
+
+ if isinstance(pattern, basestring):
+ if len(pattern) == 0:
+ warnings.warn("null string passed to Regex; use Empty() instead",
+ SyntaxWarning, stacklevel=2)
+
+ self.pattern = pattern
+ self.flags = flags
+
+ try:
+ self.re = re.compile(self.pattern, self.flags)
+ self.reString = self.pattern
+ except sre_constants.error:
+ warnings.warn("invalid pattern (%s) passed to Regex" % pattern,
+ SyntaxWarning, stacklevel=2)
+ raise
+
+ elif isinstance(pattern, Regex.compiledREtype):
+ self.re = pattern
+ self.pattern = \
+ self.reString = str(pattern)
+ self.flags = flags
+
+ else:
+ raise ValueError("Regex may only be constructed with a string or a compiled RE object")
+
+ self.name = _ustr(self)
+ self.errmsg = "Expected " + self.name
+ self.mayIndexError = False
+ self.mayReturnEmpty = True
+
+ def parseImpl( self, instring, loc, doActions=True ):
+ result = self.re.match(instring,loc)
+ if not result:
+ raise ParseException(instring, loc, self.errmsg, self)
+
+ loc = result.end()
+ d = result.groupdict()
+ ret = ParseResults(result.group())
+ if d:
+ for k in d:
+ ret[k] = d[k]
+ return loc,ret
+
+ def __str__( self ):
+ try:
+ return super(Regex,self).__str__()
+ except:
+ pass
+
+ if self.strRepr is None:
+ self.strRepr = "Re:(%s)" % repr(self.pattern)
+
+ return self.strRepr
+
+
+class QuotedString(Token):
+ """Token for matching strings that are delimited by quoting characters.
+ """
+ def __init__( self, quoteChar, escChar=None, escQuote=None, multiline=False, unquoteResults=True, endQuoteChar=None):
+ """
+ Defined with the following parameters:
+ - quoteChar - string of one or more characters defining the quote delimiting string
+ - escChar - character to escape quotes, typically backslash (default=None)
+ - escQuote - special quote sequence to escape an embedded quote string (such as SQL's "" to escape an embedded ") (default=None)
+ - multiline - boolean indicating whether quotes can span multiple lines (default=C{False})
+ - unquoteResults - boolean indicating whether the matched text should be unquoted (default=C{True})
+ - endQuoteChar - string of one or more characters defining the end of the quote delimited string (default=C{None} => same as quoteChar)
+ """
+ super(QuotedString,self).__init__()
+
+ # remove white space from quote chars - wont work anyway
+ quoteChar = quoteChar.strip()
+ if len(quoteChar) == 0:
+ warnings.warn("quoteChar cannot be the empty string",SyntaxWarning,stacklevel=2)
+ raise SyntaxError()
+
+ if endQuoteChar is None:
+ endQuoteChar = quoteChar
+ else:
+ endQuoteChar = endQuoteChar.strip()
+ if len(endQuoteChar) == 0:
+ warnings.warn("endQuoteChar cannot be the empty string",SyntaxWarning,stacklevel=2)
+ raise SyntaxError()
+
+ self.quoteChar = quoteChar
+ self.quoteCharLen = len(quoteChar)
+ self.firstQuoteChar = quoteChar[0]
+ self.endQuoteChar = endQuoteChar
+ self.endQuoteCharLen = len(endQuoteChar)
+ self.escChar = escChar
+ self.escQuote = escQuote
+ self.unquoteResults = unquoteResults
+
+ if multiline:
+ self.flags = re.MULTILINE | re.DOTALL
+ self.pattern = r'%s(?:[^%s%s]' % \
+ ( re.escape(self.quoteChar),
+ _escapeRegexRangeChars(self.endQuoteChar[0]),
+ (escChar is not None and _escapeRegexRangeChars(escChar) or '') )
+ else:
+ self.flags = 0
+ self.pattern = r'%s(?:[^%s\n\r%s]' % \
+ ( re.escape(self.quoteChar),
+ _escapeRegexRangeChars(self.endQuoteChar[0]),
+ (escChar is not None and _escapeRegexRangeChars(escChar) or '') )
+ if len(self.endQuoteChar) > 1:
+ self.pattern += (
+ '|(?:' + ')|(?:'.join(["%s[^%s]" % (re.escape(self.endQuoteChar[:i]),
+ _escapeRegexRangeChars(self.endQuoteChar[i]))
+ for i in range(len(self.endQuoteChar)-1,0,-1)]) + ')'
+ )
+ if escQuote:
+ self.pattern += (r'|(?:%s)' % re.escape(escQuote))
+ if escChar:
+ self.pattern += (r'|(?:%s.)' % re.escape(escChar))
+ charset = ''.join(set(self.quoteChar[0]+self.endQuoteChar[0])).replace('^',r'\^').replace('-',r'\-')
+ self.escCharReplacePattern = re.escape(self.escChar)+("([%s])" % charset)
+ self.pattern += (r')*%s' % re.escape(self.endQuoteChar))
+
+ try:
+ self.re = re.compile(self.pattern, self.flags)
+ self.reString = self.pattern
+ except sre_constants.error:
+ warnings.warn("invalid pattern (%s) passed to Regex" % self.pattern,
+ SyntaxWarning, stacklevel=2)
+ raise
+
+ self.name = _ustr(self)
+ self.errmsg = "Expected " + self.name
+ self.mayIndexError = False
+ self.mayReturnEmpty = True
+
+ def parseImpl( self, instring, loc, doActions=True ):
+ result = instring[loc] == self.firstQuoteChar and self.re.match(instring,loc) or None
+ if not result:
+ raise ParseException(instring, loc, self.errmsg, self)
+
+ loc = result.end()
+ ret = result.group()
+
+ if self.unquoteResults:
+
+ # strip off quotes
+ ret = ret[self.quoteCharLen:-self.endQuoteCharLen]
+
+ if isinstance(ret,basestring):
+ # replace escaped characters
+ if self.escChar:
+ ret = re.sub(self.escCharReplacePattern,"\g<1>",ret)
+
+ # replace escaped quotes
+ if self.escQuote:
+ ret = ret.replace(self.escQuote, self.endQuoteChar)
+
+ return loc, ret
+
+ def __str__( self ):
+ try:
+ return super(QuotedString,self).__str__()
+ except:
+ pass
+
+ if self.strRepr is None:
+ self.strRepr = "quoted string, starting with %s ending with %s" % (self.quoteChar, self.endQuoteChar)
+
+ return self.strRepr
+
+
+class CharsNotIn(Token):
+ """Token for matching words composed of characters *not* in a given set.
+ Defined with string containing all disallowed characters, and an optional
+ minimum, maximum, and/or exact length. The default value for C{min} is 1 (a
+ minimum value < 1 is not valid); the default values for C{max} and C{exact}
+ are 0, meaning no maximum or exact length restriction.
+ """
+ def __init__( self, notChars, min=1, max=0, exact=0 ):
+ super(CharsNotIn,self).__init__()
+ self.skipWhitespace = False
+ self.notChars = notChars
+
+ if min < 1:
+ raise ValueError("cannot specify a minimum length < 1; use Optional(CharsNotIn()) if zero-length char group is permitted")
+
+ self.minLen = min
+
+ if max > 0:
+ self.maxLen = max
+ else:
+ self.maxLen = _MAX_INT
+
+ if exact > 0:
+ self.maxLen = exact
+ self.minLen = exact
+
+ self.name = _ustr(self)
+ self.errmsg = "Expected " + self.name
+ self.mayReturnEmpty = ( self.minLen == 0 )
+ self.mayIndexError = False
+
+ def parseImpl( self, instring, loc, doActions=True ):
+ if instring[loc] in self.notChars:
+ raise ParseException(instring, loc, self.errmsg, self)
+
+ start = loc
+ loc += 1
+ notchars = self.notChars
+ maxlen = min( start+self.maxLen, len(instring) )
+ while loc < maxlen and \
+ (instring[loc] not in notchars):
+ loc += 1
+
+ if loc - start < self.minLen:
+ raise ParseException(instring, loc, self.errmsg, self)
+
+ return loc, instring[start:loc]
+
+ def __str__( self ):
+ try:
+ return super(CharsNotIn, self).__str__()
+ except:
+ pass
+
+ if self.strRepr is None:
+ if len(self.notChars) > 4:
+ self.strRepr = "!W:(%s...)" % self.notChars[:4]
+ else:
+ self.strRepr = "!W:(%s)" % self.notChars
+
+ return self.strRepr
+
+class White(Token):
+ """Special matching class for matching whitespace. Normally, whitespace is ignored
+ by pyparsing grammars. This class is included when some whitespace structures
+ are significant. Define with a string containing the whitespace characters to be
+ matched; default is C{" \\t\\r\\n"}. Also takes optional C{min}, C{max}, and C{exact} arguments,
+ as defined for the C{L{Word}} class."""
+ whiteStrs = {
+ " " : "<SPC>",
+ "\t": "<TAB>",
+ "\n": "<LF>",
+ "\r": "<CR>",
+ "\f": "<FF>",
+ }
+ def __init__(self, ws=" \t\r\n", min=1, max=0, exact=0):
+ super(White,self).__init__()
+ self.matchWhite = ws
+ self.setWhitespaceChars( "".join([c for c in self.whiteChars if c not in self.matchWhite]) )
+ #~ self.leaveWhitespace()
+ self.name = ("".join([White.whiteStrs[c] for c in self.matchWhite]))
+ self.mayReturnEmpty = True
+ self.errmsg = "Expected " + self.name
+
+ self.minLen = min
+
+ if max > 0:
+ self.maxLen = max
+ else:
+ self.maxLen = _MAX_INT
+
+ if exact > 0:
+ self.maxLen = exact
+ self.minLen = exact
+
+ def parseImpl( self, instring, loc, doActions=True ):
+ if not(instring[ loc ] in self.matchWhite):
+ raise ParseException(instring, loc, self.errmsg, self)
+ start = loc
+ loc += 1
+ maxloc = start + self.maxLen
+ maxloc = min( maxloc, len(instring) )
+ while loc < maxloc and instring[loc] in self.matchWhite:
+ loc += 1
+
+ if loc - start < self.minLen:
+ raise ParseException(instring, loc, self.errmsg, self)
+
+ return loc, instring[start:loc]
+
+
+class _PositionToken(Token):
+ def __init__( self ):
+ super(_PositionToken,self).__init__()
+ self.name=self.__class__.__name__
+ self.mayReturnEmpty = True
+ self.mayIndexError = False
+
+class GoToColumn(_PositionToken):
+ """Token to advance to a specific column of input text; useful for tabular report scraping."""
+ def __init__( self, colno ):
+ super(GoToColumn,self).__init__()
+ self.col = colno
+
+ def preParse( self, instring, loc ):
+ if col(loc,instring) != self.col:
+ instrlen = len(instring)
+ if self.ignoreExprs:
+ loc = self._skipIgnorables( instring, loc )
+ while loc < instrlen and instring[loc].isspace() and col( loc, instring ) != self.col :
+ loc += 1
+ return loc
+
+ def parseImpl( self, instring, loc, doActions=True ):
+ thiscol = col( loc, instring )
+ if thiscol > self.col:
+ raise ParseException( instring, loc, "Text not in expected column", self )
+ newloc = loc + self.col - thiscol
+ ret = instring[ loc: newloc ]
+ return newloc, ret
+
+class LineStart(_PositionToken):
+ """Matches if current position is at the beginning of a line within the parse string"""
+ def __init__( self ):
+ super(LineStart,self).__init__()
+ self.setWhitespaceChars( ParserElement.DEFAULT_WHITE_CHARS.replace("\n","") )
+ self.errmsg = "Expected start of line"
+
+ def preParse( self, instring, loc ):
+ preloc = super(LineStart,self).preParse(instring,loc)
+ if instring[preloc] == "\n":
+ loc += 1
+ return loc
+
+ def parseImpl( self, instring, loc, doActions=True ):
+ if not( loc==0 or
+ (loc == self.preParse( instring, 0 )) or
+ (instring[loc-1] == "\n") ): #col(loc, instring) != 1:
+ raise ParseException(instring, loc, self.errmsg, self)
+ return loc, []
+
+class LineEnd(_PositionToken):
+ """Matches if current position is at the end of a line within the parse string"""
+ def __init__( self ):
+ super(LineEnd,self).__init__()
+ self.setWhitespaceChars( ParserElement.DEFAULT_WHITE_CHARS.replace("\n","") )
+ self.errmsg = "Expected end of line"
+
+ def parseImpl( self, instring, loc, doActions=True ):
+ if loc<len(instring):
+ if instring[loc] == "\n":
+ return loc+1, "\n"
+ else:
+ raise ParseException(instring, loc, self.errmsg, self)
+ elif loc == len(instring):
+ return loc+1, []
+ else:
+ raise ParseException(instring, loc, self.errmsg, self)
+
+class StringStart(_PositionToken):
+ """Matches if current position is at the beginning of the parse string"""
+ def __init__( self ):
+ super(StringStart,self).__init__()
+ self.errmsg = "Expected start of text"
+
+ def parseImpl( self, instring, loc, doActions=True ):
+ if loc != 0:
+ # see if entire string up to here is just whitespace and ignoreables
+ if loc != self.preParse( instring, 0 ):
+ raise ParseException(instring, loc, self.errmsg, self)
+ return loc, []
+
+class StringEnd(_PositionToken):
+ """Matches if current position is at the end of the parse string"""
+ def __init__( self ):
+ super(StringEnd,self).__init__()
+ self.errmsg = "Expected end of text"
+
+ def parseImpl( self, instring, loc, doActions=True ):
+ if loc < len(instring):
+ raise ParseException(instring, loc, self.errmsg, self)
+ elif loc == len(instring):
+ return loc+1, []
+ elif loc > len(instring):
+ return loc, []
+ else:
+ raise ParseException(instring, loc, self.errmsg, self)
+
+class WordStart(_PositionToken):
+ """Matches if the current position is at the beginning of a Word, and
+ is not preceded by any character in a given set of C{wordChars}
+ (default=C{printables}). To emulate the C{\b} behavior of regular expressions,
+ use C{WordStart(alphanums)}. C{WordStart} will also match at the beginning of
+ the string being parsed, or at the beginning of a line.
+ """
+ def __init__(self, wordChars = printables):
+ super(WordStart,self).__init__()
+ self.wordChars = set(wordChars)
+ self.errmsg = "Not at the start of a word"
+
+ def parseImpl(self, instring, loc, doActions=True ):
+ if loc != 0:
+ if (instring[loc-1] in self.wordChars or
+ instring[loc] not in self.wordChars):
+ raise ParseException(instring, loc, self.errmsg, self)
+ return loc, []
+
+class WordEnd(_PositionToken):
+ """Matches if the current position is at the end of a Word, and
+ is not followed by any character in a given set of C{wordChars}
+ (default=C{printables}). To emulate the C{\b} behavior of regular expressions,
+ use C{WordEnd(alphanums)}. C{WordEnd} will also match at the end of
+ the string being parsed, or at the end of a line.
+ """
+ def __init__(self, wordChars = printables):
+ super(WordEnd,self).__init__()
+ self.wordChars = set(wordChars)
+ self.skipWhitespace = False
+ self.errmsg = "Not at the end of a word"
+
+ def parseImpl(self, instring, loc, doActions=True ):
+ instrlen = len(instring)
+ if instrlen>0 and loc<instrlen:
+ if (instring[loc] in self.wordChars or
+ instring[loc-1] not in self.wordChars):
+ raise ParseException(instring, loc, self.errmsg, self)
+ return loc, []
+
+
+class ParseExpression(ParserElement):
+ """Abstract subclass of ParserElement, for combining and post-processing parsed tokens."""
+ def __init__( self, exprs, savelist = False ):
+ super(ParseExpression,self).__init__(savelist)
+ if isinstance( exprs, list ):
+ self.exprs = exprs
+ elif isinstance( exprs, basestring ):
+ self.exprs = [ Literal( exprs ) ]
+ else:
+ try:
+ self.exprs = list( exprs )
+ except TypeError:
+ self.exprs = [ exprs ]
+ self.callPreparse = False
+
+ def __getitem__( self, i ):
+ return self.exprs[i]
+
+ def append( self, other ):
+ self.exprs.append( other )
+ self.strRepr = None
+ return self
+
+ def leaveWhitespace( self ):
+ """Extends C{leaveWhitespace} defined in base class, and also invokes C{leaveWhitespace} on
+ all contained expressions."""
+ self.skipWhitespace = False
+ self.exprs = [ e.copy() for e in self.exprs ]
+ for e in self.exprs:
+ e.leaveWhitespace()
+ return self
+
+ def ignore( self, other ):
+ if isinstance( other, Suppress ):
+ if other not in self.ignoreExprs:
+ super( ParseExpression, self).ignore( other )
+ for e in self.exprs:
+ e.ignore( self.ignoreExprs[-1] )
+ else:
+ super( ParseExpression, self).ignore( other )
+ for e in self.exprs:
+ e.ignore( self.ignoreExprs[-1] )
+ return self
+
+ def __str__( self ):
+ try:
+ return super(ParseExpression,self).__str__()
+ except:
+ pass
+
+ if self.strRepr is None:
+ self.strRepr = "%s:(%s)" % ( self.__class__.__name__, _ustr(self.exprs) )
+ return self.strRepr
+
+ def streamline( self ):
+ super(ParseExpression,self).streamline()
+
+ for e in self.exprs:
+ e.streamline()
+
+ # collapse nested And's of the form And( And( And( a,b), c), d) to And( a,b,c,d )
+ # but only if there are no parse actions or resultsNames on the nested And's
+ # (likewise for Or's and MatchFirst's)
+ if ( len(self.exprs) == 2 ):
+ other = self.exprs[0]
+ if ( isinstance( other, self.__class__ ) and
+ not(other.parseAction) and
+ other.resultsName is None and
+ not other.debug ):
+ self.exprs = other.exprs[:] + [ self.exprs[1] ]
+ self.strRepr = None
+ self.mayReturnEmpty |= other.mayReturnEmpty
+ self.mayIndexError |= other.mayIndexError
+
+ other = self.exprs[-1]
+ if ( isinstance( other, self.__class__ ) and
+ not(other.parseAction) and
+ other.resultsName is None and
+ not other.debug ):
+ self.exprs = self.exprs[:-1] + other.exprs[:]
+ self.strRepr = None
+ self.mayReturnEmpty |= other.mayReturnEmpty
+ self.mayIndexError |= other.mayIndexError
+
+ return self
+
+ def setResultsName( self, name, listAllMatches=False ):
+ ret = super(ParseExpression,self).setResultsName(name,listAllMatches)
+ return ret
+
+ def validate( self, validateTrace=[] ):
+ tmp = validateTrace[:]+[self]
+ for e in self.exprs:
+ e.validate(tmp)
+ self.checkRecursion( [] )
+
+ def copy(self):
+ ret = super(ParseExpression,self).copy()
+ ret.exprs = [e.copy() for e in self.exprs]
+ return ret
+
+class And(ParseExpression):
+ """Requires all given C{ParseExpression}s to be found in the given order.
+ Expressions may be separated by whitespace.
+ May be constructed using the C{'+'} operator.
+ """
+
+ class _ErrorStop(Empty):
+ def __init__(self, *args, **kwargs):
+ super(And._ErrorStop,self).__init__(*args, **kwargs)
+ self.leaveWhitespace()
+
+ def __init__( self, exprs, savelist = True ):
+ super(And,self).__init__(exprs, savelist)
+ self.mayReturnEmpty = True
+ for e in self.exprs:
+ if not e.mayReturnEmpty:
+ self.mayReturnEmpty = False
+ break
+ self.setWhitespaceChars( exprs[0].whiteChars )
+ self.skipWhitespace = exprs[0].skipWhitespace
+ self.callPreparse = True
+
+ def parseImpl( self, instring, loc, doActions=True ):
+ # pass False as last arg to _parse for first element, since we already
+ # pre-parsed the string as part of our And pre-parsing
+ loc, resultlist = self.exprs[0]._parse( instring, loc, doActions, callPreParse=False )
+ errorStop = False
+ for e in self.exprs[1:]:
+ if isinstance(e, And._ErrorStop):
+ errorStop = True
+ continue
+ if errorStop:
+ try:
+ loc, exprtokens = e._parse( instring, loc, doActions )
+ except ParseSyntaxException:
+ raise
+ except ParseBaseException as pe:
+ pe.__traceback__ = None
+ raise ParseSyntaxException(pe)
+ except IndexError:
+ raise ParseSyntaxException( ParseException(instring, len(instring), self.errmsg, self) )
+ else:
+ loc, exprtokens = e._parse( instring, loc, doActions )
+ if exprtokens or exprtokens.keys():
+ resultlist += exprtokens
+ return loc, resultlist
+
+ def __iadd__(self, other ):
+ if isinstance( other, basestring ):
+ other = Literal( other )
+ return self.append( other ) #And( [ self, other ] )
+
+ def checkRecursion( self, parseElementList ):
+ subRecCheckList = parseElementList[:] + [ self ]
+ for e in self.exprs:
+ e.checkRecursion( subRecCheckList )
+ if not e.mayReturnEmpty:
+ break
+
+ def __str__( self ):
+ if hasattr(self,"name"):
+ return self.name
+
+ if self.strRepr is None:
+ self.strRepr = "{" + " ".join( [ _ustr(e) for e in self.exprs ] ) + "}"
+
+ return self.strRepr
+
+
+class Or(ParseExpression):
+ """Requires that at least one C{ParseExpression} is found.
+ If two expressions match, the expression that matches the longest string will be used.
+ May be constructed using the C{'^'} operator.
+ """
+ def __init__( self, exprs, savelist = False ):
+ super(Or,self).__init__(exprs, savelist)
+ self.mayReturnEmpty = False
+ for e in self.exprs:
+ if e.mayReturnEmpty:
+ self.mayReturnEmpty = True
+ break
+
+ def parseImpl( self, instring, loc, doActions=True ):
+ maxExcLoc = -1
+ maxMatchLoc = -1
+ maxException = None
+ for e in self.exprs:
+ try:
+ loc2 = e.tryParse( instring, loc )
+ except ParseException as err:
+ err.__traceback__ = None
+ if err.loc > maxExcLoc:
+ maxException = err
+ maxExcLoc = err.loc
+ except IndexError:
+ if len(instring) > maxExcLoc:
+ maxException = ParseException(instring,len(instring),e.errmsg,self)
+ maxExcLoc = len(instring)
+ else:
+ if loc2 > maxMatchLoc:
+ maxMatchLoc = loc2
+ maxMatchExp = e
+
+ if maxMatchLoc < 0:
+ if maxException is not None:
+ raise maxException
+ else:
+ raise ParseException(instring, loc, "no defined alternatives to match", self)
+
+ return maxMatchExp._parse( instring, loc, doActions )
+
+ def __ixor__(self, other ):
+ if isinstance( other, basestring ):
+ other = ParserElement.literalStringClass( other )
+ return self.append( other ) #Or( [ self, other ] )
+
+ def __str__( self ):
+ if hasattr(self,"name"):
+ return self.name
+
+ if self.strRepr is None:
+ self.strRepr = "{" + " ^ ".join( [ _ustr(e) for e in self.exprs ] ) + "}"
+
+ return self.strRepr
+
+ def checkRecursion( self, parseElementList ):
+ subRecCheckList = parseElementList[:] + [ self ]
+ for e in self.exprs:
+ e.checkRecursion( subRecCheckList )
+
+
+class MatchFirst(ParseExpression):
+ """Requires that at least one C{ParseExpression} is found.
+ If two expressions match, the first one listed is the one that will match.
+ May be constructed using the C{'|'} operator.
+ """
+ def __init__( self, exprs, savelist = False ):
+ super(MatchFirst,self).__init__(exprs, savelist)
+ if exprs:
+ self.mayReturnEmpty = False
+ for e in self.exprs:
+ if e.mayReturnEmpty:
+ self.mayReturnEmpty = True
+ break
+ else:
+ self.mayReturnEmpty = True
+
+ def parseImpl( self, instring, loc, doActions=True ):
+ maxExcLoc = -1
+ maxException = None
+ for e in self.exprs:
+ try:
+ ret = e._parse( instring, loc, doActions )
+ return ret
+ except ParseException as err:
+ if err.loc > maxExcLoc:
+ maxException = err
+ maxExcLoc = err.loc
+ except IndexError:
+ if len(instring) > maxExcLoc:
+ maxException = ParseException(instring,len(instring),e.errmsg,self)
+ maxExcLoc = len(instring)
+
+ # only got here if no expression matched, raise exception for match that made it the furthest
+ else:
+ if maxException is not None:
+ raise maxException
+ else:
+ raise ParseException(instring, loc, "no defined alternatives to match", self)
+
+ def __ior__(self, other ):
+ if isinstance( other, basestring ):
+ other = ParserElement.literalStringClass( other )
+ return self.append( other ) #MatchFirst( [ self, other ] )
+
+ def __str__( self ):
+ if hasattr(self,"name"):
+ return self.name
+
+ if self.strRepr is None:
+ self.strRepr = "{" + " | ".join( [ _ustr(e) for e in self.exprs ] ) + "}"
+
+ return self.strRepr
+
+ def checkRecursion( self, parseElementList ):
+ subRecCheckList = parseElementList[:] + [ self ]
+ for e in self.exprs:
+ e.checkRecursion( subRecCheckList )
+
+
+class Each(ParseExpression):
+ """Requires all given C{ParseExpression}s to be found, but in any order.
+ Expressions may be separated by whitespace.
+ May be constructed using the C{'&'} operator.
+ """
+ def __init__( self, exprs, savelist = True ):
+ super(Each,self).__init__(exprs, savelist)
+ self.mayReturnEmpty = True
+ for e in self.exprs:
+ if not e.mayReturnEmpty:
+ self.mayReturnEmpty = False
+ break
+ self.skipWhitespace = True
+ self.initExprGroups = True
+
+ def parseImpl( self, instring, loc, doActions=True ):
+ if self.initExprGroups:
+ opt1 = [ e.expr for e in self.exprs if isinstance(e,Optional) ]
+ opt2 = [ e for e in self.exprs if e.mayReturnEmpty and e not in opt1 ]
+ self.optionals = opt1 + opt2
+ self.multioptionals = [ e.expr for e in self.exprs if isinstance(e,ZeroOrMore) ]
+ self.multirequired = [ e.expr for e in self.exprs if isinstance(e,OneOrMore) ]
+ self.required = [ e for e in self.exprs if not isinstance(e,(Optional,ZeroOrMore,OneOrMore)) ]
+ self.required += self.multirequired
+ self.initExprGroups = False
+ tmpLoc = loc
+ tmpReqd = self.required[:]
+ tmpOpt = self.optionals[:]
+ matchOrder = []
+
+ keepMatching = True
+ while keepMatching:
+ tmpExprs = tmpReqd + tmpOpt + self.multioptionals + self.multirequired
+ failed = []
+ for e in tmpExprs:
+ try:
+ tmpLoc = e.tryParse( instring, tmpLoc )
+ except ParseException:
+ failed.append(e)
+ else:
+ matchOrder.append(e)
+ if e in tmpReqd:
+ tmpReqd.remove(e)
+ elif e in tmpOpt:
+ tmpOpt.remove(e)
+ if len(failed) == len(tmpExprs):
+ keepMatching = False
+
+ if tmpReqd:
+ missing = ", ".join( [ _ustr(e) for e in tmpReqd ] )
+ raise ParseException(instring,loc,"Missing one or more required elements (%s)" % missing )
+
+ # add any unmatched Optionals, in case they have default values defined
+ matchOrder += list(e for e in self.exprs if isinstance(e,Optional) and e.expr in tmpOpt)
+
+ resultlist = []
+ for e in matchOrder:
+ loc,results = e._parse(instring,loc,doActions)
+ resultlist.append(results)
+
+ finalResults = ParseResults([])
+ for r in resultlist:
+ dups = {}
+ for k in r.keys():
+ if k in finalResults.keys():
+ tmp = ParseResults(finalResults[k])
+ tmp += ParseResults(r[k])
+ dups[k] = tmp
+ finalResults += ParseResults(r)
+ for k,v in dups.items():
+ finalResults[k] = v
+ return loc, finalResults
+
+ def __str__( self ):
+ if hasattr(self,"name"):
+ return self.name
+
+ if self.strRepr is None:
+ self.strRepr = "{" + " & ".join( [ _ustr(e) for e in self.exprs ] ) + "}"
+
+ return self.strRepr
+
+ def checkRecursion( self, parseElementList ):
+ subRecCheckList = parseElementList[:] + [ self ]
+ for e in self.exprs:
+ e.checkRecursion( subRecCheckList )
+
+
+class ParseElementEnhance(ParserElement):
+ """Abstract subclass of C{ParserElement}, for combining and post-processing parsed tokens."""
+ def __init__( self, expr, savelist=False ):
+ super(ParseElementEnhance,self).__init__(savelist)
+ if isinstance( expr, basestring ):
+ expr = Literal(expr)
+ self.expr = expr
+ self.strRepr = None
+ if expr is not None:
+ self.mayIndexError = expr.mayIndexError
+ self.mayReturnEmpty = expr.mayReturnEmpty
+ self.setWhitespaceChars( expr.whiteChars )
+ self.skipWhitespace = expr.skipWhitespace
+ self.saveAsList = expr.saveAsList
+ self.callPreparse = expr.callPreparse
+ self.ignoreExprs.extend(expr.ignoreExprs)
+
+ def parseImpl( self, instring, loc, doActions=True ):
+ if self.expr is not None:
+ return self.expr._parse( instring, loc, doActions, callPreParse=False )
+ else:
+ raise ParseException("",loc,self.errmsg,self)
+
+ def leaveWhitespace( self ):
+ self.skipWhitespace = False
+ self.expr = self.expr.copy()
+ if self.expr is not None:
+ self.expr.leaveWhitespace()
+ return self
+
+ def ignore( self, other ):
+ if isinstance( other, Suppress ):
+ if other not in self.ignoreExprs:
+ super( ParseElementEnhance, self).ignore( other )
+ if self.expr is not None:
+ self.expr.ignore( self.ignoreExprs[-1] )
+ else:
+ super( ParseElementEnhance, self).ignore( other )
+ if self.expr is not None:
+ self.expr.ignore( self.ignoreExprs[-1] )
+ return self
+
+ def streamline( self ):
+ super(ParseElementEnhance,self).streamline()
+ if self.expr is not None:
+ self.expr.streamline()
+ return self
+
+ def checkRecursion( self, parseElementList ):
+ if self in parseElementList:
+ raise RecursiveGrammarException( parseElementList+[self] )
+ subRecCheckList = parseElementList[:] + [ self ]
+ if self.expr is not None:
+ self.expr.checkRecursion( subRecCheckList )
+
+ def validate( self, validateTrace=[] ):
+ tmp = validateTrace[:]+[self]
+ if self.expr is not None:
+ self.expr.validate(tmp)
+ self.checkRecursion( [] )
+
+ def __str__( self ):
+ try:
+ return super(ParseElementEnhance,self).__str__()
+ except:
+ pass
+
+ if self.strRepr is None and self.expr is not None:
+ self.strRepr = "%s:(%s)" % ( self.__class__.__name__, _ustr(self.expr) )
+ return self.strRepr
+
+
+class FollowedBy(ParseElementEnhance):
+ """Lookahead matching of the given parse expression. C{FollowedBy}
+ does *not* advance the parsing position within the input string, it only
+ verifies that the specified parse expression matches at the current
+ position. C{FollowedBy} always returns a null token list."""
+ def __init__( self, expr ):
+ super(FollowedBy,self).__init__(expr)
+ self.mayReturnEmpty = True
+
+ def parseImpl( self, instring, loc, doActions=True ):
+ self.expr.tryParse( instring, loc )
+ return loc, []
+
+
+class NotAny(ParseElementEnhance):
+ """Lookahead to disallow matching with the given parse expression. C{NotAny}
+ does *not* advance the parsing position within the input string, it only
+ verifies that the specified parse expression does *not* match at the current
+ position. Also, C{NotAny} does *not* skip over leading whitespace. C{NotAny}
+ always returns a null token list. May be constructed using the '~' operator."""
+ def __init__( self, expr ):
+ super(NotAny,self).__init__(expr)
+ #~ self.leaveWhitespace()
+ self.skipWhitespace = False # do NOT use self.leaveWhitespace(), don't want to propagate to exprs
+ self.mayReturnEmpty = True
+ self.errmsg = "Found unwanted token, "+_ustr(self.expr)
+
+ def parseImpl( self, instring, loc, doActions=True ):
+ try:
+ self.expr.tryParse( instring, loc )
+ except (ParseException,IndexError):
+ pass
+ else:
+ raise ParseException(instring, loc, self.errmsg, self)
+ return loc, []
+
+ def __str__( self ):
+ if hasattr(self,"name"):
+ return self.name
+
+ if self.strRepr is None:
+ self.strRepr = "~{" + _ustr(self.expr) + "}"
+
+ return self.strRepr
+
+
+class ZeroOrMore(ParseElementEnhance):
+ """Optional repetition of zero or more of the given expression."""
+ def __init__( self, expr ):
+ super(ZeroOrMore,self).__init__(expr)
+ self.mayReturnEmpty = True
+
+ def parseImpl( self, instring, loc, doActions=True ):
+ tokens = []
+ try:
+ loc, tokens = self.expr._parse( instring, loc, doActions, callPreParse=False )
+ hasIgnoreExprs = ( len(self.ignoreExprs) > 0 )
+ while 1:
+ if hasIgnoreExprs:
+ preloc = self._skipIgnorables( instring, loc )
+ else:
+ preloc = loc
+ loc, tmptokens = self.expr._parse( instring, preloc, doActions )
+ if tmptokens or tmptokens.keys():
+ tokens += tmptokens
+ except (ParseException,IndexError):
+ pass
+
+ return loc, tokens
+
+ def __str__( self ):
+ if hasattr(self,"name"):
+ return self.name
+
+ if self.strRepr is None:
+ self.strRepr = "[" + _ustr(self.expr) + "]..."
+
+ return self.strRepr
+
+ def setResultsName( self, name, listAllMatches=False ):
+ ret = super(ZeroOrMore,self).setResultsName(name,listAllMatches)
+ ret.saveAsList = True
+ return ret
+
+
+class OneOrMore(ParseElementEnhance):
+ """Repetition of one or more of the given expression."""
+ def parseImpl( self, instring, loc, doActions=True ):
+ # must be at least one
+ loc, tokens = self.expr._parse( instring, loc, doActions, callPreParse=False )
+ try:
+ hasIgnoreExprs = ( len(self.ignoreExprs) > 0 )
+ while 1:
+ if hasIgnoreExprs:
+ preloc = self._skipIgnorables( instring, loc )
+ else:
+ preloc = loc
+ loc, tmptokens = self.expr._parse( instring, preloc, doActions )
+ if tmptokens or tmptokens.keys():
+ tokens += tmptokens
+ except (ParseException,IndexError):
+ pass
+
+ return loc, tokens
+
+ def __str__( self ):
+ if hasattr(self,"name"):
+ return self.name
+
+ if self.strRepr is None:
+ self.strRepr = "{" + _ustr(self.expr) + "}..."
+
+ return self.strRepr
+
+ def setResultsName( self, name, listAllMatches=False ):
+ ret = super(OneOrMore,self).setResultsName(name,listAllMatches)
+ ret.saveAsList = True
+ return ret
+
+class _NullToken(object):
+ def __bool__(self):
+ return False
+ __nonzero__ = __bool__
+ def __str__(self):
+ return ""
+
+_optionalNotMatched = _NullToken()
+class Optional(ParseElementEnhance):
+ """Optional matching of the given expression.
+ A default return string can also be specified, if the optional expression
+ is not found.
+ """
+ def __init__( self, exprs, default=_optionalNotMatched ):
+ super(Optional,self).__init__( exprs, savelist=False )
+ self.defaultValue = default
+ self.mayReturnEmpty = True
+
+ def parseImpl( self, instring, loc, doActions=True ):
+ try:
+ loc, tokens = self.expr._parse( instring, loc, doActions, callPreParse=False )
+ except (ParseException,IndexError):
+ if self.defaultValue is not _optionalNotMatched:
+ if self.expr.resultsName:
+ tokens = ParseResults([ self.defaultValue ])
+ tokens[self.expr.resultsName] = self.defaultValue
+ else:
+ tokens = [ self.defaultValue ]
+ else:
+ tokens = []
+ return loc, tokens
+
+ def __str__( self ):
+ if hasattr(self,"name"):
+ return self.name
+
+ if self.strRepr is None:
+ self.strRepr = "[" + _ustr(self.expr) + "]"
+
+ return self.strRepr
+
+
+class SkipTo(ParseElementEnhance):
+ """Token for skipping over all undefined text until the matched expression is found.
+ If C{include} is set to true, the matched expression is also parsed (the skipped text
+ and matched expression are returned as a 2-element list). The C{ignore}
+ argument is used to define grammars (typically quoted strings and comments) that
+ might contain false matches.
+ """
+ def __init__( self, other, include=False, ignore=None, failOn=None ):
+ super( SkipTo, self ).__init__( other )
+ self.ignoreExpr = ignore
+ self.mayReturnEmpty = True
+ self.mayIndexError = False
+ self.includeMatch = include
+ self.asList = False
+ if failOn is not None and isinstance(failOn, basestring):
+ self.failOn = Literal(failOn)
+ else:
+ self.failOn = failOn
+ self.errmsg = "No match found for "+_ustr(self.expr)
+
+ def parseImpl( self, instring, loc, doActions=True ):
+ startLoc = loc
+ instrlen = len(instring)
+ expr = self.expr
+ failParse = False
+ while loc <= instrlen:
+ try:
+ if self.failOn:
+ try:
+ self.failOn.tryParse(instring, loc)
+ except ParseBaseException:
+ pass
+ else:
+ failParse = True
+ raise ParseException(instring, loc, "Found expression " + str(self.failOn))
+ failParse = False
+ if self.ignoreExpr is not None:
+ while 1:
+ try:
+ loc = self.ignoreExpr.tryParse(instring,loc)
+ # print("found ignoreExpr, advance to", loc)
+ except ParseBaseException:
+ break
+ expr._parse( instring, loc, doActions=False, callPreParse=False )
+ skipText = instring[startLoc:loc]
+ if self.includeMatch:
+ loc,mat = expr._parse(instring,loc,doActions,callPreParse=False)
+ if mat:
+ skipRes = ParseResults( skipText )
+ skipRes += mat
+ return loc, [ skipRes ]
+ else:
+ return loc, [ skipText ]
+ else:
+ return loc, [ skipText ]
+ except (ParseException,IndexError):
+ if failParse:
+ raise
+ else:
+ loc += 1
+ raise ParseException(instring, loc, self.errmsg, self)
+
+class Forward(ParseElementEnhance):
+ """Forward declaration of an expression to be defined later -
+ used for recursive grammars, such as algebraic infix notation.
+ When the expression is known, it is assigned to the C{Forward} variable using the '<<' operator.
+
+ Note: take care when assigning to C{Forward} not to overlook precedence of operators.
+ Specifically, '|' has a lower precedence than '<<', so that::
+ fwdExpr << a | b | c
+ will actually be evaluated as::
+ (fwdExpr << a) | b | c
+ thereby leaving b and c out as parseable alternatives. It is recommended that you
+ explicitly group the values inserted into the C{Forward}::
+ fwdExpr << (a | b | c)
+ Converting to use the '<<=' operator instead will avoid this problem.
+ """
+ def __init__( self, other=None ):
+ super(Forward,self).__init__( other, savelist=False )
+
+ def __lshift__( self, other ):
+ if isinstance( other, basestring ):
+ other = ParserElement.literalStringClass(other)
+ self.expr = other
+ self.mayReturnEmpty = other.mayReturnEmpty
+ self.strRepr = None
+ self.mayIndexError = self.expr.mayIndexError
+ self.mayReturnEmpty = self.expr.mayReturnEmpty
+ self.setWhitespaceChars( self.expr.whiteChars )
+ self.skipWhitespace = self.expr.skipWhitespace
+ self.saveAsList = self.expr.saveAsList
+ self.ignoreExprs.extend(self.expr.ignoreExprs)
+ return None
+ __ilshift__ = __lshift__
+
+ def leaveWhitespace( self ):
+ self.skipWhitespace = False
+ return self
+
+ def streamline( self ):
+ if not self.streamlined:
+ self.streamlined = True
+ if self.expr is not None:
+ self.expr.streamline()
+ return self
+
+ def validate( self, validateTrace=[] ):
+ if self not in validateTrace:
+ tmp = validateTrace[:]+[self]
+ if self.expr is not None:
+ self.expr.validate(tmp)
+ self.checkRecursion([])
+
+ def __str__( self ):
+ if hasattr(self,"name"):
+ return self.name
+
+ self._revertClass = self.__class__
+ self.__class__ = _ForwardNoRecurse
+ try:
+ if self.expr is not None:
+ retString = _ustr(self.expr)
+ else:
+ retString = "None"
+ finally:
+ self.__class__ = self._revertClass
+ return self.__class__.__name__ + ": " + retString
+
+ def copy(self):
+ if self.expr is not None:
+ return super(Forward,self).copy()
+ else:
+ ret = Forward()
+ ret << self
+ return ret
+
+class _ForwardNoRecurse(Forward):
+ def __str__( self ):
+ return "..."
+
+class TokenConverter(ParseElementEnhance):
+ """Abstract subclass of C{ParseExpression}, for converting parsed results."""
+ def __init__( self, expr, savelist=False ):
+ super(TokenConverter,self).__init__( expr )#, savelist )
+ self.saveAsList = False
+
+class Upcase(TokenConverter):
+ """Converter to upper case all matching tokens."""
+ def __init__(self, *args):
+ super(Upcase,self).__init__(*args)
+ warnings.warn("Upcase class is deprecated, use upcaseTokens parse action instead",
+ DeprecationWarning,stacklevel=2)
+
+ def postParse( self, instring, loc, tokenlist ):
+ return list(map( str.upper, tokenlist ))
+
+
+class Combine(TokenConverter):
+ """Converter to concatenate all matching tokens to a single string.
+ By default, the matching patterns must also be contiguous in the input string;
+ this can be disabled by specifying C{'adjacent=False'} in the constructor.
+ """
+ def __init__( self, expr, joinString="", adjacent=True ):
+ super(Combine,self).__init__( expr )
+ # suppress whitespace-stripping in contained parse expressions, but re-enable it on the Combine itself
+ if adjacent:
+ self.leaveWhitespace()
+ self.adjacent = adjacent
+ self.skipWhitespace = True
+ self.joinString = joinString
+ self.callPreparse = True
+
+ def ignore( self, other ):
+ if self.adjacent:
+ ParserElement.ignore(self, other)
+ else:
+ super( Combine, self).ignore( other )
+ return self
+
+ def postParse( self, instring, loc, tokenlist ):
+ retToks = tokenlist.copy()
+ del retToks[:]
+ retToks += ParseResults([ "".join(tokenlist._asStringList(self.joinString)) ], modal=self.modalResults)
+
+ if self.resultsName and len(retToks.keys())>0:
+ return [ retToks ]
+ else:
+ return retToks
+
+class Group(TokenConverter):
+ """Converter to return the matched tokens as a list - useful for returning tokens of C{L{ZeroOrMore}} and C{L{OneOrMore}} expressions."""
+ def __init__( self, expr ):
+ super(Group,self).__init__( expr )
+ self.saveAsList = True
+
+ def postParse( self, instring, loc, tokenlist ):
+ return [ tokenlist ]
+
+class Dict(TokenConverter):
+ """Converter to return a repetitive expression as a list, but also as a dictionary.
+ Each element can also be referenced using the first token in the expression as its key.
+ Useful for tabular report scraping when the first column can be used as a item key.
+ """
+ def __init__( self, exprs ):
+ super(Dict,self).__init__( exprs )
+ self.saveAsList = True
+
+ def postParse( self, instring, loc, tokenlist ):
+ for i,tok in enumerate(tokenlist):
+ if len(tok) == 0:
+ continue
+ ikey = tok[0]
+ if isinstance(ikey,int):
+ ikey = _ustr(tok[0]).strip()
+ if len(tok)==1:
+ tokenlist[ikey] = _ParseResultsWithOffset("",i)
+ elif len(tok)==2 and not isinstance(tok[1],ParseResults):
+ tokenlist[ikey] = _ParseResultsWithOffset(tok[1],i)
+ else:
+ dictvalue = tok.copy() #ParseResults(i)
+ del dictvalue[0]
+ if len(dictvalue)!= 1 or (isinstance(dictvalue,ParseResults) and dictvalue.keys()):
+ tokenlist[ikey] = _ParseResultsWithOffset(dictvalue,i)
+ else:
+ tokenlist[ikey] = _ParseResultsWithOffset(dictvalue[0],i)
+
+ if self.resultsName:
+ return [ tokenlist ]
+ else:
+ return tokenlist
+
+
+class Suppress(TokenConverter):
+ """Converter for ignoring the results of a parsed expression."""
+ def postParse( self, instring, loc, tokenlist ):
+ return []
+
+ def suppress( self ):
+ return self
+
+
+class OnlyOnce(object):
+ """Wrapper for parse actions, to ensure they are only called once."""
+ def __init__(self, methodCall):
+ self.callable = _trim_arity(methodCall)
+ self.called = False
+ def __call__(self,s,l,t):
+ if not self.called:
+ results = self.callable(s,l,t)
+ self.called = True
+ return results
+ raise ParseException(s,l,"")
+ def reset(self):
+ self.called = False
+
+def traceParseAction(f):
+ """Decorator for debugging parse actions."""
+ f = _trim_arity(f)
+ def z(*paArgs):
+ thisFunc = f.func_name
+ s,l,t = paArgs[-3:]
+ if len(paArgs)>3:
+ thisFunc = paArgs[0].__class__.__name__ + '.' + thisFunc
+ sys.stderr.write( ">>entering %s(line: '%s', %d, %s)\n" % (thisFunc,line(l,s),l,t) )
+ try:
+ ret = f(*paArgs)
+ except Exception as exc:
+ sys.stderr.write( "<<leaving %s (exception: %s)\n" % (thisFunc,exc) )
+ raise
+ sys.stderr.write( "<<leaving %s (ret: %s)\n" % (thisFunc,ret) )
+ return ret
+ try:
+ z.__name__ = f.__name__
+ except AttributeError:
+ pass
+ return z
+
+#
+# global helpers
+#
+def delimitedList( expr, delim=",", combine=False ):
+ """Helper to define a delimited list of expressions - the delimiter defaults to ','.
+ By default, the list elements and delimiters can have intervening whitespace, and
+ comments, but this can be overridden by passing C{combine=True} in the constructor.
+ If C{combine} is set to C{True}, the matching tokens are returned as a single token
+ string, with the delimiters included; otherwise, the matching tokens are returned
+ as a list of tokens, with the delimiters suppressed.
+ """
+ dlName = _ustr(expr)+" ["+_ustr(delim)+" "+_ustr(expr)+"]..."
+ if combine:
+ return Combine( expr + ZeroOrMore( delim + expr ) ).setName(dlName)
+ else:
+ return ( expr + ZeroOrMore( Suppress( delim ) + expr ) ).setName(dlName)
+
+def countedArray( expr, intExpr=None ):
+ """Helper to define a counted list of expressions.
+ This helper defines a pattern of the form::
+ integer expr expr expr...
+ where the leading integer tells how many expr expressions follow.
+ The matched tokens returns the array of expr tokens as a list - the leading count token is suppressed.
+ """
+ arrayExpr = Forward()
+ def countFieldParseAction(s,l,t):
+ n = t[0]
+ arrayExpr << (n and Group(And([expr]*n)) or Group(empty))
+ return []
+ if intExpr is None:
+ intExpr = Word(nums).setParseAction(lambda t:int(t[0]))
+ else:
+ intExpr = intExpr.copy()
+ intExpr.setName("arrayLen")
+ intExpr.addParseAction(countFieldParseAction, callDuringTry=True)
+ return ( intExpr + arrayExpr )
+
+def _flatten(L):
+ ret = []
+ for i in L:
+ if isinstance(i,list):
+ ret.extend(_flatten(i))
+ else:
+ ret.append(i)
+ return ret
+
+def matchPreviousLiteral(expr):
+ """Helper to define an expression that is indirectly defined from
+ the tokens matched in a previous expression, that is, it looks
+ for a 'repeat' of a previous expression. For example::
+ first = Word(nums)
+ second = matchPreviousLiteral(first)
+ matchExpr = first + ":" + second
+ will match C{"1:1"}, but not C{"1:2"}. Because this matches a
+ previous literal, will also match the leading C{"1:1"} in C{"1:10"}.
+ If this is not desired, use C{matchPreviousExpr}.
+ Do *not* use with packrat parsing enabled.
+ """
+ rep = Forward()
+ def copyTokenToRepeater(s,l,t):
+ if t:
+ if len(t) == 1:
+ rep << t[0]
+ else:
+ # flatten t tokens
+ tflat = _flatten(t.asList())
+ rep << And( [ Literal(tt) for tt in tflat ] )
+ else:
+ rep << Empty()
+ expr.addParseAction(copyTokenToRepeater, callDuringTry=True)
+ return rep
+
+def matchPreviousExpr(expr):
+ """Helper to define an expression that is indirectly defined from
+ the tokens matched in a previous expression, that is, it looks
+ for a 'repeat' of a previous expression. For example::
+ first = Word(nums)
+ second = matchPreviousExpr(first)
+ matchExpr = first + ":" + second
+ will match C{"1:1"}, but not C{"1:2"}. Because this matches by
+ expressions, will *not* match the leading C{"1:1"} in C{"1:10"};
+ the expressions are evaluated first, and then compared, so
+ C{"1"} is compared with C{"10"}.
+ Do *not* use with packrat parsing enabled.
+ """
+ rep = Forward()
+ e2 = expr.copy()
+ rep << e2
+ def copyTokenToRepeater(s,l,t):
+ matchTokens = _flatten(t.asList())
+ def mustMatchTheseTokens(s,l,t):
+ theseTokens = _flatten(t.asList())
+ if theseTokens != matchTokens:
+ raise ParseException("",0,"")
+ rep.setParseAction( mustMatchTheseTokens, callDuringTry=True )
+ expr.addParseAction(copyTokenToRepeater, callDuringTry=True)
+ return rep
+
+def _escapeRegexRangeChars(s):
+ #~ escape these chars: ^-]
+ for c in r"\^-]":
+ s = s.replace(c,_bslash+c)
+ s = s.replace("\n",r"\n")
+ s = s.replace("\t",r"\t")
+ return _ustr(s)
+
+def oneOf( strs, caseless=False, useRegex=True ):
+ """Helper to quickly define a set of alternative Literals, and makes sure to do
+ longest-first testing when there is a conflict, regardless of the input order,
+ but returns a C{L{MatchFirst}} for best performance.
+
+ Parameters:
+ - strs - a string of space-delimited literals, or a list of string literals
+ - caseless - (default=False) - treat all literals as caseless
+ - useRegex - (default=True) - as an optimization, will generate a Regex
+ object; otherwise, will generate a C{MatchFirst} object (if C{caseless=True}, or
+ if creating a C{Regex} raises an exception)
+ """
+ if caseless:
+ isequal = ( lambda a,b: a.upper() == b.upper() )
+ masks = ( lambda a,b: b.upper().startswith(a.upper()) )
+ parseElementClass = CaselessLiteral
+ else:
+ isequal = ( lambda a,b: a == b )
+ masks = ( lambda a,b: b.startswith(a) )
+ parseElementClass = Literal
+
+ if isinstance(strs,(list,tuple)):
+ symbols = list(strs[:])
+ elif isinstance(strs,basestring):
+ symbols = strs.split()
+ else:
+ warnings.warn("Invalid argument to oneOf, expected string or list",
+ SyntaxWarning, stacklevel=2)
+
+ i = 0
+ while i < len(symbols)-1:
+ cur = symbols[i]
+ for j,other in enumerate(symbols[i+1:]):
+ if ( isequal(other, cur) ):
+ del symbols[i+j+1]
+ break
+ elif ( masks(cur, other) ):
+ del symbols[i+j+1]
+ symbols.insert(i,other)
+ cur = other
+ break
+ else:
+ i += 1
+
+ if not caseless and useRegex:
+ #~ print (strs,"->", "|".join( [ _escapeRegexChars(sym) for sym in symbols] ))
+ try:
+ if len(symbols)==len("".join(symbols)):
+ return Regex( "[%s]" % "".join( [ _escapeRegexRangeChars(sym) for sym in symbols] ) )
+ else:
+ return Regex( "|".join( [ re.escape(sym) for sym in symbols] ) )
+ except:
+ warnings.warn("Exception creating Regex for oneOf, building MatchFirst",
+ SyntaxWarning, stacklevel=2)
+
+
+ # last resort, just use MatchFirst
+ return MatchFirst( [ parseElementClass(sym) for sym in symbols ] )
+
+def dictOf( key, value ):
+ """Helper to easily and clearly define a dictionary by specifying the respective patterns
+ for the key and value. Takes care of defining the C{L{Dict}}, C{L{ZeroOrMore}}, and C{L{Group}} tokens
+ in the proper order. The key pattern can include delimiting markers or punctuation,
+ as long as they are suppressed, thereby leaving the significant key text. The value
+ pattern can include named results, so that the C{Dict} results can include named token
+ fields.
+ """
+ return Dict( ZeroOrMore( Group ( key + value ) ) )
+
+def originalTextFor(expr, asString=True):
+ """Helper to return the original, untokenized text for a given expression. Useful to
+ restore the parsed fields of an HTML start tag into the raw tag text itself, or to
+ revert separate tokens with intervening whitespace back to the original matching
+ input text. Simpler to use than the parse action C{L{keepOriginalText}}, and does not
+ require the inspect module to chase up the call stack. By default, returns a
+ string containing the original parsed text.
+
+ If the optional C{asString} argument is passed as C{False}, then the return value is a
+ C{L{ParseResults}} containing any results names that were originally matched, and a
+ single token containing the original matched text from the input string. So if
+ the expression passed to C{L{originalTextFor}} contains expressions with defined
+ results names, you must set C{asString} to C{False} if you want to preserve those
+ results name values."""
+ locMarker = Empty().setParseAction(lambda s,loc,t: loc)
+ endlocMarker = locMarker.copy()
+ endlocMarker.callPreparse = False
+ matchExpr = locMarker("_original_start") + expr + endlocMarker("_original_end")
+ if asString:
+ extractText = lambda s,l,t: s[t._original_start:t._original_end]
+ else:
+ def extractText(s,l,t):
+ del t[:]
+ t.insert(0, s[t._original_start:t._original_end])
+ del t["_original_start"]
+ del t["_original_end"]
+ matchExpr.setParseAction(extractText)
+ return matchExpr
+
+def ungroup(expr):
+ """Helper to undo pyparsing's default grouping of And expressions, even
+ if all but one are non-empty."""
+ return TokenConverter(expr).setParseAction(lambda t:t[0])
+
+# convenience constants for positional expressions
+empty = Empty().setName("empty")
+lineStart = LineStart().setName("lineStart")
+lineEnd = LineEnd().setName("lineEnd")
+stringStart = StringStart().setName("stringStart")
+stringEnd = StringEnd().setName("stringEnd")
+
+_escapedPunc = Word( _bslash, r"\[]-*.$+^?()~ ", exact=2 ).setParseAction(lambda s,l,t:t[0][1])
+_printables_less_backslash = "".join([ c for c in printables if c not in r"\]" ])
+_escapedHexChar = Regex(r"\\0?[xX][0-9a-fA-F]+").setParseAction(lambda s,l,t:unichr(int(t[0].lstrip(r'\0x'),16)))
+_escapedOctChar = Regex(r"\\0[0-7]+").setParseAction(lambda s,l,t:unichr(int(t[0][1:],8)))
+_singleChar = _escapedPunc | _escapedHexChar | _escapedOctChar | Word(_printables_less_backslash,exact=1)
+_charRange = Group(_singleChar + Suppress("-") + _singleChar)
+_reBracketExpr = Literal("[") + Optional("^").setResultsName("negate") + Group( OneOrMore( _charRange | _singleChar ) ).setResultsName("body") + "]"
+
+_expanded = lambda p: (isinstance(p,ParseResults) and ''.join([ unichr(c) for c in range(ord(p[0]),ord(p[1])+1) ]) or p)
+
+def srange(s):
+ r"""Helper to easily define string ranges for use in Word construction. Borrows
+ syntax from regexp '[]' string range definitions::
+ srange("[0-9]") -> "0123456789"
+ srange("[a-z]") -> "abcdefghijklmnopqrstuvwxyz"
+ srange("[a-z$_]") -> "abcdefghijklmnopqrstuvwxyz$_"
+ The input string must be enclosed in []'s, and the returned string is the expanded
+ character set joined into a single string.
+ The values enclosed in the []'s may be::
+ a single character
+ an escaped character with a leading backslash (such as \- or \])
+ an escaped hex character with a leading '\x' (\x21, which is a '!' character)
+ (\0x## is also supported for backwards compatibility)
+ an escaped octal character with a leading '\0' (\041, which is a '!' character)
+ a range of any of the above, separated by a dash ('a-z', etc.)
+ any combination of the above ('aeiouy', 'a-zA-Z0-9_$', etc.)
+ """
+ try:
+ return "".join([_expanded(part) for part in _reBracketExpr.parseString(s).body])
+ except:
+ return ""
+
+def matchOnlyAtCol(n):
+ """Helper method for defining parse actions that require matching at a specific
+ column in the input text.
+ """
+ def verifyCol(strg,locn,toks):
+ if col(locn,strg) != n:
+ raise ParseException(strg,locn,"matched token not at column %d" % n)
+ return verifyCol
+
+def replaceWith(replStr):
+ """Helper method for common parse actions that simply return a literal value. Especially
+ useful when used with C{L{transformString<ParserElement.transformString>}()}.
+ """
+ def _replFunc(*args):
+ return [replStr]
+ return _replFunc
+
+def removeQuotes(s,l,t):
+ """Helper parse action for removing quotation marks from parsed quoted strings.
+ To use, add this parse action to quoted string using::
+ quotedString.setParseAction( removeQuotes )
+ """
+ return t[0][1:-1]
+
+def upcaseTokens(s,l,t):
+ """Helper parse action to convert tokens to upper case."""
+ return [ tt.upper() for tt in map(_ustr,t) ]
+
+def downcaseTokens(s,l,t):
+ """Helper parse action to convert tokens to lower case."""
+ return [ tt.lower() for tt in map(_ustr,t) ]
+
+def keepOriginalText(s,startLoc,t):
+ """DEPRECATED - use new helper method C{L{originalTextFor}}.
+ Helper parse action to preserve original parsed text,
+ overriding any nested parse actions."""
+ try:
+ endloc = getTokensEndLoc()
+ except ParseException:
+ raise ParseFatalException("incorrect usage of keepOriginalText - may only be called as a parse action")
+ del t[:]
+ t += ParseResults(s[startLoc:endloc])
+ return t
+
+def getTokensEndLoc():
+ """Method to be called from within a parse action to determine the end
+ location of the parsed tokens."""
+ import inspect
+ fstack = inspect.stack()
+ try:
+ # search up the stack (through intervening argument normalizers) for correct calling routine
+ for f in fstack[2:]:
+ if f[3] == "_parseNoCache":
+ endloc = f[0].f_locals["loc"]
+ return endloc
+ else:
+ raise ParseFatalException("incorrect usage of getTokensEndLoc - may only be called from within a parse action")
+ finally:
+ del fstack
+
+def _makeTags(tagStr, xml):
+ """Internal helper to construct opening and closing tag expressions, given a tag name"""
+ if isinstance(tagStr,basestring):
+ resname = tagStr
+ tagStr = Keyword(tagStr, caseless=not xml)
+ else:
+ resname = tagStr.name
+
+ tagAttrName = Word(alphas,alphanums+"_-:")
+ if (xml):
+ tagAttrValue = dblQuotedString.copy().setParseAction( removeQuotes )
+ openTag = Suppress("<") + tagStr("tag") + \
+ Dict(ZeroOrMore(Group( tagAttrName + Suppress("=") + tagAttrValue ))) + \
+ Optional("/",default=[False]).setResultsName("empty").setParseAction(lambda s,l,t:t[0]=='/') + Suppress(">")
+ else:
+ printablesLessRAbrack = "".join( [ c for c in printables if c not in ">" ] )
+ tagAttrValue = quotedString.copy().setParseAction( removeQuotes ) | Word(printablesLessRAbrack)
+ openTag = Suppress("<") + tagStr("tag") + \
+ Dict(ZeroOrMore(Group( tagAttrName.setParseAction(downcaseTokens) + \
+ Optional( Suppress("=") + tagAttrValue ) ))) + \
+ Optional("/",default=[False]).setResultsName("empty").setParseAction(lambda s,l,t:t[0]=='/') + Suppress(">")
+ closeTag = Combine(_L("</") + tagStr + ">")
+
+ openTag = openTag.setResultsName("start"+"".join(resname.replace(":"," ").title().split())).setName("<%s>" % tagStr)
+ closeTag = closeTag.setResultsName("end"+"".join(resname.replace(":"," ").title().split())).setName("</%s>" % tagStr)
+ openTag.tag = resname
+ closeTag.tag = resname
+ return openTag, closeTag
+
+def makeHTMLTags(tagStr):
+ """Helper to construct opening and closing tag expressions for HTML, given a tag name"""
+ return _makeTags( tagStr, False )
+
+def makeXMLTags(tagStr):
+ """Helper to construct opening and closing tag expressions for XML, given a tag name"""
+ return _makeTags( tagStr, True )
+
+def withAttribute(*args,**attrDict):
+ """Helper to create a validating parse action to be used with start tags created
+ with C{L{makeXMLTags}} or C{L{makeHTMLTags}}. Use C{withAttribute} to qualify a starting tag
+ with a required attribute value, to avoid false matches on common tags such as
+ C{<TD>} or C{<DIV>}.
+
+ Call C{withAttribute} with a series of attribute names and values. Specify the list
+ of filter attributes names and values as:
+ - keyword arguments, as in C{(align="right")}, or
+ - as an explicit dict with C{**} operator, when an attribute name is also a Python
+ reserved word, as in C{**{"class":"Customer", "align":"right"}}
+ - a list of name-value tuples, as in ( ("ns1:class", "Customer"), ("ns2:align","right") )
+ For attribute names with a namespace prefix, you must use the second form. Attribute
+ names are matched insensitive to upper/lower case.
+
+ To verify that the attribute exists, but without specifying a value, pass
+ C{withAttribute.ANY_VALUE} as the value.
+ """
+ if args:
+ attrs = args[:]
+ else:
+ attrs = attrDict.items()
+ attrs = [(k,v) for k,v in attrs]
+ def pa(s,l,tokens):
+ for attrName,attrValue in attrs:
+ if attrName not in tokens:
+ raise ParseException(s,l,"no matching attribute " + attrName)
+ if attrValue != withAttribute.ANY_VALUE and tokens[attrName] != attrValue:
+ raise ParseException(s,l,"attribute '%s' has value '%s', must be '%s'" %
+ (attrName, tokens[attrName], attrValue))
+ return pa
+withAttribute.ANY_VALUE = object()
+
+opAssoc = _Constants()
+opAssoc.LEFT = object()
+opAssoc.RIGHT = object()
+
+def operatorPrecedence( baseExpr, opList ):
+ """Helper method for constructing grammars of expressions made up of
+ operators working in a precedence hierarchy. Operators may be unary or
+ binary, left- or right-associative. Parse actions can also be attached
+ to operator expressions.
+
+ Parameters:
+ - baseExpr - expression representing the most basic element for the nested
+ - opList - list of tuples, one for each operator precedence level in the
+ expression grammar; each tuple is of the form
+ (opExpr, numTerms, rightLeftAssoc, parseAction), where:
+ - opExpr is the pyparsing expression for the operator;
+ may also be a string, which will be converted to a Literal;
+ if numTerms is 3, opExpr is a tuple of two expressions, for the
+ two operators separating the 3 terms
+ - numTerms is the number of terms for this operator (must
+ be 1, 2, or 3)
+ - rightLeftAssoc is the indicator whether the operator is
+ right or left associative, using the pyparsing-defined
+ constants C{opAssoc.RIGHT} and C{opAssoc.LEFT}.
+ - parseAction is the parse action to be associated with
+ expressions matching this operator expression (the
+ parse action tuple member may be omitted)
+ """
+ ret = Forward()
+ lastExpr = baseExpr | ( Suppress('(') + ret + Suppress(')') )
+ for i,operDef in enumerate(opList):
+ opExpr,arity,rightLeftAssoc,pa = (operDef + (None,))[:4]
+ if arity == 3:
+ if opExpr is None or len(opExpr) != 2:
+ raise ValueError("if numterms=3, opExpr must be a tuple or list of two expressions")
+ opExpr1, opExpr2 = opExpr
+ thisExpr = Forward()#.setName("expr%d" % i)
+ if rightLeftAssoc == opAssoc.LEFT:
+ if arity == 1:
+ matchExpr = FollowedBy(lastExpr + opExpr) + Group( lastExpr + OneOrMore( opExpr ) )
+ elif arity == 2:
+ if opExpr is not None:
+ matchExpr = FollowedBy(lastExpr + opExpr + lastExpr) + Group( lastExpr + OneOrMore( opExpr + lastExpr ) )
+ else:
+ matchExpr = FollowedBy(lastExpr+lastExpr) + Group( lastExpr + OneOrMore(lastExpr) )
+ elif arity == 3:
+ matchExpr = FollowedBy(lastExpr + opExpr1 + lastExpr + opExpr2 + lastExpr) + \
+ Group( lastExpr + opExpr1 + lastExpr + opExpr2 + lastExpr )
+ else:
+ raise ValueError("operator must be unary (1), binary (2), or ternary (3)")
+ elif rightLeftAssoc == opAssoc.RIGHT:
+ if arity == 1:
+ # try to avoid LR with this extra test
+ if not isinstance(opExpr, Optional):
+ opExpr = Optional(opExpr)
+ matchExpr = FollowedBy(opExpr.expr + thisExpr) + Group( opExpr + thisExpr )
+ elif arity == 2:
+ if opExpr is not None:
+ matchExpr = FollowedBy(lastExpr + opExpr + thisExpr) + Group( lastExpr + OneOrMore( opExpr + thisExpr ) )
+ else:
+ matchExpr = FollowedBy(lastExpr + thisExpr) + Group( lastExpr + OneOrMore( thisExpr ) )
+ elif arity == 3:
+ matchExpr = FollowedBy(lastExpr + opExpr1 + thisExpr + opExpr2 + thisExpr) + \
+ Group( lastExpr + opExpr1 + thisExpr + opExpr2 + thisExpr )
+ else:
+ raise ValueError("operator must be unary (1), binary (2), or ternary (3)")
+ else:
+ raise ValueError("operator must indicate right or left associativity")
+ if pa:
+ matchExpr.setParseAction( pa )
+ thisExpr << ( matchExpr | lastExpr )
+ lastExpr = thisExpr
+ ret << lastExpr
+ return ret
+
+dblQuotedString = Regex(r'"(?:[^"\n\r\\]|(?:"")|(?:\\x[0-9a-fA-F]+)|(?:\\.))*"').setName("string enclosed in double quotes")
+sglQuotedString = Regex(r"'(?:[^'\n\r\\]|(?:'')|(?:\\x[0-9a-fA-F]+)|(?:\\.))*'").setName("string enclosed in single quotes")
+quotedString = Regex(r'''(?:"(?:[^"\n\r\\]|(?:"")|(?:\\x[0-9a-fA-F]+)|(?:\\.))*")|(?:'(?:[^'\n\r\\]|(?:'')|(?:\\x[0-9a-fA-F]+)|(?:\\.))*')''').setName("quotedString using single or double quotes")
+unicodeString = Combine(_L('u') + quotedString.copy())
+
+def nestedExpr(opener="(", closer=")", content=None, ignoreExpr=quotedString.copy()):
+ """Helper method for defining nested lists enclosed in opening and closing
+ delimiters ("(" and ")" are the default).
+
+ Parameters:
+ - opener - opening character for a nested list (default="("); can also be a pyparsing expression
+ - closer - closing character for a nested list (default=")"); can also be a pyparsing expression
+ - content - expression for items within the nested lists (default=None)
+ - ignoreExpr - expression for ignoring opening and closing delimiters (default=quotedString)
+
+ If an expression is not provided for the content argument, the nested
+ expression will capture all whitespace-delimited content between delimiters
+ as a list of separate values.
+
+ Use the C{ignoreExpr} argument to define expressions that may contain
+ opening or closing characters that should not be treated as opening
+ or closing characters for nesting, such as quotedString or a comment
+ expression. Specify multiple expressions using an C{L{Or}} or C{L{MatchFirst}}.
+ The default is L{quotedString}, but if no expressions are to be ignored,
+ then pass C{None} for this argument.
+ """
+ if opener == closer:
+ raise ValueError("opening and closing strings cannot be the same")
+ if content is None:
+ if isinstance(opener,basestring) and isinstance(closer,basestring):
+ if len(opener) == 1 and len(closer)==1:
+ if ignoreExpr is not None:
+ content = (Combine(OneOrMore(~ignoreExpr +
+ CharsNotIn(opener+closer+ParserElement.DEFAULT_WHITE_CHARS,exact=1))
+ ).setParseAction(lambda t:t[0].strip()))
+ else:
+ content = (empty.copy()+CharsNotIn(opener+closer+ParserElement.DEFAULT_WHITE_CHARS
+ ).setParseAction(lambda t:t[0].strip()))
+ else:
+ if ignoreExpr is not None:
+ content = (Combine(OneOrMore(~ignoreExpr +
+ ~Literal(opener) + ~Literal(closer) +
+ CharsNotIn(ParserElement.DEFAULT_WHITE_CHARS,exact=1))
+ ).setParseAction(lambda t:t[0].strip()))
+ else:
+ content = (Combine(OneOrMore(~Literal(opener) + ~Literal(closer) +
+ CharsNotIn(ParserElement.DEFAULT_WHITE_CHARS,exact=1))
+ ).setParseAction(lambda t:t[0].strip()))
+ else:
+ raise ValueError("opening and closing arguments must be strings if no content expression is given")
+ ret = Forward()
+ if ignoreExpr is not None:
+ ret << Group( Suppress(opener) + ZeroOrMore( ignoreExpr | ret | content ) + Suppress(closer) )
+ else:
+ ret << Group( Suppress(opener) + ZeroOrMore( ret | content ) + Suppress(closer) )
+ return ret
+
+def indentedBlock(blockStatementExpr, indentStack, indent=True):
+ """Helper method for defining space-delimited indentation blocks, such as
+ those used to define block statements in Python source code.
+
+ Parameters:
+ - blockStatementExpr - expression defining syntax of statement that
+ is repeated within the indented block
+ - indentStack - list created by caller to manage indentation stack
+ (multiple statementWithIndentedBlock expressions within a single grammar
+ should share a common indentStack)
+ - indent - boolean indicating whether block must be indented beyond the
+ the current level; set to False for block of left-most statements
+ (default=True)
+
+ A valid block must contain at least one C{blockStatement}.
+ """
+ def checkPeerIndent(s,l,t):
+ if l >= len(s): return
+ curCol = col(l,s)
+ if curCol != indentStack[-1]:
+ if curCol > indentStack[-1]:
+ raise ParseFatalException(s,l,"illegal nesting")
+ raise ParseException(s,l,"not a peer entry")
+
+ def checkSubIndent(s,l,t):
+ curCol = col(l,s)
+ if curCol > indentStack[-1]:
+ indentStack.append( curCol )
+ else:
+ raise ParseException(s,l,"not a subentry")
+
+ def checkUnindent(s,l,t):
+ if l >= len(s): return
+ curCol = col(l,s)
+ if not(indentStack and curCol < indentStack[-1] and curCol <= indentStack[-2]):
+ raise ParseException(s,l,"not an unindent")
+ indentStack.pop()
+
+ NL = OneOrMore(LineEnd().setWhitespaceChars("\t ").suppress())
+ INDENT = Empty() + Empty().setParseAction(checkSubIndent)
+ PEER = Empty().setParseAction(checkPeerIndent)
+ UNDENT = Empty().setParseAction(checkUnindent)
+ if indent:
+ smExpr = Group( Optional(NL) +
+ #~ FollowedBy(blockStatementExpr) +
+ INDENT + (OneOrMore( PEER + Group(blockStatementExpr) + Optional(NL) )) + UNDENT)
+ else:
+ smExpr = Group( Optional(NL) +
+ (OneOrMore( PEER + Group(blockStatementExpr) + Optional(NL) )) )
+ blockStatementExpr.ignore(_bslash + LineEnd())
+ return smExpr
+
+alphas8bit = srange(r"[\0xc0-\0xd6\0xd8-\0xf6\0xf8-\0xff]")
+punc8bit = srange(r"[\0xa1-\0xbf\0xd7\0xf7]")
+
+anyOpenTag,anyCloseTag = makeHTMLTags(Word(alphas,alphanums+"_:"))
+commonHTMLEntity = Combine(_L("&") + oneOf("gt lt amp nbsp quot").setResultsName("entity") +";").streamline()
+_htmlEntityMap = dict(zip("gt lt amp nbsp quot".split(),'><& "'))
+replaceHTMLEntity = lambda t : t.entity in _htmlEntityMap and _htmlEntityMap[t.entity] or None
+
+# it's easy to get these comment structures wrong - they're very common, so may as well make them available
+cStyleComment = Regex(r"/\*(?:[^*]*\*+)+?/").setName("C style comment")
+
+htmlComment = Regex(r"<!--[\s\S]*?-->")
+restOfLine = Regex(r".*").leaveWhitespace()
+dblSlashComment = Regex(r"\/\/(\\\n|.)*").setName("// comment")
+cppStyleComment = Regex(r"/(?:\*(?:[^*]*\*+)+?/|/[^\n]*(?:\n[^\n]*)*?(?:(?<!\\)|\Z))").setName("C++ style comment")
+
+javaStyleComment = cppStyleComment
+pythonStyleComment = Regex(r"#.*").setName("Python style comment")
+_noncomma = "".join( [ c for c in printables if c != "," ] )
+_commasepitem = Combine(OneOrMore(Word(_noncomma) +
+ Optional( Word(" \t") +
+ ~Literal(",") + ~LineEnd() ) ) ).streamline().setName("commaItem")
+commaSeparatedList = delimitedList( Optional( quotedString.copy() | _commasepitem, default="") ).setName("commaSeparatedList")
+
+
+if __name__ == "__main__":
+
+ def test( teststring ):
+ try:
+ tokens = simpleSQL.parseString( teststring )
+ tokenlist = tokens.asList()
+ print (teststring + "->" + str(tokenlist))
+ print ("tokens = " + str(tokens))
+ print ("tokens.columns = " + str(tokens.columns))
+ print ("tokens.tables = " + str(tokens.tables))
+ print (tokens.asXML("SQL",True))
+ except ParseBaseException as err:
+ print (teststring + "->")
+ print (err.line)
+ print (" "*(err.column-1) + "^")
+ print (err)
+ print()
+
+ selectToken = CaselessLiteral( "select" )
+ fromToken = CaselessLiteral( "from" )
+
+ ident = Word( alphas, alphanums + "_$" )
+ columnName = delimitedList( ident, ".", combine=True ).setParseAction( upcaseTokens )
+ columnNameList = Group( delimitedList( columnName ) )#.setName("columns")
+ tableName = delimitedList( ident, ".", combine=True ).setParseAction( upcaseTokens )
+ tableNameList = Group( delimitedList( tableName ) )#.setName("tables")
+ simpleSQL = ( selectToken + \
+ ( '*' | columnNameList ).setResultsName( "columns" ) + \
+ fromToken + \
+ tableNameList.setResultsName( "tables" ) )
+
+ test( "SELECT * from XYZZY, ABC" )
+ test( "select * from SYS.XYZZY" )
+ test( "Select A from Sys.dual" )
+ test( "Select AA,BB,CC from Sys.dual" )
+ test( "Select A, B, C from Sys.dual" )
+ test( "Select A, B, C from Sys.dual" )
+ test( "Xelect A, B, C from Sys.dual" )
+ test( "Select A, B, C frox Sys.dual" )
+ test( "Select" )
+ test( "Select ^^^ frox Sys.dual" )
+ test( "Select A, B, C from Sys.dual, Table2 " )
diff --git a/src/setup.py b/src/setup.py
new file mode 100644
index 0000000..642c5f0
--- /dev/null
+++ b/src/setup.py
@@ -0,0 +1,56 @@
+#!/usr/bin/env python
+
+"""Setup script for the pyparsing module distribution."""
+from distutils.core import setup
+
+import sys
+import os
+
+_PY3 = sys.version_info[0] > 2
+
+if _PY3:
+ from pyparsing_py3 import __version__ as pyparsing_version
+else:
+ from pyparsing_py2 import __version__ as pyparsing_version
+
+modules = ["pyparsing",]
+
+# make sure that a pyparsing.py file exists - if not, copy the appropriate version
+def fileexists(fname):
+ try:
+ return bool(os.stat(fname))
+ except:
+ return False
+
+def copyfile(fromname, toname):
+ outf = open(toname,'w')
+ outf.write(open(fromname).read())
+ outf.close()
+
+if "MAKING_PYPARSING_RELEASE" not in os.environ and not fileexists("pyparsing.py"):
+ if _PY3:
+ from_file = "pyparsing_py3.py"
+ else:
+ from_file = "pyparsing_py2.py"
+ copyfile(from_file, "pyparsing.py")
+
+setup(# Distribution meta-data
+ name = "pyparsing",
+ version = pyparsing_version,
+ description = "Python parsing module",
+ author = "Paul McGuire",
+ author_email = "ptmcg@users.sourceforge.net",
+ url = "http://pyparsing.wikispaces.com/",
+ download_url = "http://sourceforge.net/project/showfiles.php?group_id=97203",
+ license = "MIT License",
+ py_modules = modules,
+ classifiers=[
+ 'Development Status :: 5 - Production/Stable',
+ 'Intended Audience :: Developers',
+ 'Intended Audience :: Information Technology',
+ 'License :: OSI Approved :: MIT License',
+ 'Operating System :: OS Independent',
+ 'Programming Language :: Python',
+ 'Programming Language :: Python :: 3',
+ ]
+ )
diff --git a/src/unitTests.py b/src/unitTests.py
new file mode 100644
index 0000000..4d6583a
--- /dev/null
+++ b/src/unitTests.py
@@ -0,0 +1,2375 @@
+# -*- coding: UTF-8 -*-
+from unittest import TestCase, TestSuite, TextTestRunner
+import unittest
+from pyparsing import ParseException
+import HTMLTestRunner
+
+import sys
+import pprint
+import pdb
+
+# see which Python implementation we are running
+CPYTHON_ENV = (sys.platform == "win32")
+IRON_PYTHON_ENV = (sys.platform == "cli")
+JYTHON_ENV = sys.platform.startswith("java")
+
+TEST_USING_PACKRAT = True
+#~ TEST_USING_PACKRAT = False
+
+# simple utility for flattening nested lists
+def flatten(L):
+ if type(L) is not list: return [L]
+ if L == []: return L
+ return flatten(L[0]) + flatten(L[1:])
+
+"""
+class ParseTest(TestCase):
+ def setUp(self):
+ pass
+
+ def runTest(self):
+ assert 1==1, "we've got bigger problems..."
+
+ def tearDown(self):
+ pass
+"""
+
+class ParseTestCase(TestCase):
+ def setUp(self):
+ print ">>>> Starting test",str(self)
+
+ def runTest(self):
+ pass
+
+ def tearDown(self):
+ print "<<<< End of test",str(self)
+ print
+
+ def __str__(self):
+ return self.__class__.__name__
+
+class PyparsingTestInit(ParseTestCase):
+ def setUp(self):
+ from pyparsing import __version__ as pyparsingVersion
+ print "Beginning test of pyparsing, version", pyparsingVersion
+ print "Python version", sys.version
+ def tearDown(self):
+ pass
+
+class ParseASMLTest(ParseTestCase):
+ def runTest(self):
+ import parseASML
+ files = [ ("A52759.txt", 2150, True, True, 0.38, 25, "21:47:17", "22:07:32", 235),
+ ("24141506_P5107RM59_399A1457N1_PHS04", 373,True, True, 0.5, 1, "11:35:25", "11:37:05", 183),
+ ("24141506_P5107RM59_399A1457N1_PHS04B", 373, True, True, 0.5, 1, "01:02:54", "01:04:49", 186),
+ ("24157800_P5107RM74_399A1828M1_PHS04", 1141, True, False, 0.5, 13, "00:00:54", "23:59:48", 154) ]
+ for testFile,numToks,trkInpUsed,trkOutpUsed,maxDelta,numWafers,minProcBeg,maxProcEnd,maxLevStatsIV in files:
+ print "Parsing",testFile,"...",
+ #~ text = "\n".join( [ line for line in file(testFile) ] )
+ #~ results = parseASML.BNF().parseString( text )
+ results = parseASML.BNF().parseFile( testFile )
+ #~ pprint.pprint( results.asList() )
+ #~ pprint.pprint( results.batchData.asList() )
+ #~ print results.batchData.keys()
+
+ allToks = flatten( results.asList() )
+ assert len(allToks) == numToks, \
+ "wrong number of tokens parsed (%s), got %d, expected %d" % (testFile, len(allToks),numToks)
+ assert results.batchData.trackInputUsed == trkInpUsed, "error evaluating results.batchData.trackInputUsed"
+ assert results.batchData.trackOutputUsed == trkOutpUsed, "error evaluating results.batchData.trackOutputUsed"
+ assert results.batchData.maxDelta == maxDelta,"error evaluating results.batchData.maxDelta"
+ assert len(results.waferData) == numWafers, "did not read correct number of wafers"
+ assert min([wd.procBegin for wd in results.waferData]) == minProcBeg, "error reading waferData.procBegin"
+ assert max([results.waferData[k].procEnd for k in range(len(results.waferData))]) == maxProcEnd, "error reading waferData.procEnd"
+ assert sum(results.levelStatsIV['MAX']) == maxLevStatsIV, "error reading levelStatsIV"
+ assert sum(results.levelStatsIV.MAX) == maxLevStatsIV, "error reading levelStatsIV"
+ print "OK"
+ print testFile,len(allToks)
+ #~ print "results.batchData.trackInputUsed =",results.batchData.trackInputUsed
+ #~ print "results.batchData.trackOutputUsed =",results.batchData.trackOutputUsed
+ #~ print "results.batchData.maxDelta =",results.batchData.maxDelta
+ #~ print len(results.waferData)," wafers"
+ #~ print min([wd.procBegin for wd in results.waferData])
+ #~ print max([results.waferData[k].procEnd for k in range(len(results.waferData))])
+ #~ print sum(results.levelStatsIV['MAX.'])
+
+
+class ParseFourFnTest(ParseTestCase):
+ def runTest(self):
+ import fourFn
+ def test(s,ans):
+ fourFn.exprStack = []
+ results = fourFn.BNF().parseString( s )
+ resultValue = fourFn.evaluateStack( fourFn.exprStack )
+ assert resultValue == ans, "failed to evaluate %s, got %f" % ( s, resultValue )
+ print s, "->", resultValue
+
+ test( "9", 9 )
+ test( "9 + 3 + 6", 18 )
+ test( "9 + 3 / 11", 9.0+3.0/11.0)
+ test( "(9 + 3)", 12 )
+ test( "(9+3) / 11", (9.0+3.0)/11.0 )
+ test( "9 - (12 - 6)", 3)
+ test( "2*3.14159", 6.28318)
+ test( "3.1415926535*3.1415926535 / 10", 3.1415926535*3.1415926535/10.0 )
+ test( "PI * PI / 10", 3.1415926535*3.1415926535/10.0 )
+ test( "PI*PI/10", 3.1415926535*3.1415926535/10.0 )
+ test( "6.02E23 * 8.048", 6.02E23 * 8.048 )
+ test( "e / 3", 2.718281828/3.0 )
+ test( "sin(PI/2)", 1.0 )
+ test( "trunc(E)", 2.0 )
+ test( "E^PI", 2.718281828**3.1415926535 )
+ test( "2^3^2", 2**3**2)
+ test( "2^3+2", 2**3+2)
+ test( "2^9", 2**9 )
+ test( "sgn(-2)", -1 )
+ test( "sgn(0)", 0 )
+ test( "sgn(0.1)", 1 )
+
+class ParseSQLTest(ParseTestCase):
+ def runTest(self):
+ import simpleSQL
+
+ def test(s, numToks, errloc=-1 ):
+ try:
+ sqlToks = flatten( simpleSQL.simpleSQL.parseString(s).asList() )
+ print s,sqlToks,len(sqlToks)
+ assert len(sqlToks) == numToks
+ except ParseException, e:
+ if errloc >= 0:
+ assert e.loc == errloc
+
+
+ test( "SELECT * from XYZZY, ABC", 6 )
+ test( "select * from SYS.XYZZY", 5 )
+ test( "Select A from Sys.dual", 5 )
+ test( "Select A,B,C from Sys.dual", 7 )
+ test( "Select A, B, C from Sys.dual", 7 )
+ test( "Select A, B, C from Sys.dual, Table2 ", 8 )
+ test( "Xelect A, B, C from Sys.dual", 0, 0 )
+ test( "Select A, B, C frox Sys.dual", 0, 15 )
+ test( "Select", 0, 6 )
+ test( "Select &&& frox Sys.dual", 0, 7 )
+ test( "Select A from Sys.dual where a in ('RED','GREEN','BLUE')", 12 )
+ test( "Select A from Sys.dual where a in ('RED','GREEN','BLUE') and b in (10,20,30)", 20 )
+ test( "Select A,b from table1,table2 where table1.id eq table2.id -- test out comparison operators", 10 )
+
+class ParseConfigFileTest(ParseTestCase):
+ def runTest(self):
+ import configParse
+
+ def test(fnam,numToks,resCheckList):
+ print "Parsing",fnam,"...",
+ iniFileLines = "\n".join([ lin for lin in file(fnam) ])
+ iniData = configParse.inifile_BNF().parseString( iniFileLines )
+ print len(flatten(iniData.asList()))
+ #~ pprint.pprint( iniData.asList() )
+ #~ pprint.pprint( repr(iniData) )
+ #~ print len(iniData), len(flatten(iniData.asList()))
+ print iniData.keys()
+ #~ print iniData.users.keys()
+ #~ print
+ assert len(flatten(iniData.asList())) == numToks, "file %s not parsed correctly" % fnam
+ for chk in resCheckList:
+ print chk[0], eval("iniData."+chk[0]), chk[1]
+ assert eval("iniData."+chk[0]) == chk[1]
+ print "OK"
+
+ test("karthik.ini", 23,
+ [ ("users.K","8"),
+ ("users.mod_scheme","'QPSK'"),
+ ("users.Na", "K+2") ]
+ )
+ test("setup.ini", 125,
+ [ ("Startup.audioinf", "M3i"),
+ ("Languages.key1", "0x0003"),
+ ("test.foo","bar") ] )
+
+class ParseJSONDataTest(ParseTestCase):
+ def runTest(self):
+ from jsonParser import jsonObject
+ from jsonParserFull import test1,test2,test3,test4,test5
+
+ expected = [
+ [],
+ [],
+ [],
+ [],
+ [],
+ ]
+
+ import pprint
+ for t,exp in zip((test1,test2,test3,test4,test5),expected):
+ result = jsonObject.parseString(t)
+## print result.dump()
+ pprint.pprint(result.asList())
+ print
+## if result.asList() != exp:
+## print "Expected %s, parsed results as %s" % (exp, result.asList())
+
+class ParseCommaSeparatedValuesTest(ParseTestCase):
+ def runTest(self):
+ from pyparsing import commaSeparatedList
+ import string
+
+ testData = [
+ "a,b,c,100.2,,3",
+ "d, e, j k , m ",
+ "'Hello, World', f, g , , 5.1,x",
+ "John Doe, 123 Main St., Cleveland, Ohio",
+ "Jane Doe, 456 St. James St., Los Angeles , California ",
+ "",
+ ]
+ testVals = [
+ [ (3,'100.2'), (4,''), (5, '3') ],
+ [ (2, 'j k'), (3, 'm') ],
+ [ (0, "'Hello, World'"), (2, 'g'), (3, '') ],
+ [ (0,'John Doe'), (1, '123 Main St.'), (2, 'Cleveland'), (3, 'Ohio') ],
+ [ (0,'Jane Doe'), (1, '456 St. James St.'), (2, 'Los Angeles'), (3, 'California') ]
+ ]
+ for line,tests in zip(testData, testVals):
+ print "Parsing: \""+line+"\" ->",
+ results = commaSeparatedList.parseString(line)
+ print results.asList()
+ for t in tests:
+ if not(len(results)>t[0] and results[t[0]] == t[1]):
+ print "$$$", results.dump()
+ print "$$$", results[0]
+ assert len(results)>t[0] and results[t[0]] == t[1],"failed on %s, item %d s/b '%s', got '%s'" % ( line, t[0], t[1], str(results.asList()) )
+
+class ParseEBNFTest(ParseTestCase):
+ def runTest(self):
+ import ebnf
+ from pyparsing import Word, quotedString, alphas, nums,ParserElement
+
+ print 'Constructing EBNF parser with pyparsing...'
+
+ grammar = '''
+ syntax = (syntax_rule), {(syntax_rule)};
+ syntax_rule = meta_identifier, '=', definitions_list, ';';
+ definitions_list = single_definition, {'|', single_definition};
+ single_definition = syntactic_term, {',', syntactic_term};
+ syntactic_term = syntactic_factor,['-', syntactic_factor];
+ syntactic_factor = [integer, '*'], syntactic_primary;
+ syntactic_primary = optional_sequence | repeated_sequence |
+ grouped_sequence | meta_identifier | terminal_string;
+ optional_sequence = '[', definitions_list, ']';
+ repeated_sequence = '{', definitions_list, '}';
+ grouped_sequence = '(', definitions_list, ')';
+ (*
+ terminal_string = "'", character - "'", {character - "'"}, "'" |
+ '"', character - '"', {character - '"'}, '"';
+ meta_identifier = letter, {letter | digit};
+ integer = digit, {digit};
+ *)
+ '''
+
+ table = {}
+ table['terminal_string'] = quotedString
+ table['meta_identifier'] = Word(alphas+"_", alphas+"_"+nums)
+ table['integer'] = Word(nums)
+
+ print 'Parsing EBNF grammar with EBNF parser...'
+ parsers = ebnf.parse(grammar, table)
+ ebnf_parser = parsers['syntax']
+ #~ print ",\n ".join( str(parsers.keys()).split(", ") )
+ print "-","\n- ".join( parsers.keys() )
+ assert len(parsers.keys()) == 13, "failed to construct syntax grammar"
+
+ print 'Parsing EBNF grammar with generated EBNF parser...'
+ parsed_chars = ebnf_parser.parseString(grammar)
+ parsed_char_len = len(parsed_chars)
+
+ print "],\n".join(str( parsed_chars.asList() ).split("],"))
+ assert len(flatten(parsed_chars.asList())) == 98, "failed to tokenize grammar correctly"
+
+
+class ParseIDLTest(ParseTestCase):
+ def runTest(self):
+ import idlParse
+
+ def test( strng, numToks, errloc=0 ):
+ print strng
+ try:
+ bnf = idlParse.CORBA_IDL_BNF()
+ tokens = bnf.parseString( strng )
+ print "tokens = "
+ pprint.pprint( tokens.asList() )
+ tokens = flatten( tokens.asList() )
+ print len(tokens)
+ assert len(tokens) == numToks, "error matching IDL string, %s -> %s" % (strng, str(tokens) )
+ except ParseException, err:
+ print err.line
+ print " "*(err.column-1) + "^"
+ print err
+ assert numToks == 0, "unexpected ParseException while parsing %s, %s" % (strng, str(err) )
+ assert err.loc == errloc, "expected ParseException at %d, found exception at %d" % (errloc, err.loc)
+
+ test(
+ """
+ /*
+ * a block comment *
+ */
+ typedef string[10] tenStrings;
+ typedef sequence<string> stringSeq;
+ typedef sequence< sequence<string> > stringSeqSeq;
+
+ interface QoSAdmin {
+ stringSeq method1( in string arg1, inout long arg2 );
+ stringSeqSeq method2( in string arg1, inout long arg2, inout long arg3);
+ string method3();
+ };
+ """, 59
+ )
+ test(
+ """
+ /*
+ * a block comment *
+ */
+ typedef string[10] tenStrings;
+ typedef
+ /** ** *** **** *
+ * a block comment *
+ */
+ sequence<string> /*comment inside an And */ stringSeq;
+ /* */ /**/ /***/ /****/
+ typedef sequence< sequence<string> > stringSeqSeq;
+
+ interface QoSAdmin {
+ stringSeq method1( in string arg1, inout long arg2 );
+ stringSeqSeq method2( in string arg1, inout long arg2, inout long arg3);
+ string method3();
+ };
+ """, 59
+ )
+ test(
+ r"""
+ const string test="Test String\n";
+ const long a = 0;
+ const long b = -100;
+ const float c = 3.14159;
+ const long d = 0x007f7f7f;
+ exception TestException
+ {
+ string msg;
+ sequence<string> dataStrings;
+ };
+
+ interface TestInterface
+ {
+ void method1( in string arg1, inout long arg2 );
+ };
+ """, 60
+ )
+ test(
+ """
+ module Test1
+ {
+ exception TestException
+ {
+ string msg;
+ ];
+
+ interface TestInterface
+ {
+ void method1( in string arg1, inout long arg2 )
+ raises ( TestException );
+ };
+ };
+ """, 0, 57
+ )
+ test(
+ """
+ module Test1
+ {
+ exception TestException
+ {
+ string msg;
+ };
+
+ };
+ """, 13
+ )
+
+class ParseVerilogTest(ParseTestCase):
+ def runTest(self):
+ pass
+
+class RunExamplesTest(ParseTestCase):
+ def runTest(self):
+ pass
+
+class ScanStringTest(ParseTestCase):
+ def runTest(self):
+ from pyparsing import Word, Combine, Suppress, CharsNotIn, nums, StringEnd
+ testdata = """
+ <table border="0" cellpadding="3" cellspacing="3" frame="" width="90%">
+ <tr align="left" valign="top">
+ <td><b>Name</b></td>
+ <td><b>IP Address</b></td>
+ <td><b>Location</b></td>
+ </tr>
+ <tr align="left" valign="top" bgcolor="#c7efce">
+ <td>time-a.nist.gov</td>
+ <td>129.6.15.28</td>
+ <td>NIST, Gaithersburg, Maryland</td>
+ </tr>
+ <tr align="left" valign="top">
+ <td>time-b.nist.gov</td>
+ <td>129.6.15.29</td>
+ <td>NIST, Gaithersburg, Maryland</td>
+ </tr>
+ <tr align="left" valign="top" bgcolor="#c7efce">
+ <td>time-a.timefreq.bldrdoc.gov</td>
+ <td>132.163.4.101</td>
+ <td>NIST, Boulder, Colorado</td>
+ </tr>
+ <tr align="left" valign="top">
+ <td>time-b.timefreq.bldrdoc.gov</td>
+ <td>132.163.4.102</td>
+ <td>NIST, Boulder, Colorado</td>
+ </tr>
+ <tr align="left" valign="top" bgcolor="#c7efce">
+ <td>time-c.timefreq.bldrdoc.gov</td>
+ <td>132.163.4.103</td>
+ <td>NIST, Boulder, Colorado</td>
+ </tr>
+ </table>
+ """
+ integer = Word(nums)
+ ipAddress = Combine( integer + "." + integer + "." + integer + "." + integer )
+ tdStart = Suppress("<td>")
+ tdEnd = Suppress("</td>")
+ timeServerPattern = tdStart + ipAddress.setResultsName("ipAddr") + tdEnd + \
+ tdStart + CharsNotIn("<").setResultsName("loc") + tdEnd
+ servers = \
+ [ srvr.ipAddr for srvr,startloc,endloc in timeServerPattern.scanString( testdata ) ]
+
+ print servers
+ assert servers == ['129.6.15.28', '129.6.15.29', '132.163.4.101', '132.163.4.102', '132.163.4.103'], \
+ "failed scanString()"
+
+ # test for stringEnd detection in scanString
+ foundStringEnds = [ r for r in StringEnd().scanString("xyzzy") ]
+ print foundStringEnds
+ assert foundStringEnds, "Failed to find StringEnd in scanString"
+
+class QuotedStringsTest(ParseTestCase):
+ def runTest(self):
+ from pyparsing import sglQuotedString,dblQuotedString,quotedString
+ testData = \
+ """
+ 'a valid single quoted string'
+ 'an invalid single quoted string
+ because it spans lines'
+ "a valid double quoted string"
+ "an invalid double quoted string
+ because it spans lines"
+ """
+ print testData
+ sglStrings = [ (t[0],b,e) for (t,b,e) in sglQuotedString.scanString(testData) ]
+ print sglStrings
+ assert len(sglStrings) == 1 and (sglStrings[0][1]==17 and sglStrings[0][2]==47), \
+ "single quoted string failure"
+ dblStrings = [ (t[0],b,e) for (t,b,e) in dblQuotedString.scanString(testData) ]
+ print dblStrings
+ assert len(dblStrings) == 1 and (dblStrings[0][1]==154 and dblStrings[0][2]==184), \
+ "double quoted string failure"
+ allStrings = [ (t[0],b,e) for (t,b,e) in quotedString.scanString(testData) ]
+ print allStrings
+ assert len(allStrings) == 2 and (allStrings[0][1]==17 and allStrings[0][2]==47) and \
+ (allStrings[1][1]==154 and allStrings[1][2]==184), \
+ "quoted string failure"
+
+ escapedQuoteTest = \
+ r"""
+ 'This string has an escaped (\') quote character'
+ "This string has an escaped (\") quote character"
+ """
+ sglStrings = [ (t[0],b,e) for (t,b,e) in sglQuotedString.scanString(escapedQuoteTest) ]
+ print sglStrings
+ assert len(sglStrings) == 1 and (sglStrings[0][1]==17 and sglStrings[0][2]==66), \
+ "single quoted string escaped quote failure (%s)" % str(sglStrings[0])
+ dblStrings = [ (t[0],b,e) for (t,b,e) in dblQuotedString.scanString(escapedQuoteTest) ]
+ print dblStrings
+ assert len(dblStrings) == 1 and (dblStrings[0][1]==83 and dblStrings[0][2]==132), \
+ "double quoted string escaped quote failure (%s)" % str(dblStrings[0])
+ allStrings = [ (t[0],b,e) for (t,b,e) in quotedString.scanString(escapedQuoteTest) ]
+ print allStrings
+ assert len(allStrings) == 2 and (allStrings[0][1]==17 and allStrings[0][2]==66 and
+ allStrings[1][1]==83 and allStrings[1][2]==132), \
+ "quoted string escaped quote failure (%s)" % ([str(s[0]) for s in allStrings])
+
+ dblQuoteTest = \
+ r"""
+ 'This string has an doubled ('') quote character'
+ "This string has an doubled ("") quote character"
+ """
+ sglStrings = [ (t[0],b,e) for (t,b,e) in sglQuotedString.scanString(dblQuoteTest) ]
+ print sglStrings
+ assert len(sglStrings) == 1 and (sglStrings[0][1]==17 and sglStrings[0][2]==66), \
+ "single quoted string escaped quote failure (%s)" % str(sglStrings[0])
+ dblStrings = [ (t[0],b,e) for (t,b,e) in dblQuotedString.scanString(dblQuoteTest) ]
+ print dblStrings
+ assert len(dblStrings) == 1 and (dblStrings[0][1]==83 and dblStrings[0][2]==132), \
+ "double quoted string escaped quote failure (%s)" % str(dblStrings[0])
+ allStrings = [ (t[0],b,e) for (t,b,e) in quotedString.scanString(dblQuoteTest) ]
+ print allStrings
+ assert len(allStrings) == 2 and (allStrings[0][1]==17 and allStrings[0][2]==66 and
+ allStrings[1][1]==83 and allStrings[1][2]==132), \
+ "quoted string escaped quote failure (%s)" % ([str(s[0]) for s in allStrings])
+
+class CaselessOneOfTest(ParseTestCase):
+ def runTest(self):
+ from pyparsing import oneOf,ZeroOrMore
+
+ caseless1 = oneOf("d a b c aA B A C", caseless=True)
+ caseless1str = str( caseless1 )
+ print caseless1str
+ caseless2 = oneOf("d a b c Aa B A C", caseless=True)
+ caseless2str = str( caseless2 )
+ print caseless2str
+ assert caseless1str.upper() == caseless2str.upper(), "oneOf not handling caseless option properly"
+ assert caseless1str != caseless2str, "Caseless option properly sorted"
+
+ res = ZeroOrMore(caseless1).parseString("AAaaAaaA")
+ print res
+ assert len(res) == 4, "caseless1 oneOf failed"
+ assert "".join(res) == "aA"*4,"caseless1 CaselessLiteral return failed"
+
+ res = ZeroOrMore(caseless2).parseString("AAaaAaaA")
+ print res
+ assert len(res) == 4, "caseless2 oneOf failed"
+ assert "".join(res) == "Aa"*4,"caseless1 CaselessLiteral return failed"
+
+
+class AsXMLTest(ParseTestCase):
+ def runTest(self):
+
+ import pyparsing
+ # test asXML()
+
+ aaa = pyparsing.Word("a").setResultsName("A")
+ bbb = pyparsing.Group(pyparsing.Word("b")).setResultsName("B")
+ ccc = pyparsing.Combine(":" + pyparsing.Word("c")).setResultsName("C")
+ g1 = "XXX>&<" + pyparsing.ZeroOrMore( aaa | bbb | ccc )
+ teststring = "XXX>&< b b a b b a b :c b a"
+ #~ print teststring
+ print "test including all items"
+ xml = g1.parseString(teststring).asXML("TEST",namedItemsOnly=False)
+ assert xml=="\n".join(["",
+ "<TEST>",
+ " <ITEM>XXX&gt;&amp;&lt;</ITEM>",
+ " <B>",
+ " <ITEM>b</ITEM>",
+ " </B>",
+ " <B>",
+ " <ITEM>b</ITEM>",
+ " </B>",
+ " <A>a</A>",
+ " <B>",
+ " <ITEM>b</ITEM>",
+ " </B>",
+ " <B>",
+ " <ITEM>b</ITEM>",
+ " </B>",
+ " <A>a</A>",
+ " <B>",
+ " <ITEM>b</ITEM>",
+ " </B>",
+ " <C>:c</C>",
+ " <B>",
+ " <ITEM>b</ITEM>",
+ " </B>",
+ " <A>a</A>",
+ "</TEST>",
+ ] ), \
+ "failed to generate XML correctly showing all items: \n[" + xml + "]"
+ print "test filtering unnamed items"
+ xml = g1.parseString(teststring).asXML("TEST",namedItemsOnly=True)
+ assert xml=="\n".join(["",
+ "<TEST>",
+ " <B>",
+ " <ITEM>b</ITEM>",
+ " </B>",
+ " <B>",
+ " <ITEM>b</ITEM>",
+ " </B>",
+ " <A>a</A>",
+ " <B>",
+ " <ITEM>b</ITEM>",
+ " </B>",
+ " <B>",
+ " <ITEM>b</ITEM>",
+ " </B>",
+ " <A>a</A>",
+ " <B>",
+ " <ITEM>b</ITEM>",
+ " </B>",
+ " <C>:c</C>",
+ " <B>",
+ " <ITEM>b</ITEM>",
+ " </B>",
+ " <A>a</A>",
+ "</TEST>",
+ ] ), \
+ "failed to generate XML correctly, filtering unnamed items: " + xml
+
+class AsXMLTest2(ParseTestCase):
+ def runTest(self):
+ from pyparsing import Suppress,Optional,CharsNotIn,Combine,ZeroOrMore,Word,\
+ Group,Literal,alphas,alphanums,delimitedList,OneOrMore
+
+ EndOfLine = Word("\n").setParseAction(lambda s,l,t: [' '])
+ whiteSpace=Word('\t ')
+ Mexpr = Suppress(Optional(whiteSpace)) + CharsNotIn('\\"\t \n') + Optional(" ") + \
+ Suppress(Optional(whiteSpace))
+ reducedString = Combine(Mexpr + ZeroOrMore(EndOfLine + Mexpr))
+ _bslash = "\\"
+ _escapables = "tnrfbacdeghijklmopqsuvwxyz" + _bslash + "'" + '"'
+ _octDigits = "01234567"
+ _escapedChar = ( Word( _bslash, _escapables, exact=2 ) |
+ Word( _bslash, _octDigits, min=2, max=4 ) )
+ _sglQuote = Literal("'")
+ _dblQuote = Literal('"')
+ QuotedReducedString = Combine( Suppress(_dblQuote) + ZeroOrMore( reducedString |
+ _escapedChar ) + \
+ Suppress(_dblQuote )).streamline()
+
+ Manifest_string = QuotedReducedString.setResultsName('manifest_string')
+
+ Identifier = Word( alphas, alphanums+ '_$' ).setResultsName("identifier")
+ Index_string = CharsNotIn('\\";\n')
+ Index_string.setName('index_string')
+ Index_term_list = (
+ Group(delimitedList(Manifest_string, delim=',')) | \
+ Index_string
+ ).setResultsName('value')
+
+ IndexKey = Identifier.setResultsName('key')
+ IndexKey.setName('key')
+ Index_clause = Group(IndexKey + Suppress(':') + Optional(Index_term_list))
+ Index_clause.setName('index_clause')
+ Index_list = Index_clause.setResultsName('index')
+ Index_list.setName('index_list')
+ Index_block = Group('indexing' + Group(OneOrMore(Index_list + Suppress(';')))).setResultsName('indexes')
+
+
+class CommentParserTest(ParseTestCase):
+ def runTest(self):
+ import pyparsing
+ print "verify processing of C and HTML comments"
+ testdata = """
+ /* */
+ /** **/
+ /**/
+ /***/
+ /****/
+ /* /*/
+ /** /*/
+ /*** /*/
+ /*
+ ablsjdflj
+ */
+ """
+ foundLines = [ pyparsing.lineno(s,testdata)
+ for t,s,e in pyparsing.cStyleComment.scanString(testdata) ]
+ assert foundLines == range(11)[2:],"only found C comments on lines "+str(foundLines)
+ testdata = """
+ <!-- -->
+ <!--- --->
+ <!---->
+ <!----->
+ <!------>
+ <!-- /-->
+ <!--- /-->
+ <!---- /-->
+ <!---- /- ->
+ <!---- / -- >
+ <!--
+ ablsjdflj
+ -->
+ """
+ foundLines = [ pyparsing.lineno(s,testdata)
+ for t,s,e in pyparsing.htmlComment.scanString(testdata) ]
+ assert foundLines == range(11)[2:],"only found HTML comments on lines "+str(foundLines)
+
+ # test C++ single line comments that have line terminated with '\' (should continue comment to following line)
+ testSource = r"""
+ // comment1
+ // comment2 \
+ still comment 2
+ // comment 3
+ """
+ assert len(pyparsing.cppStyleComment.searchString(testSource)[1][0]) == 41, r"failed to match single-line comment with '\' at EOL"
+
+class ParseExpressionResultsTest(ParseTestCase):
+ def runTest(self):
+ from pyparsing import Word,alphas,OneOrMore,Optional,Group
+
+ a = Word("a",alphas).setName("A")
+ b = Word("b",alphas).setName("B")
+ c = Word("c",alphas).setName("C")
+ ab = (a + b).setName("AB")
+ abc = (ab + c).setName("ABC")
+ word = Word(alphas).setName("word")
+
+ #~ words = OneOrMore(word).setName("words")
+ words = Group(OneOrMore(~a + word)).setName("words")
+
+ #~ phrase = words.setResultsName("Head") + \
+ #~ ( abc ^ ab ^ a ).setResultsName("ABC") + \
+ #~ words.setResultsName("Tail")
+ #~ phrase = words.setResultsName("Head") + \
+ #~ ( abc | ab | a ).setResultsName("ABC") + \
+ #~ words.setResultsName("Tail")
+ phrase = words.setResultsName("Head") + \
+ Group( a + Optional(b + Optional(c)) ).setResultsName("ABC") + \
+ words.setResultsName("Tail")
+
+ results = phrase.parseString("xavier yeti alpha beta charlie will beaver")
+ print results,results.Head, results.ABC,results.Tail
+ for key,ln in [("Head",2), ("ABC",3), ("Tail",2)]:
+ #~ assert len(results[key]) == ln,"expected %d elements in %s, found %s" % (ln, key, str(results[key].asList()))
+ assert len(results[key]) == ln,"expected %d elements in %s, found %s" % (ln, key, str(results[key]))
+
+
+class ParseKeywordTest(ParseTestCase):
+ def runTest(self):
+ from pyparsing import Literal,Keyword
+
+ kw = Keyword("if")
+ lit = Literal("if")
+
+ def test(s,litShouldPass,kwShouldPass):
+ print "Test",s
+ print "Match Literal",
+ try:
+ print lit.parseString(s)
+ except:
+ print "failed"
+ if litShouldPass: assert False, "Literal failed to match %s, should have" % s
+ else:
+ if not litShouldPass: assert False, "Literal matched %s, should not have" % s
+
+ print "Match Keyword",
+ try:
+ print kw.parseString(s)
+ except:
+ print "failed"
+ if kwShouldPass: assert False, "Keyword failed to match %s, should have" % s
+ else:
+ if not kwShouldPass: assert False, "Keyword matched %s, should not have" % s
+
+ test("ifOnlyIfOnly", True, False)
+ test("if(OnlyIfOnly)", True, True)
+ test("if (OnlyIf Only)", True, True)
+
+ kw = Keyword("if",caseless=True)
+
+ test("IFOnlyIfOnly", False, False)
+ test("If(OnlyIfOnly)", False, True)
+ test("iF (OnlyIf Only)", False, True)
+
+
+
+class ParseExpressionResultsAccumulateTest(ParseTestCase):
+ def runTest(self):
+ from pyparsing import Word,delimitedList,Combine,alphas,nums
+
+ num=Word(nums).setName("num").setResultsName("base10", listAllMatches=True)
+ hexnum=Combine("0x"+ Word(nums)).setName("hexnum").setResultsName("hex", listAllMatches=True)
+ name = Word(alphas).setName("word").setResultsName("word", listAllMatches=True)
+ list_of_num=delimitedList( hexnum | num | name, "," )
+
+ tokens = list_of_num.parseString('1, 0x2, 3, 0x4, aaa')
+ for k,llen,lst in ( ("base10",2,['1','3']),
+ ("hex",2,['0x2','0x4']),
+ ("word",1,['aaa']) ):
+ print k,tokens[k]
+ assert len(tokens[k]) == llen, "Wrong length for key %s, %s" % (k,str(tokens[k].asList()))
+ assert lst == tokens[k].asList(), "Incorrect list returned for key %s, %s" % (k,str(tokens[k].asList()))
+ assert tokens.base10.asList() == ['1','3'], "Incorrect list for attribute base10, %s" % str(tokens.base10.asList())
+ assert tokens.hex.asList() == ['0x2','0x4'], "Incorrect list for attribute hex, %s" % str(tokens.hex.asList())
+ assert tokens.word.asList() == ['aaa'], "Incorrect list for attribute word, %s" % str(tokens.word.asList())
+
+ from pyparsing import Literal, Word, nums, Group, Dict, alphas, \
+ quotedString, oneOf, delimitedList, removeQuotes, alphanums
+
+ lbrack = Literal("(").suppress()
+ rbrack = Literal(")").suppress()
+ integer = Word( nums ).setName("int")
+ variable = Word( alphas, max=1 ).setName("variable")
+ relation_body_item = variable | integer | quotedString.copy().setParseAction(removeQuotes)
+ relation_name = Word( alphas+"_", alphanums+"_" )
+ relation_body = lbrack + Group(delimitedList(relation_body_item)) + rbrack
+ Goal = Dict(Group( relation_name + relation_body ))
+ Comparison_Predicate = Group(variable + oneOf("< >") + integer).setResultsName("pred",listAllMatches=True)
+ Query = Goal.setResultsName("head") + ":-" + delimitedList(Goal | Comparison_Predicate)
+
+ test="""Q(x,y,z):-Bloo(x,"Mitsis",y),Foo(y,z,1243),y>28,x<12,x>3"""
+
+ queryRes = Query.parseString(test)
+ print "pred",queryRes.pred
+ assert queryRes.pred.asList() == [['y', '>', '28'], ['x', '<', '12'], ['x', '>', '3']], "Incorrect list for attribute pred, %s" % str(queryRes.pred.asList())
+ print queryRes.dump()
+
+class ReStringRangeTest(ParseTestCase):
+ def runTest(self):
+ import pyparsing
+ testCases = (
+ (r"[A-Z]"),
+ (r"[A-A]"),
+ (r"[A-Za-z]"),
+ (r"[A-z]"),
+ (r"[\ -\~]"),
+ (r"[\0x20-0]"),
+ (r"[\0x21-\0x7E]"),
+ (r"[\0xa1-\0xfe]"),
+ (r"[\040-0]"),
+ (r"[A-Za-z0-9]"),
+ (r"[A-Za-z0-9_]"),
+ (r"[A-Za-z0-9_$]"),
+ (r"[A-Za-z0-9_$\-]"),
+ (r"[^0-9\\]"),
+ (r"[a-zA-Z]"),
+ (r"[/\^~]"),
+ (r"[=\+\-!]"),
+ (r"[A-]"),
+ (r"[-A]"),
+ )
+ expectedResults = (
+ "ABCDEFGHIJKLMNOPQRSTUVWXYZ",
+ "A",
+ "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz",
+ "ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz",
+ " !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~",
+ " !\"#$%&'()*+,-./0",
+ "!\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~",
+ #~ "¡¢£¤¥¦§¨©ª«¬­®¯°±²³´µ¶·¸¹º»¼½¾¿ÀÃÂÃÄÅÆÇÈÉÊËÌÃÃŽÃÃÑÒÓÔÕÖ×ØÙÚÛÜÃÞßàáâãäåæçèéêëìíîïðñòóôõö÷øùúûüýþ",
+ u'\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\xad\xae\xaf\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\xfd\xfe',
+ " !\"#$%&'()*+,-./0",
+ "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789",
+ "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789_",
+ "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789_$",
+ "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789_$-",
+ "0123456789\\",
+ "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ",
+ "/^~",
+ "=+-!",
+ "A-",
+ "-A",
+ )
+ for test in zip( testCases, expectedResults ):
+ t,exp = test
+ res = pyparsing.srange(t)
+ #~ print t,"->",res
+ assert res == exp, "srange error, srange(%s)->'%s', expected '%s'" % (t, res, exp)
+
+class SkipToParserTests(ParseTestCase):
+ def runTest(self):
+
+ from pyparsing import Literal, SkipTo, NotAny, cStyleComment
+
+ thingToFind = Literal('working')
+ testExpr = SkipTo(Literal(';'), True, cStyleComment) + thingToFind
+
+ def tryToParse (someText):
+ try:
+ print testExpr.parseString(someText)
+ except Exception, e:
+ print "Exception %s while parsing string %s" % (e,repr(someText))
+ assert False, "Exception %s while parsing string %s" % (e,repr(someText))
+
+ # This first test works, as the SkipTo expression is immediately following the ignore expression (cStyleComment)
+ tryToParse('some text /* comment with ; in */; working')
+ # This second test fails, as there is text following the ignore expression, and before the SkipTo expression.
+ tryToParse('some text /* comment with ; in */some other stuff; working')
+
+
+class CustomQuotesTest(ParseTestCase):
+ def runTest(self):
+ from pyparsing import QuotedString
+
+ testString = r"""
+ sdlfjs :sdf\:jls::djf: sl:kfsjf
+ sdlfjs -sdf\:jls::--djf: sl-kfsjf
+ sdlfjs -sdf\:::jls::--djf: sl:::-kfsjf
+ sdlfjs ^sdf\:jls^^--djf^ sl-kfsjf
+ sdlfjs ^^^==sdf\:j=lz::--djf: sl=^^=kfsjf
+ sdlfjs ==sdf\:j=ls::--djf: sl==kfsjf^^^
+ """
+ colonQuotes = QuotedString(':','\\','::')
+ dashQuotes = QuotedString('-','\\', '--')
+ hatQuotes = QuotedString('^','\\')
+ hatQuotes1 = QuotedString('^','\\','^^')
+ dblEqQuotes = QuotedString('==','\\')
+
+ def test(quoteExpr, expected):
+ print quoteExpr.pattern
+ print quoteExpr.searchString(testString)
+ print quoteExpr.searchString(testString)[0][0]
+ assert quoteExpr.searchString(testString)[0][0] == expected, \
+ "failed to match %s, expected '%s', got '%s'" % \
+ (quoteExpr,expected,quoteExpr.searchString(testString)[0])
+
+ test(colonQuotes, r"sdf:jls:djf")
+ test(dashQuotes, r"sdf\:jls::-djf: sl")
+ test(hatQuotes, r"sdf\:jls")
+ test(hatQuotes1, r"sdf\:jls^--djf")
+ test(dblEqQuotes, r"sdf\:j=ls::--djf: sl")
+ test( QuotedString(':::'), 'jls::--djf: sl')
+ test( QuotedString('==',endQuoteChar='--'), r'sdf\:j=lz::')
+ test( QuotedString('^^^',multiline=True), r"""==sdf\:j=lz::--djf: sl=^^=kfsjf
+ sdlfjs ==sdf\:j=ls::--djf: sl==kfsjf""")
+ try:
+ bad1 = QuotedString('','\\')
+ except SyntaxError,se:
+ pass
+ else:
+ assert False,"failed to raise SyntaxError with empty quote string"
+
+class RepeaterTest(ParseTestCase):
+ def runTest(self):
+ from pyparsing import matchPreviousLiteral,matchPreviousExpr, Forward, Literal, Word, alphas, nums
+
+ first = Word("abcdef").setName("word1")
+ bridge = Word(nums).setName("number")
+ second = matchPreviousLiteral(first).setName("repeat(word1Literal)")
+
+ seq = first + bridge + second
+
+ tests = [
+ ( "abc12abc", True ),
+ ( "abc12aabc", False ),
+ ( "abc12cba", True ),
+ ( "abc12bca", True ),
+ ]
+
+ for tst,result in tests:
+ found = False
+ for tokens,start,end in seq.scanString(tst):
+ f,b,s = tokens
+ print f,b,s
+ found = True
+ if not found:
+ print "No literal match in", tst
+ assert found == result, "Failed repeater for test: %s, matching %s" % (tst, str(seq))
+ print
+
+ # retest using matchPreviousExpr instead of matchPreviousLiteral
+ second = matchPreviousExpr(first).setName("repeat(word1expr)")
+ seq = first + bridge + second
+
+ tests = [
+ ( "abc12abc", True ),
+ ( "abc12cba", False ),
+ ( "abc12abcdef", False ),
+ ]
+
+ for tst,result in tests:
+ found = False
+ for tokens,start,end in seq.scanString(tst):
+ print tokens.asList()
+ found = True
+ if not found:
+ print "No expression match in", tst
+ assert found == result, "Failed repeater for test: %s, matching %s" % (tst, str(seq))
+
+ print
+
+ first = Word("abcdef").setName("word1")
+ bridge = Word(nums).setName("number")
+ second = matchPreviousExpr(first).setName("repeat(word1)")
+ seq = first + bridge + second
+ csFirst = seq.setName("word-num-word")
+ csSecond = matchPreviousExpr(csFirst)
+ compoundSeq = csFirst + ":" + csSecond
+ compoundSeq.streamline()
+ print compoundSeq
+
+ tests = [
+ ( "abc12abc:abc12abc", True ),
+ ( "abc12cba:abc12abc", False ),
+ ( "abc12abc:abc12abcdef", False ),
+ ]
+
+ #~ for tst,result in tests:
+ #~ print tst,
+ #~ try:
+ #~ compoundSeq.parseString(tst)
+ #~ print "MATCH"
+ #~ assert result, "matched when shouldn't have matched"
+ #~ except ParseException:
+ #~ print "NO MATCH"
+ #~ assert not result, "didnt match but should have"
+
+ #~ for tst,result in tests:
+ #~ print tst,
+ #~ if compoundSeq == tst:
+ #~ print "MATCH"
+ #~ assert result, "matched when shouldn't have matched"
+ #~ else:
+ #~ print "NO MATCH"
+ #~ assert not result, "didnt match but should have"
+
+ for tst,result in tests:
+ found = False
+ for tokens,start,end in compoundSeq.scanString(tst):
+ print "match:", tokens.asList()
+ found = True
+ break
+ if not found:
+ print "No expression match in", tst
+ assert found == result, "Failed repeater for test: %s, matching %s" % (tst, str(seq))
+
+ print
+ eFirst = Word(nums)
+ eSecond = matchPreviousExpr(eFirst)
+ eSeq = eFirst + ":" + eSecond
+
+ tests = [
+ ( "1:1A", True ),
+ ( "1:10", False ),
+ ]
+
+ for tst,result in tests:
+ found = False
+ for tokens,start,end in eSeq.scanString(tst):
+ #~ f,b,s = tokens
+ #~ print f,b,s
+ print tokens.asList()
+ found = True
+ if not found:
+ print "No match in", tst
+ assert found == result, "Failed repeater for test: %s, matching %s" % (tst, str(seq))
+
+class RecursiveCombineTest(ParseTestCase):
+ def runTest(self):
+ from pyparsing import Forward,Word,alphas,nums,Optional,Combine
+
+ testInput = "myc(114)r(11)dd"
+ Stream=Forward()
+ Stream << Optional(Word(alphas))+Optional("("+Word(nums)+")"+Stream)
+ expected = Stream.parseString(testInput).asList()
+ print ["".join(expected)]
+
+ Stream=Forward()
+ Stream << Combine(Optional(Word(alphas))+Optional("("+Word(nums)+")"+Stream))
+ testVal = Stream.parseString(testInput).asList()
+ print testVal
+
+ assert "".join(testVal) == "".join(expected), "Failed to process Combine with recursive content"
+
+class OperatorPrecedenceGrammarTest1(ParseTestCase):
+ def runTest(self):
+ from pyparsing import Word,nums,alphas,Literal,oneOf,operatorPrecedence,opAssoc
+
+ integer = Word(nums).setParseAction(lambda t:int(t[0]))
+ variable = Word(alphas,exact=1)
+ operand = integer | variable
+
+ expop = Literal('^')
+ signop = oneOf('+ -')
+ multop = oneOf('* /')
+ plusop = oneOf('+ -')
+ factop = Literal('!')
+
+ expr = operatorPrecedence( operand,
+ [("!", 1, opAssoc.LEFT),
+ ("^", 2, opAssoc.RIGHT),
+ (signop, 1, opAssoc.RIGHT),
+ (multop, 2, opAssoc.LEFT),
+ (plusop, 2, opAssoc.LEFT),]
+ )
+
+ test = ["9 + 2 + 3",
+ "9 + 2 * 3",
+ "(9 + 2) * 3",
+ "(9 + -2) * 3",
+ "(9 + --2) * 3",
+ "(9 + -2) * 3^2^2",
+ "(9! + -2) * 3^2^2",
+ "M*X + B",
+ "M*(X + B)",
+ "1+2*-3^4*5+-+-6",
+ "3!!"]
+ expected = """[[9, '+', 2, '+', 3]]
+ [[9, '+', [2, '*', 3]]]
+ [[[9, '+', 2], '*', 3]]
+ [[[9, '+', ['-', 2]], '*', 3]]
+ [[[9, '+', ['-', ['-', 2]]], '*', 3]]
+ [[[9, '+', ['-', 2]], '*', [3, '^', [2, '^', 2]]]]
+ [[[[9, '!'], '+', ['-', 2]], '*', [3, '^', [2, '^', 2]]]]
+ [[['M', '*', 'X'], '+', 'B']]
+ [['M', '*', ['X', '+', 'B']]]
+ [[1, '+', [2, '*', ['-', [3, '^', 4]], '*', 5], '+', ['-', ['+', ['-', 6]]]]]
+ [[3, '!', '!']]""".split('\n')
+ expected = map(lambda x:eval(x),expected)
+ for t,e in zip(test,expected):
+ print t,"->",e, "got", expr.parseString(t).asList()
+ assert expr.parseString(t).asList() == e,"mismatched results for operatorPrecedence: got %s, expected %s" % (expr.parseString(t).asList(),e)
+
+class OperatorPrecedenceGrammarTest2(ParseTestCase):
+ def runTest(self):
+
+ from pyparsing import operatorPrecedence, Word, alphas, oneOf, opAssoc
+
+ boolVars = { "True":True, "False":False }
+ class BoolOperand(object):
+ def __init__(self,t):
+ self.args = t[0][0::2]
+ def __str__(self):
+ sep = " %s " % self.reprsymbol
+ return "(" + sep.join(map(str,self.args)) + ")"
+
+ class BoolAnd(BoolOperand):
+ reprsymbol = '&'
+ def __nonzero__(self):
+ for a in self.args:
+ if isinstance(a,basestring):
+ v = boolVars[a]
+ else:
+ v = bool(a)
+ if not v:
+ return False
+ return True
+
+ class BoolOr(BoolOperand):
+ reprsymbol = '|'
+ def __nonzero__(self):
+ for a in self.args:
+ if isinstance(a,basestring):
+ v = boolVars[a]
+ else:
+ v = bool(a)
+ if v:
+ return True
+ return False
+
+ class BoolNot(BoolOperand):
+ def __init__(self,t):
+ self.arg = t[0][1]
+ def __str__(self):
+ return "~" + str(self.arg)
+ def __nonzero__(self):
+ if isinstance(self.arg,basestring):
+ v = boolVars[self.arg]
+ else:
+ v = bool(self.arg)
+ return not v
+
+ boolOperand = Word(alphas,max=1) | oneOf("True False")
+ boolExpr = operatorPrecedence( boolOperand,
+ [
+ ("not", 1, opAssoc.RIGHT, BoolNot),
+ ("and", 2, opAssoc.LEFT, BoolAnd),
+ ("or", 2, opAssoc.LEFT, BoolOr),
+ ])
+ test = ["p and not q",
+ "not not p",
+ "not(p and q)",
+ "q or not p and r",
+ "q or not p or not r",
+ "q or not (p and r)",
+ "p or q or r",
+ "p or q or r and False",
+ "(p or q or r) and False",
+ ]
+
+ boolVars["p"] = True
+ boolVars["q"] = False
+ boolVars["r"] = True
+ print "p =", boolVars["p"]
+ print "q =", boolVars["q"]
+ print "r =", boolVars["r"]
+ print
+ for t in test:
+ res = boolExpr.parseString(t)[0]
+ print t,'\n', res, '=', bool(res),'\n'
+
+
+class OperatorPrecedenceGrammarTest3(ParseTestCase):
+ def runTest(self):
+
+ from pyparsing import operatorPrecedence, Word, alphas, oneOf, opAssoc, nums, Literal
+
+ global count
+ count = 0
+
+ def evaluate_int(t):
+ global count
+ value = int(t[0])
+ print "evaluate_int", value
+ count += 1
+ return value
+
+ integer = Word(nums).setParseAction(evaluate_int)
+ variable = Word(alphas,exact=1)
+ operand = integer | variable
+
+ expop = Literal('^')
+ signop = oneOf('+ -')
+ multop = oneOf('* /')
+ plusop = oneOf('+ -')
+ factop = Literal('!')
+
+ expr = operatorPrecedence( operand,
+ [
+ ("!", 1, opAssoc.LEFT),
+ ("^", 2, opAssoc.RIGHT),
+ (signop, 1, opAssoc.RIGHT),
+ (multop, 2, opAssoc.LEFT),
+ (plusop, 2, opAssoc.LEFT),
+ ])
+
+ test = ["9"]
+ for t in test:
+ count = 0
+ print "%s => %s" % (t, expr.parseString(t))
+ assert count == 1, "count evaluated too many times!"
+
+class OperatorPrecedenceGrammarTest4(ParseTestCase):
+ def runTest(self):
+
+ import pyparsing
+
+ word = pyparsing.Word(pyparsing.alphas)
+
+ def supLiteral(s):
+ """Returns the suppressed literal s"""
+ return pyparsing.Literal(s).suppress()
+
+ def booleanExpr(atom):
+ ops = [
+ (supLiteral(u"!"), 1, pyparsing.opAssoc.RIGHT, lambda s, l, t: ["!", t[0][0]]),
+ (pyparsing.oneOf(u"= !="), 2, pyparsing.opAssoc.LEFT, ),
+ (supLiteral(u"&"), 2, pyparsing.opAssoc.LEFT, lambda s, l, t: ["&", t[0]]),
+ (supLiteral(u"|"), 2, pyparsing.opAssoc.LEFT, lambda s, l, t: ["|", t[0]])]
+ return pyparsing.operatorPrecedence(atom, ops)
+
+ f = booleanExpr(word) + pyparsing.StringEnd()
+
+ tests = [
+ ("bar = foo", "[['bar', '=', 'foo']]"),
+ ("bar = foo & baz = fee", "['&', [['bar', '=', 'foo'], ['baz', '=', 'fee']]]"),
+ ]
+ for test,expected in tests:
+ print test
+ results = f.parseString(test)
+ print results
+ assert str(results) == expected, "failed to match expected results, got '%s'" % str(results)
+ print
+
+
+class ParseResultsPickleTest(ParseTestCase):
+ def runTest(self):
+ from pyparsing import makeHTMLTags, ParseResults
+ import pickle
+
+ body = makeHTMLTags("BODY")[0]
+ result = body.parseString("<BODY BGCOLOR='#00FFBB' FGCOLOR=black>")
+ print result.dump()
+ print
+
+ # TODO - add support for protocols >= 2
+ #~ for protocol in range(pickle.HIGHEST_PROTOCOL+1):
+ for protocol in range(2):
+ print "Test pickle dump protocol", protocol
+ try:
+ pickleString = pickle.dumps(result, protocol)
+ except Exception, e:
+ print "dumps exception:", e
+ newresult = ParseResults([])
+ else:
+ newresult = pickle.loads(pickleString)
+ print newresult.dump()
+
+ assert result.dump() == newresult.dump(), "Error pickling ParseResults object (protocol=%d)" % protocol
+ print
+
+
+class ParseResultsWithNamedTupleTest(ParseTestCase):
+ def runTest(self):
+
+ from pyparsing import Literal,replaceWith
+
+ expr = Literal("A")
+ expr.setParseAction(replaceWith(tuple(["A","Z"])))
+ expr = expr.setResultsName("Achar")
+
+ res = expr.parseString("A")
+ print repr(res)
+ print res.Achar
+ assert res.Achar == ("A","Z"), "Failed accessing named results containing a tuple, got " + res.Achar
+
+
+class ParseHTMLTagsTest(ParseTestCase):
+ def runTest(self):
+ import pyparsing
+ test = """
+ <BODY>
+ <BODY BGCOLOR="#00FFCC">
+ <BODY BGCOLOR="#00FFAA"/>
+ <BODY BGCOLOR='#00FFBB' FGCOLOR=black>
+ <BODY/>
+ </BODY>
+ """
+ results = [
+ ("startBody", False, "", ""),
+ ("startBody", False, "#00FFCC", ""),
+ ("startBody", True, "#00FFAA", ""),
+ ("startBody", False, "#00FFBB", "black"),
+ ("startBody", True, "", ""),
+ ("endBody", False, "", ""),
+ ]
+
+ bodyStart, bodyEnd = pyparsing.makeHTMLTags("BODY")
+ resIter = iter(results)
+ for t,s,e in (bodyStart | bodyEnd).scanString( test ):
+ print test[s:e], "->", t.asList()
+ (expectedType, expectedEmpty, expectedBG, expectedFG) = resIter.next()
+
+ tType = t.getName()
+ #~ print tType,"==",expectedType,"?"
+ assert tType in "startBody endBody".split(), "parsed token of unknown type '%s'" % tType
+ assert tType == expectedType, "expected token of type %s, got %s" % (expectedType, tType)
+ if tType == "startBody":
+ assert bool(t.empty) == expectedEmpty, "expected %s token, got %s" % ( expectedEmpty and "empty" or "not empty",
+ t.empty and "empty" or "not empty" )
+ assert t.bgcolor == expectedBG, "failed to match BGCOLOR, expected %s, got %s" % ( expectedBG, t.bgcolor )
+ assert t.fgcolor == expectedFG, "failed to match FGCOLOR, expected %s, got %s" % ( expectedFG, t.bgcolor )
+ elif tType == "endBody":
+ #~ print "end tag"
+ pass
+ else:
+ print "BAD!!!"
+
+class UpcaseDowncaseUnicode(ParseTestCase):
+ def runTest(self):
+
+ import pyparsing as pp
+ import sys
+
+ a = u'\u00bfC\u00f3mo esta usted?'
+ ualphas = u"".join( [ unichr(i) for i in range(sys.maxunicode)
+ if unichr(i).isalpha() ] )
+ uword = pp.Word(ualphas).setParseAction(pp.upcaseTokens)
+
+ print uword.searchString(a)
+
+ uword = pp.Word(ualphas).setParseAction(pp.downcaseTokens)
+
+ print uword.searchString(a)
+
+ if not IRON_PYTHON_ENV:
+ #test html data
+ html = "<TR class=maintxt bgColor=#ffffff> \
+ <TD vAlign=top>Производитель, модель</TD> \
+ <TD vAlign=top><STRONG>BenQ-Siemens CF61</STRONG></TD> \
+ ".decode('utf-8')
+
+ # u'Manufacturer, model
+ text_manuf = u'Производитель, модель'
+ manufacturer = pp.Literal(text_manuf)
+
+ td_start, td_end = pp.makeHTMLTags("td")
+ manuf_body = td_start.suppress() + manufacturer + pp.SkipTo(td_end).setResultsName("cells", True) + td_end.suppress()
+
+ #~ manuf_body.setDebug()
+
+ for tokens in manuf_body.scanString(html):
+ print tokens
+
+class ParseUsingRegex(ParseTestCase):
+ def runTest(self):
+
+ import re
+ import pyparsing
+
+ signedInt = pyparsing.Regex(r'[-+][0-9]+')
+ unsignedInt = pyparsing.Regex(r'[0-9]+')
+ simpleString = pyparsing.Regex(r'("[^\"]*")|(\'[^\']*\')')
+ namedGrouping = pyparsing.Regex(r'("(?P<content>[^\"]*)")')
+ compiledRE = pyparsing.Regex(re.compile(r'[A-Z]+'))
+
+ def testMatch (expression, instring, shouldPass, expectedString=None):
+ if shouldPass:
+ try:
+ result = expression.parseString(instring)
+ print '%s correctly matched %s' % (repr(expression), repr(instring))
+ if expectedString != result[0]:
+ print '\tbut failed to match the pattern as expected:'
+ print '\tproduced %s instead of %s' % \
+ (repr(result[0]), repr(expectedString))
+ return True
+ except pyparsing.ParseException:
+ print '%s incorrectly failed to match %s' % \
+ (repr(expression), repr(instring))
+ else:
+ try:
+ result = expression.parseString(instring)
+ print '%s incorrectly matched %s' % (repr(expression), repr(instring))
+ print '\tproduced %s as a result' % repr(result[0])
+ except pyparsing.ParseException:
+ print '%s correctly failed to match %s' % \
+ (repr(expression), repr(instring))
+ return True
+ return False
+
+ # These should fail
+ assert testMatch(signedInt, '1234 foo', False), "Re: (1) passed, expected fail"
+ assert testMatch(signedInt, ' +foo', False), "Re: (2) passed, expected fail"
+ assert testMatch(unsignedInt, 'abc', False), "Re: (3) passed, expected fail"
+ assert testMatch(unsignedInt, '+123 foo', False), "Re: (4) passed, expected fail"
+ assert testMatch(simpleString, 'foo', False), "Re: (5) passed, expected fail"
+ assert testMatch(simpleString, '"foo bar\'', False), "Re: (6) passed, expected fail"
+ assert testMatch(simpleString, '\'foo bar"', False), "Re: (7) passed, expected fail"
+
+ # These should pass
+ assert testMatch(signedInt, ' +123', True, '+123'), "Re: (8) failed, expected pass"
+ assert testMatch(signedInt, '+123', True, '+123'), "Re: (9) failed, expected pass"
+ assert testMatch(signedInt, '+123 foo', True, '+123'), "Re: (10) failed, expected pass"
+ assert testMatch(signedInt, '-0 foo', True, '-0'), "Re: (11) failed, expected pass"
+ assert testMatch(unsignedInt, '123 foo', True, '123'), "Re: (12) failed, expected pass"
+ assert testMatch(unsignedInt, '0 foo', True, '0'), "Re: (13) failed, expected pass"
+ assert testMatch(simpleString, '"foo"', True, '"foo"'), "Re: (14) failed, expected pass"
+ assert testMatch(simpleString, "'foo bar' baz", True, "'foo bar'"), "Re: (15) failed, expected pass"
+
+ assert testMatch(compiledRE, 'blah', False), "Re: (16) passed, expected fail"
+ assert testMatch(compiledRE, 'BLAH', True, 'BLAH'), "Re: (17) failed, expected pass"
+
+ assert testMatch(namedGrouping, '"foo bar" baz', True, '"foo bar"'), "Re: (16) failed, expected pass"
+ ret = namedGrouping.parseString('"zork" blah')
+ print ret.asList()
+ print ret.items()
+ print ret.content
+ assert ret.content == 'zork', "named group lookup failed"
+ assert ret[0] == simpleString.parseString('"zork" blah')[0], "Regex not properly returning ParseResults for named vs. unnamed groups"
+
+ try:
+ #~ print "lets try an invalid RE"
+ invRe = pyparsing.Regex('("[^\"]*")|(\'[^\']*\'')
+ except Exception,e:
+ print "successfully rejected an invalid RE:",
+ print e
+ else:
+ assert False, "failed to reject invalid RE"
+
+ invRe = pyparsing.Regex('')
+
+class CountedArrayTest(ParseTestCase):
+ def runTest(self):
+ from pyparsing import Word,nums,OneOrMore,countedArray
+
+ testString = "2 5 7 6 0 1 2 3 4 5 0 3 5 4 3"
+
+ integer = Word(nums).setParseAction(lambda t: int(t[0]))
+ countedField = countedArray(integer)
+
+ r = OneOrMore(countedField).parseString( testString )
+ print testString
+ print r.asList()
+
+ assert r.asList() == [[5,7],[0,1,2,3,4,5],[],[5,4,3]], \
+ "Failed matching countedArray, got " + str(r.asList())
+
+class CountedArrayTest2(ParseTestCase):
+ # addresses bug raised by Ralf Vosseler
+ def runTest(self):
+ from pyparsing import Word,nums,OneOrMore,countedArray
+
+ testString = "2 5 7 6 0 1 2 3 4 5 0 3 5 4 3"
+
+ integer = Word(nums).setParseAction(lambda t: int(t[0]))
+ countedField = countedArray(integer)
+
+ dummy = Word("A")
+ r = OneOrMore(dummy ^ countedField).parseString( testString )
+ print testString
+ print r.asList()
+
+ assert r.asList() == [[5,7],[0,1,2,3,4,5],[],[5,4,3]], \
+ "Failed matching countedArray, got " + str(r.asList())
+
+class LineAndStringEndTest(ParseTestCase):
+ def runTest(self):
+ from pyparsing import OneOrMore,lineEnd,alphanums,Word,stringEnd,delimitedList,SkipTo
+
+ les = OneOrMore(lineEnd)
+ bnf1 = delimitedList(Word(alphanums).leaveWhitespace(),les)
+ bnf2 = Word(alphanums) + stringEnd
+ bnf3 = Word(alphanums) + SkipTo(stringEnd)
+ tests = [
+ ("testA\ntestB\ntestC\n", ['testA', 'testB', 'testC']),
+ ("testD\ntestE\ntestF", ['testD', 'testE', 'testF']),
+ ("a", ['a']),
+ ]
+
+ for t in tests:
+ res1 = bnf1.parseString(t[0])
+ print res1,'=?',t[1]
+ assert res1.asList() == t[1], "Failed lineEnd/stringEnd test (1): "+repr(t[0])+ " -> "+str(res1.asList())
+ res2 = bnf2.searchString(t[0])
+ print res2[0].asList(),'=?',t[1][-1:]
+ assert res2[0].asList() == t[1][-1:], "Failed lineEnd/stringEnd test (2): "+repr(t[0])+ " -> "+str(res2[0].asList())
+ res3 = bnf3.parseString(t[0])
+ print repr(res3[1]),'=?',repr(t[0][len(res3[0])+1:])
+ assert res3[1] == t[0][len(res3[0])+1:], "Failed lineEnd/stringEnd test (3): " +repr(t[0])+ " -> "+str(res3[1].asList())
+
+ from pyparsing import Regex
+ import re
+
+ k = Regex(r'a+',flags=re.S+re.M)
+ k = k.parseWithTabs()
+ k = k.leaveWhitespace()
+
+ tests = [
+ (r'aaa',['aaa']),
+ (r'\naaa',None),
+ (r'a\naa',None),
+ (r'aaa\n',None),
+ ]
+ for i,(src,expected) in enumerate(tests):
+ print i, repr(src).replace('\\\\','\\'),
+ try:
+ res = k.parseString(src, parseAll=True).asList()
+ except ParseException, pe:
+ res = None
+ print res
+ assert res == expected, "Failed on parseAll=True test %d" % i
+
+class VariableParseActionArgsTest(ParseTestCase):
+ def runTest(self):
+
+ pa3 = lambda s,l,t: t
+ pa2 = lambda l,t: t
+ pa1 = lambda t: t
+ pa0 = lambda : None
+ class Callable3(object):
+ def __call__(self,s,l,t):
+ return t
+ class Callable2(object):
+ def __call__(self,l,t):
+ return t
+ class Callable1(object):
+ def __call__(self,t):
+ return t
+ class Callable0(object):
+ def __call__(self):
+ return
+ class CallableS3(object):
+ #~ @staticmethod
+ def __call__(s,l,t):
+ return t
+ __call__=staticmethod(__call__)
+ class CallableS2(object):
+ #~ @staticmethod
+ def __call__(l,t):
+ return t
+ __call__=staticmethod(__call__)
+ class CallableS1(object):
+ #~ @staticmethod
+ def __call__(t):
+ return t
+ __call__=staticmethod(__call__)
+ class CallableS0(object):
+ #~ @staticmethod
+ def __call__():
+ return
+ __call__=staticmethod(__call__)
+ class CallableC3(object):
+ #~ @classmethod
+ def __call__(cls,s,l,t):
+ return t
+ __call__=classmethod(__call__)
+ class CallableC2(object):
+ #~ @classmethod
+ def __call__(cls,l,t):
+ return t
+ __call__=classmethod(__call__)
+ class CallableC1(object):
+ #~ @classmethod
+ def __call__(cls,t):
+ return t
+ __call__=classmethod(__call__)
+ class CallableC0(object):
+ #~ @classmethod
+ def __call__(cls):
+ return
+ __call__=classmethod(__call__)
+
+ class parseActionHolder(object):
+ #~ @staticmethod
+ def pa3(s,l,t):
+ return t
+ pa3=staticmethod(pa3)
+ #~ @staticmethod
+ def pa2(l,t):
+ return t
+ pa2=staticmethod(pa2)
+ #~ @staticmethod
+ def pa1(t):
+ return t
+ pa1=staticmethod(pa1)
+ #~ @staticmethod
+ def pa0():
+ return
+ pa0=staticmethod(pa0)
+
+ def paArgs(*args):
+ print args
+ return args[2]
+
+ class ClassAsPA0(object):
+ def __init__(self):
+ pass
+ def __str__(self):
+ return "A"
+
+ class ClassAsPA1(object):
+ def __init__(self,t):
+ print "making a ClassAsPA1"
+ self.t = t
+ def __str__(self):
+ return self.t[0]
+
+ class ClassAsPA2(object):
+ def __init__(self,l,t):
+ self.t = t
+ def __str__(self):
+ return self.t[0]
+
+ class ClassAsPA3(object):
+ def __init__(self,s,l,t):
+ self.t = t
+ def __str__(self):
+ return self.t[0]
+
+ class ClassAsPAStarNew(tuple):
+ def __new__(cls, *args):
+ print "make a ClassAsPAStarNew", args
+ return tuple.__new__(cls, *args[2].asList())
+ def __str__(self):
+ return ''.join(self)
+
+ #~ def ClassAsPANew(object):
+ #~ def __new__(cls, t):
+ #~ return object.__new__(cls, t)
+ #~ def __init__(self,t):
+ #~ self.t = t
+ #~ def __str__(self):
+ #~ return self.t
+
+ from pyparsing import Literal,OneOrMore
+
+ A = Literal("A").setParseAction(pa0)
+ B = Literal("B").setParseAction(pa1)
+ C = Literal("C").setParseAction(pa2)
+ D = Literal("D").setParseAction(pa3)
+ E = Literal("E").setParseAction(Callable0())
+ F = Literal("F").setParseAction(Callable1())
+ G = Literal("G").setParseAction(Callable2())
+ H = Literal("H").setParseAction(Callable3())
+ I = Literal("I").setParseAction(CallableS0())
+ J = Literal("J").setParseAction(CallableS1())
+ K = Literal("K").setParseAction(CallableS2())
+ L = Literal("L").setParseAction(CallableS3())
+ M = Literal("M").setParseAction(CallableC0())
+ N = Literal("N").setParseAction(CallableC1())
+ O = Literal("O").setParseAction(CallableC2())
+ P = Literal("P").setParseAction(CallableC3())
+ Q = Literal("Q").setParseAction(paArgs)
+ R = Literal("R").setParseAction(parseActionHolder.pa3)
+ S = Literal("S").setParseAction(parseActionHolder.pa2)
+ T = Literal("T").setParseAction(parseActionHolder.pa1)
+ U = Literal("U").setParseAction(parseActionHolder.pa0)
+ V = Literal("V")
+
+ gg = OneOrMore( A | C | D | E | F | G | H |
+ I | J | K | L | M | N | O | P | Q | R | S | U | V | B | T)
+ testString = "VUTSRQPONMLKJIHGFEDCBA"
+ res = gg.parseString(testString)
+ print res.asList()
+ assert res.asList()==list(testString), "Failed to parse using variable length parse actions"
+
+ A = Literal("A").setParseAction(ClassAsPA0)
+ B = Literal("B").setParseAction(ClassAsPA1)
+ C = Literal("C").setParseAction(ClassAsPA2)
+ D = Literal("D").setParseAction(ClassAsPA3)
+ E = Literal("E").setParseAction(ClassAsPAStarNew)
+
+ gg = OneOrMore( A | B | C | D | E | F | G | H |
+ I | J | K | L | M | N | O | P | Q | R | S | T | U | V)
+ testString = "VUTSRQPONMLKJIHGFEDCBA"
+ res = gg.parseString(testString)
+ print map(str,res)
+ assert map(str,res)==list(testString), "Failed to parse using variable length parse actions using class constructors as parse actions"
+
+class EnablePackratParsing(ParseTestCase):
+ def runTest(self):
+ from pyparsing import ParserElement
+ ParserElement.enablePackrat()
+
+class SingleArgExceptionTest(ParseTestCase):
+ def runTest(self):
+ from pyparsing import ParseBaseException,ParseFatalException
+
+ msg = ""
+ raisedMsg = ""
+ testMessage = "just one arg"
+ try:
+ raise ParseFatalException, testMessage
+ except ParseBaseException,pbe:
+ print "Received expected exception:", pbe
+ raisedMsg = pbe.msg
+ assert raisedMsg == testMessage, "Failed to get correct exception message"
+
+
+class KeepOriginalTextTest(ParseTestCase):
+ def runTest(self):
+ from pyparsing import makeHTMLTags, keepOriginalText
+
+ def rfn(t):
+ return "%s:%d" % (t.src, len("".join(t)))
+
+ makeHTMLStartTag = lambda tag: makeHTMLTags(tag)[0].setParseAction(keepOriginalText)
+
+ # use the lambda, Luke
+ #~ start, imge = makeHTMLTags('IMG')
+ start = makeHTMLStartTag('IMG')
+
+ # don't replace our fancy parse action with rfn,
+ # append rfn to the list of parse actions
+ #~ start.setParseAction(rfn)
+ start.addParseAction(rfn)
+
+ #start.setParseAction(lambda s,l,t:t.src)
+ text = '''_<img src="images/cal.png"
+ alt="cal image" width="16" height="15">_'''
+ s = start.transformString(text)
+ print s
+ assert s.startswith("_images/cal.png:"), "failed to preserve input s properly"
+ assert s.endswith("77_"),"failed to return full original text properly"
+
+class PackratParsingCacheCopyTest(ParseTestCase):
+ def runTest(self):
+ from pyparsing import Word,nums,ParserElement,delimitedList,Literal,Optional,alphas,alphanums,ZeroOrMore,empty
+
+ integer = Word(nums).setName("integer")
+ id = Word(alphas+'_',alphanums+'_')
+ simpleType = Literal('int');
+ arrayType= simpleType+ZeroOrMore('['+delimitedList(integer)+']')
+ varType = arrayType | simpleType
+ varDec = varType + delimitedList(id + Optional('='+integer))+';'
+
+ codeBlock = Literal('{}')
+
+ funcDef = Optional(varType | 'void')+id+'('+(delimitedList(varType+id)|'void'|empty)+')'+codeBlock
+
+ program = varDec | funcDef
+ input = 'int f(){}'
+ results = program.parseString(input)
+ print "Parsed '%s' as %s" % (input, results.asList())
+ assert results.asList() == ['int', 'f', '(', ')', '{}'], "Error in packrat parsing"
+
+class PackratParsingCacheCopyTest2(ParseTestCase):
+ def runTest(self):
+ from pyparsing import Keyword,Word,Suppress,Forward,Optional,delimitedList,ParserElement,Group
+
+ DO,AA = map(Keyword, "DO AA".split())
+ LPAR,RPAR = map(Suppress,"()")
+ identifier = ~AA + Word("Z")
+
+ function_name = identifier.copy()
+ #~ function_name = ~AA + Word("Z") #identifier.copy()
+ expr = Forward().setName("expr")
+ expr << (Group(function_name + LPAR + Optional(delimitedList(expr)) + RPAR).setName("functionCall") |
+ identifier.setName("ident")#.setDebug()#.setBreak()
+ )
+
+ stmt = DO + Group(delimitedList(identifier + ".*" | expr))
+ result = stmt.parseString("DO Z")
+ print result.asList()
+ assert len(result[1]) == 1, "packrat parsing is duplicating And term exprs"
+
+class ParseResultsDelTest(ParseTestCase):
+ def runTest(self):
+ from pyparsing import OneOrMore, Word, alphas, nums
+
+ grammar = OneOrMore(Word(nums))("ints") + OneOrMore(Word(alphas))("words")
+ res = grammar.parseString("123 456 ABC DEF")
+ print res.dump()
+ origInts = res.ints.asList()
+ origWords = res.words.asList()
+ del res[1]
+ del res["words"]
+ print res.dump()
+ assert res[1]=='ABC',"failed to delete 0'th element correctly"
+ assert res.ints.asList()==origInts, "updated named attributes, should have updated list only"
+ assert res.words=="", "failed to update named attribute correctly"
+ assert res[-1]=='DEF', "updated list, should have updated named attributes only"
+
+class WithAttributeParseActionTest(ParseTestCase):
+ def runTest(self):
+ """
+ This unit test checks withAttribute in these ways:
+
+ * Argument forms as keywords and tuples
+ * Selecting matching tags by attribute
+ * Case-insensitive attribute matching
+ * Correctly matching tags having the attribute, and rejecting tags not having the attribute
+
+ (Unit test written by voigts as part of the Google Highly Open Participation Contest)
+ """
+
+ from pyparsing import makeHTMLTags, Word, withAttribute, nums
+
+ data = """
+ <a>1</a>
+ <a b="x">2</a>
+ <a B="x">3</a>
+ <a b="X">4</a>
+ <a b="y">5</a>
+ """
+ tagStart, tagEnd = makeHTMLTags("a")
+
+ expr = tagStart + Word(nums).setResultsName("value") + tagEnd
+
+ expected = [['a', ['b', 'x'], False, '2', '</a>'], ['a', ['b', 'x'], False, '3', '</a>']]
+
+ for attrib in [
+ withAttribute(b="x"),
+ #withAttribute(B="x"),
+ withAttribute(("b","x")),
+ #withAttribute(("B","x")),
+ ]:
+
+ tagStart.setParseAction(attrib)
+ result = expr.searchString(data)
+
+ print result.dump()
+ assert result.asList() == expected, "Failed test, expected %s, got %s" % (expected, result.asList())
+
+class NestedExpressionsTest(ParseTestCase):
+ def runTest(self):
+ """
+ This unit test checks nestedExpr in these ways:
+ - use of default arguments
+ - use of non-default arguments (such as a pyparsing-defined comment
+ expression in place of quotedString)
+ - use of a custom content expression
+ - use of a pyparsing expression for opener and closer is *OPTIONAL*
+ - use of input data containing nesting delimiters
+ - correct grouping of parsed tokens according to nesting of opening
+ and closing delimiters in the input string
+
+ (Unit test written by christoph... as part of the Google Highly Open Participation Contest)
+ """
+ from pyparsing import nestedExpr, Literal, Regex, restOfLine, quotedString
+
+ #All defaults. Straight out of the example script. Also, qualifies for
+ #the bonus: note the fact that (Z | (E^F) & D) is not parsed :-).
+ # Tests for bug fixed in 1.4.10
+ print "Test defaults:"
+ teststring = "(( ax + by)*C) (Z | (E^F) & D)"
+
+ expr = nestedExpr()
+
+ expected = [[['ax', '+', 'by'], '*C']]
+ result = expr.parseString(teststring)
+ print result.dump()
+ assert result.asList() == expected, "Defaults didn't work. That's a bad sign. Expected: %s, got: %s" % (expected, result)
+
+ #Going through non-defaults, one by one; trying to think of anything
+ #odd that might not be properly handled.
+
+ #Change opener
+ print "\nNon-default opener"
+ opener = "["
+ teststring = test_string = "[[ ax + by)*C)"
+ expected = [[['ax', '+', 'by'], '*C']]
+ expr = nestedExpr("[")
+ result = expr.parseString(teststring)
+ print result.dump()
+ assert result.asList() == expected, "Non-default opener didn't work. Expected: %s, got: %s" % (expected, result)
+
+ #Change closer
+ print "\nNon-default closer"
+
+ teststring = test_string = "(( ax + by]*C]"
+ expected = [[['ax', '+', 'by'], '*C']]
+ expr = nestedExpr(closer="]")
+ result = expr.parseString(teststring)
+ print result.dump()
+ assert result.asList() == expected, "Non-default closer didn't work. Expected: %s, got: %s" % (expected, result)
+
+ # #Multicharacter opener, closer
+ # opener = "bar"
+ # closer = "baz"
+ print "\nLiteral expressions for opener and closer"
+
+ opener,closer = map(Literal, "bar baz".split())
+ expr = nestedExpr(opener, closer,
+ content=Regex(r"([^b ]|b(?!a)|ba(?![rz]))+"))
+
+ teststring = "barbar ax + bybaz*Cbaz"
+ expected = [[['ax', '+', 'by'], '*C']]
+ # expr = nestedExpr(opener, closer)
+ result = expr.parseString(teststring)
+ print result.dump()
+ assert result.asList() == expected, "Multicharacter opener and closer didn't work. Expected: %s, got: %s" % (expected, result)
+
+ #Lisp-ish comments
+ print "\nUse ignore expression (1)"
+ comment = Regex(r";;.*")
+ teststring = \
+ """
+ (let ((greeting "Hello, world!")) ;;(foo bar
+ (display greeting))
+ """
+
+ expected = [['let', [['greeting', '"Hello,', 'world!"']], ';;(foo bar',\
+ ['display', 'greeting']]]
+ expr = nestedExpr(ignoreExpr=comment)
+ result = expr.parseString(teststring)
+ print result.dump()
+ assert result.asList() == expected , "Lisp-ish comments (\";; <...> $\") didn't work. Expected: %s, got: %s" % (expected, result)
+
+
+ #Lisp-ish comments, using a standard bit of pyparsing, and an Or.
+ print "\nUse ignore expression (2)"
+ comment = ';;' + restOfLine
+
+ teststring = \
+ """
+ (let ((greeting "Hello, )world!")) ;;(foo bar
+ (display greeting))
+ """
+
+ expected = [['let', [['greeting', '"Hello, )world!"']], ';;', '(foo bar',
+ ['display', 'greeting']]]
+ expr = nestedExpr(ignoreExpr=(comment ^ quotedString))
+ result = expr.parseString(teststring)
+ print result.dump()
+ assert result.asList() == expected , "Lisp-ish comments (\";; <...> $\") and quoted strings didn't work. Expected: %s, got: %s" % (expected, result)
+
+class ParseAllTest(ParseTestCase):
+ def runTest(self):
+ from pyparsing import Word, cppStyleComment
+
+ testExpr = Word("A")
+
+ tests = [
+ ("AAAAA", False, True),
+ ("AAAAA", True, True),
+ ("AAABB", False, True),
+ ("AAABB", True, False),
+ ]
+ for s,parseAllFlag,shouldSucceed in tests:
+ try:
+ print "'%s' parseAll=%s (shouldSuceed=%s)" % (s, parseAllFlag, shouldSucceed)
+ testExpr.parseString(s,parseAllFlag)
+ assert shouldSucceed, "successfully parsed when should have failed"
+ except ParseException, pe:
+ assert not shouldSucceed, "failed to parse when should have succeeded"
+
+ # add test for trailing comments
+ testExpr.ignore(cppStyleComment)
+
+ tests = [
+ ("AAAAA //blah", False, True),
+ ("AAAAA //blah", True, True),
+ ("AAABB //blah", False, True),
+ ("AAABB //blah", True, False),
+ ]
+ for s,parseAllFlag,shouldSucceed in tests:
+ try:
+ print "'%s' parseAll=%s (shouldSucceed=%s)" % (s, parseAllFlag, shouldSucceed)
+ testExpr.parseString(s,parseAllFlag)
+ assert shouldSucceed, "successfully parsed when should have failed"
+ except ParseException, pe:
+ assert not shouldSucceed, "failed to parse when should have succeeded"
+
+class GreedyQuotedStringsTest(ParseTestCase):
+ def runTest(self):
+ from pyparsing import QuotedString, sglQuotedString, dblQuotedString, quotedString, delimitedList
+
+ src = """\
+ "string1", "strin""g2"
+ 'string1', 'string2'
+ ^string1^, ^string2^
+ <string1>, <string2>"""
+
+ testExprs = (sglQuotedString, dblQuotedString, quotedString,
+ QuotedString('"', escQuote='""'), QuotedString("'", escQuote="''"),
+ QuotedString("^"), QuotedString("<",endQuoteChar=">"))
+ for expr in testExprs:
+ strs = delimitedList(expr).searchString(src)
+ print strs
+ assert bool(strs), "no matches found for test expression '%s'" % expr
+ for lst in strs:
+ assert len(lst) == 2, "invalid match found for test expression '%s'" % expr
+
+ from pyparsing import alphas, nums, Word
+ src = """'ms1',1,0,'2009-12-22','2009-12-22 10:41:22') ON DUPLICATE KEY UPDATE sent_count = sent_count + 1, mtime = '2009-12-22 10:41:22';"""
+ tok_sql_quoted_value = (
+ QuotedString("'", "\\", "''", True, False) ^
+ QuotedString('"', "\\", '""', True, False))
+ tok_sql_computed_value = Word(nums)
+ tok_sql_identifier = Word(alphas)
+
+ val = tok_sql_quoted_value | tok_sql_computed_value | tok_sql_identifier
+ vals = delimitedList(val)
+ print vals.parseString(src)
+ assert len(vals.parseString(src)) == 5, "error in greedy quote escaping"
+
+
+class WordBoundaryExpressionsTest(ParseTestCase):
+ def runTest(self):
+ from pyparsing import WordEnd, WordStart, oneOf
+
+ ws = WordStart()
+ we = WordEnd()
+ vowel = oneOf(list("AEIOUY"))
+ consonant = oneOf(list("BCDFGHJKLMNPQRSTVWXZ"))
+
+ leadingVowel = ws + vowel
+ trailingVowel = vowel + we
+ leadingConsonant = ws + consonant
+ trailingConsonant = consonant + we
+ internalVowel = ~ws + vowel + ~we
+
+ bnf = leadingVowel | trailingVowel
+
+ tests = """\
+ ABC DEF GHI
+ JKL MNO PQR
+ STU VWX YZ """.splitlines()
+ tests.append( "\n".join(tests) )
+
+ expectedResult = [
+ [['D', 'G'], ['A'], ['C', 'F'], ['I'], ['E'], ['A', 'I']],
+ [['J', 'M', 'P'], [], ['L', 'R'], ['O'], [], ['O']],
+ [['S', 'V'], ['Y'], ['X', 'Z'], ['U'], [], ['U', 'Y']],
+ [['D', 'G', 'J', 'M', 'P', 'S', 'V'],
+ ['A', 'Y'],
+ ['C', 'F', 'L', 'R', 'X', 'Z'],
+ ['I', 'O', 'U'],
+ ['E'],
+ ['A', 'I', 'O', 'U', 'Y']],
+ ]
+
+ for t,expected in zip(tests, expectedResult):
+ print t
+ results = map(lambda e: flatten(e.searchString(t).asList()),
+ [
+ leadingConsonant,
+ leadingVowel,
+ trailingConsonant,
+ trailingVowel,
+ internalVowel,
+ bnf,
+ ]
+ )
+ print results
+ assert results==expected,"Failed WordBoundaryTest, expected %s, got %s" % (expected,results)
+ print
+
+class OptionalEachTest(ParseTestCase):
+ def runTest(self):
+ from pyparsing import Optional, Keyword
+
+ the_input = "Major Tal Weiss"
+ parser1 = (Optional('Tal') + Optional('Weiss')) & Keyword('Major')
+ parser2 = Optional(Optional('Tal') + Optional('Weiss')) & Keyword('Major')
+ p1res = parser1.parseString( the_input)
+ p2res = parser2.parseString( the_input)
+ assert p1res.asList() == p2res.asList(), "Each failed to match with nested Optionals, " + \
+ str(p1res.asList()) + " should match " + str(p2res.asList())
+
+class SumParseResultsTest(ParseTestCase):
+ def runTest(self):
+
+ samplestr1 = "garbage;DOB 10-10-2010;more garbage\nID PARI12345678;more garbage"
+ samplestr2 = "garbage;ID PARI12345678;more garbage\nDOB 10-10-2010;more garbage"
+ samplestr3 = "garbage;DOB 10-10-2010"
+ samplestr4 = "garbage;ID PARI12345678;more garbage- I am cool"
+
+ res1 = "ID:PARI12345678 DOB:10-10-2010 INFO:"
+ res2 = "ID:PARI12345678 DOB:10-10-2010 INFO:"
+ res3 = "ID: DOB:10-10-2010 INFO:"
+ res4 = "ID:PARI12345678 DOB: INFO: I am cool"
+
+ from pyparsing import Regex, Word, alphanums, restOfLine
+ dob_ref = "DOB" + Regex(r"\d{2}-\d{2}-\d{4}")("dob")
+ id_ref = "ID" + Word(alphanums,exact=12)("id")
+ info_ref = "-" + restOfLine("info")
+
+ person_data = dob_ref | id_ref | info_ref
+
+ tests = (samplestr1,samplestr2,samplestr3,samplestr4,)
+ results = (res1, res2, res3, res4,)
+ for test,expected in zip(tests, results):
+ person = sum(person_data.searchString(test))
+ result = "ID:%s DOB:%s INFO:%s" % (person.id, person.dob, person.info)
+ print test
+ print expected
+ print result
+ for pd in person_data.searchString(test):
+ print pd.dump()
+ print
+ assert expected == result, \
+ "Failed to parse '%s' correctly, \nexpected '%s', got '%s'" % (test,expected,result)
+
+class MiscellaneousParserTests(ParseTestCase):
+ def runTest(self):
+ import pyparsing
+
+ runtests = "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
+ if IRON_PYTHON_ENV:
+ runtests = "ABCDEGHIJKLMNOPQRSTUVWXYZ"
+
+ # test making oneOf with duplicate symbols
+ if "A" in runtests:
+ print "verify oneOf handles duplicate symbols"
+ try:
+ test1 = pyparsing.oneOf("a b c d a")
+ except RuntimeError:
+ assert False,"still have infinite loop in oneOf with duplicate symbols"
+
+ # test MatchFirst bugfix
+ if "B" in runtests:
+ print "verify MatchFirst iterates properly"
+ results = pyparsing.quotedString.parseString("'this is a single quoted string'")
+ assert len(results) > 0, "MatchFirst error - not iterating over all choices"
+
+ # verify streamline of subexpressions
+ if "C" in runtests:
+ print "verify proper streamline logic"
+ compound = pyparsing.Literal("A") + "B" + "C" + "D"
+ assert len(compound.exprs) == 2,"bad test setup"
+ print compound
+ compound.streamline()
+ print compound
+ assert len(compound.exprs) == 4,"streamline not working"
+
+ # test for Optional with results name and no match
+ if "D" in runtests:
+ print "verify Optional's do not cause match failure if have results name"
+ testGrammar = pyparsing.Literal("A") + pyparsing.Optional("B").setResultsName("gotB") + pyparsing.Literal("C")
+ try:
+ testGrammar.parseString("ABC")
+ testGrammar.parseString("AC")
+ except pyparsing.ParseException, pe:
+ print pe.pstr,"->",pe
+ assert False, "error in Optional matching of string %s" % pe.pstr
+
+ # test return of furthest exception
+ if "E" in runtests:
+ testGrammar = ( pyparsing.Literal("A") |
+ ( pyparsing.Optional("B") + pyparsing.Literal("C") ) |
+ pyparsing.Literal("D") )
+ try:
+ testGrammar.parseString("BC")
+ testGrammar.parseString("BD")
+ except pyparsing.ParseException, pe:
+ print pe.pstr,"->",pe
+ assert pe.pstr == "BD", "wrong test string failed to parse"
+ assert pe.loc == 1, "error in Optional matching, pe.loc="+str(pe.loc)
+
+ # test validate
+ if "F" in runtests:
+ print "verify behavior of validate()"
+ def testValidation( grmr, gnam, isValid ):
+ try:
+ grmr.streamline()
+ grmr.validate()
+ assert isValid,"validate() accepted invalid grammar " + gnam
+ except pyparsing.RecursiveGrammarException,e:
+ print grmr
+ assert not isValid, "validate() rejected valid grammar " + gnam
+
+ fwd = pyparsing.Forward()
+ g1 = pyparsing.OneOrMore( ( pyparsing.Literal("A") + "B" + "C" ) | fwd )
+ g2 = pyparsing.ZeroOrMore("C" + g1)
+ fwd << pyparsing.Group(g2)
+ testValidation( fwd, "fwd", isValid=True )
+
+ fwd2 = pyparsing.Forward()
+ fwd2 << pyparsing.Group("A" | fwd2)
+ testValidation( fwd2, "fwd2", isValid=False )
+
+ fwd3 = pyparsing.Forward()
+ fwd3 << pyparsing.Optional("A") + fwd3
+ testValidation( fwd3, "fwd3", isValid=False )
+
+ # test getName
+ if "G" in runtests:
+ print "verify behavior of getName()"
+ aaa = pyparsing.Group(pyparsing.Word("a")).setResultsName("A")
+ bbb = pyparsing.Group(pyparsing.Word("b")).setResultsName("B")
+ ccc = pyparsing.Group(":" + pyparsing.Word("c")).setResultsName("C")
+ g1 = "XXX" + pyparsing.ZeroOrMore( aaa | bbb | ccc )
+ teststring = "XXX b b a b b a b :c b a"
+ names = []
+ print g1.parseString(teststring).dump()
+ for t in g1.parseString(teststring):
+ print t, repr(t)
+ try:
+ names.append( t[0].getName() )
+ except:
+ try:
+ names.append( t.getName() )
+ except:
+ names.append( None )
+ print teststring
+ print names
+ assert names==[None, 'B', 'B', 'A', 'B', 'B', 'A', 'B', 'C', 'B', 'A'], \
+ "failure in getting names for tokens"
+
+ # test ParseResults.get() method
+ if "H" in runtests:
+ print "verify behavior of ParseResults.get()"
+ res = g1.parseString(teststring)
+ print res.get("A","A not found")[0]
+ print res.get("D","!D")
+ assert res.get("A","A not found")[0] == "a", "get on existing key failed"
+ assert res.get("D","!D") == "!D", "get on missing key failed"
+
+ if "I" in runtests:
+ print "verify handling of Optional's beyond the end of string"
+ testGrammar = "A" + pyparsing.Optional("B") + pyparsing.Optional("C") + pyparsing.Optional("D")
+ testGrammar.parseString("A")
+ testGrammar.parseString("AB")
+
+ # test creating Literal with empty string
+ if "J" in runtests:
+ print 'verify non-fatal usage of Literal("")'
+ e = pyparsing.Literal("")
+ try:
+ e.parseString("SLJFD")
+ except Exception,e:
+ assert False, "Failed to handle empty Literal"
+
+ # test line() behavior when starting at 0 and the opening line is an \n
+ if "K" in runtests:
+ print 'verify correct line() behavior when first line is empty string'
+ assert pyparsing.line(0, "\nabc\ndef\n") == '', "Error in line() with empty first line in text"
+ txt = "\nabc\ndef\n"
+ results = [ pyparsing.line(i,txt) for i in range(len(txt)) ]
+ assert results == ['', 'abc', 'abc', 'abc', 'abc', 'def', 'def', 'def', 'def'], "Error in line() with empty first line in text"
+ txt = "abc\ndef\n"
+ results = [ pyparsing.line(i,txt) for i in range(len(txt)) ]
+ assert results == ['abc', 'abc', 'abc', 'abc', 'def', 'def', 'def', 'def'], "Error in line() with non-empty first line in text"
+
+ # test bugfix with repeated tokens when packrat parsing enabled
+ if "L" in runtests:
+ a = pyparsing.Literal("a")
+ b = pyparsing.Literal("b")
+ c = pyparsing.Literal("c")
+
+ abb = a + b + b
+ abc = a + b + c
+ aba = a + b + a
+ grammar = abb | abc | aba
+
+ assert ''.join(grammar.parseString( "aba" )) == 'aba', "Packrat ABA failure!"
+
+def makeTestSuite():
+ suite = TestSuite()
+ suite.addTest( PyparsingTestInit() )
+ suite.addTest( ParseIDLTest() )
+ suite.addTest( ParseASMLTest() )
+ suite.addTest( ParseFourFnTest() )
+ suite.addTest( ParseSQLTest() )
+ suite.addTest( ParseConfigFileTest() )
+ suite.addTest( ParseJSONDataTest() )
+ suite.addTest( ParseCommaSeparatedValuesTest() )
+ suite.addTest( ParseEBNFTest() )
+ suite.addTest( ScanStringTest() )
+ suite.addTest( QuotedStringsTest() )
+ suite.addTest( CustomQuotesTest() )
+ suite.addTest( CaselessOneOfTest() )
+ suite.addTest( AsXMLTest() )
+ suite.addTest( CommentParserTest() )
+ suite.addTest( ParseExpressionResultsTest() )
+ suite.addTest( ParseExpressionResultsAccumulateTest() )
+ suite.addTest( ReStringRangeTest() )
+ suite.addTest( ParseKeywordTest() )
+ suite.addTest( ParseHTMLTagsTest() )
+ suite.addTest( ParseUsingRegex() )
+ suite.addTest( SkipToParserTests() )
+ suite.addTest( CountedArrayTest() )
+ suite.addTest( CountedArrayTest2() )
+ suite.addTest( LineAndStringEndTest() )
+ suite.addTest( VariableParseActionArgsTest() )
+ suite.addTest( RepeaterTest() )
+ suite.addTest( RecursiveCombineTest() )
+ suite.addTest( OperatorPrecedenceGrammarTest1() )
+ suite.addTest( OperatorPrecedenceGrammarTest2() )
+ suite.addTest( OperatorPrecedenceGrammarTest3() )
+ suite.addTest( OperatorPrecedenceGrammarTest4() )
+ suite.addTest( ParseResultsPickleTest() )
+ suite.addTest( ParseResultsWithNamedTupleTest() )
+ suite.addTest( ParseResultsDelTest() )
+ suite.addTest( SingleArgExceptionTest() )
+ suite.addTest( UpcaseDowncaseUnicode() )
+ if not IRON_PYTHON_ENV:
+ suite.addTest( KeepOriginalTextTest() )
+ suite.addTest( PackratParsingCacheCopyTest() )
+ suite.addTest( PackratParsingCacheCopyTest2() )
+ suite.addTest( WithAttributeParseActionTest() )
+ suite.addTest( NestedExpressionsTest() )
+ suite.addTest( WordBoundaryExpressionsTest() )
+ suite.addTest( ParseAllTest() )
+ suite.addTest( GreedyQuotedStringsTest() )
+ suite.addTest( OptionalEachTest() )
+ suite.addTest( SumParseResultsTest() )
+ suite.addTest( MiscellaneousParserTests() )
+ if TEST_USING_PACKRAT:
+ # retest using packrat parsing (disable those tests that aren't compatible)
+ suite.addTest( EnablePackratParsing() )
+
+ unpackrattables = [ EnablePackratParsing, RepeaterTest, ]
+
+ # add tests to test suite a second time, to run with packrat parsing
+ # (leaving out those that we know wont work with packrat)
+ packratTests = [t.__class__() for t in suite._tests
+ if t.__class__ not in unpackrattables]
+ suite.addTests( packratTests )
+
+ return suite
+
+def makeTestSuiteTemp():
+ suite = TestSuite()
+ suite.addTest( PyparsingTestInit() )
+ suite.addTest( OptionalEachTest() )
+
+ return suite
+
+console = False
+console = True
+
+#~ from line_profiler import LineProfiler
+#~ from pyparsing import ParseResults
+#~ lp = LineProfiler(ParseResults.__setitem__)
+
+if console:
+ #~ # console mode
+ testRunner = TextTestRunner()
+ testRunner.run( makeTestSuite() )
+ #~ testRunner.run( makeTestSuiteTemp() )
+ #~ lp.run("testRunner.run( makeTestSuite() )")
+else:
+ # HTML mode
+ outfile = "testResults.html"
+ outstream = file(outfile,"w")
+ testRunner = HTMLTestRunner.HTMLTestRunner( stream=outstream )
+ testRunner.run( makeTestSuite() )
+ outstream.close()
+
+ import os
+ os.system(r'"C:\Program Files\Internet Explorer\iexplore.exe" file://' + outfile)
+
+#~ lp.print_stats() \ No newline at end of file