summaryrefslogtreecommitdiff
path: root/trunk/src/examples/btpyparse.py
diff options
context:
space:
mode:
authorptmcg <ptmcg@9bf210a0-9d2d-494c-87cf-cfb32e7dff7b>2016-08-09 21:50:19 +0000
committerptmcg <ptmcg@9bf210a0-9d2d-494c-87cf-cfb32e7dff7b>2016-08-09 21:50:19 +0000
commit0b19bb71ba5a4afa84e673a8239935426fa0db23 (patch)
treee9abae9c616fdfdfebd9a8a0931d8f21824f30d2 /trunk/src/examples/btpyparse.py
parentb2c3ade75384efe76b8774b607e17fe98fab92ef (diff)
downloadpyparsing_2.1.6.tar.gz
Remove incorrect tag directorypyparsing_2.1.6
git-svn-id: svn://svn.code.sf.net/p/pyparsing/code/tags/pyparsing_2.1.6@405 9bf210a0-9d2d-494c-87cf-cfb32e7dff7b
Diffstat (limited to 'trunk/src/examples/btpyparse.py')
-rw-r--r--trunk/src/examples/btpyparse.py128
1 files changed, 0 insertions, 128 deletions
diff --git a/trunk/src/examples/btpyparse.py b/trunk/src/examples/btpyparse.py
deleted file mode 100644
index f3c11ae..0000000
--- a/trunk/src/examples/btpyparse.py
+++ /dev/null
@@ -1,128 +0,0 @@
-""" Pyparsing parser for BibTeX files
-
-A standalone parser using pyparsing.
-
-pyparsing has a simple and expressive syntax so the grammar is easy to read and
-write.
-
-Matthew Brett 2010
-Simplified BSD license
-"""
-
-from pyparsing import (Regex, Suppress, ZeroOrMore, Group, Optional, Forward,
- SkipTo, CaselessLiteral, Dict)
-
-
-class Macro(object):
- """ Class to encapsulate undefined macro references """
- def __init__(self, name):
- self.name = name
- def __repr__(self):
- return 'Macro("%s")' % self.name
- def __eq__(self, other):
- return self.name == other.name
- def __ne__(self, other):
- return self.name != other.name
-
-
-# Character literals
-LCURLY,RCURLY,LPAREN,RPAREN,QUOTE,COMMA,AT,EQUALS,HASH = map(Suppress,'{}()",@=#')
-
-
-def bracketed(expr):
- """ Return matcher for `expr` between curly brackets or parentheses """
- return (LPAREN + expr + RPAREN) | (LCURLY + expr + RCURLY)
-
-
-# Define parser components for strings (the hard bit)
-chars_no_curly = Regex(r"[^{}]+")
-chars_no_curly.leaveWhitespace()
-chars_no_quotecurly = Regex(r'[^"{}]+')
-chars_no_quotecurly.leaveWhitespace()
-# Curly string is some stuff without curlies, or nested curly sequences
-curly_string = Forward()
-curly_item = Group(curly_string) | chars_no_curly
-curly_string << LCURLY + ZeroOrMore(curly_item) + RCURLY
-# quoted string is either just stuff within quotes, or stuff within quotes, within
-# which there is nested curliness
-quoted_item = Group(curly_string) | chars_no_quotecurly
-quoted_string = QUOTE + ZeroOrMore(quoted_item) + QUOTE
-
-# Numbers can just be numbers. Only integers though.
-number = Regex('[0-9]+')
-
-# Basis characters (by exclusion) for variable / field names. The following
-# list of characters is from the btparse documentation
-any_name = Regex('[^\s"#%\'(),={}]+')
-
-# btparse says, and the test bibs show by experiment, that macro and field names
-# cannot start with a digit. In fact entry type names cannot start with a digit
-# either (see tests/bibs). Cite keys can start with a digit
-not_digname = Regex('[^\d\s"#%\'(),={}][^\s"#%\'(),={}]*')
-
-# Comment comments out to end of line
-comment = (AT + CaselessLiteral('comment') +
- Regex("[\s{(].*").leaveWhitespace())
-
-# The name types with their digiteyness
-not_dig_lower = not_digname.copy().setParseAction(lambda t: t[0].lower())
-macro_def = not_dig_lower.copy()
-macro_ref = not_dig_lower.copy().setParseAction(lambda t : Macro(t[0].lower()))
-field_name = not_dig_lower.copy()
-# Spaces in names mean they cannot clash with field names
-entry_type = not_dig_lower('entry_type')
-cite_key = any_name('cite_key')
-# Number has to be before macro name
-string = (number | macro_ref | quoted_string | curly_string)
-
-# There can be hash concatenation
-field_value = string + ZeroOrMore(HASH + string)
-field_def = Group(field_name + EQUALS + field_value)
-entry_contents = Dict(ZeroOrMore(field_def + COMMA) + Optional(field_def))
-
-# Entry is surrounded either by parentheses or curlies
-entry = (AT + entry_type + bracketed(cite_key + COMMA + entry_contents))
-
-# Preamble is a macro-like thing with no name
-preamble = AT + CaselessLiteral('preamble') + bracketed(field_value)
-
-# Macros (aka strings)
-macro_contents = macro_def + EQUALS + field_value
-macro = AT + CaselessLiteral('string') + bracketed(macro_contents)
-
-# Implicit comments
-icomment = SkipTo('@').setParseAction(lambda t : t.insert(0, 'icomment'))
-
-# entries are last in the list (other than the fallback) because they have
-# arbitrary start patterns that would match comments, preamble or macro
-definitions = Group(comment |
- preamble |
- macro |
- entry |
- icomment)
-
-# Start symbol
-bibfile = ZeroOrMore(definitions)
-
-
-def parse_str(str):
- return bibfile.parseString(str)
-
-
-if __name__ == '__main__':
- # Run basic test
- txt = """
-Some introductory text
-(implicit comment)
-
-@ARTICLE{Authors2011,
- author = {First Author and Second Author and Third Author},
- title = {An article about {S}omething},
- journal = "Journal of Articles",
- year = {2011},
- volume = {16},
- pages = {1140--1141},
- number = {2}
-}
-"""
- print('\n\n'.join(defn.dump() for defn in parse_str(txt)))