From 0b19bb71ba5a4afa84e673a8239935426fa0db23 Mon Sep 17 00:00:00 2001 From: ptmcg Date: Tue, 9 Aug 2016 21:50:19 +0000 Subject: Remove incorrect tag directory git-svn-id: svn://svn.code.sf.net/p/pyparsing/code/tags/pyparsing_2.1.6@405 9bf210a0-9d2d-494c-87cf-cfb32e7dff7b --- trunk/src/examples/btpyparse.py | 128 ---------------------------------------- 1 file changed, 128 deletions(-) delete mode 100644 trunk/src/examples/btpyparse.py (limited to 'trunk/src/examples/btpyparse.py') diff --git a/trunk/src/examples/btpyparse.py b/trunk/src/examples/btpyparse.py deleted file mode 100644 index f3c11ae..0000000 --- a/trunk/src/examples/btpyparse.py +++ /dev/null @@ -1,128 +0,0 @@ -""" Pyparsing parser for BibTeX files - -A standalone parser using pyparsing. - -pyparsing has a simple and expressive syntax so the grammar is easy to read and -write. - -Matthew Brett 2010 -Simplified BSD license -""" - -from pyparsing import (Regex, Suppress, ZeroOrMore, Group, Optional, Forward, - SkipTo, CaselessLiteral, Dict) - - -class Macro(object): - """ Class to encapsulate undefined macro references """ - def __init__(self, name): - self.name = name - def __repr__(self): - return 'Macro("%s")' % self.name - def __eq__(self, other): - return self.name == other.name - def __ne__(self, other): - return self.name != other.name - - -# Character literals -LCURLY,RCURLY,LPAREN,RPAREN,QUOTE,COMMA,AT,EQUALS,HASH = map(Suppress,'{}()",@=#') - - -def bracketed(expr): - """ Return matcher for `expr` between curly brackets or parentheses """ - return (LPAREN + expr + RPAREN) | (LCURLY + expr + RCURLY) - - -# Define parser components for strings (the hard bit) -chars_no_curly = Regex(r"[^{}]+") -chars_no_curly.leaveWhitespace() -chars_no_quotecurly = Regex(r'[^"{}]+') -chars_no_quotecurly.leaveWhitespace() -# Curly string is some stuff without curlies, or nested curly sequences -curly_string = Forward() -curly_item = Group(curly_string) | chars_no_curly -curly_string << LCURLY + ZeroOrMore(curly_item) + RCURLY -# quoted string is either just stuff within quotes, or stuff within quotes, within -# which there is nested curliness -quoted_item = Group(curly_string) | chars_no_quotecurly -quoted_string = QUOTE + ZeroOrMore(quoted_item) + QUOTE - -# Numbers can just be numbers. Only integers though. -number = Regex('[0-9]+') - -# Basis characters (by exclusion) for variable / field names. The following -# list of characters is from the btparse documentation -any_name = Regex('[^\s"#%\'(),={}]+') - -# btparse says, and the test bibs show by experiment, that macro and field names -# cannot start with a digit. In fact entry type names cannot start with a digit -# either (see tests/bibs). Cite keys can start with a digit -not_digname = Regex('[^\d\s"#%\'(),={}][^\s"#%\'(),={}]*') - -# Comment comments out to end of line -comment = (AT + CaselessLiteral('comment') + - Regex("[\s{(].*").leaveWhitespace()) - -# The name types with their digiteyness -not_dig_lower = not_digname.copy().setParseAction(lambda t: t[0].lower()) -macro_def = not_dig_lower.copy() -macro_ref = not_dig_lower.copy().setParseAction(lambda t : Macro(t[0].lower())) -field_name = not_dig_lower.copy() -# Spaces in names mean they cannot clash with field names -entry_type = not_dig_lower('entry_type') -cite_key = any_name('cite_key') -# Number has to be before macro name -string = (number | macro_ref | quoted_string | curly_string) - -# There can be hash concatenation -field_value = string + ZeroOrMore(HASH + string) -field_def = Group(field_name + EQUALS + field_value) -entry_contents = Dict(ZeroOrMore(field_def + COMMA) + Optional(field_def)) - -# Entry is surrounded either by parentheses or curlies -entry = (AT + entry_type + bracketed(cite_key + COMMA + entry_contents)) - -# Preamble is a macro-like thing with no name -preamble = AT + CaselessLiteral('preamble') + bracketed(field_value) - -# Macros (aka strings) -macro_contents = macro_def + EQUALS + field_value -macro = AT + CaselessLiteral('string') + bracketed(macro_contents) - -# Implicit comments -icomment = SkipTo('@').setParseAction(lambda t : t.insert(0, 'icomment')) - -# entries are last in the list (other than the fallback) because they have -# arbitrary start patterns that would match comments, preamble or macro -definitions = Group(comment | - preamble | - macro | - entry | - icomment) - -# Start symbol -bibfile = ZeroOrMore(definitions) - - -def parse_str(str): - return bibfile.parseString(str) - - -if __name__ == '__main__': - # Run basic test - txt = """ -Some introductory text -(implicit comment) - -@ARTICLE{Authors2011, - author = {First Author and Second Author and Third Author}, - title = {An article about {S}omething}, - journal = "Journal of Articles", - year = {2011}, - volume = {16}, - pages = {1140--1141}, - number = {2} -} -""" - print('\n\n'.join(defn.dump() for defn in parse_str(txt))) -- cgit v1.2.1