# sexpParser.py # # Demonstration of the pyparsing module, implementing a simple S-expression # parser. # # Updates: # November, 2011 - fixed errors in precedence of alternatives in simpleString; # fixed exception raised in verifyLen to properly signal the input string # and exception location so that markInputline works correctly; fixed # definition of decimal to accept a single '0' and optional leading '-' # sign; updated tests to improve parser coverage # # Copyright 2007-2011, by Paul McGuire # """ BNF reference: http://theory.lcs.mit.edu/~rivest/sexp.txt :: | :: ? ; :: | | | | ; :: "[" "]" ; :: ":" ; :: + ; -- decimal numbers should have no unnecessary leading zeros -- any string of bytes, of the indicated length :: + ; :: ? "|" ( | )* "|" ; :: "#" ( | )* "#" ; :: ? :: "\"" "\"" :: "(" ( | )* ")" ; :: * ; :: | | ; :: | | ; :: "a" | ... | "z" ; :: "A" | ... | "Z" ; :: "0" | ... | "9" ; :: | "A" | ... | "F" | "a" | ... | "f" ; :: "-" | "." | "/" | "_" | ":" | "*" | "+" | "=" ; :: " " | "\t" | "\r" | "\n" ; :: | | "+" | "/" | "=" ; :: "" ; """ from pyparsing import * from base64 import b64decode import pprint def verifyLen(s,l,t): t = t[0] if t.len is not None: t1len = len(t[1]) if t1len != t.len: raise ParseFatalException(s,l,\ "invalid data of length %d, expected %s" % (t1len, t.len)) return t[1] # define punctuation literals LPAR, RPAR, LBRK, RBRK, LBRC, RBRC, VBAR = map(Suppress, "()[]{}|") decimal = Regex(r'0|[1-9]\d*').setParseAction(lambda t: int(t[0])) hexadecimal = ("#" + OneOrMore(Word(hexnums)) + "#")\ .setParseAction(lambda t: int("".join(t[1:-1]),16)) bytes = Word(printables) raw = Group(decimal("len") + Suppress(":") + bytes).setParseAction(verifyLen) token = Word(alphanums + "-./_:*+=") base64_ = Group(Optional(decimal|hexadecimal,default=None)("len") + VBAR + OneOrMore(Word( alphanums +"+/=" )).setParseAction(lambda t: b64decode("".join(t))) + VBAR).setParseAction(verifyLen) qString = Group(Optional(decimal,default=None)("len") + dblQuotedString.setParseAction(removeQuotes)).setParseAction(verifyLen) simpleString = base64_ | raw | decimal | token | hexadecimal | qString # extended definitions decimal = Regex(r'-?0|[1-9]\d*').setParseAction(lambda t: int(t[0])) real = Regex(r"[+-]?\d+\.\d*([eE][+-]?\d+)?").setParseAction(lambda tokens: float(tokens[0])) token = Word(alphanums + "-./_:*+=!<>") simpleString = real | base64_ | raw | decimal | token | hexadecimal | qString display = LBRK + simpleString + RBRK string_ = Optional(display) + simpleString sexp = Forward() sexpList = Group(LPAR + ZeroOrMore(sexp) + RPAR) sexp << ( string_ | sexpList ) ######### Test data ########### test00 = """(snicker "abc" (#03# |YWJj|))""" test01 = """(certificate (issuer (name (public-key rsa-with-md5 (e 15 |NFGq/E3wh9f4rJIQVXhS|) (n |d738/4ghP9rFZ0gAIYZ5q9y6iskDJwASi5rEQpEQq8ZyMZeIZzIAR2I5iGE=|)) aid-committee)) (subject (ref (public-key rsa-with-md5 (e |NFGq/E3wh9f4rJIQVXhS|) (n |d738/4ghP9rFZ0gAIYZ5q9y6iskDJwASi5rEQpEQq8ZyMZeIZzIAR2I5iGE=|)) tom mother)) (not-before "1997-01-01_09:00:00") (not-after "1998-01-01_09:00:00") (tag (spend (account "12345678") (* numeric range "1" "1000")))) """ test02 = """(lambda (x) (* x x))""" test03 = """(def length (lambda (x) (cond ((not x) 0) ( t (+ 1 (length (cdr x)))) ) ) ) """ test04 = """(2:XX "abc" (#03# |YWJj|))""" test05 = """(if (is (window_name) "XMMS") (set_workspace 2))""" test06 = """(if (and (is (application_name) "Firefox") (or (contains (window_name) "Enter name of file to save to") (contains (window_name) "Save As") (contains (window_name) "Save Image") () ) ) (geometry "+140+122") ) """ test07 = """(defun factorial (x) (if (zerop x) 1 (* x (factorial (- x 1))))) """ test51 = """(2:XX "abc" (#30# |YWJj|))""" test51error = """(3:XX "abc" (#30# |YWJj|))""" test52 = """ (and (or (> uid 1000) (!= gid 20) ) (> quota 5.0e+03) ) """ # Run tests t = None alltests = [ locals()[t] for t in sorted(locals()) if t.startswith("test") ] for t in alltests: print('-'*50) print(t) try: sexpr = sexp.parseString(t, parseAll=True) pprint.pprint(sexpr.asList()) except ParseFatalException as pfe: print("Error:", pfe.msg) print(pfe.markInputline('^')) print()