""" Creole Rules for parser ~~~~~~~~~~~~~~~~~~~~~~~ :copyleft: 2008-2013 by python-creole team, see AUTHORS for more details. :license: GNU GPL v3 or above, see LICENSE for more details. """ import re class InlineRules: """ All inline rules """ proto = r'http|https|ftp|nntp|news|mailto|telnet|file|irc' # New regex for finding uris, requires uri to free stand within whitespace or lineends. url = r'''(?P (^ | (?<=\s)) (?P~)? (?P (?P %s )://[^$\s]+ ) )''' % proto # Original uri matching regex inherited from MoinMoin code. # url = r'''(?P # (^ | (?<=\s | [.,:;!?()/=])) # (?P~)? # (?P (?P %s ):\S+? ) # ($ | (?=\s | [,.:;!?()] (\s | $))) # )''' % proto link = r'''(?P \[\[ (?P.+?) \s* ([|] \s* (?P.+?) \s*)? ]] )''' # link = r'''(?P # \[\[ # (?P.+?)\|(?P.+?) # ]] # )|(?P # \[\[ # (?P (%s)://[^ ]+) \s* (?P.+?) # ]] # )| # \[\[(?P.+)\]\] # ''' % proto # image tag image = r'''(?P {{ (?P.+?) \s* (\| \s* (?P.+?) \s*)? }} )''' # -------------------------------------------------------------------------- # a macro like: <>text<> macro_inline = r''' (?P << \s* (?P\w+) \s* (?P.*?) \s* >> (?P(.|\n)*?) <> ) ''' # A single macro tag, like <> or <> macro_tag = r'''(?P <<(?P \w+) (?P.*?) \s* /*>> )''' pre_inline = r'(?P {{{ (?P.*?) }}} )' # Basic text typefaces: emphasis = r'(?P(?.+?) (?\*\* (?P.+?) \*\* )' # Creole 1.0 optional: monospace = r'(?P \#\# (?P.+?) \#\# )' superscript = r'(?P \^\^ (?P.+?) \^\^ )' subscript = r'(?P ,, (?P.+?) ,, )' underline = r'(?P __ (?P.+?) __ )' delete = r'(?P ~~ (?P.+?) ~~ )' # own additions: small = r'(?P-- (?P.+?) -- )' linebreak = r'(?P \\\\ )' escape = r'(?P ~ (?P\S) )' char = r'(?P . )' class BlockRules: """ All used block rules. """ # macro_block = r'''(?P # \s* << (?P\w+) \s* (?P.*?) >> # (?P(.|\n)+?) # <> \s* # )''' # macro_block = r'''(?P # <<(?P.*?)>> # (?P.*?) # <> # )''' macro_block = r''' (?P << \s* (?P\w+) \s* (?P.*?) \s* >> (?P(.|\n)*?) <> ) ''' line = r'''(?P ^\s*$ )''' # empty line that separates paragraphs head = r'''(?P ^ (?P=+) \s* (?P .*? ) (=|\s)*?$ )''' separator = r'(?P ^ \s* ----) [ \t]* $' # horizontal line pre_block = r'''(?P ^{{{ \s* $ (?P ([\#]!(?P\w*?)(\s+.*)?$)? (.|\n)+? ) ^}}}) ''' # Matches the whole list, separate items are parsed later. # The list *must* start with a single bullet. list = r'''(?P ^ \s* ([*][^*\#]|[\#][^\#*]).* $ ( \n[ \t]* [*\#]+.* $ )* )''' table = r'''^ \s*(?P [|].*? \s* [|]? ) \s* $''' re_flags = re.VERBOSE | re.UNICODE | re.MULTILINE def __init__(self, blog_line_breaks=True): if blog_line_breaks: # use blog style line breaks (every line break would be converted into
) self.text = r'(?P .+ ) (?P (? (? .+ )' self.rules = ( self.macro_block, self.line, self.head, self.separator, self.pre_block, self.list, self.table, self.text, ) class SpecialRules: """ re rules witch not directly used as inline/block rules. """ # Matches single list items: item = r'''^ \s* (?P (?P [\#*]+) \s* (?P .*?) ) \s* $''' # For splitting table cells: cell = r''' \| \s* ( (?P [=][^|]+ ) | (?P ( %s | [^|])+ ) ) \s* ''' % '|'.join([ InlineRules.link, InlineRules.macro_inline, InlineRules.macro_tag, InlineRules.image, InlineRules.pre_inline ]) # For pre escaping, in creole 1.0 done with ~: pre_escape = r' ^(?P\s*) ~ (?P \}\}\} \s*) $' INLINE_FLAGS = re.VERBOSE | re.UNICODE INLINE_RULES = ( InlineRules.link, InlineRules.url, InlineRules.macro_inline, InlineRules.macro_tag, InlineRules.pre_inline, InlineRules.image, InlineRules.strong, InlineRules.emphasis, InlineRules.monospace, InlineRules.underline, InlineRules.superscript, InlineRules.subscript, InlineRules.small, InlineRules.delete, InlineRules.linebreak, InlineRules.escape, InlineRules.char ) def _verify_rules(rules, flags): """ Simple verify the rules -> try to compile it ;) >>> _verify_rules(INLINE_RULES, INLINE_FLAGS) Rule test ok. >>> block_rules = BlockRules() >>> _verify_rules(block_rules.rules, block_rules.re_flags) Rule test ok. """ # Test with re.compile rule_list = [] for rule in rules: try: # print(rule) re.compile(rule, flags) # Try to merge the rules. e.g. Check if group named double used. rule_list.append(rule) re.compile('|'.join(rule_list), flags) except Exception: print(" *** Error with rule:") print(rule) print(" -" * 39) raise print("Rule test ok.") if __name__ == "__main__": import doctest print(doctest.testmod()) print("-" * 80)