diff options
author | Jens Diemer <github.com@jensdiemer.de> | 2008-11-28 14:49:40 +0000 |
---|---|---|
committer | Jens Diemer <github.com@jensdiemer.de> | 2008-11-28 14:49:40 +0000 |
commit | 74ae679e1e802cbeee303227d6a561cef4e46d0b (patch) | |
tree | 797dcae062300234faeb2e76ca05ede4139b0733 | |
download | creole-74ae679e1e802cbeee303227d6a561cef4e46d0b.tar.gz |
init the current snapshot from PyLucid: http://www.pylucid.org
-rw-r--r-- | AUTHORS | 27 | ||||
-rw-r--r-- | LICENSE | 18 | ||||
-rw-r--r-- | creole/__init__.py | 0 | ||||
-rw-r--r-- | creole/creole.py | 742 | ||||
-rw-r--r-- | creole/creole2html.py | 342 | ||||
-rw-r--r-- | creole/html2creole.py | 726 | ||||
-rw-r--r-- | tests/markup_creole.py | 658 |
7 files changed, 2513 insertions, 0 deletions
@@ -0,0 +1,27 @@ + +INITIAL AUTHORS + + The original creole markup parser and emitter are from the MoinMoin + project: + * 2007 MoinMoin:RadomirDopieralski (creole 0.5 implementation), + * 2007 MoinMoin:ThomasWaldmann (updates) + + +PRIMARY AUTHORS are and/or have been (alphabetic order): + +* Diemer, Jens + Main Developer since the first code line. + ohloh.net profile: <http://www.ohloh.net/accounts/4179/> + Homepage: <http://www.jensdiemer.de/> + + +CONTRIBUTORS are and/or have been (alphabetic order): + - + + +Special thanks to the python-forum.de guys, particularly (in alphabetic order): +* sma + +last SVN commit info: + $LastChangedDate:$ + $Rev:$
\ No newline at end of file @@ -0,0 +1,18 @@ +All rights reserved. + +python-creole is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License version 3 or later as published by +the Free Software Foundation. + +complete GNU General Public License version 3: + http://www.gnu.org/licenses/gpl-3.0.txt + +German translation: + http://www.gnu.de/documents/gpl.de.html + + +copyleft 2008 by the python-creole team, see AUTHORS for more details. + +last SVN commit info: + $LastChangedDate:$ + $Rev:$
\ No newline at end of file diff --git a/creole/__init__.py b/creole/__init__.py new file mode 100644 index 0000000..e69de29 --- /dev/null +++ b/creole/__init__.py diff --git a/creole/creole.py b/creole/creole.py new file mode 100644 index 0000000..31623fc --- /dev/null +++ b/creole/creole.py @@ -0,0 +1,742 @@ +# -*- coding: iso-8859-1 -*- +""" + Creole wiki markup parser + + See http://wikicreole.org/ for latest specs. + + Notes: + * No markup allowed in headings. + Creole 1.0 does not require us to support this. + * No markup allowed in table headings. + Creole 1.0 does not require us to support this. + * No (non-bracketed) generic url recognition: this is "mission impossible" + except if you want to risk lots of false positives. Only known protocols + are recognized. + * We do not allow ":" before "//" italic markup to avoid urls with + unrecognized schemes (like wtf://server/path) triggering italic rendering + for the rest of the paragraph. + + PyLucid Updates by the PyLucid team: + - Bugfixes and better html code style + - Make the image tag match more strict, so it doesn't clash with + django template tags + - Add a passthrough for all django template blocktags + - Add a passthrough for html code lines + + @copyright: 2007 MoinMoin:RadomirDopieralski (creole 0.5 implementation), + 2007 MoinMoin:ThomasWaldmann (updates) + 2008 PyLucid:JensDiemer (PyLucid patches) + @license: GNU GPL, see COPYING for details. +""" + +import re + + +class Rules: + """Hold all the rules for generating regular expressions.""" + + # For the inline elements: + proto = r'http|https|ftp|nntp|news|mailto|telnet|file|irc' + url = r'''(?P<url> + (^ | (?<=\s | [.,:;!?()/=])) + (?P<escaped_url>~)? + (?P<url_target> (?P<url_proto> %s ):\S+? ) + ($ | (?=\s | [,.:;!?()] (\s | $))) + )''' % proto + link = r'''(?P<link> + \[\[ + (?P<link_target>.+?) \s* + ([|] \s* (?P<link_text>.+?) \s*)? + ]] + )''' + +# link = r'''(?P<link1> +# \[\[ +# (?P<link_target1>.+?)\|(?P<link_text1>.+?) +# ]] +# )|(?P<link2> +# \[\[ +# (?P<link_target2> (%s)://[^ ]+) \s* (?P<link_text2>.+?) +# ]] +# )| +# \[\[(?P<internal_link>.+)\]\] +# ''' % proto + + #-------------------------------------------------------------------------- + # The image rule should not match on django template tags! So we make it + # more restricted. + # It matches only if... + # ...image target ends with a picture extention + # ...separator >|< and the image text exist + image = r'''(?P<image> + {{ + (?P<image_target>.+?(\.jpg|\.jpeg|\.gif|\.png)) \s* + (\| \s* (?P<image_text>.+?) \s*)? + }} + )(?i)''' + #-------------------------------------------------------------------------- + + macro_block = r'''(?P<macro_block> + \s* << (?P<macro_block_start>\w+) \s* (?P<macro_block_args>.*?) >> + (?P<macro_block_text>(.|\n)+?) + <</(?P=macro_block_start)>> \s* + )''' + + macro = r'''(?P<macro> + << + (?P<macro_name> \w+) (?P<macro_args>.*?) + >> + )''' + code = r'(?P<code> {{{ (?P<code_text>.*?) }}} )' + emph = r'(?P<emph> (?<!:)// )' # there must be no : in front of the // + # avoids italic rendering in urls with + # unknown protocols + strong = r'(?P<strong> \*\* )' + linebreak = r'(?P<linebreak> \\\\ )' + escape = r'(?P<escape> ~ (?P<escaped_char>\S) )' + char = r'(?P<char> . )' + + # For the block elements: + separator = r'(?P<separator> ^ \s* ---- \s* $ )' # horizontal line + line = r'''(?P<line> ^\s*$ )''' # empty line that separates paragraphs + head = r'''(?P<head> + ^ + (?P<head_head>=+) \s* + (?P<head_text> .*? ) + =*$ + )''' + text = r'(?P<text> .+ ) (?P<break> (?<!\\)$\n(?!\s*$) )?' + list = r'''(?P<list> + ^ [ \t]* ([*][^*\#]|[\#][^\#*]).* $ + ( \n[ \t]* [*\#]+.* $ )* + )''' # Matches the whole list, separate items are parsed later. The + # list *must* start with a single bullet. + item = r'''^ \s* (?P<item> + (?P<item_head> [\#*]+) \s* + (?P<item_text> .*?) + ) \s* $''' # Matches single list items + pre = r'''(?P<pre> + ^{{{ \s* $ + (\n)? + (?P<pre_text> + ([\#]!(?P<pre_kind>\w*?)(\s+.*)?$)? + (.|\n)+? + ) + (\n)? + ^}}} \s*$ + )''' + pre_escape = r' ^(?P<indent>\s*) ~ (?P<rest> \}\}\} \s*) $' + + # Pass-through all django template blocktags + pass_block = r'''(?P<pass_block> + {% \s* (?P<pass_block_start>.+?) \s* (?P<pass_block_args>.*?) \s* %} + (\n|.)*? + {% \s* end(?P=pass_block_start) \s* %} + )''' + + pass_line = r'''\n(?P<pass_line> + (\n|\s)* + ({%.*?%})| + ({{.*?}}) + (\n|\s)* + )''' + pass_inline = r'''(?P<pass_inline> + ({%.*?%})| + ({{.*?}}) + )''' + + #Pass-through html code lines + html = r'''(?P<html> + ^[ \t]*<[a-zA-Z].*?<(/[a-zA-Z ]+?)>[ \t]*$ + )''' + + table = r'''^ \s*(?P<table> + [|].*? \s* + [|]? + ) \s* $''' + + # For splitting table cells: + cell = r''' + \| \s* + ( + (?P<head> [=][^|]+ ) | + (?P<cell> ( %s | [^|])+ ) + ) \s* + ''' % '|'.join([link, macro, image, code]) + + #-------------------------------------------------------------------------- +# blockelements = ( +# "head", "list", "pre", "code", "table", "separator", "macro", +# "pass_block", "pass_line", "html" +# ) + +class Parser: + """ + Parse the raw text and create a document object + that can be converted into output using Emitter. + """ + # For pre escaping, in creole 1.0 done with ~: + pre_escape_re = re.compile(Rules.pre_escape, re.M | re.X) + # for link descriptions: + link_re = re.compile( + '|'.join([Rules.image, Rules.linebreak, Rules.char]), + re.X | re.U + ) + item_re = re.compile(Rules.item, re.X | re.U | re.M) # for list items + cell_re = re.compile(Rules.cell, re.X | re.U) # for table cells + # For block elements: + block_re = re.compile( + '|'.join([ + Rules.pass_block, + Rules.pass_line, + Rules.macro_block, + Rules.html, + Rules.line, Rules.head, Rules.separator, Rules.pre, Rules.list, + Rules.table, Rules.text, + ]), + re.X | re.U | re.M + ) + # For inline elements: + inline_re = re.compile( + '|'.join([ + Rules.link, Rules.url, Rules.macro, + Rules.code, Rules.image, + Rules.pass_inline, + Rules.strong, Rules.emph, Rules.linebreak, + Rules.escape, Rules.char + ]), + re.X | re.U + ) + + def __init__(self, raw): + self.raw = raw + self.root = DocNode('document', None) + self.cur = self.root # The most recent document node + self.text = None # The node to add inline characters to + self.last_text_break = None # Last break node, inserted by _text_repl() + + #-------------------------------------------------------------------------- + + def cleanup_break(self, old_cur): + """ + remove unused end line breaks. + Should be called before a new block element. + e.g.: + <p>line one<br /> + line two<br /> <--- remove this br-tag + </p> + """ + if self.cur.children: + last_child = self.cur.children[-1] + if last_child.kind == "break": + del(self.cur.children[-1]) + + def _upto(self, node, kinds): + """ + Look up the tree to the first occurence + of one of the listed kinds of nodes or root. + Start at the node node. + """ + self.cleanup_break(node) # remove unused end line breaks. + while node.parent is not None and not node.kind in kinds: + node = node.parent + + return node + + def _upto_block(self): + self.cur = self._upto(self.cur, ('document',))# 'section', 'blockquote')) + + #__________________________________________________________________________ + # The _*_repl methods called for matches in regexps. Sometimes the + # same method needs several names, because of group names in regexps. + + def _pass_block_repl(self, groups): + """ Pass-through all django template blocktags """ + self._upto_block() + self.cur = self.root + DocNode("pass_block", self.cur, groups["pass_block"]) + self.text = None + _pass_block_start_repl = _pass_block_repl + _pass_block_end_repl = _pass_block_repl + + def _pass_line_repl(self, groups): + """ Pass-through all django tags witch is alone in a code line """ + self._upto_block() + self.cur = self.root + DocNode("pass_line", self.cur, groups["pass_line"]) + self.text = None + + def _pass_inline_repl(self, groups): + """ Pass-through all inline django tags""" + DocNode("pass_inline", self.cur, groups["pass_inline"]) + self.text = None + + def _html_repl(self, groups): + """ Pass-through html code """ + self._upto_block() + DocNode("html", self.root, groups["html"]) + self.text = None + + def _text_repl(self, groups): +# print "_text_repl()", self.cur.kind, groups.get('break') != None + if self.cur.kind in ('table', 'table_row', 'bullet_list', + 'number_list'): + self._upto_block() + + if self.cur.kind in ('document', 'section', 'blockquote'): + self.cur = DocNode('paragraph', self.cur) + + self.parse_inline(groups.get('text', u"")) + + if groups.get('break') and self.cur.kind in ('paragraph', + 'emphasis', 'strong', 'code'): + self.last_text_break = DocNode('break', self.cur, u"") + + self.text = None + _break_repl = _text_repl + + def _url_repl(self, groups): + """Handle raw urls in text.""" + if not groups.get('escaped_url'): + # this url is NOT escaped + target = groups.get('url_target', u"") + node = DocNode('link', self.cur) + node.content = target + DocNode('text', node, node.content) + self.text = None + else: + # this url is escaped, we render it as text + if self.text is None: + self.text = DocNode('text', self.cur, u"") + self.text.content += groups.get('url_target') + _url_target_repl = _url_repl + _url_proto_repl = _url_repl + _escaped_url = _url_repl + + def _link_repl(self, groups): + """Handle all kinds of links.""" + target = groups.get('link_target', u"") + text = (groups.get('link_text', u"") or u"").strip() + parent = self.cur + self.cur = DocNode('link', self.cur) + self.cur.content = target + self.text = None + re.sub(self.link_re, self._replace, text) + self.cur = parent + self.text = None + _link_target_repl = _link_repl + _link_text_repl = _link_repl + + def _add_macro(self, macro_name, macro_args, macro_text=u""): +# self._upto_block() + node = DocNode("macro", self.cur, macro_text.strip()) + node.macro_name = macro_name + node.macro_args = macro_args.strip() + self.text = None + + def _macro_block_repl(self, groups): + """Handles macros using the placeholder syntax.""" + #self.debug_groups(groups) + self._upto_block() + self.cur = self.root + self._add_macro( + macro_name = groups['macro_block_start'], + macro_text = groups.get('macro_block_text', u""), + macro_args = groups.get('macro_block_args', u""), + ) + self.text = None + _macro_block_start_repl = _macro_block_repl + _macro_block_args_repl = _macro_block_repl + _macro_block_text_repl = _macro_block_repl + + def _macro_repl(self, groups): + """Handles macros using the placeholder syntax.""" + macro_name = groups.get('macro_name', u"") + macro_args = groups.get('macro_args', u"") + self._add_macro(macro_name, macro_args) + self.text = None + +# text = (groups.get('macro_text', u"") or u"").strip() +# node = DocNode('macro', self.cur, name) +# node.args = groups.get('macro_args', u"") or '' +# DocNode('text', node, text or name) +# self.text = None + _macro_name_repl = _macro_repl + _macro_args_repl = _macro_repl +# _macro_text_repl = _macro_repl + + def _image_repl(self, groups): + """Handles images and attachemnts included in the page.""" + target = groups.get('image_target', u"").strip() + text = (groups.get('image_text', u"") or u"").strip() + node = DocNode("image", self.cur, target) + DocNode('text', node, text or node.content) + self.text = None + _image_target_repl = _image_repl + _image_text_repl = _image_repl + + def _separator_repl(self, groups): + self._upto_block() + DocNode('separator', self.cur) + + def _item_repl(self, groups): + bullet = groups.get('item_head', u"") + text = groups.get('item_text', u"") + if bullet[-1] == '#': + kind = 'number_list' + else: + kind = 'bullet_list' + level = len(bullet)-1 + lst = self.cur + # Find a list of the same kind and level up the tree + while (lst and + not (lst.kind in ('number_list', 'bullet_list') and + lst.level == level) and + not lst.kind in ('document', 'section', 'blockquote')): + lst = lst.parent + if lst and lst.kind == kind: + self.cur = lst + else: + # Create a new level of list + self.cur = self._upto(self.cur, + ('list_item', 'document', 'section', 'blockquote')) + self.cur = DocNode(kind, self.cur) + self.cur.level = level + self.cur = DocNode('list_item', self.cur) + self.cur.level = level+1 + self.parse_inline(text) + self.text = None + _item_text_repl = _item_repl + _item_head_repl = _item_repl + + def _list_repl(self, groups): + self.item_re.sub(self._replace, groups["list"]) + + def _head_repl(self, groups): + self._upto_block() + node = DocNode('header', self.cur, groups['head_text'].strip()) + node.level = len(groups['head_head']) + self.text = None + _head_head_repl = _head_repl + _head_text_repl = _head_repl + + def _table_repl(self, groups): + row = groups.get('table', '|').strip() + self.cur = self._upto(self.cur, ( + 'table', 'document', 'section', 'blockquote')) + if self.cur.kind != 'table': + self.cur = DocNode('table', self.cur) + tb = self.cur + tr = DocNode('table_row', tb) + + for m in self.cell_re.finditer(row): + cell = m.group('cell') + if cell: + text = cell.strip() + self.cur = DocNode('table_cell', tr) + self.text = None + else: + text = m.group('head').strip('= ') + self.cur = DocNode('table_head', tr) + self.text = DocNode('text', self.cur, u"") + self.parse_inline(text) + + self.cur = tb + self.text = None + + def _pre_repl(self, groups): + self._upto_block() + kind = groups.get('pre_kind', None) + text = groups.get('pre_text', u"") + def remove_tilde(m): + return m.group('indent') + m.group('rest') + text = self.pre_escape_re.sub(remove_tilde, text) + node = DocNode('preformatted', self.cur, text) + node.sect = kind or '' + self.text = None + _pre_text_repl = _pre_repl + _pre_head_repl = _pre_repl + _pre_kind_repl = _pre_repl + + def _line_repl(self, groups): + """ Transfer newline from the original markup into the html code """ + self._upto_block() + DocNode('line', self.cur, u"") + + def _code_repl(self, groups): + DocNode('code', self.cur, groups.get('code_text', u"").strip()) + self.text = None + _code_text_repl = _code_repl + _code_head_repl = _code_repl + + def _emph_repl(self, groups): + if self.cur.kind != 'emphasis': + self.cur = DocNode('emphasis', self.cur) + else: + self.cur = self._upto(self.cur, ('emphasis', )).parent + self.text = None + + def _strong_repl(self, groups): + if self.cur.kind != 'strong': + self.cur = DocNode('strong', self.cur) + else: + self.cur = self._upto(self.cur, ('strong', )).parent + self.text = None + + def _linebreak_repl(self, groups): + DocNode('break', self.cur, None) + self.text = None + + def _escape_repl(self, groups): + if self.text is None: + self.text = DocNode('text', self.cur, u"") + self.text.content += groups.get('escaped_char', u"") + + def _char_repl(self, groups): + if self.text is None: + self.text = DocNode('text', self.cur, u"") + self.text.content += groups.get('char', u"") + + #-------------------------------------------------------------------------- + + def _replace(self, match): + """Invoke appropriate _*_repl method. Called for every matched group.""" + groups = match.groupdict() + for name, text in groups.iteritems(): + if text is not None: + #if name != "char": print "%15s: %r" % (name, text) + #print "%15s: %r" % (name, text) + replace = getattr(self, '_%s_repl' % name) + replace(groups) + return + + def parse_inline(self, raw): + """Recognize inline elements inside blocks.""" + re.sub(self.inline_re, self._replace, raw) + + def parse_block(self, raw): + """Recognize block elements.""" + re.sub(self.block_re, self._replace, raw) + + def parse(self): + """Parse the text given as self.raw and return DOM tree.""" + # convert all lineendings to \n + text = self.raw.replace("\r\n", "\n").replace("\r", "\n") + self.parse_block(text) + return self.root + + + #-------------------------------------------------------------------------- + def debug(self, start_node=None): + """ + Display the current document tree + """ + print "_"*80 + + if start_node == None: + start_node = self.root + print " document tree:" + else: + print " tree from %s:" % start_node + + print "="*80 + def emit(node, ident=0): + for child in node.children: + print u"%s%s: %r" % (u" "*ident, child.kind, child.content) + emit(child, ident+4) + emit(start_node) + print "*"*80 + + def debug_groups(self, groups): + print "_"*80 + print " debug groups:" + for name, text in groups.iteritems(): + if text is not None: + print "%15s: %r" % (name, text) + print "-"*80 + + + +#------------------------------------------------------------------------------ + + +class DocNode: + """ + A node in the document. + """ + def __init__(self, kind='', parent=None, content=None): + self.children = [] + self.parent = parent + self.kind = kind + + if content: + content = unicode(content) + self.content = content + + if self.parent is not None: + self.parent.children.append(self) + + def __str__(self): +# return "DocNode kind '%s', content: %r" % (self.kind, self.content) + return "<DocNode %s: %r>" % (self.kind, self.content) + def __repr__(self): + return u"<DocNode %s: %r>" % (self.kind, self.content) + + def debug(self): + print "_"*80 + print "\tDocNode - debug:" + print "str(): %s" % self + print "attributes:" + for i in dir(self): + if i.startswith("_"): + continue + print "%20s: %r" % (i, getattr(self, i, "---")) + + +#------------------------------------------------------------------------------ + + +if __name__=="__main__": + txt = r"""== a headline + +Here is [[a internal]] link. +This is [[http://domain.tld|external links]]. +A [[internal links|different]] link name. + +Basics: **bold** or //italic// +or **//both//** or //**both**// +Force\\linebreak. + +The current page name: >{{ PAGE.name }}< great? +A {% lucidTag page_update_list count=10 %} PyLucid plugin + +{% sourcecode py %} +import sys + +sys.stdout("Hello World!") +{% endsourcecode %} + +A [[www.domain.tld|link]]. +a {{/image.jpg|My Image}} image + +no image: {{ foo|bar }}! +picture [[www.domain.tld | {{ foo.JPG | Foo }} ]] as a link + +END + +==== Headline 1 + +{% a tag 1 %} + +==== Headline 2 + +{% a tag 2 %} + +the end +""" + + txt = r""" +==== Headline 1 + +The current page name: >{{ PAGE.name }}< great? + +{% a tag 1 %} + +==== Headline 2 + +{% a tag 2 %} + +some text + +{% something arg1="foo" arg2="bar" arg2=3 %} +foobar +{% endsomething %} + +the end +""" + + txt = r"""A {% lucidTag page_update_list count=10 %} PyLucid plugin + +{% sourcecode py %} +import sys + +sys.stdout("Hello World!") +{% endsourcecode %} +A [[www.domain.tld|link]].""" + + txt = r""" +==== Headline 1 + +On {% a tag 1 %} line +line two + +==== Headline 2 + +{% a tag 2 %} + +A block: +{% block %} +<Foo:> {{ Bar }} +{% endblock %} +end block + +{% block1 arg="jo" %} +eofjwqp +{% endblock1 %} + +A block without the right end block: +{% block1 %} +111 +{% endblock2 %} +BBB + +A block without endblock: +{% block3 %} +222 +{% block3 %} +CCC + +the end""" +# txt = r''' +#<<jojo>> +#owrej +#<<code>> +#some code +#<</code>> +#a macro: +#<<code ext=.css>> +#/* Stylesheet */ +#form * { +# vertical-align:middle; +#} +#<</code>> +#the end +#<<code>> +#<<code>> +#jup +#<</code>> +#''' + + + print "-"*80 + p = Parser(txt) + document = p.parse() + p.debug() + + def test_rules(rules, txt): + def display_match(match): + groups = match.groupdict() + for name, text in groups.iteritems(): + if name != "char" and text != None: + print "%13s: %r" % (name, text) + re.sub(rules, display_match, txt) + +# print "_"*80 +# print "plain block rules match:" +# test_rules(Parser("").block_re, txt) +# +# print "_"*80 +# print "plain inline rules match:" +# test_rules(Parser("").inline_re, txt) + + print "---END---"
\ No newline at end of file diff --git a/creole/creole2html.py b/creole/creole2html.py new file mode 100644 index 0000000..4a3b52f --- /dev/null +++ b/creole/creole2html.py @@ -0,0 +1,342 @@ +# -*- coding: utf-8 -*- + +""" +WikiCreole to HTML converter +This program is an example of how the creole.py WikiCreole parser +can be used. + +Copyright (c) 2007, Radomir Dopieralski <creole@sheep.art.pl> +:copyleft: 2008 by the PyLucid team, see AUTHORS for more details. + + PyLucid Updates by the PyLucid team: + - Bugfixes and better html code style + - Add a passthrough for all django template blocktags + - Add a passthrough for html code lines + +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in + the documentation and/or other materials provided with the + distribution. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS +FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED +TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +""" + +import sys, re, traceback + +from creole import Parser + +import macros + +from PyLucid.tools.utils import escape + +class Rules: + # For the link targets: + proto = r'http|https|ftp|nntp|news|mailto|telnet|file|irc' + extern = r'(?P<extern_addr>(?P<extern_proto>%s):.*)' % proto + interwiki = r''' + (?P<inter_wiki> [A-Z][a-zA-Z]+ ) : + (?P<inter_page> .* ) + ''' + +class HtmlEmitter: + """ + Generate HTML output for the document + tree consisting of DocNodes. + """ + + addr_re = re.compile('|'.join([ + Rules.extern, + Rules.interwiki, + ]), re.X | re.U) # for addresses + + def __init__(self, root, verbose=1, stderr=sys.stderr): + self.root = root + self.verbose = verbose + self.stderr = stderr + + def get_text(self, node): + """Try to emit whatever text is in the node.""" + try: + return node.children[0].content or '' + except: + return node.content or '' + + def html_escape(self, text): + return escape(text) + #return text.replace('&', '&').replace('<', '<').replace('>', '>') + + def attr_escape(self, text): + return self.html_escape(text).replace('"', '"') + + # *_emit methods for emitting nodes of the document: + + def document_emit(self, node): + return self.emit_children(node) + + def text_emit(self, node): + return self.html_escape(node.content) + + def separator_emit(self, node): + return u'<hr />\n'; + + def paragraph_emit(self, node): + return u'<p>%s</p>\n' % self.emit_children(node) + + def _list_emit(self, node, list_type): + if node.parent.kind in ("document",): + # The first list item + formatter = u'' + else: + formatter = u'\n' + + if list_type == "li": + formatter += ( + u'%(i)s<%(t)s>%(c)s</%(t)s>' + ) + else: + formatter += ( + u'%(i)s<%(t)s>%(c)s\n' + '%(i)s</%(t)s>' + ) + return formatter % { + "i": "\t"*node.level, + "c": self.emit_children(node), + "t": list_type, + } + + def bullet_list_emit(self, node): + return self._list_emit(node, list_type=u"ul") + + def number_list_emit(self, node): + return self._list_emit(node, list_type=u"ol") + + def list_item_emit(self, node): + return self._list_emit(node, list_type=u"li") + + def table_emit(self, node): + return u'<table>\n%s</table>\n' % self.emit_children(node) + + def table_row_emit(self, node): + return u'<tr>\n%s</tr>\n' % self.emit_children(node) + + def table_cell_emit(self, node): + return u'\t<td>%s</td>\n' % self.emit_children(node) + + def table_head_emit(self, node): + return u'\t<th>%s</th>\n' % self.emit_children(node) + + def emphasis_emit(self, node): + return u'<i>%s</i>' % self.emit_children(node) + + def strong_emit(self, node): + return u'<strong>%s</strong>' % self.emit_children(node) + + def header_emit(self, node): + return u'<h%d>%s</h%d>\n' % ( + node.level, self.html_escape(node.content), node.level) + + def code_emit(self, node): + return u'<tt>%s</tt>' % self.html_escape(node.content) + + def link_emit(self, node): + target = node.content + if node.children: + inside = self.emit_children(node) + else: + inside = self.html_escape(target) + m = self.addr_re.match(target) + if m: + if m.group('extern_addr'): + return u'<a href="%s">%s</a>' % ( + self.attr_escape(target), inside) + elif m.group('inter_wiki'): + raise NotImplementedError + return u'<a href="%s">%s</a>' % ( + self.attr_escape(target), inside) + + def image_emit(self, node): + target = node.content + text = self.get_text(node) + m = self.addr_re.match(target) + if m: + if m.group('extern_addr'): + return u'<img src="%s" alt="%s">' % ( + self.attr_escape(target), self.attr_escape(text)) + elif m.group('inter_wiki'): + raise NotImplementedError + return u'<img src="%s" alt="%s">' % ( + self.attr_escape(target), self.attr_escape(text)) + + def macro_emit(self, node): + #print node.debug() + macro_name = node.macro_name + try: + macro = getattr(macros, macro_name) + except AttributeError, e: + return self.error( + u"Macro '%s' doesn't exist" % macro_name, + handle_traceback = True + ) + + try: + result = macro(args=node.macro_args, text=node.content) + except Exception, err: + return self.error( + u"Macro '%s' error: %s" % (macro_name, err), + handle_traceback = True + ) + + if not isinstance(result, unicode): + msg = u"Macro '%s' doesn't return a unicode string!" % macro_name + if self.verbose>1: + msg += " - returns: %r, type %r" % (result, type(result)) + return self.error(msg) + + return result + + def break_emit(self, node): + if node.parent.kind == "list_item": + return u"<br />\n" + "\t"*node.parent.level + elif node.parent.kind in ("table_head", "table_cell"): + return u"<br />\n\t\t" + else: + return u"<br />\n" + + def line_emit(self, node): + return u"\n" + + def preformatted_emit(self, node): + return u"<pre>\n%s\n</pre>\n" % self.html_escape(node.content) + + def pass_block_emit(self, node): + """ Pass-through all django template blocktags and html code lines """ + return node.content + "\n" + pass_line_emit = pass_block_emit + html_emit = pass_block_emit + + def pass_inline_emit(self, node): + """ Pass-through all django template tags """ + return node.content + + def default_emit(self, node): + """Fallback function for emitting unknown nodes.""" + raise NotImplementedError("Node '%s' unknown" % node.kind) + + def emit_children(self, node): + """Emit all the children of a node.""" + return u''.join([self.emit_node(child) for child in node.children]) + + def emit_node(self, node): + """Emit a single node.""" + #print "%s_emit: %r" % (node.kind, node.content) + emit = getattr(self, '%s_emit' % node.kind, self.default_emit) + return emit(node) + + def emit(self): + """Emit the document represented by self.root DOM tree.""" + return self.emit_node(self.root) + + def error(self, text, handle_traceback=False): + """ + Error Handling. + """ + if self.verbose>1 and handle_traceback: + self.stderr.write( + "<pre>%s</pre>" % traceback.format_exc() + ) + + if self.verbose>0: + return u"[Error: %s]" % text + else: + # No error output + return u"" + +if __name__=="__main__": + txt = r"""== a headline + +Here is [[a internal]] link. +This is [[http://domain.tld|external links]]. +A [[internal links|different]] link name. + +Basics: **bold** or //italic// +or **//both//** or //**both**// +Force\\linebreak. + +The current page name: >{{ PAGE.name }}< great? +A {% lucidTag page_update_list count=10 %} PyLucid plugin + +{% sourcecode py %} +import sys + +sys.stdout("Hello World!") +{% endsourcecode %} + +A [[www.domain.tld|link]]. +a {{/image.jpg|My Image}} image + +no image: {{ foo|bar }}! +picture [[www.domain.tld | {{ foo.JPG | Foo }} ]] as a link + +END""" + + + txt = r""" +==== Headline 1 + +On {% a tag 1 %} line +line two + +==== Headline 2 + +{% a tag 2 %} + +A block: +{% block %} +<Foo:> {{ Bar }} +{% endblock %} +end block + +{% block1 arg="jo" %} +eofjwqp +{% endblock1 %} + +A block without the right end block: +{% block1 %} +111 +{% endblock2 %} +BBB + +A block without endblock: +{% block3 %} +222 +{% block3 %} +CCC + +the end""" + + print "-"*80 +# from creole_alt.creole import Parser + p = Parser(txt) + document = p.parse() + p.debug() + + print HtmlEmitter(document).emit()
\ No newline at end of file diff --git a/creole/html2creole.py b/creole/html2creole.py new file mode 100644 index 0000000..d495593 --- /dev/null +++ b/creole/html2creole.py @@ -0,0 +1,726 @@ +# -*- coding: utf-8 -*- + +""" + html2creole converter + ~~~~~~~~~~~~~~~~~~~~~ + + convert html code into creole markup. + + Last commit info: + ~~~~~~~~~~~~~~~~~ + $LastChangedDate$ + $Rev$ + $Author:JensDiemer $ + + :copyleft: 2008 by the PyLucid team, see AUTHORS for more details. + :license: GNU GPL v3 or above, see LICENSE for more details. +""" + +from HTMLParser import HTMLParser + +BOTH2CREOLE = { + "p": "\n", + "br": "\n", + "i": "//", + "strong": "**", + "hr": "----", + + "table": "\n", +} +START2CREOLE = { + "a": "[[", + + "tr": "", + "td": "|", + "th": "|", + + "h1": "\n= ", + "h2": "\n== ", + "h3": "\n=== ", + "h4": "\n==== ", + "h5": "\n===== ", + "h6": "\n====== ", +} +END2CREOLE = { + "a": "]]", + + "tr": "|\n", + "td": "", + "th": "", + + "h1": "\n", + "h2": "\n", + "h3": "\n", + "h4": "\n", + "h5": "\n", + "h6": "\n", +} +ENTITY2HTML = { + "gt": ">", + "lt": "<", +} + +NO_WIKI_TAGS = ("pre", "tt") + +import inspect + +class DebugList(list): + def __init__(self, html2creole): + self.html2creole = html2creole + super(DebugList, self).__init__() + + def append(self, item): +# for stack_frame in inspect.stack(): print stack_frame + + line, method = inspect.stack()[1][2:4] + + print "%-8s append: %-35r (%-15s line:%s)" % ( + self.html2creole.getpos(), item, + method, line + ) + list.append(self, item) + + +class Html2Creole(HTMLParser): + def __init__(self, debug=False): + HTMLParser.__init__(self) + + self.debugging = debug + if self.debugging: + print "_"*79 + print "Html2Creole debug is on! print every data append." + self.result = DebugList(self) + else: + self.result = [] + + self.__last_tag = None + self.__inner_block = None + + self.__list_level = 0 # list level + self.__inner_listitem = False # in <li>? + self.__list_type = "" # <ul> += "*" or <ol> += "#" + + self.__inner_table_cell = False + + def _error(self, method, tag): + print ">>> unknown %s @ %s: %r" % (method, self.getpos(), tag) + + def debug(self, method, txt): + if not self.debugging: + return + print "%-8s %8s: %s" % (self.getpos(), method, txt) + + + def _get_markup(self, tag, transdict={}): + for d in (BOTH2CREOLE, transdict): + if tag in d: + return d[tag] + + def handle_starttag(self, tag, attrs): + self.debug("starttag", "%r atts: %s" % (tag, attrs)) + + self.__last_tag = tag + + if tag in NO_WIKI_TAGS: + # Staring a pre block + self.__inner_block = tag + self.result.append("{{{") + return + + attr_dict = dict(attrs) + + if tag in ("th", "td"): + self.__inner_table_cell = True + + if tag == "a": + data = "[[%s|" % attr_dict["href"] + elif tag == "img": + data = "{{%(src)s|%(alt)s}}" % attr_dict + elif tag == "ul": + self.__list_type += "*" + self.__list_level += 1 + return + elif tag == "ol": + self.__list_type += "#" + self.__list_level += 1 + return + elif tag == "li": + self.__inner_listitem = True + self.result.append(self.__list_type + " ") + return + else: + data = self._get_markup(tag, transdict=START2CREOLE) + + if data == None: + self._error("starttag", tag) + else: + self.result.append(data) + + def handle_data(self, data): + self.debug("data", "%r" % data) + + def strip_ex_second(data): + lines = data.split("\n") + # strip every item, except the first one + lines = lines[:1] + [line.strip() for line in lines[1:]] + return "\\\\".join(lines) + + if self.__list_level > 0: # we are in <ul> or <ol> list + if self.__inner_listitem == False: # not in <li> + data = data.strip() + + if self.__inner_listitem or self.__inner_table_cell: + listitem = strip_ex_second(data) + self.result.append(listitem) + return + + if self.__inner_block == None: + data = data.replace("\n", "") + if data=="": + return + + self.result.append(data) + +# def get_starttag_text(self, *args, **kwargs): +# print ">>> XXX", args, kwargs + + def handle_charref(self, name): + self.debug("charref", "%r" % name) + if self.__inner_block != None: + self.result.append("&#%s;" % name) + else: + self._error("charref", name) + + def handle_entityref(self, name): + self.debug("entityref", "%r" % name) + if name in ENTITY2HTML: + self.result.append(ENTITY2HTML[name]) + else: + self._error("entityref", name) + + def handle_startendtag(self, tag, attrs): + self.debug("startendtag", "%r atts: %s" % (tag, attrs)) + attr_dict = dict(attrs) + + if tag == "img": + data = "{{%(src)s|%(alt)s}}" % attr_dict + else: + data = self._get_markup(tag) + + if data == None: + self._error("startendtag", tag) + else: + self.result.append(data) + + def handle_endtag(self, tag): + self.debug("endtag", "%r" % tag) + if self.__inner_block != None: + # We are in a block + if tag == self.__inner_block: + # The end of the started end block + self.__inner_block = None + if tag in NO_WIKI_TAGS: + self.result.append("}}}") + return + else: + raise NotImplementedError() + else: + # We in a block + self.result.append(tag) + return + + if tag in ("ul", "ol"): + # End of a list + self.__list_level -= 1 + self.__list_type = self.__list_type[:-1] + if self.__list_level == 0: # Last close tag + self.result.append("\n") + return + elif tag == "li": + self.__inner_listitem = False + self.result.append("\n") + return + elif tag in ("th", "td"): + self.__inner_table_cell = True + + data = self._get_markup(tag, transdict=END2CREOLE) + + if data == None: + self._error("endtag", tag) + else: + self.result.append(data) + + def get(self): + return "".join(self.result).strip() + + + + + +#______________________________________________________________________________ +import unittest +import sys, difflib, traceback + +## error output format: +# =1 -> via repr() +# =2 -> raw +#VERBOSE = 1 +VERBOSE = 2 + + +class MarkupDiffFailure(Exception): + """ + Special error class: Try to display markup errors in a better way. + """ + def _format_output(self, txt): + txt = txt.split("\\n") + if VERBOSE == 1: + txt = "".join(['%s\\n\n' % i for i in txt]) + elif VERBOSE == 2: + txt = "".join(['%s\n' % i for i in txt]) + return txt + + def _diff(self, block1, block2): + d = difflib.Differ() + + block1 = block1.replace("\\n", "\\n\n").split("\n") + block2 = block2.replace("\\n", "\\n\n").split("\n") + + diff = d.compare(block1, block2) + + result = ["%2s %s\n" % (line, i) for line, i in enumerate(diff)] + return "".join(result) + + def __str__(self): + try: + raw_msg = self.args[0] + + """ + Get the right split_string is not easy. There are three kinds: + "foo" != "bar" + 'foo' != "bar" + "foo" != 'bar' + 'foo' != 'bar' + With and without a 'u' ;) + """ + msg = raw_msg.lstrip("u") + + first_quote = msg[0] + second_quote = msg[-1] + + msg = msg.strip("'\"") + + split_string = "%s != %s" % (first_quote, second_quote) + + if split_string not in msg: + # Second part is unicode? + split_string = "%s != u%s" % (first_quote, second_quote) + + if split_string not in msg: + msg = ( + "Split error output failed!" + " - split string >%r< not in message: %r" + ) % (split_string, raw_msg) + raise AssertionError(msg) + + try: + block1, block2 = msg.split(split_string) + except ValueError, err: + msg = self._format_output(msg) + return ( + "Can't split error output: %r\n" + "Info:\n%s" + ) % (err, msg) + + #~ block1 = block1.rstrip("\\n") + #~ block2 = block2.rstrip("\\n") + diff = self._diff(block1, block2) + + block1 = self._format_output(block1) + block2 = self._format_output(block2) + + return ( + "%r\n\n---[Output:]---\n%s\n" + "---[not equal to:]---\n%s" + "\n---[diff:]---\n%s" + ) % (raw_msg, block1, block2, diff) + except: + etype, value, tb = sys.exc_info() + msg = traceback.format_exc(tb) + return msg + + +class MarkupTest(unittest.TestCase): + + # Use the own error class from above + failureException = MarkupDiffFailure + + #_________________________________________________________________________ + + def _prepare_text(self, txt): + """ + prepare the multiline, indentation text. + """ + txt = txt.splitlines() + assert txt[0]=="", "First must be empty!" + txt = txt[1:] # Skip the first line + + # get the indentation level from the first line + count = False + for count, char in enumerate(txt[0]): + if char!=" ": + break + + assert count != False, "second line is empty!" + + # remove indentation from all lines + txt = [i[count:] for i in txt] + + #~ txt = re.sub("\n {2,}", "\n", txt) + txt = "\n".join(txt) + + # strip *one* newline at the begining... + if txt.startswith("\n"): txt = txt[1:] + # and strip *one* newline at the end of the text + if txt.endswith("\n"): txt = txt[:-1] + #~ print repr(txt) + #~ print "-"*79 + return txt + + def testSelf(self): + """ + Test for self._prepare_text() + """ + out1 = self._prepare_text(""" + one line + line two""") + self.assertEqual(out1, "one line\nline two") + + out2 = self._prepare_text(""" + one line + line two + """) + self.assertEqual(out2, "one line\nline two") + + out3 = self._prepare_text(""" + one line + + line two + """) + self.assertEqual(out3, "one line\n\nline two") + + out4 = self._prepare_text(""" + one line + line two + + """) + self.assertEqual(out4, "one line\n line two\n") + + out5 = self._prepare_text(""" + one line + line two + dritte Zeile + """) + self.assertEqual(out5, "one line\n line two\ndritte Zeile") + + + +class TestHtml2Creole(MarkupTest): + +# def setUp(self): + + + def assertCreole(self, raw_markup, raw_html, debug=False): + markup = self._prepare_text(raw_markup) + html = self._prepare_text(raw_html) + + h2c = Html2Creole(debug) + h2c.feed(html) + out_string = h2c.get() + + self.assertEqual(out_string, markup) + + def test_bold_italics(self): + self.assertCreole(r""" + **//bold italics//** + //**bold italics**// + //This is **also** good.// + """, """ + <p><strong><i>bold italics</i></strong><br /> + <i><strong>bold italics</strong></i><br /> + <i>This is <strong>also</strong> good.</i></p> + """, +# debug=True + ) + + def test_links(self): + self.assertCreole(r""" + test link: '[[internal links|link A]]' 1 and + test link: '[[http://domain.tld|link B]]' 2. + """, """ + <p>test link: '<a href="internal links">link A</a>' 1 and<br /> + test link: '<a href="http://domain.tld">link B</a>' 2.</p> + """) + + def test_images(self): + self.assertCreole(r""" + a {{/image.jpg|JPG pictures}} and + a {{/image.jpeg|JPEG pictures}} and + a {{/image.gif|GIF pictures}} and + a {{/image.png|PNG pictures}} ! + + picture [[www.domain.tld|{{foo.JPG|Foo}}]] as a link + """, """ + <p>a <img src="/image.jpg" alt="JPG pictures"> and<br /> + a <img src="/image.jpeg" alt="JPEG pictures"> and<br /> + a <img src="/image.gif" alt="GIF pictures" /> and<br /> + a <img src="/image.png" alt="PNG pictures" /> !</p> + + <p>picture <a href="www.domain.tld"><img src="foo.JPG" alt="Foo"></a> as a link</p> + """) + + def test_nowiki1(self): + self.assertCreole(r""" + this: + {{{ + //This// does **not** get [[formatted]] + }}} + and this: {{{** <i>this</i> ** }}} not, too. + + === Closing braces in nowiki: + {{{ + if (x != NULL) { + for (i = 0; i < size; i++) { + if (x[i] > 0) { + x[i]--; + }}} + }}} + """, """ + <p>this:</p> + <pre> + //This// does **not** get [[formatted]] + </pre> + <p>and this: <tt>** <i>this</i> ** </tt> not, too.</p> + + <h3>Closing braces in nowiki:</h3> + <pre> + if (x != NULL) { + for (i = 0; i < size; i++) { + if (x[i] > 0) { + x[i]--; + }}} + </pre> + """) + + def test_headlines(self): + self.assertCreole(r""" + = Level 1 (largest) + + == Level 2 + + === Level 3 + + ==== Level 4 + + ===== Level 5 + + ====== Level 6 + + === **not** \\ //parsed// + + No == headline == or? + """, r""" + <h1>Level 1 (largest)</h1> + <h2>Level 2</h2> + <h3>Level 3</h3> + <h4>Level 4</h4> + <h5>Level 5</h5> + <h6>Level 6</h6> + <h3>**not** \\ //parsed//</h3> + <p>No == headline == or?</p> + """) + + def test_horizontal_rule(self): + self.assertCreole(r""" + one + ---- + two + """, """ + <p>one</p> + <hr /> + <p>two</p> + """) + + def test_list1(self): + """ + FIXME: Two newlines between a list and the next paragraph :( + """ + self.assertCreole(r""" + ==== List a: + * a1 item + ** a1.1 Force\\linebreak + ** a1.2 item + *** a1.2.1 item + *** a1.2.2 item + * a2 item + + + list 'a' end + + ==== List b: + # b1 item + ## b1.2 item + ### b1.2.1 item + ### b1.2.2 Force\\linebreak1\\linebreak2 + ## b1.3 item + # b2 item + + + list 'b' end + """, """ + <h4>List a:</h4> + <ul> + <li>a1 item</li> + <ul> + <li>a1.1 Force + linebreak</li> + <li>a1.2 item</li> + <ul> + <li>a1.2.1 item</li> + <li>a1.2.2 item</li> + </ul> + </ul> + <li>a2 item</li> + </ul> + <p>list 'a' end</p> + + <h4>List b:</h4> + <ol> + <li>b1 item</li> + <ol> + <li>b1.2 item</li> + <ol> + <li>b1.2.1 item</li> + <li>b1.2.2 Force + linebreak1 + linebreak2</li> + </ol> + <li>b1.3 item</li> + </ol> + <li>b2 item</li> + </ol> + <p>list 'b' end</p> + """, +# debug=True + ) + + def test_list2(self): + """ Bold, Italics, Links, Pre in Lists """ + self.assertCreole(r""" + * **bold** item + * //italic// item + + # item about a [[domain.tld|page link]] + # {{{//this// is **not** [[processed]]}}} + """, """ + <ul> + <li><strong>bold</strong> item</li> + <li><i>italic</i> item</li> + </ul> + <ol> + <li>item about a <a href="domain.tld">page link</a></li> + <li><tt>//this// is **not** [[processed]]</tt></li> + </ol> + """, +# debug=True + ) + + def test_table(self): + self.assertCreole(r""" + A Table... + |= Headline |= a other\\headline |= the **big end | + | a cell | a **big** cell |**//bold italics//** | + | next\\line | No == headline == or? | | + | | | open end + ...end + """, """ + <p>A Table...</p> + <table> + <tr> + <th>Headline</th> + <th>a other<br /> + headline</th> + <th>the <strong>big end</strong></th> + </tr> + <tr> + <td>a cell</td> + <td>a <strong>big</strong> cell</td> + <td><strong><i>bold italics</i></strong></td> + </tr> + <tr> + <td>next<br /> + line</td> + <td>No == headline == or?</td> + <td></td> + </tr> + <tr> + <td></td> + <td></td> + <td>open end</td> + </tr> + </table> + <p>...end</p> + """, + debug=True + ) + + #__________________________________________________________________________ + # TODO: + +# +# def test_django(self): +# self.assertCreole(r""" +# The current page name: >{{ PAGE.name }}< great? +# A {% lucidTag page_update_list count=10 %} PyLucid plugin +# {% block %} +# FooBar +# {% endblock %} +# A [[www.domain.tld|link]]. +# no image: {{ foo|bar }}! +# """, """ +# <p>The current page name: >{{ PAGE.name }}< great?<br /> +# A {% lucidTag page_update_list count=10 %} PyLucid plugin</p> +# {% block %} +# FooBar +# {% endblock %} +# <p>A <a href="www.domain.tld">link</a>.<br /> +# no image: {{ foo|bar }}!</p> +# """) +# +# def test_escape_char(self): +# self.assertCreole(r""" +# ~#1 +# http://domain.tld/~bar/ +# ~http://domain.tld/ +# [[Link]] +# ~[[Link]] +# """, """ +# <p>#1<br /> +# <a href="http://domain.tld/~bar/">http://domain.tld/~bar/</a><br /> +# http://domain.tld/<br /> +# <a href="Link">Link</a><br /> +# [[Link]]</p> +# """) + +if __name__ == '__main__': + unittest.main() + +# h2c = Html2Creole(debug=False) +# h2c = Html2Creole(debug=True) +# h2c.feed(""" +#<strong>bold 1</strong><i>italic1</i> +#111 <strong>bold 1</strong> 222 <i>italic1</i> 333 +#""") +# print "-"*79 +# print h2c.get() +# print "-"*79
\ No newline at end of file diff --git a/tests/markup_creole.py b/tests/markup_creole.py new file mode 100644 index 0000000..b697a4f --- /dev/null +++ b/tests/markup_creole.py @@ -0,0 +1,658 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +""" + PyLucid unittest + ~~~~~~~~~~~~~~~~ + + Test the creole markup. + We patches some parts of the creole markup, so it doesn't clash with the + django template syntax. + + Some Links + ~~~~~~~~~~ + http://code.google.com/p/creoleparser/source/browse/trunk/creoleparser/tests.py + http://hg.moinmo.in/moin/1.8/file/tip/MoinMoin/parser/ + http://sheep.art.pl/devel/creole/file/tip + http://code.google.com/p/djikiki/source/browse/#svn/trunk/djikiki/creole + http://creoleparser.googlepages.com/cheatsheetplus.html + http://www.wikicreole.org/creole-sandbox/EditX.jsp?page=Home + http://www.wikicreole.org/wiki/Sandbox + + Differences to Creole 1.0 + ~~~~~~~~~~~~~~~~~~~~~~~~~ + * italics -> <i> and not <em> + + Last commit info: + ~~~~~~~~~~~~~~~~~ + $LastChangedDate: 2008-11-14 12:05:22 +0100 (Fr, 14 Nov 2008) $ + $Rev: 1795 $ + $Author: JensDiemer $ + + :copyleft: 2008 by the PyLucid team, see AUTHORS for more details. + :license: GNU GPL v3, see LICENSE.txt for more details. +""" + +#import sys, re, difflib, unittest, traceback + +import tests +from tests.utils import unittest_addons + +from PyLucid.system.markups.creole import Parser +from PyLucid.system.markups.creole2html import HtmlEmitter + + +## error output format: +# =1 -> via repr() +# =2 -> raw +#VERBOSE = 1 +VERBOSE = 2 + +unittest_addons.VERBOSE = VERBOSE + +#_____________________________________________________________________________ + +class CreoleTest(unittest_addons.MarkupTest): + def _parse(self, txt): + """ + Apply creole markup on txt + """ + document = Parser(txt).parse() + out_string = HtmlEmitter(document, verbose=1).emit() + #print ">>>%r<<<" % out_string + return out_string + + def _processCreole(self, source_string, should_string): + """ + prepate the given text and apply the markup. + """ + source = self._prepare_text(source_string) + should = self._prepare_text(should_string) + out_string = self._parse(source) + return out_string, should + + def assertCreole(self, source_string, should_string): + """ + applies the tinyTextile markup to the given source_string and compairs + it with the should_string. + """ + out_string, should = self._processCreole( + source_string, should_string + ) + out_string = out_string.rstrip("\n") + self.assertEqual(out_string, should) + + #-------------------------------------------------------------------------- + + def test_creole_basic(self): + out_string = self._parse("a text line.") + self.assertEqual(out_string, "<p>a text line.</p>\n") + + def test_lineendings(self): + """ Test all existing lineending version """ + out_string = self._parse(u"first\nsecond") + self.assertEqual(out_string, u"<p>first<br />\nsecond</p>\n") + + out_string = self._parse(u"first\rsecond") + self.assertEqual(out_string, u"<p>first<br />\nsecond</p>\n") + + out_string = self._parse(u"first\r\nsecond") + self.assertEqual(out_string, u"<p>first<br />\nsecond</p>\n") + + def test_creole_linebreak(self): + self.assertCreole(r""" + Force\\linebreak + """, """ + <p>Force<br /> + linebreak</p> + """) + + def test_bold_italics(self): + self.assertCreole(r""" + **//bold italics//** + //**bold italics**// + //This is **also** good.// + """, """ + <p><strong><i>bold italics</i></strong><br /> + <i><strong>bold italics</strong></i><br /> + <i>This is <strong>also</strong> good.</i></p> + """) + + def test_internal_links(self): + self.assertCreole(r""" + A [[internal]] link... + ...and [[/a internal]] link. + """, """ + <p>A <a href="internal">internal</a> link...<br /> + ...and <a href="/a internal">/a internal</a> link.</p> + """) + + def test_external_links(self): + self.assertCreole(r""" + With pipe separator: + 1 [[internal links|link A]] test. + 2 [[http://domain.tld|link B]] test. + """, """ + <p>With pipe separator:<br /> + 1 <a href="internal links">link A</a> test.<br /> + 2 <a href="http://domain.tld">link B</a> test.</p> + """) + + def test_bolditalic_links(self): + self.assertCreole(r""" + //[[a internal]]// + **[[Shortcut2|a page2]]** + //**[[Shortcut3|a page3]]**// + """, """ + <p><i><a href="a internal">a internal</a></i><br /> + <strong><a href="Shortcut2">a page2</a></strong><br /> + <i><strong><a href="Shortcut3">a page3</a></strong></i></p> + """) + + def test_image(self): + self.assertCreole(r""" + a {{/image.jpg|JPG pictures}} and + a {{/image.jpeg|JPEG pictures}} and + a {{/image.gif|GIF pictures}} and + a {{/image.png|PNG pictures}} ! + """, """ + <p>a <img src="/image.jpg" alt="JPG pictures"> and<br /> + a <img src="/image.jpeg" alt="JPEG pictures"> and<br /> + a <img src="/image.gif" alt="GIF pictures"> and<br /> + a <img src="/image.png" alt="PNG pictures"> !</p> + """) + + def test_django1(self): + """ + Test if django template tags are not changed by Creole. + The Creole image tag use "{{" and "}}", too. + We test also the passthrough for all django template blocktags + """ + self.assertCreole(r""" + The current page name: >{{ PAGE.name }}< great? + A {% lucidTag page_update_list count=10 %} PyLucid plugin + {% block %} + FooBar + {% endblock %} + A [[www.domain.tld|link]]. + a {{/image.jpg|My Image}} image + + no image: {{ foo|bar }}! + picture [[www.domain.tld | {{ foo.JPG | Foo }} ]] as a link + """, """ + <p>The current page name: >{{ PAGE.name }}< great?<br /> + A {% lucidTag page_update_list count=10 %} PyLucid plugin</p> + {% block %} + FooBar + {% endblock %} + <p>A <a href="www.domain.tld">link</a>.<br /> + a <img src="/image.jpg" alt="My Image"> image</p> + + <p>no image: {{ foo|bar }}!<br /> + picture <a href="www.domain.tld"><img src="foo.JPG" alt="Foo"></a> as a link</p> + """) + + def test_django2(self): + self.assertCreole(r""" + ==== Headline 1 + + On {% a tag 1 %} line + line two + + ==== Headline 2 + + {% a tag 2 %} + + Right block with a end tag: + {% block %} + <Foo:> {{ Bar }} + {% endblock %} + end block + + A block without the right end block: + {% block1 %} + not matched + {% endblock2 %} + BBB + + A block without endblock: + {% noblock3 %} + not matched + {% noblock3 %} + CCC + """, """ + <h4>Headline 1</h4> + + <p>On {% a tag 1 %} line<br /> + line two</p> + + <h4>Headline 2</h4> + + {% a tag 2 %} + + <p>Right block with a end tag:</p> + {% block %} + <Foo:> {{ Bar }} + {% endblock %} + <p>end block</p> + + <p>A block without the right end block:<br /> + {% block1 %}<br /> + not matched<br /> + {% endblock2 %}<br /> + BBB</p> + + <p>A block without endblock:<br /> + {% noblock3 %}<br /> + not matched<br /> + {% noblock3 %}<br /> + CCC</p> + """) + + def test_nowiki1(self): + self.assertCreole(r""" + this: + {{{ + //This// does **not** get [[formatted]] + }}} + and this: {{{** <i>this</i> ** }}} + + === Closing braces in nowiki: + {{{ + if (x != NULL) { + for (i = 0; i < size; i++) { + if (x[i] > 0) { + x[i]--; + }}} + }}} + """, """ + <p>this:</p> + <pre> + //This// does **not** get [[formatted]] + </pre> + <p>and this: <tt>** <i>this</i> **</tt></p> + + <h3>Closing braces in nowiki:</h3> + <pre> + if (x != NULL) { + for (i = 0; i < size; i++) { + if (x[i] > 0) { + x[i]--; + }}} + </pre> + """) + + def test_nowiki2(self): + self.assertCreole(r""" + 111 + 222 + {{{ + 333 + }}} + 444 + + one + {{{ + foobar + }}} + + two + """, """ + <p>111<br /> + 222</p> + <pre> + 333 + </pre> + <p>444</p> + + <p>one</p> + <pre> + foobar + </pre> + <p>two</p> + """) + + def test_escape_char(self): + self.assertCreole(r""" + ~#1 + http://domain.tld/~bar/ + ~http://domain.tld/ + [[Link]] + ~[[Link]] + """, """ + <p>#1<br /> + <a href="http://domain.tld/~bar/">http://domain.tld/~bar/</a><br /> + http://domain.tld/<br /> + <a href="Link">Link</a><br /> + [[Link]]</p> + """) + + def test_cross_paragraphs(self): + self.assertCreole(r""" + Bold and italics should //be + able// to cross lines. + + But, should //not be... + + ...able// to cross paragraphs. + """, """ + <p>Bold and italics should <i>be<br /> + able</i> to cross lines.</p> + + <p>But, should <i>not be...</i></p> + + <p>...able<i> to cross paragraphs.</i></p> + """) + + def test_headlines(self): + self.assertCreole(r""" + = Level 1 (largest) = + == Level 2 == + === Level 3 === + ==== Level 4 ==== + ===== Level 5 ===== + ====== Level 6 ====== + === Also level 3 + === Also level 3 = + === Also level 3 == + === **not** \\ //parsed// === + No == headline == or? + """, r""" + <h1>Level 1 (largest)</h1> + <h2>Level 2</h2> + <h3>Level 3</h3> + <h4>Level 4</h4> + <h5>Level 5</h5> + <h6>Level 6</h6> + <h3>Also level 3</h3> + <h3>Also level 3</h3> + <h3>Also level 3</h3> + <h3>**not** \\ //parsed//</h3> + <p>No == headline == or?</p> + """) + + def test_horizontal_rule(self): + self.assertCreole(r""" + one + ---- + two + """, """ + <p>one</p> + <hr /> + <p>two</p> + """) + + def test_bullet_list(self): + self.assertCreole(r""" + * Item 1 + ** Item 1.1 + ** a **bold** Item 1.2 + * Item 2 + ** Item 2.1 + *** [[a link Item 3.1]] + *** Force\\linebreak 3.2 + *** item 3.3 + *** item 3.4 + + up to five levels + * 1 + ** 2 + *** 3 + **** 4 + ***** 5 + """, """ + <ul> + \t<li>Item 1 + \t<ul> + \t\t<li>Item 1.1</li> + \t\t<li>a <strong>bold</strong> Item 1.2</li> + \t</ul></li> + \t<li>Item 2 + \t<ul> + \t\t<li>Item 2.1 + \t\t<ul> + \t\t\t<li><a href="a link Item 3.1">a link Item 3.1</a></li> + \t\t\t<li>Force<br /> + \t\t\tlinebreak 3.2</li> + \t\t\t<li>item 3.3</li> + \t\t\t<li>item 3.4</li> + \t\t</ul></li> + \t</ul></li> + </ul> + <p>up to five levels</p> + <ul> + \t<li>1 + \t<ul> + \t\t<li>2 + \t\t<ul> + \t\t\t<li>3 + \t\t\t<ul> + \t\t\t\t<li>4 + \t\t\t\t<ul> + \t\t\t\t\t<li>5</li> + \t\t\t\t</ul></li> + \t\t\t</ul></li> + \t\t</ul></li> + \t</ul></li> + </ul> + """) + + def test_number_list(self): + self.assertCreole(r""" + # Item 1 + ## Item 1.1 + ## a **bold** Item 1.2 + # Item 2 + ## Item 2.1 + ### [[a link Item 3.1]] + ### Force\\linebreak 3.2 + ### item 3.3 + ### item 3.4 + + up to five levels + # 1 + ## 2 + ### 3 + #### 4 + ##### 5 + """, """ + <ol> + \t<li>Item 1 + \t<ol> + \t\t<li>Item 1.1</li> + \t\t<li>a <strong>bold</strong> Item 1.2</li> + \t</ol></li> + \t<li>Item 2 + \t<ol> + \t\t<li>Item 2.1 + \t\t<ol> + \t\t\t<li><a href="a link Item 3.1">a link Item 3.1</a></li> + \t\t\t<li>Force<br /> + \t\t\tlinebreak 3.2</li> + \t\t\t<li>item 3.3</li> + \t\t\t<li>item 3.4</li> + \t\t</ol></li> + \t</ol></li> + </ol> + <p>up to five levels</p> + <ol> + \t<li>1 + \t<ol> + \t\t<li>2 + \t\t<ol> + \t\t\t<li>3 + \t\t\t<ol> + \t\t\t\t<li>4 + \t\t\t\t<ol> + \t\t\t\t\t<li>5</li> + \t\t\t\t</ol></li> + \t\t\t</ol></li> + \t\t</ol></li> + \t</ol></li> + </ol> + """) + + def test_list(self): + """ Bold, Italics, Links, Pre in Lists """ + self.assertCreole(r""" + * **bold** item + * //italic// item + + # item about a [[certain_page]] + # {{{ //this// is **not** [[processed]] }}} + """, """ + <ul> + \t<li><strong>bold</strong> item</li> + \t<li><i>italic</i> item</li> + </ul> + <ol> + \t<li>item about a <a href="certain_page">certain_page</a></li> + \t<li><tt>//this// is **not** [[processed]]</tt></li> + </ol> + """) + + def test_table(self): + self.assertCreole(r""" + A Table... + |= Headline |= a other\\headline |= the **big end | + | a cell | a **big** cell |**//bold italics//** | + | next\\line | No == headline == or? | | + | | | open end + ...end + """, """ + <p>A Table...</p> + <table> + <tr> + \t<th>Headline</th> + \t<th>a other<br /> + \t\theadline</th> + \t<th>the <strong>big end</strong></th> + </tr> + <tr> + \t<td>a cell</td> + \t<td>a <strong>big</strong> cell</td> + \t<td><strong><i>bold italics</i></strong></td> + </tr> + <tr> + \t<td>next<br /> + \t\tline</td> + \t<td>No == headline == or?</td> + \t<td></td> + </tr> + <tr> + \t<td></td> + \t<td></td> + \t<td>open end</td> + </tr> + </table> + <p>...end</p> + """) + + def test_html_lines(self): + self.assertCreole(r""" + This is a normal Text block witch would + escape html chars like < and > ;) + + html code must start and end with a tag: + <p>this <strong class="my">html code</strong> line pass-through</p> + this works. + + this: + <p>didn't<br /> + match</p> + + <p> + didn't match + </p> + + <p>didn't match,too.< p > + """, """ + <p>This is a normal Text block witch would<br /> + escape html chars like < and > ;)</p> + + <p>html code must start and end with a tag:</p> + <p>this <strong class="my">html code</strong> line pass-through</p> + <p>this works.</p> + + <p>this:<br /> + <p>didn\'t<br /><br /> + match</p></p> + + <p><p><br /> + didn\'t match<br /> + </p></p> + + <p><p>didn\'t match,too.< p ></p> + """) + + def test_macro_html1(self): + self.assertCreole(r""" + <<a_not_existing_macro>> + + <<code>> + some code + <</code>> + + a macro: + <<code>> + <<code>> + the sourcecode + <</code>> + """, r""" + <p>[Error: Macro 'a_not_existing_macro' doesn't exist]</p> + <fieldset class="pygments_code"> + <legend class="pygments_code"><small title="no lexer matching the text found">unknown type</small></legend> + <pre><code>some code</code></pre> + </fieldset> + <p>a macro:</p> + <fieldset class="pygments_code"> + <legend class="pygments_code"><small title="no lexer matching the text found">unknown type</small></legend> + <pre><code><<code>> + the sourcecode</code></pre> + </fieldset> + """) + + def test_macro_html2(self): + self.assertCreole(r""" + html macro: + <<html>> + <p><<this is 'html'>></p> + <</html>> + """, r""" + <p>html macro:</p> + <p><<this is 'html'>></p> + """) + + def test_macro_pygments_code(self): + self.assertCreole(r""" + a macro: + <<code ext=.css>> + /* Stylesheet */ + form * { + vertical-align:middle; + } + <</code>> + the end + """, """ + <p>a macro:</p> + <fieldset class="pygments_code"> + <legend class="pygments_code">CSS</legend><table class="pygmentstable"><tr><td class="linenos"><pre>1 + 2 + 3 + 4</pre></td><td class="code"><div class="pygments"><pre><span class="c">/* Stylesheet */</span> + <span class="nt">form</span> <span class="o">*</span> <span class="p">{</span> + <span class="k">vertical-align</span><span class="o">:</span><span class="k">middle</span><span class="p">;</span> + <span class="p">}</span> + </pre></div> + </td></tr></table></fieldset> + <p>the end</p> + """) + + + + +if __name__ == "__main__": + # Run this unitest directly + import os + os.chdir("../") + filename = os.path.splitext(os.path.basename(__file__))[0] + tests.run_tests(test_labels=[filename])
\ No newline at end of file |