summaryrefslogtreecommitdiff
path: root/creole
diff options
context:
space:
mode:
authorJens Diemer <github.com@jensdiemer.de>2009-02-25 16:19:57 +0000
committerJens Diemer <github.com@jensdiemer.de>2009-02-25 16:19:57 +0000
commit04d5a18abfb268b1c9267e61aa291c537a0c84e2 (patch)
tree2c1b87291d8d8e2f635ced0a40619e9cfb7a8472 /creole
parentd58cfa6b1f68be9cfe398070f1ae74e906cf7c04 (diff)
downloadcreole-04d5a18abfb268b1c9267e61aa291c537a0c84e2.tar.gz
* differ macro/preformatted areas (inline or block), fix for issue 1: http://code.google.com/p/python-creole/issues/detail?id=1
* add a test macro * add different unknown tag handles in html2creole * add unittests
Diffstat (limited to 'creole')
-rw-r--r--creole/__init__.py7
-rw-r--r--creole/creole.py187
-rw-r--r--creole/creole2html.py14
-rw-r--r--creole/default_macros.py10
-rw-r--r--creole/html2creole.py165
5 files changed, 256 insertions, 127 deletions
diff --git a/creole/__init__.py b/creole/__init__.py
index 8014515..9e3fed0 100644
--- a/creole/__init__.py
+++ b/creole/__init__.py
@@ -8,14 +8,13 @@ from html2creole import Html2CreoleParser, Html2CreoleEmitter
-def creole2html(markup_string, **kwargs):
+def creole2html(markup_string, debug=False, **kwargs):
"""
convert creole markup into html code
>>> creole2html(u'This is **creole //markup//**!')
u'<p>This is <strong>creole <i>markup</i></strong>!</p>\\n'
"""
- debug = kwargs.pop("debug", False)
# Create document tree from creole markup
document = Parser(markup_string).parse()
if debug:
@@ -26,7 +25,7 @@ def creole2html(markup_string, **kwargs):
-def html2creole(html_string, debug=False):
+def html2creole(html_string, debug=False, **kwargs):
"""
convert html code into creole markup
@@ -40,7 +39,7 @@ def html2creole(html_string, debug=False):
h2c.debug()
# create creole markup from the document tree
- emitter = Html2CreoleEmitter(document_tree, debug)
+ emitter = Html2CreoleEmitter(document_tree, debug=debug, **kwargs)
return emitter.emit()
diff --git a/creole/creole.py b/creole/creole.py
index 02eb656..33c5011 100644
--- a/creole/creole.py
+++ b/creole/creole.py
@@ -80,19 +80,20 @@ class InlineRules:
)(?i)'''
#--------------------------------------------------------------------------
- macro = r'''(?P<macro>
- <<(?P<macro_name> \w+) (?P<macro_args>.*?)>>
- )'''
-
+ # a macro like: <<macro>>text<</macro>>
inline_macro = r'''
(?P<inline_macro>
- << \s* (?P<inline_macro_start>\w+) \s* (?P<inline_macro_args>.*?) \s* >>
- (?P<inline_macro_text>(.|\n)*)
- <</ \s* (?P=inline_macro_start) \s* >>
+ << \s* (?P<macro_inline_start>\w+) \s* (?P<macro_inline_args>.*?) \s* >>
+ (?P<macro_inline_text>(.|\n)*?)
+ <</ \s* (?P=macro_inline_start) \s* >>
)
'''
+ # A single macro tag, like <<macro-a foo="bar">> or <<macro />>
+ macro_tag = r'''(?P<macro_tag>
+ <<(?P<macro_tag_name> \w+) (?P<macro_tag_args>.*?) \s* /*>>
+ )'''
- preformatted = r'(?P<preformatted> {{{ (?P<preformatted_text>.*?) }}} )'
+ pre_inline = r'(?P<pre_inline> {{{ (?P<pre_inline_text>.*?) }}} )'
# Basic text typefaces:
emph = r'(?P<emph> (?<!:)// )' # there must be no : in front of the //
@@ -164,7 +165,7 @@ class BlockRules:
macro_block = r'''
(?P<macro_block>
<< \s* (?P<macro_block_start>\w+) \s* (?P<macro_block_args>.*?) \s* >>
- (?P<macro_block_text>(.|\n)*)
+ (?P<macro_block_text>(.|\n)*?)
<</ \s* (?P=macro_block_start) \s* >>
)
'''
@@ -183,16 +184,15 @@ class BlockRules:
=*$
)'''
separator = r'(?P<separator> ^ \s* ---- \s* $ )' # horizontal line
- pre = r'''(?P<pre>
+
+ pre_block = r'''(?P<pre_block>
^{{{ \s* $
- (\n)?
- (?P<pre_text>
- ([\#]!(?P<pre_kind>\w*?)(\s+.*)?$)?
+ (?P<pre_block_text>
+ ([\#]!(?P<pre_block_kind>\w*?)(\s+.*)?$)?
(.|\n)+?
)
- (\n)?
- ^}}} \s*$
- )'''
+ ^}}})
+ '''
list = r'''(?P<list>
^ [ \t]* ([*][^*\#]|[\#][^\#*]).* $
( \n[ \t]* [*\#]+.* $ )*
@@ -227,8 +227,10 @@ class SpecialRules:
(?P<cell> ( %s | [^|])+ )
) \s*
''' % '|'.join([
- InlineRules.link, InlineRules.macro, InlineRules.image,
- InlineRules.preformatted
+ InlineRules.link,
+ InlineRules.inline_macro, InlineRules.macro_tag,
+ InlineRules.image,
+ InlineRules.pre_inline
])
# For pre escaping, in creole 1.0 done with ~:
@@ -242,16 +244,16 @@ BLOCK_RULES = (
BlockRules.pass_line,
BlockRules.macro_block,
BlockRules.html,
- BlockRules.line, BlockRules.head, BlockRules.separator, BlockRules.pre, BlockRules.list,
+ BlockRules.line, BlockRules.head, BlockRules.separator,
+ BlockRules.pre_block, BlockRules.list,
BlockRules.table, BlockRules.text,
)
INLINE_FLAGS = re.VERBOSE | re.UNICODE
INLINE_RULES = (
InlineRules.link, InlineRules.url,
- InlineRules.macro,
- InlineRules.inline_macro,
- InlineRules.preformatted, InlineRules.image,
+ InlineRules.inline_macro, InlineRules.macro_tag,
+ InlineRules.pre_inline, InlineRules.image,
InlineRules.pass_inline,
InlineRules.strong, InlineRules.emph,
@@ -405,7 +407,7 @@ class Parser:
self.parse_inline(groups.get('text', u""))
if groups.get('break') and self.cur.kind in ('paragraph',
- 'emphasis', 'strong', 'preformatted'):
+ 'emphasis', 'strong', 'pre_inline'):
self.last_text_break = DocNode('break', self.cur, u"")
self.text = None
@@ -442,44 +444,80 @@ class Parser:
self.text = None
_link_target_repl = _link_repl
_link_text_repl = _link_repl
+
+ #--------------------------------------------------------------------------
+
+ def _add_macro(self, groups, macro_type, name_key, args_key, text_key=None):
+ """
+ generic mathod to handle the macro, used for all variants:
+ inline, inline-tag, block
+ """
+ #self.debug_groups(groups)
+ assert macro_type in ("macro_inline", "macro_block")
+
+ if text_key:
+ macro_text = groups.get(text_key, u"").strip()
+ else:
+ macro_text = None
+
+ node = DocNode(macro_type, self.cur, macro_text)
+ node.macro_name = groups[name_key]
+ node.macro_args = groups.get(args_key, u"").strip()
- def _add_macro(self, macro_name, macro_args, macro_text=u""):
-# self._upto_block()
- node = DocNode("macro", self.cur, macro_text.strip())
- node.macro_name = macro_name
- node.macro_args = macro_args.strip()
self.text = None
def _macro_block_repl(self, groups):
- """Handles macros using the placeholder syntax."""
- #self.debug_groups(groups)
+ """
+ block macro, e.g:
+ <<macro args="foo">>
+ some
+ lines
+ <</macro>>
+ """
self._upto_block()
self.cur = self.root
self._add_macro(
- macro_name = groups['macro_block_start'],
- macro_text = groups.get('macro_block_text', u""),
- macro_args = groups.get('macro_block_args', u""),
+ groups,
+ macro_type = "macro_block",
+ name_key = "macro_block_start",
+ args_key = "macro_block_args",
+ text_key = "macro_block_text",
)
- self.text = None
_macro_block_start_repl = _macro_block_repl
_macro_block_args_repl = _macro_block_repl
_macro_block_text_repl = _macro_block_repl
- def _macro_repl(self, groups):
- """Handles macros using the placeholder syntax."""
- macro_name = groups.get('macro_name', u"")
- macro_args = groups.get('macro_args', u"")
- self._add_macro(macro_name, macro_args)
- self.text = None
+ def _macro_tag_repl(self, groups):
+ """
+ A single macro tag, e.g.: <<macro-a foo="bar">> or <<macro />>
+ """
+ self._add_macro(
+ groups,
+ macro_type = "macro_inline",
+ name_key = "macro_tag_name",
+ args_key = "macro_tag_args",
+ text_key = None,
+ )
+ _macro_tag_name_repl = _macro_tag_repl
+ _macro_tag_args_repl = _macro_tag_repl
+
+
+ def _macro_inline_repl(self, groups):
+ """
+ inline macro tag with data, e.g.: <<macro>>text<</macro>>
+ """
+ self._add_macro(
+ groups,
+ macro_type = "macro_inline",
+ name_key = "macro_inline_start",
+ args_key = "macro_inline_args",
+ text_key = "macro_inline_text",
+ )
+ _macro_inline_start_repl = _macro_inline_repl
+ _macro_inline_args_repl = _macro_inline_repl
+ _macro_inline_text_repl = _macro_inline_repl
-# text = (groups.get('macro_text', u"") or u"").strip()
-# node = DocNode('macro', self.cur, name)
-# node.args = groups.get('macro_args', u"") or ''
-# DocNode('text', node, text or name)
-# self.text = None
- _macro_name_repl = _macro_repl
- _macro_args_repl = _macro_repl
-# _macro_text_repl = _macro_repl
+ #--------------------------------------------------------------------------
def _image_repl(self, groups):
"""Handles images and attachemnts included in the page."""
@@ -560,31 +598,31 @@ class Parser:
self.cur = tb
self.text = None
- def _pre_repl(self, groups):
+ def _pre_block_repl(self, groups):
self._upto_block()
- kind = groups.get('pre_kind', None)
- text = groups.get('pre_text', u"")
+ kind = groups.get('pre_block_kind', None)
+ text = groups.get('pre_block_text', u"")
def remove_tilde(m):
return m.group('indent') + m.group('rest')
text = self.pre_escape_re.sub(remove_tilde, text)
- node = DocNode('preformatted', self.cur, text)
+ node = DocNode('pre_block', self.cur, text)
node.sect = kind or ''
self.text = None
- _pre_text_repl = _pre_repl
- _pre_head_repl = _pre_repl
- _pre_kind_repl = _pre_repl
+ _pre_block_text_repl = _pre_block_repl
+ _pre_block_head_repl = _pre_block_repl
+ _pre_block_kind_repl = _pre_block_repl
def _line_repl(self, groups):
""" Transfer newline from the original markup into the html code """
self._upto_block()
DocNode('line', self.cur, u"")
- def _preformatted_repl(self, groups):
- text = groups.get('preformatted_text', u"")
- DocNode('preformatted', self.cur, text)
+ def _pre_inline_repl(self, groups):
+ text = groups.get('pre_inline_text', u"")
+ DocNode('pre_inline', self.cur, text)
self.text = None
- _preformatted_text_repl = _preformatted_repl
- _preformatted_head_repl = _preformatted_repl
+ _pre_inline_text_repl = _pre_inline_repl
+ _pre_inline_head_repl = _pre_inline_repl
#--------------------------------------------------------------------------
@@ -721,16 +759,15 @@ class DocNode:
def __repr__(self):
return u"<DocNode %s: %r>" % (self.kind, self.content)
-# def debug(self):
-# raise
-# print "_"*80
-# print "\tDocNode - debug:"
-# print "str(): %s" % self
-# print "attributes:"
-# for i in dir(self):
-# if i.startswith("_"):
-# continue
-# print "%20s: %r" % (i, getattr(self, i, "---"))
+ def debug(self):
+ print "_"*80
+ print "\tDocNode - debug:"
+ print "str(): %s" % self
+ print "attributes:"
+ for i in dir(self):
+ if i.startswith("_"):
+ continue
+ print "%20s: %r" % (i, getattr(self, i, "---"))
#------------------------------------------------------------------------------
@@ -741,12 +778,10 @@ if __name__=="__main__":
doctest.testmod()
print "doc test done."
- txt = r"""Creole **<<html>>&#x7B;...&#x7D;<</html>>** code"""
- txt = r"""foo
-Y<<html>>the
-code X<</html>>bar
-Creole <<html>>&#x7B;...&#x7D;<</html>> code
- """
+ txt = r"""111 <<html>><X><</html>>foo<<html>></X><</html>> 222
+ 333<<html>><X foo1="bar1"><</html>>foobar<<html>></X><</html>>444
+
+ 555<<html>><X /><</html>>666"""
print "-"*80
p = Parser(txt)
diff --git a/creole/creole2html.py b/creole/creole2html.py
index 9a17642..07eac09 100644
--- a/creole/creole2html.py
+++ b/creole/creole2html.py
@@ -238,7 +238,12 @@ class HtmlEmitter:
msg += " - returns: %r, type %r" % (result, type(result))
return self.error(msg)
+ if node.kind == "macro_block":
+ result += "\n"
+
return result
+ macro_inline_emit = macro_emit
+ macro_block_emit = macro_emit
def break_emit(self, node):
if node.parent.kind == "list_item":
@@ -251,7 +256,11 @@ class HtmlEmitter:
def line_emit(self, node):
return u"\n"
- def preformatted_emit(self, node):
+ def pre_block_emit(self, node):
+ """ pre block, with newline at the end """
+ return u"<pre>%s</pre>\n" % self.html_escape(node.content)
+ def pre_inline_emit(self, node):
+ """ pre without newline at the end """
return u"<pre>%s</pre>" % self.html_escape(node.content)
def pass_block_emit(self, node):
@@ -301,8 +310,7 @@ class HtmlEmitter:
return u""
if __name__=="__main__":
- txt = r"""Creole **<<html>>&#x7B;...&#x7D;<</html>>** code"""
- txt = r"""Creole {{{preprepre}}} c **od** e"""
+ txt = r"""<<html>>1<</html><<html>>2<</html>>"""
print "-"*80
# from creole_alt.creole import Parser
diff --git a/creole/default_macros.py b/creole/default_macros.py
index 4e38a3a..07d8fb9 100644
--- a/creole/default_macros.py
+++ b/creole/default_macros.py
@@ -5,4 +5,12 @@ def html(args, text):
Macro tag <<html>>...<</html>>
Pass-trought for html code (or other stuff)
"""
- return text \ No newline at end of file
+ return text
+
+
+
+def test_macro(args, text):
+ """
+ a macro only for testing
+ """
+ return u"[%s text: %s]" % (args, text)
diff --git a/creole/html2creole.py b/creole/html2creole.py
index 0bb3c8a..1d5517f 100644
--- a/creole/html2creole.py
+++ b/creole/html2creole.py
@@ -3,11 +3,13 @@
import re
import inspect
from HTMLParser import HTMLParser
+from xml.sax.saxutils import escape
from htmlentitydefs import entitydefs
+
BLOCK_TAGS = (
- "address", "blockquote", "center", "del", "dir", "div", "dl", "fieldset",
+ "address", "blockquote", "center", "dir", "div", "dl", "fieldset",
"form",
"h1", "h2", "h3", "h4", "h5", "h6",
"hr", "ins", "isindex", "menu", "noframes", "noscript",
@@ -28,12 +30,11 @@ pass_block_re = r'''
[\s\n]*
)'''
pre_block_re = r'''
- [\s\n]*
- <pre>
+ ^<pre> \s* $
(?P<pre_block>
(\n|.)*?
)
- </pre>
+ ^</pre> \s* $
[\s\n]*
'''
block_re = re.compile(
@@ -46,13 +47,6 @@ block_re = re.compile(
#------------------------------------------------------------------------------
-#tt_block_re = r'''
-# <tt>
-# (?P<tt_block>
-# (\n|.)*?
-# )
-# </tt>
-#'''
inline_django_re = r'''
(?P<django_tag>
[\s\n]*
@@ -60,9 +54,16 @@ inline_django_re = r'''
[\s\n]*
)
'''
+pre_inline_re = r'''
+ <pre>
+ (?P<pre_inline>
+ (\n|.)*?
+ )
+ </pre>
+'''
inline_re = re.compile(
'|'.join([
-# tt_block_re,
+ pre_inline_re,
inline_django_re,
]),
re.VERBOSE | re.UNICODE
@@ -74,6 +75,12 @@ headline_tag_re = re.compile(r"h(\d)", re.UNICODE)
+
+
+
+
+
+
class DocNode:
"""
A node in the document.
@@ -91,6 +98,17 @@ class DocNode:
self.content = content
self.level = level
+ def get_attrs_as_string(self):
+ """
+ FIXME!
+ """
+ attr_list = []
+ for key, value in self.attrs.iteritems():
+ if isinstance(value, unicode):
+ value = value.encode("utf-8")
+ attr_list.append("%s=%r" % (key, value))
+ return " ".join(attr_list).replace("'", '"')
+
def __str__(self):
return "<DocNode %s: %r>" % (self.kind, self.content)
@@ -159,6 +177,9 @@ def strip_html(html_code):
>>> strip_html(u'<p>a <unknown tag /> foobar </p>')
u'<p>a <unknown tag /> foobar</p>'
+
+ >>> strip_html(u'<p>a <pre> preformated area </pre> foo </p>')
+ u'<p>a<pre>preformated area</pre>foo</p>'
"""
def strip_tag(match):
block = match.group(0)
@@ -275,8 +296,8 @@ class Html2CreoleParser(HTMLParser):
id = len(self.blockdata)-1
return '<%s type="%s" id="%s" />' % (placeholder, type, id)
- def _pre_tt_block_cut(self, groups):
- return self._pre_cut(groups["tt_block"], "tt", self._inline_placeholder)
+ def _pre_pre_inline_cut(self, groups):
+ return self._pre_cut(groups["pre_inline"], "pre", self._inline_placeholder)
def _pre_pre_block_cut(self, groups):
return self._pre_cut(groups["pre_block"], "pre", self._block_placeholder)
@@ -479,14 +500,90 @@ def deentitfy(text):
+#------------------------------------------------------------------------------
+
+RAISE_UNKNOWN_NODES = 1
+HTML_MACRO_UNKNOWN_NODES = 2
+ESCAPE_UNKNOWN_NODES = 3
+
class Html2CreoleEmitter(object):
- def __init__(self, document_tree, debug=False):
+
+ def __init__(self, document_tree, unknown_emit=ESCAPE_UNKNOWN_NODES,
+ debug=False):
self.root = document_tree
+
+ if unknown_emit == RAISE_UNKNOWN_NODES:
+ self.unknown_emit = self.raise_unknown_node
+ elif unknown_emit == HTML_MACRO_UNKNOWN_NODES:
+ self.unknown_emit = self.use_html_macro
+ elif unknown_emit == ESCAPE_UNKNOWN_NODES:
+ self.unknown_emit = self.escape_unknown_nodes
+ else:
+ raise AssertionError("wrong keyword argument 'unknown_emit'!")
+
self.debugging = debug
self.__inner_list = ""
self.__mask_linebreak = False
#--------------------------------------------------------------------------
+
+ def raise_unknown_node(self, node):
+ """
+ Raise NotImplementedError on unknown tags.
+ """
+ raise NotImplementedError(
+ "Node from type '%s' is not implemented!" % node.kind
+ )
+
+ def use_html_macro(self, node):
+ """
+ Use the <<html>> macro to mask unknown tags.
+ """
+ #node.debug()
+ attrs = node.get_attrs_as_string()
+ if attrs:
+ attrs = " "+attrs
+
+ tag_data = {
+ "tag": node.kind,
+ "attrs": attrs,
+ }
+
+ content = self.emit_children(node)
+ if not content:
+ # single tag
+ return u"<<html>><%(tag)s%(attrs)s /><</html>>" % tag_data
+
+ start_tag = u"<<html>><%(tag)s%(attrs)s><</html>>" % tag_data
+ end_tag = u"<<html>></%(tag)s><</html>>" % tag_data
+
+ return start_tag + content + end_tag
+
+ def escape_unknown_nodes(self, node):
+ """
+ All unknown tags should be escaped.
+ """
+ #node.debug()
+ attrs = node.get_attrs_as_string()
+ if attrs:
+ attrs = " "+attrs
+
+ tag_data = {
+ "tag": node.kind,
+ "attrs": attrs,
+ }
+
+ content = self.emit_children(node)
+ if not content:
+ # single tag
+ return escape(u"<%(tag)s%(attrs)s />" % tag_data)
+
+ start_tag = escape(u"<%(tag)s%(attrs)s>" % tag_data)
+ end_tag = escape(u"</%(tag)s>" % tag_data)
+
+ return start_tag + content + end_tag
+
+ #--------------------------------------------------------------------------
def _escape_linebreaks(self, text):
text = text.split("\n")
@@ -496,15 +593,16 @@ class Html2CreoleEmitter(object):
#--------------------------------------------------------------------------
def blockdata_pre_emit(self, node):
+ """ pre block -> with newline at the end """
return u"{{{%s}}}\n" % deentitfy(node.content)
+ def inlinedata_pre_emit(self, node):
+ """ a pre inline block -> no newline at the end """
+ return u"{{{%s}}}" % deentitfy(node.content)
def blockdata_pass_emit(self, node):
return u"%s\n\n" % node.content
return node.content
- def inlinedata_tt_emit(self, node):
- return u"{{{ %s }}}" % deentitfy(node.content)
-
def inlinedata_django_tag_emit(self, node):
return node.content
@@ -661,14 +759,6 @@ class Html2CreoleEmitter(object):
def document_emit(self, node):
return self.emit_children(node)
- def default_emit(self, node):
- """
- Fallback function for emit unknown nodes.
- """
- raise NotImplementedError(
- "Node from type '%s' is not implemented!" % node.kind
- )
-
def emit_children(self, node):
"""Emit all the children of a node."""
result = []
@@ -684,7 +774,7 @@ class Html2CreoleEmitter(object):
self.debug_msg("emit_node", "%s: %r" % (node.kind, node.content))
method_name = "%s_emit" % node.kind
- emit_method = getattr(self, method_name, self.default_emit)
+ emit_method = getattr(self, method_name, self.unknown_emit)
content = emit_method(node)
if not isinstance(content, unicode):
@@ -717,24 +807,13 @@ if __name__ == '__main__':
doctest.testmod()
print "doc test done."
-# import sys
-# sys.exit()
+# import sys;sys.exit()
data = """
-<pre> jojo </pre>
-<p>basics:<br />
-<strong><i>bold italics</i></strong><br />
-<i><strong>bold italics</strong></i><br />
-<i>This is <strong>also</strong> good.</i></p>
-
-<p>Creole 1.0 optional:<br />
-This is <tt>monospace</tt> text.<br />
-This is <sup>superscripted</sup> text.<br />
-This is <sub>subscripted</sub> text.<br />
-This is <u>underlined</u> text.</p>
-
-<p>own additions:<br />
-This is <small>small</small> and this <del>strikeout</del> ;)</p>"""
+<p>111 <x>foo</x> 222<br />
+333<x foo1="bar1">foobar</x>444</p>
+
+<p>555<x />666</p>"""
# print data.strip()
h2c = Html2CreoleParser(