"""
html -> creole Emitter
~~~~~~~~~~~~~~~~~~~~~~
:copyleft: 2008-2011 by python-creole team, see AUTHORS for more details.
:license: GNU GPL v3 or above, see LICENSE for more details.
"""
import posixpath
from creole.shared.base_emitter import BaseEmitter
class CreoleEmitter(BaseEmitter):
"""
Build from a document_tree (html2creole.parser.HtmlParser instance) a
creole markup text.
"""
def __init__(self, document_tree, strict=False, *args, **kwargs):
self.strict = strict
super().__init__(document_tree, *args, **kwargs)
self.table_head_prefix = "= "
self.table_auto_width = True
def emit(self):
"""Emit the document represented by self.root DOM tree."""
return self.emit_node(self.root).strip() # FIXME
# --------------------------------------------------------------------------
def blockdata_pre_emit(self, node):
""" pre block -> with newline at the end """
return "{{{%s}}}\n" % self.deentity.replace_all(node.content)
def inlinedata_pre_emit(self, node):
""" a pre inline block -> no newline at the end """
return "{{{%s}}}" % self.deentity.replace_all(node.content)
def blockdata_pass_emit(self, node):
return f"{node.content}\n\n"
return node.content
# --------------------------------------------------------------------------
def p_emit(self, node):
result = self.emit_children(node)
if self._inner_list == "":
result += "\n\n"
return result
def br_emit(self, node):
if self._inner_list != "":
return "\\\\"
else:
return "\n"
def headline_emit(self, node):
return f"{'=' * node.level} {self.emit_children(node)}\n\n"
# --------------------------------------------------------------------------
def strong_emit(self, node):
return self._typeface(node, key="**")
b_emit = strong_emit
big_emit = strong_emit
def i_emit(self, node):
return self._typeface(node, key="//")
em_emit = i_emit
def tt_emit(self, node):
return self._typeface(node, key="##")
def sup_emit(self, node):
return self._typeface(node, key="^^")
def sub_emit(self, node):
return self._typeface(node, key=",,")
def u_emit(self, node):
return self._typeface(node, key="__")
def small_emit(self, node):
return self._typeface(node, key="--")
def del_emit(self, node):
return self._typeface(node, key="~~")
strike_emit = del_emit
# --------------------------------------------------------------------------
def hr_emit(self, node):
return "----\n\n"
def a_emit(self, node):
link_text = self.emit_children(node)
try:
url = node.attrs["href"]
except KeyError:
# e.g.: foo
return link_text
if link_text == url:
return f"[[{url}]]"
else:
return f"[[{url}|{link_text}]]"
def img_emit(self, node):
src = node.attrs["src"]
if src.split(':')[0] == 'data':
return ""
title = node.attrs.get("title", "")
alt = node.attrs.get("alt", "")
width = node.attrs.get("height", None)
height = node.attrs.get("width", None)
if len(alt) > len(title): # Use the longest one
text = alt
else:
text = title
if text == "": # Use filename as picture text
text = posixpath.basename(src)
if not self.strict:
if width and height:
return f"{{{{{src}|{text}|{width}x{height}}}}}"
return f"{{{{{src}|{text}}}}}"
# --------------------------------------------------------------------------
def ul_emit(self, node):
return self._list_emit(node, list_type="*")
def ol_emit(self, node):
return self._list_emit(node, list_type="#")
# --------------------------------------------------------------------------
def div_emit(self, node):
return self._emit_content(node)
def span_emit(self, node):
return self._emit_content(node)
if __name__ == '__main__':
import doctest
print(doctest.testmod())
# import sys;sys.exit()
from creole.parser.html_parser import HtmlParser
data = """A <>bar1<> in a line..."""
# print(data.strip())
h2c = HtmlParser(
debug=True
)
document_tree = h2c.feed(data)
h2c.debug()
from creole.shared.unknown_tags import escape_unknown_nodes
e = CreoleEmitter(document_tree,
debug=True,
unknown_emit=escape_unknown_nodes
)
content = e.emit()
print("*" * 79)
print(content)
print("*" * 79)
print(content.replace(" ", ".").replace("\n", "\\n\n"))