diff options
author | Jens Diemer <github.com@jensdiemer.de> | 2020-01-19 11:17:46 +0100 |
---|---|---|
committer | GitHub <noreply@github.com> | 2020-01-19 11:17:46 +0100 |
commit | 4e74f29daaf5026a3d4d6dae9f2e74f5f3655439 (patch) | |
tree | c98259013096e40799b9a7908c83e371cc8e6e37 | |
parent | f352a1393d789326d1cbd4f941c7a791c604aa30 (diff) | |
parent | 51a92cd75319bdef124d802cd23da0d32b11528b (diff) | |
download | creole-4e74f29daaf5026a3d4d6dae9f2e74f5f3655439.tar.gz |
Merge pull request #38 from jedie/cleanup
Cleanup
47 files changed, 625 insertions, 1437 deletions
diff --git a/.github/workflows/pythonapp.yml b/.github/workflows/pythonapp.yml index a244f20..620ca27 100644 --- a/.github/workflows/pythonapp.yml +++ b/.github/workflows/pythonapp.yml @@ -28,10 +28,10 @@ jobs: source $HOME/.poetry/env make tox-listenvs -# - name: Run linters -# run: | -# source $HOME/.poetry/env -# make lint + - name: Run linters + run: | + source $HOME/.poetry/env + make lint - name: Run tests with Python v3.8 run: | diff --git a/.travis.yml b/.travis.yml index c2d158e..5ff3ee0 100644 --- a/.travis.yml +++ b/.travis.yml @@ -17,6 +17,7 @@ install: script: - make pytest + - make lint after_success: - coveralls @@ -29,13 +29,13 @@ install: check-poetry ## install python-creole via poetry poetry install lint: ## Run code formatters and linter -# poetry run isort --check-only --recursive creole -# poetry run black --line-length=119 --check creole + poetry run flynt --fail-on-change --line_length=${MAX_LINE_LENGTH} creole + poetry run isort --check-only --recursive creole poetry run flake8 creole fix-code-style: ## Fix code formatting -# poetry run flynt --line_length=119 creole -# poetry run isort --apply --recursive creole + poetry run flynt --line_length=${MAX_LINE_LENGTH} creole + poetry run isort --apply --recursive creole poetry run autopep8 --ignore-local-config --max-line-length=${MAX_LINE_LENGTH} --aggressive --aggressive --in-place --recursive creole tox-listenvs: check-poetry ## List all tox test environments diff --git a/README.creole b/README.creole index bf02c02..f656531 100644 --- a/README.creole +++ b/README.creole @@ -207,7 +207,17 @@ Note: In this case you must install **docutils**! See above. = history = -* *dev* - [[https://github.com/jedie/python-creole/compare/v1.3.2...master|compare v1.3.2...master]] +* *dev* - [[https://github.com/jedie/python-creole/compare/v1.4.1...master|compare v1.4.1...master]] +** TBC +* v1.4.1 - 2020-01-19 - [[https://github.com/jedie/python-creole/compare/v1.4.0...master|compare v1.4.0...master]] +** Remove Python v2 support code +** [[https://github.com/jedie/python-creole/issues/26|Fix "Undefined substitution referenced" error]] contributed by dforsi +** [[https://github.com/jedie/python-creole/pull/37|Fix regression in tests for setup_utils]] contributed by jugmac00 +** Fix code style with: autopep8 +** sort imports with isort +** change old {{{%-formatted}}} and {{{.format(...)}}} strings into Python 3.6+'s {{{f-strings}}} with flynt +** Activate linting in CI pipeline +* v1.4.0 - 2020-01-19 - [[https://github.com/jedie/python-creole/compare/v1.3.2...master|compare v1.3.2...master]] ** modernize project: *** use poetry *** Add a {{{Makefile}}} @@ -237,7 +247,7 @@ Note: In this case you must install **docutils**! See above. ** NEW: Add {{{<<toc>>}}} macro to create a table of contents list ** Bugfix for: AttributeError: 'CreoleParser' object has no attribute '_escaped_char_repl' ** Bugfix for: AttributeError: 'CreoleParser' object has no attribute '_escaped_url_repl' -** API Change: Callable macros will raise a TypeError instead of create a DeprecationWarning (Was removed in v0.5) +** API Change: Callable macros will raise a TypeError instead of create a DeprecationWarning (Was removed in v0.5) * v1.1.1 - 2013-11-08 ** Bugfix: Setup script exited with error: can't copy 'README.creole': doesn't exist or not a regular file * v1.1.0 - 2013-10-28 diff --git a/creole/__init__.py b/creole/__init__.py index 3c6a113..25dd09c 100644 --- a/creole/__init__.py +++ b/creole/__init__.py @@ -19,31 +19,28 @@ """ - import warnings from creole.emitter.creol2html_emitter import HtmlEmitter -from creole.parser.creol2html_parser import CreoleParser from creole.emitter.html2creole_emitter import CreoleEmitter from creole.emitter.html2rest_emitter import ReStructuredTextEmitter from creole.emitter.html2textile_emitter import TextileEmitter +from creole.parser.creol2html_parser import CreoleParser from creole.parser.html_parser import HtmlParser -from creole.py3compat import TEXT_TYPE - -__version__ = "1.4.0" -__api__ = "1.0" # Creole 1.0 spec - http://wikicreole.org/ +__version__ = "1.4.1" +__api__ = "1.0" # Creole 1.0 spec - http://wikicreole.org/ -VERSION_STRING = __version__ # remove in future -API_STRING = __api__ # remove in future +VERSION_STRING = __version__ # remove in future +API_STRING = __api__ # remove in future def creole2html(markup_string, debug=False, - parser_kwargs=None, emitter_kwargs=None, - block_rules=None, blog_line_breaks=True, - macros=None, verbose=None, stderr=None, - strict=False, - ): + parser_kwargs=None, emitter_kwargs=None, + block_rules=None, blog_line_breaks=True, + macros=None, verbose=None, stderr=None, + strict=False, + ): """ convert creole markup into html code @@ -52,7 +49,7 @@ def creole2html(markup_string, debug=False, Info: parser_kwargs and emitter_kwargs are deprecated """ - assert isinstance(markup_string, TEXT_TYPE), "given markup_string must be unicode!" + assert isinstance(markup_string, str), "given markup_string must be unicode!" parser_kwargs2 = { "block_rules": block_rules, @@ -60,7 +57,9 @@ def creole2html(markup_string, debug=False, "debug": debug, } if parser_kwargs is not None: - warnings.warn("parser_kwargs argument in creole2html would be removed in the future!", PendingDeprecationWarning) + warnings.warn( + "parser_kwargs argument in creole2html would be removed in the future!", + PendingDeprecationWarning) parser_kwargs2.update(parser_kwargs) # Create document tree from creole markup @@ -75,7 +74,9 @@ def creole2html(markup_string, debug=False, "strict": strict, } if emitter_kwargs is not None: - warnings.warn("emitter_kwargs argument in creole2html would be removed in the future!", PendingDeprecationWarning) + warnings.warn( + "emitter_kwargs argument in creole2html would be removed in the future!", + PendingDeprecationWarning) emitter_kwargs2.update(emitter_kwargs) # Build html code from document tree @@ -84,7 +85,7 @@ def creole2html(markup_string, debug=False, def parse_html(html_string, debug=False): """ create the document tree from html code """ - assert isinstance(html_string, TEXT_TYPE), "given html_string must be unicode!" + assert isinstance(html_string, str), "given html_string must be unicode!" h2c = HtmlParser(debug=debug) document_tree = h2c.feed(html_string) @@ -94,13 +95,13 @@ def parse_html(html_string, debug=False): def html2creole( - html_string, - debug=False, - parser_kwargs=None, - emitter_kwargs=None, - unknown_emit=None, - strict=False, - ): + html_string, + debug=False, + parser_kwargs=None, + emitter_kwargs=None, + unknown_emit=None, + strict=False, +): """ convert html code into creole markup @@ -108,7 +109,9 @@ def html2creole( 'This is **creole //markup//**!' """ if parser_kwargs is not None: - warnings.warn("parser_kwargs argument in html2creole would be removed in the future!", PendingDeprecationWarning) + warnings.warn( + "parser_kwargs argument in html2creole would be removed in the future!", + PendingDeprecationWarning) document_tree = parse_html(html_string, debug=debug) @@ -117,7 +120,9 @@ def html2creole( "strict": strict, } if emitter_kwargs is not None: - warnings.warn("emitter_kwargs argument in html2creole would be removed in the future!", PendingDeprecationWarning) + warnings.warn( + "emitter_kwargs argument in html2creole would be removed in the future!", + PendingDeprecationWarning) emitter_kwargs2.update(emitter_kwargs) # create creole markup from the document tree @@ -126,9 +131,9 @@ def html2creole( def html2textile(html_string, debug=False, - parser_kwargs=None, emitter_kwargs=None, - unknown_emit=None - ): + parser_kwargs=None, emitter_kwargs=None, + unknown_emit=None + ): """ convert html code into textile markup @@ -136,7 +141,9 @@ def html2textile(html_string, debug=False, 'This is *textile __markup__*!' """ if parser_kwargs is not None: - warnings.warn("parser_kwargs argument in html2textile would be removed in the future!", PendingDeprecationWarning) + warnings.warn( + "parser_kwargs argument in html2textile would be removed in the future!", + PendingDeprecationWarning) document_tree = parse_html(html_string, debug=debug) @@ -144,7 +151,9 @@ def html2textile(html_string, debug=False, "unknown_emit": unknown_emit, } if emitter_kwargs is not None: - warnings.warn("emitter_kwargs argument in html2textile would be removed in the future!", PendingDeprecationWarning) + warnings.warn( + "emitter_kwargs argument in html2textile would be removed in the future!", + PendingDeprecationWarning) emitter_kwargs2.update(emitter_kwargs) # create textile markup from the document tree @@ -153,9 +162,9 @@ def html2textile(html_string, debug=False, def html2rest(html_string, debug=False, - parser_kwargs=None, emitter_kwargs=None, - unknown_emit=None - ): + parser_kwargs=None, emitter_kwargs=None, + unknown_emit=None + ): """ convert html code into ReStructuredText markup @@ -171,18 +180,11 @@ def html2rest(html_string, debug=False, "unknown_emit": unknown_emit, } if emitter_kwargs is not None: - warnings.warn("emitter_kwargs argument in html2rest would be removed in the future!", PendingDeprecationWarning) + warnings.warn( + "emitter_kwargs argument in html2rest would be removed in the future!", + PendingDeprecationWarning) emitter_kwargs2.update(emitter_kwargs) # create ReStructuredText markup from the document tree emitter = ReStructuredTextEmitter(document_tree, debug=debug, **emitter_kwargs2) return emitter.emit() - - - -if __name__ == '__main__': - print("runing local doctest...") - import doctest - print(doctest.testmod( -# verbose=True - )) diff --git a/creole/cmdline.py b/creole/cmdline.py index 78f4eb4..3fcdfe6 100644 --- a/creole/cmdline.py +++ b/creole/cmdline.py @@ -1,11 +1,10 @@ -#!/usr/bin/env python -# coding: utf-8 +#!/usr/bin/env python3 """ python-creole commandline interface ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - :copyleft: 2013 by the python-creole team, see AUTHORS for more details. + :copyleft: 2013-2020 by the python-creole team, see AUTHORS for more details. :license: GNU GPL v3 or above, see LICENSE for more details. """ @@ -13,8 +12,7 @@ import argparse import codecs -from creole import creole2html, html2creole, html2rest, html2textile -from creole import VERSION_STRING +from creole import VERSION_STRING, creole2html, html2creole, html2rest, html2textile class CreoleCLI(object): @@ -27,15 +25,16 @@ class CreoleCLI(object): " creole2html, html2creole, html2ReSt, html2textile" ), ) - self.parser.add_argument('--version', action='version', - version='%%(prog)s from python-creole v%s' % VERSION_STRING + self.parser.add_argument( + '--version', action='version', + version='%%(prog)s from python-creole v%s' % VERSION_STRING # noqa flynt ) self.parser.add_argument("sourcefile", help="source file to convert") self.parser.add_argument("destination", help="Output filename") self.parser.add_argument("--encoding", - default="utf-8", - help="Codec for read/write file (default encoding: utf-8)" - ) + default="utf-8", + help="Codec for read/write file (default encoding: utf-8)" + ) args = self.parser.parse_args() @@ -46,27 +45,28 @@ class CreoleCLI(object): self.convert(sourcefile, destination, encoding) def convert(self, sourcefile, destination, encoding): - print("Convert %r to %r with %s (codec: %s)" % ( - sourcefile, destination, self.convert_func.__name__, encoding - )) + print(f"Convert {sourcefile!r} to {destination!r} with {self.convert_func.__name__} (codec: {encoding})") with codecs.open(sourcefile, "r", encoding=encoding) as infile: with codecs.open(destination, "w", encoding=encoding) as outfile: content = infile.read() converted = self.convert_func(content) outfile.write(converted) - print("done. %r created." % destination) + print(f"done. {destination!r} created.") def cli_creole2html(): CreoleCLI(creole2html) + def cli_html2creole(): CreoleCLI(html2creole) + def cli_html2rest(): CreoleCLI(html2rest) + def cli_html2textile(): CreoleCLI(html2textile) diff --git a/creole/emitter/creol2html_emitter.py b/creole/emitter/creol2html_emitter.py index 0fa0def..f946c88 100644 --- a/creole/emitter/creol2html_emitter.py +++ b/creole/emitter/creol2html_emitter.py @@ -1,23 +1,18 @@ -# coding: utf-8 - """ WikiCreole to HTML converter - :copyleft: 2008-2015 by python-creole team, see AUTHORS for more details. + :copyleft: 2008-2020 by python-creole team, see AUTHORS for more details. :license: GNU GPL v3 or above, see LICENSE for more details. """ - - import json -from xml.sax.saxutils import escape import sys import traceback +from xml.sax.saxutils import escape from creole.parser.creol2html_parser import CreoleParser -from creole.py3compat import TEXT_TYPE from creole.shared.utils import string2dict @@ -57,9 +52,9 @@ class TableOfContent(object): if index > stack_length: for _ in range(stack_length, index): - l = [] - stack[-1].append(l) - stack.append(l) + nest_list = [] + stack[-1].append(nest_list) + stack.append(nest_list) elif index < stack_length: stack = stack[:index] @@ -69,14 +64,14 @@ class TableOfContent(object): def nested_headlines2html(self, nested_headlines, level=0): """Convert a python nested list like the one representing the toc to an html equivalent.""" - indent = "\t"*level - if isinstance(nested_headlines, TEXT_TYPE): - return '%s<li><a href="#%s">%s</a></li>\n' % (indent, nested_headlines, nested_headlines) + indent = "\t" * level + if isinstance(nested_headlines, str): + return f'{indent}<li><a href="#{nested_headlines}">{nested_headlines}</a></li>\n' elif isinstance(nested_headlines, list): - html = '%s<ul>\n' % indent + html = f'{indent}<ul>\n' for elt in nested_headlines: html += self.nested_headlines2html(elt, level + 1) - html += '%s</ul>' % indent + html += f'{indent}</ul>' if level > 0: html += "\n" return html @@ -96,17 +91,17 @@ class TableOfContent(object): return document - class HtmlEmitter(object): """ Generate HTML output for the document tree consisting of DocNodes. """ + def __init__(self, root, macros=None, verbose=None, stderr=None, strict=False): self.root = root - if callable(macros) == True: + if callable(macros): # was a DeprecationWarning in the past raise TypeError("Callable macros are not supported anymore!") @@ -115,7 +110,7 @@ class HtmlEmitter(object): else: self.macros = macros - if not "toc" in root.used_macros: + if "toc" not in root.used_macros: # The document has no <<toc>> self.toc = None else: @@ -132,7 +127,6 @@ class HtmlEmitter(object): self.toc = TableOfContent() self.macros.toc = self.toc - if verbose is None: self.verbose = 1 else: @@ -149,7 +143,7 @@ class HtmlEmitter(object): """Try to emit whatever text is in the node.""" try: return node.children[0].content or '' - except: + except BaseException: return node.content or '' def html_escape(self, text): @@ -215,7 +209,7 @@ class HtmlEmitter(object): def table_head_emit(self, node): return '\t<th>%s</th>\n' % self.emit_children(node) - #-------------------------------------------------------------------------- + # -------------------------------------------------------------------------- def _typeface(self, node, tag): return '<%(tag)s>%(data)s</%(tag)s>' % { @@ -226,33 +220,36 @@ class HtmlEmitter(object): # TODO: How can we generalize that: def emphasis_emit(self, node): return self._typeface(node, tag="i") + def strong_emit(self, node): return self._typeface(node, tag="strong") + def monospace_emit(self, node): return self._typeface(node, tag="tt") + def superscript_emit(self, node): return self._typeface(node, tag="sup") + def subscript_emit(self, node): return self._typeface(node, tag="sub") + def underline_emit(self, node): return self._typeface(node, tag="u") + def small_emit(self, node): return self._typeface(node, tag="small") + def delete_emit(self, node): return self._typeface(node, tag="del") - #-------------------------------------------------------------------------- + # -------------------------------------------------------------------------- def header_emit(self, node): - header = '<h%d>%s</h%d>' % ( - node.level, self.html_escape(node.content), node.level - ) + header = f'<h{node.level:d}>{self.html_escape(node.content)}</h{node.level:d}>' if self.toc is not None: self.toc.add_headline(node.level, node.content) # add link attribute for toc navigation - header = '<a name="%s">%s</a>' % ( - self.html_escape(node.content), header - ) + header = f'<a name="{self.html_escape(node.content)}">{header}</a>' header += "\n" return header @@ -267,8 +264,7 @@ class HtmlEmitter(object): else: inside = self.html_escape(target) - return '<a href="%s">%s</a>' % ( - self.attr_escape(target), inside) + return f'<a href="{self.attr_escape(target)}">{inside}</a>' def image_emit(self, node): target = node.content @@ -284,13 +280,12 @@ class HtmlEmitter(object): height = int(h_str.strip()) return '<img src="%s" title="%s" alt="%s" width="%s" height="%s" />' % ( self.attr_escape(target), title, title, width, height) - except: + except BaseException: pass - return '<img src="%s" title="%s" alt="%s" />' % ( - self.attr_escape(target), text, text) + return f'<img src="{self.attr_escape(target)}" title="{text}" alt="{text}" />' def macro_emit(self, node): - #print(node.debug()) + # print(node.debug()) macro_name = node.macro_name text = node.content macro = None @@ -298,12 +293,10 @@ class HtmlEmitter(object): args = node.macro_args try: macro_kwargs = string2dict(args) - except ValueError as e: + except ValueError: exc_info = sys.exc_info() return self.error( - "Wrong macro arguments: %s for macro '%s' (maybe wrong macro tag syntax?)" % ( - json.dumps(args), macro_name - ), + f"Wrong macro arguments: {json.dumps(args)} for macro '{macro_name}' (maybe wrong macro tag syntax?)", exc_info ) @@ -313,24 +306,24 @@ class HtmlEmitter(object): if isinstance(self.macros, dict): try: macro = self.macros[macro_name] - except KeyError as e: + except KeyError: exc_info = sys.exc_info() else: try: macro = getattr(self.macros, macro_name) - except AttributeError as e: + except AttributeError: exc_info = sys.exc_info() - if macro == None: + if macro is None: return self.error( - "Macro '%s' doesn't exist" % macro_name, + f"Macro '{macro_name}' doesn't exist", exc_info ) try: result = macro(**macro_kwargs) except TypeError as err: - msg = "Macro '%s' error: %s" % (macro_name, err) + msg = f"Macro '{macro_name}' error: {err}" exc_info = sys.exc_info() if self.verbose > 1: if self.verbose > 2: @@ -347,22 +340,22 @@ class HtmlEmitter(object): try: sourceline = inspect.getsourcelines(macro)[0][0].strip() except IOError as err: - evalue = etype("%s (error getting sourceline: %s from %s)" % (evalue, err, filename)) + evalue = etype(f"{evalue} (error getting sourceline: {err} from {filename})") else: - evalue = etype("%s (sourceline: %r from %s)" % (evalue, sourceline, filename)) + evalue = etype(f"{evalue} (sourceline: {sourceline!r} from {filename})") exc_info = etype, evalue, etb return self.error(msg, exc_info) except Exception as err: return self.error( - "Macro '%s' error: %s" % (macro_name, err), + f"Macro '{macro_name}' error: {err}", exc_info=sys.exc_info() ) - if not isinstance(result, TEXT_TYPE): - msg = "Macro '%s' doesn't return a unicode string!" % macro_name + if not isinstance(result, str): + msg = f"Macro '{macro_name}' doesn't return a unicode string!" if self.verbose > 1: - msg += " - returns: %r, type %r" % (result, type(result)) + msg += f" - returns: {result!r}, type {type(result)!r}" return self.error(msg) if node.kind == "macro_block": @@ -393,7 +386,7 @@ class HtmlEmitter(object): def default_emit(self, node): """Fallback function for emitting unknown nodes.""" - raise NotImplementedError("Node '%s' unknown" % node.kind) + raise NotImplementedError(f"Node '{node.kind}' unknown") def emit_children(self, node): """Emit all the children of a node.""" @@ -402,7 +395,7 @@ class HtmlEmitter(object): def emit_node(self, node): """Emit a single node.""" #print("%s_emit: %r" % (node.kind, node.content)) - emit = getattr(self, '%s_emit' % node.kind, self.default_emit) + emit = getattr(self, f'{node.kind}_emit', self.default_emit) return emit(node) def emit(self): @@ -423,7 +416,7 @@ class HtmlEmitter(object): self.stderr.write(exception) if self.verbose > 0: - return "[Error: %s]\n" % text + return f"[Error: {text}]\n" else: # No error output return "" diff --git a/creole/emitter/html2creole_emitter.py b/creole/emitter/html2creole_emitter.py index 00fd142..7c6421e 100644 --- a/creole/emitter/html2creole_emitter.py +++ b/creole/emitter/html2creole_emitter.py @@ -16,12 +16,12 @@ import posixpath from creole.shared.base_emitter import BaseEmitter - class CreoleEmitter(BaseEmitter): """ Build from a document_tree (html2creole.parser.HtmlParser instance) a creole markup text. """ + def __init__(self, document_tree, strict=False, *args, **kwargs): self.strict = strict super(CreoleEmitter, self).__init__(document_tree, *args, **kwargs) @@ -31,22 +31,23 @@ class CreoleEmitter(BaseEmitter): def emit(self): """Emit the document represented by self.root DOM tree.""" - return self.emit_node(self.root).strip() # FIXME + return self.emit_node(self.root).strip() # FIXME - #-------------------------------------------------------------------------- + # -------------------------------------------------------------------------- def blockdata_pre_emit(self, node): """ pre block -> with newline at the end """ return "{{{%s}}}\n" % self.deentity.replace_all(node.content) + def inlinedata_pre_emit(self, node): """ a pre inline block -> no newline at the end """ return "{{{%s}}}" % self.deentity.replace_all(node.content) def blockdata_pass_emit(self, node): - return "%s\n\n" % node.content + return f"{node.content}\n\n" return node.content - #-------------------------------------------------------------------------- + # -------------------------------------------------------------------------- def p_emit(self, node): result = self.emit_children(node) @@ -63,7 +64,7 @@ class CreoleEmitter(BaseEmitter): def headline_emit(self, node): return "%s %s\n\n" % ("=" * node.level, self.emit_children(node)) - #-------------------------------------------------------------------------- + # -------------------------------------------------------------------------- def strong_emit(self, node): return self._typeface(node, key="**") @@ -76,19 +77,24 @@ class CreoleEmitter(BaseEmitter): def tt_emit(self, node): return self._typeface(node, key="##") + def sup_emit(self, node): return self._typeface(node, key="^^") + def sub_emit(self, node): return self._typeface(node, key=",,") + def u_emit(self, node): return self._typeface(node, key="__") + def small_emit(self, node): return self._typeface(node, key="--") + def del_emit(self, node): return self._typeface(node, key="~~") strike_emit = del_emit - #-------------------------------------------------------------------------- + # -------------------------------------------------------------------------- def hr_emit(self, node): return "----\n\n" @@ -101,9 +107,9 @@ class CreoleEmitter(BaseEmitter): # e.g.: <a name="anchor-one">foo</a> return link_text if link_text == url: - return "[[%s]]" % url + return f"[[{url}]]" else: - return "[[%s|%s]]" % (url, link_text) + return f"[[{url}|{link_text}]]" def img_emit(self, node): src = node.attrs["src"] @@ -115,12 +121,12 @@ class CreoleEmitter(BaseEmitter): alt = node.attrs.get("alt", "") width = node.attrs.get("height", None) height = node.attrs.get("width", None) - if len(alt) > len(title): # Use the longest one + if len(alt) > len(title): # Use the longest one text = alt else: text = title - if text == "": # Use filename as picture text + if text == "": # Use filename as picture text text = posixpath.basename(src) if not self.strict: @@ -129,7 +135,7 @@ class CreoleEmitter(BaseEmitter): return "{{%s|%s}}" % (src, text) - #-------------------------------------------------------------------------- + # -------------------------------------------------------------------------- def ul_emit(self, node): return self._list_emit(node, list_type="*") @@ -137,7 +143,7 @@ class CreoleEmitter(BaseEmitter): def ol_emit(self, node): return self._list_emit(node, list_type="#") - #-------------------------------------------------------------------------- + # -------------------------------------------------------------------------- def div_emit(self, node): return self._emit_content(node) @@ -146,10 +152,6 @@ class CreoleEmitter(BaseEmitter): return self._emit_content(node) - - - - if __name__ == '__main__': import doctest print(doctest.testmod()) @@ -169,9 +171,9 @@ if __name__ == '__main__': from creole.shared.unknown_tags import escape_unknown_nodes e = CreoleEmitter(document_tree, - debug=True, - unknown_emit=escape_unknown_nodes - ) + debug=True, + unknown_emit=escape_unknown_nodes + ) content = e.emit() print("*" * 79) print(content) diff --git a/creole/emitter/html2rest_emitter.py b/creole/emitter/html2rest_emitter.py index 0e0b663..23de5ce 100644 --- a/creole/emitter/html2rest_emitter.py +++ b/creole/emitter/html2rest_emitter.py @@ -19,9 +19,8 @@ import posixpath from creole.shared.base_emitter import BaseEmitter from creole.shared.markup_table import MarkupTable - # Kink of nodes in which hyperlinks are stored in references intead of embedded urls. -DO_SUBSTITUTION = ("th", "td",) # TODO: In witch kind of node must we also substitude links? +DO_SUBSTITUTION = ("th", "td",) # TODO: In witch kind of node must we also substitude links? class Html2restException(Exception): @@ -33,6 +32,7 @@ class ReStructuredTextEmitter(BaseEmitter): Build from a document_tree (html2creole.parser.HtmlParser instance) a creole markup text. """ + def __init__(self, *args, **kwargs): super(ReStructuredTextEmitter, self).__init__(*args, **kwargs) @@ -55,23 +55,23 @@ class ReStructuredTextEmitter(BaseEmitter): self._substitution_data = [] return content - #-------------------------------------------------------------------------- + # -------------------------------------------------------------------------- def blockdata_pre_emit(self, node): """ pre block -> with newline at the end """ pre_block = self.deentity.replace_all(node.content).strip() pre_block = "\n".join([" %s" % line for line in pre_block.splitlines()]) - return "::\n\n%s\n\n" % pre_block + return f"::\n\n{pre_block}\n\n" def inlinedata_pre_emit(self, node): """ a pre inline block -> no newline at the end """ return "<pre>%s</pre>" % self.deentity.replace_all(node.content) def blockdata_pass_emit(self, node): - return "%s\n\n" % node.content + return f"{node.content}\n\n" return node.content - #-------------------------------------------------------------------------- + # -------------------------------------------------------------------------- def emit_children(self, node): """Emit all the children of a node.""" @@ -87,7 +87,7 @@ class ReStructuredTextEmitter(BaseEmitter): if self._substitution_data: # add rest at the end if not result.endswith("\n\n"): - result += "\n\n" + result += "\n\n" result += "%s\n\n" % self._get_block_data() return result @@ -103,13 +103,14 @@ class ReStructuredTextEmitter(BaseEmitter): return "%s\n\n" % self.emit_children(node) HEADLINE_DATA = { - 1:("=", True), - 2:("-", True), - 3:("=", False), - 4:("-", False), - 5:('`', False), - 6:("'", False), + 1: ("=", True), + 2: ("-", True), + 3: ("=", False), + 4: ("-", False), + 5: ('`', False), + 6: ("'", False), } + def headline_emit(self, node): text = self.emit_children(node) @@ -125,21 +126,23 @@ class ReStructuredTextEmitter(BaseEmitter): else: format = "%(t)s\n%(m)s\n\n" - return format % {"m":markup, "t":text} + return format % {"m": markup, "t": text} - #-------------------------------------------------------------------------- + # -------------------------------------------------------------------------- def _typeface(self, node, key): return key + self.emit_children(node) + key def strong_emit(self, node): return self._typeface(node, key="**") + def b_emit(self, node): return self._typeface(node, key="**") big_emit = strong_emit def i_emit(self, node): return self._typeface(node, key="*") + def em_emit(self, node): return self._typeface(node, key="*") @@ -167,7 +170,7 @@ class ReStructuredTextEmitter(BaseEmitter): # def code_emit(self, node): # return self._typeface(node, key="@") - #-------------------------------------------------------------------------- + # -------------------------------------------------------------------------- def hr_emit(self, node): return "----\n\n" @@ -212,16 +215,16 @@ class ReStructuredTextEmitter(BaseEmitter): if not old_url: # new substitution self._substitution_data.append( - ".. _%s: %s" % (link_text, url) + f".. _{link_text}: {url}" ) - return "`%s`_" % link_text + return f"`{link_text}`_" if old_url: # reuse a existing substitution - return "`%s`_" % link_text + return f"`{link_text}`_" else: # create a inline hyperlink - return "`%s <%s>`_" % (link_text, url) + return f"`{link_text} <{url}>`_" def img_emit(self, node): src = node.attrs["src"] @@ -231,12 +234,12 @@ class ReStructuredTextEmitter(BaseEmitter): title = node.attrs.get("title", "") alt = node.attrs.get("alt", "") - if len(alt) > len(title): # Use the longest one + if len(alt) > len(title): # Use the longest one substitution_text = alt else: substitution_text = title - if substitution_text == "": # Use filename as picture text + if substitution_text == "": # Use filename as picture text substitution_text = posixpath.basename(src) old_src = self._get_old_substitution( @@ -244,17 +247,17 @@ class ReStructuredTextEmitter(BaseEmitter): ) if not old_src: self._substitution_data.append( - ".. |%s| image:: %s" % (substitution_text, src) + f".. |{substitution_text}| image:: {src}" ) - return "|%s|" % substitution_text + return f"|{substitution_text}|" - #-------------------------------------------------------------------------- + # -------------------------------------------------------------------------- def code_emit(self, node): return "``%s``" % self._emit_content(node) - #-------------------------------------------------------------------------- + # -------------------------------------------------------------------------- def li_emit(self, node): content = self.emit_children(node).strip("\n") @@ -270,7 +273,7 @@ class ReStructuredTextEmitter(BaseEmitter): if node.level == 1: # FIXME: This should be made easier and better complete_list = "\n\n".join([i.strip("\n") for i in content.split("\n") if i]) - content = "%s\n\n" % complete_list + content = f"{complete_list}\n\n" return content @@ -291,7 +294,7 @@ class ReStructuredTextEmitter(BaseEmitter): ) self.emit_children(node) content = self._table.get_rest_table() - return "%s\n\n" % content + return f"{content}\n\n" if __name__ == '__main__': @@ -328,17 +331,16 @@ if __name__ == '__main__': print(data) h2c = HtmlParser( -# debug=True + # debug=True ) document_tree = h2c.feed(data) h2c.debug() e = ReStructuredTextEmitter(document_tree, - debug=True - ) + debug=True + ) content = e.emit() print("*" * 79) print(content) print("*" * 79) print(content.replace(" ", ".").replace("\n", "\\n\n")) - diff --git a/creole/emitter/html2textile_emitter.py b/creole/emitter/html2textile_emitter.py index e43b0df..3cbbd02 100644 --- a/creole/emitter/html2textile_emitter.py +++ b/creole/emitter/html2textile_emitter.py @@ -16,7 +16,6 @@ import posixpath from creole.shared.base_emitter import BaseEmitter - class TextileEmitter(BaseEmitter): """ Build from a document_tree (html2creole.parser.HtmlParser instance) a @@ -31,64 +30,70 @@ class TextileEmitter(BaseEmitter): def emit(self): """Emit the document represented by self.root DOM tree.""" - return self.emit_node(self.root).strip() # FIXME + return self.emit_node(self.root).strip() # FIXME - #-------------------------------------------------------------------------- + # -------------------------------------------------------------------------- def blockdata_pre_emit(self, node): """ pre block -> with newline at the end """ return "<pre>%s</pre>\n" % self.deentity.replace_all(node.content) + def inlinedata_pre_emit(self, node): """ a pre inline block -> no newline at the end """ return "<pre>%s</pre>" % self.deentity.replace_all(node.content) def blockdata_pass_emit(self, node): - return "%s\n\n" % node.content + return f"{node.content}\n\n" return node.content - - #-------------------------------------------------------------------------- + # -------------------------------------------------------------------------- def p_emit(self, node): return "%s\n\n" % self.emit_children(node) def headline_emit(self, node): - return "h%i. %s\n\n" % (node.level, self.emit_children(node)) + return f"h{node.level:d}. {self.emit_children(node)}\n\n" - #-------------------------------------------------------------------------- + # -------------------------------------------------------------------------- def _typeface(self, node, key): return key + self.emit_children(node) + key def strong_emit(self, node): return self._typeface(node, key="*") + def b_emit(self, node): return self._typeface(node, key="**") big_emit = strong_emit def i_emit(self, node): return self._typeface(node, key="__") + def em_emit(self, node): return self._typeface(node, key="_") def sup_emit(self, node): return self._typeface(node, key="^") + def sub_emit(self, node): return self._typeface(node, key="~") + def del_emit(self, node): return self._typeface(node, key="-") def cite_emit(self, node): return self._typeface(node, key="??") + def ins_emit(self, node): return self._typeface(node, key="+") def span_emit(self, node): return self._typeface(node, key="%") + def code_emit(self, node): return self._typeface(node, key="@") - #-------------------------------------------------------------------------- + # -------------------------------------------------------------------------- def hr_emit(self, node): return "----\n\n" @@ -96,7 +101,7 @@ class TextileEmitter(BaseEmitter): def a_emit(self, node): link_text = self.emit_children(node) url = node.attrs["href"] - return '"%s":%s' % (link_text, url) + return f'"{link_text}":{url}' def img_emit(self, node): src = node.attrs["src"] @@ -106,17 +111,17 @@ class TextileEmitter(BaseEmitter): title = node.attrs.get("title", "") alt = node.attrs.get("alt", "") - if len(alt) > len(title): # Use the longest one + if len(alt) > len(title): # Use the longest one text = alt else: text = title - if text == "": # Use filename as picture text + if text == "": # Use filename as picture text text = posixpath.basename(src) - return "!%s(%s)!" % (src, text) + return f"!{src}({text})!" - #-------------------------------------------------------------------------- + # -------------------------------------------------------------------------- def ul_emit(self, node): return self._list_emit(node, list_type="*") @@ -125,12 +130,6 @@ class TextileEmitter(BaseEmitter): return self._list_emit(node, list_type="#") - - - - - - if __name__ == '__main__': import doctest print(doctest.testmod()) @@ -160,8 +159,8 @@ if __name__ == '__main__': h2c.debug() e = TextileEmitter(document_tree, - debug=True - ) + debug=True + ) content = e.emit() print("*" * 79) print(content) diff --git a/creole/exceptions.py b/creole/exceptions.py index 91f9042..d52e6b4 100644 --- a/creole/exceptions.py +++ b/creole/exceptions.py @@ -10,6 +10,5 @@ """ - class DocutilsImportError(ImportError): pass diff --git a/creole/html_tools/deentity.py b/creole/html_tools/deentity.py index 2f6104a..23a6190 100644 --- a/creole/html_tools/deentity.py +++ b/creole/html_tools/deentity.py @@ -1,32 +1,23 @@ -#!/usr/bin/env python -# coding: utf-8 """ python-creole utils ~~~~~~~~~~~~~~~~~~~ - :copyleft: 2008-2011 by python-creole team, see AUTHORS for more details. + :copyleft: 2008-2020 by python-creole team, see AUTHORS for more details. :license: GNU GPL v3 or above, see LICENSE for more details. """ - import re -try: - import htmlentitydefs as entities -except ImportError: - from html import entities # python 3 - -from creole.py3compat import PY3 - +from html import entities entities_rules = '|'.join([ r"(&\#(?P<number>\d+);)", r"(&\#x(?P<hex>[a-fA-F0-9]+);)", r"(&(?P<named>[a-zA-Z]+);)", ]) -#print(entities_rules) +# print(entities_rules) entities_regex = re.compile( entities_rules, re.VERBOSE | re.UNICODE | re.MULTILINE ) @@ -50,21 +41,16 @@ class Deentity(object): >>> d.replace_named("amp") '&' """ + def replace_number(self, text): """ unicode number entity """ unicode_no = int(text) - if PY3: - return chr(unicode_no) - else: - return unichr(unicode_no) + return chr(unicode_no) def replace_hex(self, text): """ hex entity """ unicode_no = int(text, 16) - if PY3: - return chr(unicode_no) - else: - return unichr(unicode_no) + return chr(unicode_no) def replace_named(self, text): """ named entity """ @@ -73,10 +59,7 @@ class Deentity(object): return " " else: codepoint = entities.name2codepoint[text] - if PY3: - return chr(codepoint) - else: - return unichr(codepoint) + return chr(codepoint) def replace_all(self, content): """ replace all html entities form the given text. """ @@ -84,7 +67,7 @@ class Deentity(object): groups = match.groupdict() for name, text in groups.items(): if text is not None: - replace_method = getattr(self, 'replace_%s' % name) + replace_method = getattr(self, f'replace_{name}') return replace_method(text) # Should never happen: diff --git a/creole/html_tools/strip_html.py b/creole/html_tools/strip_html.py index 10534ad..11a2f91 100644 --- a/creole/html_tools/strip_html.py +++ b/creole/html_tools/strip_html.py @@ -12,12 +12,10 @@ """ - import re from creole.parser.html_parser_config import BLOCK_TAGS - strip_html_regex = re.compile( r""" \s* @@ -33,7 +31,6 @@ strip_html_regex = re.compile( ) - def strip_html(html_code): """ Delete whitespace from html code. Doesn't recordnize preformatted blocks! @@ -58,8 +55,6 @@ def strip_html(html_code): >>> strip_html('<p>a <img src="/image.jpg" /> image.</p>') '<p>a <img src="/image.jpg" /> image.</p>' - - """ def strip_tag(match): @@ -90,10 +85,10 @@ def strip_html(html_code): elif startend_tag: # It's a closed start tag e.g.: <br /> - if space_start: # there was space before the tag + if space_start: # there was space before the tag result = " " + result - if space_end: # there was space after the tag + if space_end: # there was space after the tag result += " " else: # a start tag e.g.: <strong> diff --git a/creole/html_tools/text_tools.py b/creole/html_tools/text_tools.py index 5843cf6..16487a5 100644 --- a/creole/html_tools/text_tools.py +++ b/creole/html_tools/text_tools.py @@ -12,11 +12,11 @@ """ - import re - space_re = re.compile(r"^(\s*)(.*?)(\s*)$", re.DOTALL) + + def clean_whitespace(txt): """ Special whitespaces cleanup diff --git a/creole/parser/creol2html_parser.py b/creole/parser/creol2html_parser.py index 63f3b14..ff440e2 100644 --- a/creole/parser/creol2html_parser.py +++ b/creole/parser/creol2html_parser.py @@ -1,6 +1,3 @@ -# coding: utf-8 - - """ Creole wiki markup parser @@ -18,18 +15,14 @@ unrecognized schemes (like wtf://server/path) triggering italic rendering for the rest of the paragraph. - :copyleft: 2008-2011 by python-creole team, see AUTHORS for more details. + :copyleft: 2008-2020 by python-creole team, see AUTHORS for more details. :license: GNU GPL v3 or above, see LICENSE for more details. """ - - import re from pprint import pformat -from creole.parser.creol2html_rules import BlockRules, INLINE_FLAGS, INLINE_RULES, \ - SpecialRules, InlineRules -from creole.py3compat import TEXT_TYPE +from creole.parser.creol2html_rules import INLINE_FLAGS, INLINE_RULES, BlockRules, InlineRules, SpecialRules from creole.shared.document_tree import DocNode @@ -59,9 +52,8 @@ class CreoleParser(object): # For inline elements: inline_re = re.compile('|'.join(INLINE_RULES), INLINE_FLAGS) - def __init__(self, raw, block_rules=None, blog_line_breaks=True, debug=False): - assert isinstance(raw, TEXT_TYPE) + assert isinstance(raw, str) self.raw = raw if block_rules is None: @@ -76,12 +68,12 @@ class CreoleParser(object): self.root = DocNode('document', None) self.cur = self.root # The most recent document node self.text = None # The node to add inline characters to - self.last_text_break = None # Last break node, inserted by _text_repl() + self.last_text_break = None # Last break node, inserted by _text_repl() # Filled with all macros that's in the text self.root.used_macros = set() - #-------------------------------------------------------------------------- + # -------------------------------------------------------------------------- def cleanup_break(self, old_cur): """ @@ -103,22 +95,22 @@ class CreoleParser(object): of one of the listed kinds of nodes or root. Start at the node node. """ - self.cleanup_break(node) # remove unused end line breaks. - while node.parent is not None and not node.kind in kinds: + self.cleanup_break(node) # remove unused end line breaks. + while node.parent is not None and node.kind not in kinds: node = node.parent return node def _upto_block(self): - self.cur = self._upto(self.cur, ('document',))# 'section', 'blockquote')) + self.cur = self._upto(self.cur, ('document',)) # 'section', 'blockquote')) - #__________________________________________________________________________ + # __________________________________________________________________________ # The _*_repl methods called for matches in regexps. Sometimes the # same method needs several names, because of group names in regexps. def _text_repl(self, groups): -# print("_text_repl()", self.cur.kind) -# self.debug_groups(groups) + # print("_text_repl()", self.cur.kind) + # self.debug_groups(groups) if self.cur.kind in ('table', 'table_row', 'bullet_list', 'number_list'): self._upto_block() @@ -135,7 +127,7 @@ class CreoleParser(object): self.parse_inline(text) if groups.get('break') and self.cur.kind in ('paragraph', - 'emphasis', 'strong', 'pre_inline'): + 'emphasis', 'strong', 'pre_inline'): self.last_text_break = DocNode('break', self.cur, "") self.text = None @@ -174,14 +166,14 @@ class CreoleParser(object): _link_target_repl = _link_repl _link_text_repl = _link_repl - #-------------------------------------------------------------------------- + # -------------------------------------------------------------------------- def _add_macro(self, groups, macro_type, name_key, args_key, text_key=None): """ generic method to handle the macro, used for all variants: inline, inline-tag, block """ - #self.debug_groups(groups) + # self.debug_groups(groups) assert macro_type in ("macro_inline", "macro_block") if text_key: @@ -232,7 +224,6 @@ class CreoleParser(object): _macro_tag_name_repl = _macro_tag_repl _macro_tag_args_repl = _macro_tag_repl - def _macro_inline_repl(self, groups): """ inline macro tag with data, e.g.: <<macro>>text<</macro>> @@ -248,7 +239,7 @@ class CreoleParser(object): _macro_inline_args_repl = _macro_inline_repl _macro_inline_text_repl = _macro_inline_repl - #-------------------------------------------------------------------------- + # -------------------------------------------------------------------------- def _image_repl(self, groups): """Handles images and attachemnts included in the page.""" @@ -275,17 +266,21 @@ class CreoleParser(object): level = len(bullet) - 1 lst = self.cur # Find a list of the same kind and level up the tree - while (lst and - not (lst.kind in ('number_list', 'bullet_list') and - lst.level == level) and - not lst.kind in ('document', 'section', 'blockquote')): + while ( + lst and not ( + lst.kind in ( + 'number_list', + 'bullet_list') and lst.level == level) and lst.kind not in ( + 'document', + 'section', + 'blockquote')): lst = lst.parent if lst and lst.kind == kind: self.cur = lst else: # Create a new level of list self.cur = self._upto(self.cur, - ('list_item', 'document', 'section', 'blockquote')) + ('list_item', 'document', 'section', 'blockquote')) self.cur = DocNode(kind, self.cur) self.cur.level = level self.cur = DocNode('list_item', self.cur) @@ -335,6 +330,7 @@ class CreoleParser(object): self._upto_block() kind = groups.get('pre_block_kind', None) text = groups.get('pre_block_text', "") + def remove_tilde(m): return m.group('indent') + m.group('rest') text = self.pre_escape_re.sub(remove_tilde, text) @@ -357,20 +353,20 @@ class CreoleParser(object): _pre_inline_text_repl = _pre_inline_repl _pre_inline_head_repl = _pre_inline_repl - #-------------------------------------------------------------------------- + # -------------------------------------------------------------------------- def _inline_mark(self, groups, key): self.cur = DocNode(key, self.cur) self.text = None - text = groups["%s_text" % key] + text = groups[f"{key}_text"] self.parse_inline(text) self.cur = self._upto(self.cur, (key,)).parent self.text = None - # TODO: How can we generalize that: + def _emphasis_repl(self, groups): self._inline_mark(groups, key='emphasis') _emphasis_text_repl = _emphasis_repl @@ -403,7 +399,7 @@ class CreoleParser(object): self._inline_mark(groups, key='delete') _delete_text_repl = _delete_repl - #-------------------------------------------------------------------------- + # -------------------------------------------------------------------------- def _linebreak_repl(self, groups): DocNode('break', self.cur, None) @@ -420,7 +416,7 @@ class CreoleParser(object): self.text = DocNode('text', self.cur, "") self.text.content += groups.get('char', "") - #-------------------------------------------------------------------------- + # -------------------------------------------------------------------------- def _replace(self, match): """Invoke appropriate _*_repl method. Called for every matched group.""" @@ -437,7 +433,7 @@ class CreoleParser(object): if self.debug and name != "char": # TODO: use logging debug(groups) - replace_method = getattr(self, '_%s_repl' % name) + replace_method = getattr(self, f'_{name}_repl') replace_method(groups) return @@ -459,21 +455,22 @@ class CreoleParser(object): self.parse_block(text) return self.root + # -------------------------------------------------------------------------- - #-------------------------------------------------------------------------- def debug_tree(self, start_node=None): """ Display the current document tree """ print("_" * 80) - if start_node == None: + if start_node is None: start_node = self.root print(" document tree:") else: - print(" tree from %s:" % start_node) + print(f" tree from {start_node}:") print("=" * 80) + def emit(node, ident=0): for child in node.children: print("%s%s: %r" % (" " * ident, child.kind, child.content)) @@ -490,9 +487,6 @@ class CreoleParser(object): print("-" * 80) - - - if __name__ == "__main__": import doctest print(doctest.testmod()) @@ -513,10 +507,9 @@ if __name__ == "__main__": def display_match(match): groups = match.groupdict() for name, text in groups.items(): - if name != "char" and text != None: + if name != "char" and text is not None: print("%20s: %r" % (name, text)) - parser = CreoleParser("", blog_line_breaks=blog_line_breaks) print("_" * 80) @@ -527,7 +520,6 @@ if __name__ == "__main__": print("merged inline rules test:") re.sub(parser.inline_re, display_match, txt) - def test_single(rules, flags, txt): for rule in rules: rexp = re.compile(rule, flags) @@ -542,5 +534,4 @@ if __name__ == "__main__": print("single inline rules match test:") test_single(INLINE_RULES, INLINE_FLAGS, txt) - print("---END---") diff --git a/creole/parser/creol2html_rules.py b/creole/parser/creol2html_rules.py index 0a9f059..cbe0451 100644 --- a/creole/parser/creol2html_rules.py +++ b/creole/parser/creol2html_rules.py @@ -10,7 +10,6 @@ """ - import re @@ -26,12 +25,12 @@ class InlineRules(object): (?P<url_target> (?P<url_proto> %s )://[^$\s]+ ) )''' % proto # Original uri matching regex inherited from MoinMoin code. - #url = r'''(?P<url> - #(^ | (?<=\s | [.,:;!?()/=])) - #(?P<escaped_url>~)? - #(?P<url_target> (?P<url_proto> %s ):\S+? ) - #($ | (?=\s | [,.:;!?()] (\s | $))) - #)''' % proto + # url = r'''(?P<url> + # (^ | (?<=\s | [.,:;!?()/=])) + # (?P<escaped_url>~)? + # (?P<url_target> (?P<url_proto> %s ):\S+? ) + # ($ | (?=\s | [,.:;!?()] (\s | $))) + # )''' % proto link = r'''(?P<link> \[\[ (?P<link_target>.+?) \s* @@ -58,7 +57,7 @@ class InlineRules(object): (\| \s* (?P<image_text>.+?) \s*)? }} )''' - #-------------------------------------------------------------------------- + # -------------------------------------------------------------------------- # a macro like: <<macro>>text<</macro>> macro_inline = r''' @@ -98,10 +97,6 @@ class InlineRules(object): char = r'(?P<char> . )' - - - - class BlockRules(object): """ All used block rules. @@ -125,7 +120,7 @@ class BlockRules(object): ) ''' - line = r'''(?P<line> ^\s*$ )''' # empty line that separates paragraphs + line = r'''(?P<line> ^\s*$ )''' # empty line that separates paragraphs head = r'''(?P<head> ^ @@ -133,7 +128,7 @@ class BlockRules(object): (?P<head_text> .*? ) (=|\s)*?$ )''' - separator = r'(?P<separator> ^ \s* ----) [ \t]* $' # horizontal line + separator = r'(?P<separator> ^ \s* ----) [ \t]* $' # horizontal line pre_block = r'''(?P<pre_block> ^{{{ \s* $ @@ -174,9 +169,6 @@ class BlockRules(object): ) - - - class SpecialRules(object): """ re rules witch not directly used as inline/block rules. @@ -195,11 +187,11 @@ class SpecialRules(object): (?P<cell> ( %s | [^|])+ ) ) \s* ''' % '|'.join([ - InlineRules.link, - InlineRules.macro_inline, InlineRules.macro_tag, - InlineRules.image, - InlineRules.pre_inline - ]) + InlineRules.link, + InlineRules.macro_inline, InlineRules.macro_tag, + InlineRules.image, + InlineRules.pre_inline + ]) # For pre escaping, in creole 1.0 done with ~: pre_escape = r' ^(?P<indent>\s*) ~ (?P<rest> \}\}\} \s*) $' @@ -236,13 +228,13 @@ def _verify_rules(rules, flags): rule_list = [] for rule in rules: try: -# print(rule) + # print(rule) re.compile(rule, flags) # Try to merge the rules. e.g. Check if group named double used. rule_list.append(rule) re.compile('|'.join(rule_list), flags) - except Exception as err: + except Exception: print(" *** Error with rule:") print(rule) print(" -" * 39) diff --git a/creole/parser/html_parser.py b/creole/parser/html_parser.py index 4380431..d83a936 100644 --- a/creole/parser/html_parser.py +++ b/creole/parser/html_parser.py @@ -1,28 +1,21 @@ -#!/usr/bin/env python -# coding: utf-8 """ python-creole ~~~~~~~~~~~~~ - - :copyleft: 2008-2011 by python-creole team, see AUTHORS for more details. + :copyleft: 2008-2020 by python-creole team, see AUTHORS for more details. :license: GNU GPL v3 or above, see LICENSE for more details. """ - - import re import warnings +from html.parser import HTMLParser -from creole.parser.html_parser_config import BLOCK_TAGS, IGNORE_TAGS from creole.html_tools.strip_html import strip_html -from creole.py3compat import TEXT_TYPE, BINARY_TYPE -from creole.shared.document_tree import DocNode, DebugList -from creole.shared.html_parser import HTMLParser - +from creole.parser.html_parser_config import BLOCK_TAGS, IGNORE_TAGS +from creole.shared.document_tree import DebugList, DocNode -#------------------------------------------------------------------------------ +# ------------------------------------------------------------------------------ block_re = re.compile(r''' ^<pre> \s* $ @@ -44,7 +37,7 @@ inline_re = re.compile(r''' headline_tag_re = re.compile(r"h(\d)", re.UNICODE) -#------------------------------------------------------------------------------ +# ------------------------------------------------------------------------------ class HtmlParser(HTMLParser): @@ -83,7 +76,7 @@ class HtmlParser(HTMLParser): _inline_placeholder = "inlinedata" def __init__(self, debug=False): - HTMLParser.__init__(self) + super(HtmlParser, self).__init__(convert_charrefs=False) self.debugging = debug if self.debugging: @@ -103,11 +96,11 @@ class HtmlParser(HTMLParser): def _pre_cut(self, data, type, placeholder): if self.debugging: - print("append blockdata: %r" % data) - assert isinstance(data, TEXT_TYPE), "blockdata is not unicode" + print(f"append blockdata: {data!r}") + assert isinstance(data, str), "blockdata is not unicode" self.blockdata.append(data) id = len(self.blockdata) - 1 - return '<%s type="%s" id="%s" />' % (placeholder, type, id) + return f'<{placeholder} type="{type}" id="{id}" />' def _pre_pre_inline_cut(self, groups): return self._pre_cut(groups["pre_inline"], "pre", self._inline_placeholder) @@ -127,13 +120,13 @@ class HtmlParser(HTMLParser): if text is not None: if self.debugging: print("%15s: %r (%r)" % (name, text, match.group(0))) - method = getattr(self, '_pre_%s_cut' % name) + method = getattr(self, f'_pre_{name}_cut') return method(groups) # data = match.group("data") def feed(self, raw_data): - assert isinstance(raw_data, TEXT_TYPE), "feed data must be unicode!" + assert isinstance(raw_data, str), "feed data must be unicode!" data = raw_data.strip() # cut out <pre> and <tt> areas block tag areas @@ -158,8 +151,7 @@ class HtmlParser(HTMLParser): return self.root - - #------------------------------------------------------------------------- + # ------------------------------------------------------------------------- def _upto(self, node, kinds): """ @@ -179,10 +171,10 @@ class HtmlParser(HTMLParser): self.cur = self._upto(self.cur, kinds) self.debug_msg("go up to", self.cur) - #------------------------------------------------------------------------- + # ------------------------------------------------------------------------- def handle_starttag(self, tag, attrs): - self.debug_msg("starttag", "%r atts: %s" % (tag, attrs)) + self.debug_msg("starttag", f"{tag!r} atts: {attrs}") if tag in IGNORE_TAGS: return @@ -207,30 +199,29 @@ class HtmlParser(HTMLParser): self.cur = DocNode(tag, self.cur, None, attrs) def handle_data(self, data): - self.debug_msg("data", "%r" % data) - if isinstance(data, BINARY_TYPE): - data = unicode(data) + self.debug_msg("data", f"{data!r}") + assert isinstance(data, str) DocNode("data", self.cur, content=data) def handle_charref(self, name): - self.debug_msg("charref", "%r" % name) + self.debug_msg("charref", f"{name!r}") DocNode("charref", self.cur, content=name) def handle_entityref(self, name): - self.debug_msg("entityref", "%r" % name) + self.debug_msg("entityref", f"{name!r}") DocNode("entityref", self.cur, content=name) def handle_startendtag(self, tag, attrs): - self.debug_msg("startendtag", "%r atts: %s" % (tag, attrs)) + self.debug_msg("startendtag", f"{tag!r} atts: {attrs}") attr_dict = dict(attrs) if tag in (self._block_placeholder, self._inline_placeholder): id = int(attr_dict["id"]) # block_type = attr_dict["type"] DocNode( - "%s_%s" % (tag, attr_dict["type"]), + f"{tag}_{attr_dict['type']}", self.cur, content=self.blockdata[id], -# attrs = attr_dict + # attrs = attr_dict ) else: DocNode(tag, self.cur, None, attrs) @@ -239,9 +230,9 @@ class HtmlParser(HTMLParser): if tag in IGNORE_TAGS: return - self.debug_msg("endtag", "%r" % tag) + self.debug_msg("endtag", f"{tag!r}") - if tag == "br": # handled in starttag + if tag == "br": # handled in starttag return self.debug_msg("starttag", "%r" % self.get_starttag_text()) @@ -254,7 +245,7 @@ class HtmlParser(HTMLParser): else: self.cur = self.cur.parent - #------------------------------------------------------------------------- + # ------------------------------------------------------------------------- def debug_msg(self, method, txt): if not self.debugging: @@ -267,25 +258,26 @@ class HtmlParser(HTMLParser): """ print("_" * 80) - if start_node == None: + if start_node is None: start_node = self.root print(" document tree:") else: - print(" tree from %s:" % start_node) + print(f" tree from {start_node}:") print("=" * 80) + def emit(node, ident=0): for child in node.children: txt = "%s%s" % (" " * ident, child.kind) if child.content: - txt += ": %r" % child.content + txt += f": {child.content!r}" if child.attrs: - txt += " - attrs: %r" % child.attrs + txt += f" - attrs: {child.attrs!r}" - if child.level != None: - txt += " - level: %r" % child.level + if child.level is not None: + txt += f" - level: {child.level!r}" print(txt) emit(child, ident + 4) @@ -299,7 +291,7 @@ if __name__ == '__main__': # p = HtmlParser(debug=True) # p.feed("""\ -#<p><span>in span</span><br /> -#<code>in code</code></p> -#""") +# <p><span>in span</span><br /> +# <code>in code</code></p> +# """) # p.debug() diff --git a/creole/parser/html_parser_config.py b/creole/parser/html_parser_config.py index ddbda8f..679db4c 100644 --- a/creole/parser/html_parser_config.py +++ b/creole/parser/html_parser_config.py @@ -12,7 +12,6 @@ """ - BLOCK_TAGS = ( "address", "blockquote", "center", "dir", "div", "dl", "fieldset", "form", diff --git a/creole/py3compat.py b/creole/py3compat.py deleted file mode 100644 index 76c55b4..0000000 --- a/creole/py3compat.py +++ /dev/null @@ -1,43 +0,0 @@ -# coding: utf-8 - -""" - Helper to support Python v2 and v3 - ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - - Some ideas borrowed from six - - See also: - http://python3porting.com - https://bitbucket.org/gutworth/six/src/tip/six.py - http://packages.python.org/six/ -""" - - - -import sys -import doctest -import re - -# True if we are running on Python 3. -PY3 = sys.version_info[0] == 3 - - -if PY3: - TEXT_TYPE = str - BINARY_TYPE = bytes -else: - TEXT_TYPE = unicode - BINARY_TYPE = str - - # Simple remove 'u' from python 2 unicode repr string - # See also: - # http://bugs.python.org/issue3955 - # http://www.python-forum.de/viewtopic.php?f=1&t=27509 (de) - origin_OutputChecker = doctest.OutputChecker - class OutputChecker2(origin_OutputChecker): - def check_output(self, want, got, optionflags): - got = got.replace("u'", "'").replace('u"', '"') - return origin_OutputChecker.check_output(self, want, got, optionflags) - doctest.OutputChecker = OutputChecker2 - - diff --git a/creole/rest_tools/clean_writer.py b/creole/rest_tools/clean_writer.py index 2b6ae66..3798c50 100644 --- a/creole/rest_tools/clean_writer.py +++ b/creole/rest_tools/clean_writer.py @@ -1,6 +1,3 @@ -#!/usr/bin/env python -# coding: utf-8 - """ A clean reStructuredText html writer ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -12,17 +9,14 @@ http://www.arnebrodowski.de/blog/write-your-own-restructuredtext-writer.html https://github.com/alex-morega/docutils-plainhtml/blob/master/plain_html_writer.py - :copyleft: 2011-2013 by python-creole team, see AUTHORS for more details. + :copyleft: 2011-2020 by python-creole team, see AUTHORS for more details. :license: GNU GPL v3 or above, see LICENSE for more details. """ - -#import warnings import sys from creole.exceptions import DocutilsImportError -from creole.py3compat import TEXT_TYPE, PY3 try: import docutils @@ -43,7 +37,6 @@ except ImportError: raise DocutilsImportError(msg) - DEBUG = False #DEBUG = True @@ -59,6 +52,7 @@ class CleanHTMLWriter(html4css1.Writer): """ This docutils writer will use the CleanHTMLTranslator class below. """ + def __init__(self): html4css1.Writer.__init__(self) self.translator_class = CleanHTMLTranslator @@ -68,6 +62,7 @@ class CleanHTMLTranslator(html4css1.HTMLTranslator, object): """ Clean html translator for docutils system. """ + def _do_nothing(self, node, *args, **kwargs): pass @@ -80,7 +75,7 @@ class CleanHTMLTranslator(html4css1.HTMLTranslator, object): if tagname in IGNORE_TAGS: if DEBUG: - print("ignore tag %r" % tagname) + print(f"ignore tag {tagname!r}") return "" parts = [tagname] @@ -95,9 +90,9 @@ class CleanHTMLTranslator(html4css1.HTMLTranslator, object): continue if isinstance(value, list): - value = ' '.join([TEXT_TYPE(x) for x in value]) + value = ' '.join([str(x) for x in value]) - part = '%s="%s"' % (name.lower(), self.attval(TEXT_TYPE(value))) + part = f'{name.lower()}="{self.attval(str(value))}"' parts.append(part) if DEBUG: @@ -111,7 +106,7 @@ class CleanHTMLTranslator(html4css1.HTMLTranslator, object): infix = '' html = '<%s%s>%s' % (' '.join(parts), infix, suffix) if DEBUG: - print("startag html: %r" % html) + print(f"startag html: {html!r}") return html def visit_section(self, node): @@ -129,13 +124,12 @@ class CleanHTMLTranslator(html4css1.HTMLTranslator, object): # set only html_body, we used in rest2html() and don't surround it with <div> def depart_document(self, node): - self.html_body.extend(self.body_prefix[1:] + self.body_pre_docinfo - + self.docinfo + self.body - + self.body_suffix[:-1]) + self.html_body.extend( + self.body_prefix[1:] + self.body_pre_docinfo + self.docinfo + self.body + self.body_suffix[:-1] + ) assert not self.context, 'len(context) = %s' % len(self.context) - - #__________________________________________________________________________ + # __________________________________________________________________________ # Clean table: visit_thead = _do_nothing @@ -167,7 +161,7 @@ class CleanHTMLTranslator(html4css1.HTMLTranslator, object): def depart_docinfo(self, node): self.body.append('</table>\n') - #__________________________________________________________________________ + # __________________________________________________________________________ # Clean image: depart_figure = _do_nothing @@ -186,8 +180,7 @@ class CleanHTMLTranslator(html4css1.HTMLTranslator, object): align = node.parent['align'] if align: - self.body[-1] = self.body[-1].replace(' />', ' align="%s" />' % align) - + self.body[-1] = self.body[-1].replace(' />', f' align="{align}" />') def rest2html(content, enable_exit_status=None, **kwargs): @@ -205,10 +198,7 @@ def rest2html(content, enable_exit_status=None, **kwargs): ... SystemExit: 13 """ - if not PY3: - content = unicode(content) - - assert isinstance(content, TEXT_TYPE), "rest2html content must be %s, but it's %s" % (TEXT_TYPE, type(content)) + assert isinstance(content, str), f"rest2html content must be {str}, but it's {type(content)}" settings_overrides = { "input_encoding": "unicode", @@ -226,7 +216,7 @@ def rest2html(content, enable_exit_status=None, **kwargs): ) # import pprint # pprint.pprint(parts) - return parts["html_body"] # Don't detache the first heading + return parts["html_body"] # Don't detache the first heading if __name__ == '__main__': @@ -234,11 +224,11 @@ if __name__ == '__main__': print(doctest.testmod()) # print(rest2html(""") -#+------------+------------+ -#| Headline 1 | Headline 2 | -#+============+============+ -#| cell one | cell two | -#+------------+------------+ +# +------------+------------+ +# | Headline 1 | Headline 2 | +# +============+============+ +# | cell one | cell two | +# +------------+------------+ # """) # print(rest2html(""") diff --git a/creole/rest_tools/pypi_rest2html.py b/creole/rest_tools/pypi_rest2html.py index c60ae0c..b264514 100644 --- a/creole/rest_tools/pypi_rest2html.py +++ b/creole/rest_tools/pypi_rest2html.py @@ -12,22 +12,15 @@ """ - -try: - # Python 3 - from urllib.parse import urlparse -except ImportError: - # Python 2 - from urlparse import urlparse +from urllib.parse import urlparse from creole.exceptions import DocutilsImportError try: - import docutils + import docutils # noqa flake8 from docutils import io, readers from docutils.core import publish_doctree, Publisher - from docutils.writers import get_writer_class - from docutils.transforms import TransformError, Transform + from docutils.transforms import TransformError except ImportError as err: msg = ( "%s - You can't use rest2html!" @@ -51,11 +44,11 @@ def pypi_rest2html(source, output_encoding='unicode'): 'file_insertion_enabled': 0, # no file/URL access 'halt_level': 2, # at warnings or errors, raise an exception 'report_level': 5, # never report problems with the reST code - } + } # Convert reStructuredText to HTML using Docutils. document = publish_doctree(source=source, - settings_overrides=settings_overrides) + settings_overrides=settings_overrides) for node in document.traverse(): if node.tagname == '#text': @@ -73,7 +66,7 @@ def pypi_rest2html(source, output_encoding='unicode'): # now turn the transformed document into HTML reader = readers.doctree.Reader(parser_name='null') pub = Publisher(reader, source=io.DocTreeInput(document), - destination_class=io.StringOutput) + destination_class=io.StringOutput) pub.set_writer('html') pub.process_programmatic_settings(None, settings_overrides, None) pub.set_destination(None, None) diff --git a/creole/setup_utils.py b/creole/setup_utils.py index 6f6b651..17623f5 100644 --- a/creole/setup_utils.py +++ b/creole/setup_utils.py @@ -38,12 +38,10 @@ ) --------------------------------------------------------------------------- - :copyleft: 2011-2014 by the python-creole team, see AUTHORS for more details. + :copyleft: 2011-2020 by the python-creole team, see AUTHORS for more details. :license: GNU GPL v3 or above, see LICENSE for more details. """ - - import codecs import os import sys @@ -51,8 +49,6 @@ import warnings from creole import creole2html, html2rest from creole.shared.unknown_tags import raise_unknown_node, transparent_unknown_nodes -from creole.py3compat import PY3 - RAISE_ERRORS_ARGS = ( "check", "register", "sdist", "bdist", "upload", @@ -99,34 +95,28 @@ def get_long_description(package_root, filename="README.creole", raise_errors=No long_description_html = creole2html(long_description_origin) # convert html to ReSt - long_description_rest_unicode = html2rest( + long_description_rest = html2rest( long_description_html, - emitter_kwargs={"unknown_emit":unknown_emit} + emitter_kwargs={"unknown_emit": unknown_emit} ) - if PY3: - long_description_rest = long_description_rest_unicode - else: - long_description_rest = long_description_rest_unicode.encode("utf-8") except Exception: if raise_errors: raise # Don't raise the error e.g. in ./setup install process evalue = sys.exc_info()[1] - long_description_rest = "[Error: %s]\n%s" % ( - evalue, long_description_origin - ) + long_description_rest = f"[Error: {evalue}]\n{long_description_origin}" else: if raise_errors: # Test created ReSt code like PyPi does it. from creole.rest_tools.pypi_rest2html import pypi_rest2html try: - pypi_rest2html(long_description_rest_unicode) + pypi_rest2html(long_description_rest) except SystemExit as e: - msg = "Error creole2rest self test failed: rest2html() exist with status code: %s\n" % e.args[0] + msg = f"Error creole2rest self test failed: rest2html() exist with status code: {e.args[0]}\n" sys.stderr.write(msg) sys.exit(msg) except Exception as e: - sys.exit("ReSt2html error: %s" % e) + sys.exit(f"ReSt2html error: {e}") else: if "check" in sys.argv: print("Generating creole to ReSt to html, ok.") @@ -141,11 +131,12 @@ def _get_long_description(*args, **kwargs): else: warnings.warn(msg, DeprecationWarning) return get_long_description(*args, **kwargs) -GetLongDescription = _get_long_description # for backward-compatibility + + +GetLongDescription = _get_long_description # for backward-compatibility if __name__ == "__main__": package_root = os.path.abspath("../") long_description = get_long_description(package_root) print(long_description) - diff --git a/creole/shared/HTMLParsercompat.py b/creole/shared/HTMLParsercompat.py deleted file mode 100644 index 6f61cc5..0000000 --- a/creole/shared/HTMLParsercompat.py +++ /dev/null @@ -1,589 +0,0 @@ -""" -Patched version of the original from: - http://hg.python.org/cpython/file/tip/Lib/html/parser.py - -compare: - http://hg.python.org/cpython/file/2.7/Lib/HTMLParser.py - http://hg.python.org/cpython/file/3.2/Lib/html/parser.py - -e.g.: - cd /tmp/ - wget http://hg.python.org/cpython/raw-file/2.7/Lib/HTMLParser.py - wget http://hg.python.org/cpython/raw-file/3.2/Lib/html/parser.py - meld HTMLParser.py parser.py - -Make it compatible with Python 2.x and 3.x - -More info see html_parser.py ! -""" - -# ------------------------------------------------------------------- add start - -from creole.py3compat import PY3 -# --------------------------------------------------------------------- add end - -"""A parser for HTML and XHTML.""" - -# This file is based on sgmllib.py, but the API is slightly different. - -# XXX There should be a way to distinguish between PCDATA (parsed -# character data -- the normal case), RCDATA (replaceable character -# data -- only char and entity references and end tags are special) -# and CDATA (character data -- only end tags are special). - - -# --------------------------------------------------------------- changes start -try: - import _markupbase # python 3 -except ImportError: - import markupbase as _markupbase # python 2 -# --------------------------------------------------------------- changes end -import re - -# Regular expressions used for parsing - -interesting_normal = re.compile('[&<]') -incomplete = re.compile('&[a-zA-Z#]') - -entityref = re.compile('&([a-zA-Z][-.a-zA-Z0-9]*)[^a-zA-Z0-9]') -charref = re.compile('&#(?:[0-9]+|[xX][0-9a-fA-F]+)[^0-9a-fA-F]') - -starttagopen = re.compile('<[a-zA-Z]') -piclose = re.compile('>') -commentclose = re.compile(r'--\s*>') -tagfind = re.compile('([a-zA-Z][-.a-zA-Z0-9:_]*)(?:\s|/(?!>))*') -# see http://www.w3.org/TR/html5/tokenization.html#tag-open-state -# and http://www.w3.org/TR/html5/tokenization.html#tag-name-state -tagfind_tolerant = re.compile('[a-zA-Z][^\t\n\r\f />\x00]*') -# Note: -# 1) the strict attrfind isn't really strict, but we can't make it -# correctly strict without breaking backward compatibility; -# 2) if you change attrfind remember to update locatestarttagend too; -# 3) if you change attrfind and/or locatestarttagend the parser will -# explode, so don't do it. -attrfind = re.compile( - r'\s*([a-zA-Z_][-.:a-zA-Z_0-9]*)(\s*=\s*' - r'(\'[^\']*\'|"[^"]*"|[^\s"\'=<>`]*))?') -attrfind_tolerant = re.compile( - r'((?<=[\'"\s/])[^\s/>][^\s/=>]*)(\s*=+\s*' - r'(\'[^\']*\'|"[^"]*"|(?![\'"])[^>\s]*))?(?:\s|/(?!>))*') -locatestarttagend = re.compile(r""" - <[a-zA-Z][-.a-zA-Z0-9:_]* # tag name - (?:\s+ # whitespace before attribute name - (?:[a-zA-Z_][-.:a-zA-Z0-9_]* # attribute name - (?:\s*=\s* # value indicator - (?:'[^']*' # LITA-enclosed value - |\"[^\"]*\" # LIT-enclosed value - |[^'\">\s]+ # bare value - ) - )? - ) - )* - \s* # trailing whitespace -""", re.VERBOSE) -locatestarttagend_tolerant = re.compile(r""" - <[a-zA-Z][-.a-zA-Z0-9:_]* # tag name - (?:[\s/]* # optional whitespace before attribute name - (?:(?<=['"\s/])[^\s/>][^\s/=>]* # attribute name - (?:\s*=+\s* # value indicator - (?:'[^']*' # LITA-enclosed value - |"[^"]*" # LIT-enclosed value - |(?!['"])[^>\s]* # bare value - ) - (?:\s*,)* # possibly followed by a comma - )?(?:\s|/(?!>))* - )* - )? - \s* # trailing whitespace -""", re.VERBOSE) -endendtag = re.compile('>') -# the HTML 5 spec, section 8.1.2.2, doesn't allow spaces between -# </ and the tag name, so maybe this should be fixed -endtagfind = re.compile('</\s*([a-zA-Z][-.a-zA-Z0-9:_]*)\s*>') - - -class HTMLParseError(Exception): - """Exception raised for all parse errors.""" - - def __init__(self, msg, position=(None, None)): - assert msg - self.msg = msg - self.lineno = position[0] - self.offset = position[1] - - def __str__(self): - result = self.msg - if self.lineno is not None: - result = result + ", at line %d" % self.lineno - if self.offset is not None: - result = result + ", column %d" % (self.offset + 1) - return result - - -class HTMLParser(_markupbase.ParserBase): - """Find tags and other markup and call handler functions. - - Usage: - p = HTMLParser() - p.feed(data) - ... - p.close() - - Start tags are handled by calling self.handle_starttag() or - self.handle_startendtag(); end tags by self.handle_endtag(). The - data between tags is passed from the parser to the derived class - by calling self.handle_data() with the data as argument (the data - may be split up in arbitrary chunks). Entity references are - passed by calling self.handle_entityref() with the entity - reference as the argument. Numeric character references are - passed to self.handle_charref() with the string containing the - reference as the argument. - """ - - CDATA_CONTENT_ELEMENTS = ("script", "style") - - def __init__(self, strict=True): - """Initialize and reset this instance. - - If strict is set to True (the default), errors are raised when invalid - HTML is encountered. If set to False, an attempt is instead made to - continue parsing, making "best guesses" about the intended meaning, in - a fashion similar to what browsers typically do. - """ - self.strict = strict - self.reset() - - def reset(self): - """Reset this instance. Loses all unprocessed data.""" - self.rawdata = '' - self.lasttag = '???' - self.interesting = interesting_normal - self.cdata_elem = None - _markupbase.ParserBase.reset(self) - - def feed(self, data): - r"""Feed data to the parser. - - Call this as often as you want, with as little or as much text - as you want (may include '\n'). - """ - self.rawdata = self.rawdata + data - self.goahead(0) - - def close(self): - """Handle any buffered data.""" - self.goahead(1) - - def error(self, message): - raise HTMLParseError(message, self.getpos()) - - __starttag_text = None - - def get_starttag_text(self): - """Return full source of start tag: '<...>'.""" - return self.__starttag_text - - def set_cdata_mode(self, elem): - self.cdata_elem = elem.lower() - self.interesting = re.compile(r'</\s*%s\s*>' % self.cdata_elem, re.I) - - def clear_cdata_mode(self): - self.interesting = interesting_normal - self.cdata_elem = None - - # Internal -- handle data as far as reasonable. May leave state - # and data to be processed by a subsequent call. If 'end' is - # true, force handling all data as if followed by EOF marker. - def goahead(self, end): - rawdata = self.rawdata - i = 0 - n = len(rawdata) - while i < n: - match = self.interesting.search(rawdata, i) # < or & - if match: - j = match.start() - else: - if self.cdata_elem: - break - j = n - if i < j: self.handle_data(rawdata[i:j]) - i = self.updatepos(i, j) - if i == n: break - startswith = rawdata.startswith - if startswith('<', i): - if starttagopen.match(rawdata, i): # < + letter - k = self.parse_starttag(i) - elif startswith("</", i): - k = self.parse_endtag(i) - elif startswith("<!--", i): - k = self.parse_comment(i) - elif startswith("<?", i): - k = self.parse_pi(i) - elif startswith("<!", i): - if self.strict: - k = self.parse_declaration(i) - else: - k = self.parse_html_declaration(i) - elif (i + 1) < n: - self.handle_data("<") - k = i + 1 - else: - break - if k < 0: - if not end: - break - if self.strict: - self.error("EOF in middle of construct") - k = rawdata.find('>', i + 1) - if k < 0: - k = rawdata.find('<', i + 1) - if k < 0: - k = i + 1 - else: - k += 1 - self.handle_data(rawdata[i:k]) - i = self.updatepos(i, k) - elif startswith("&#", i): - match = charref.match(rawdata, i) - if match: - name = match.group()[2:-1] - self.handle_charref(name) - k = match.end() - if not startswith(';', k-1): - k = k - 1 - i = self.updatepos(i, k) - continue - else: - if ";" in rawdata[i:]: #bail by consuming &# - self.handle_data(rawdata[0:2]) - i = self.updatepos(i, 2) - break - elif startswith('&', i): - match = entityref.match(rawdata, i) - if match: - name = match.group(1) - self.handle_entityref(name) - k = match.end() - if not startswith(';', k-1): - k = k - 1 - i = self.updatepos(i, k) - continue - match = incomplete.match(rawdata, i) - if match: - # match.group() will contain at least 2 chars - if end and match.group() == rawdata[i:]: - if self.strict: - self.error("EOF in middle of entity or char ref") - else: - if k <= i: - k = n - i = self.updatepos(i, i + 1) - # incomplete - break - elif (i + 1) < n: - # not the end of the buffer, and can't be confused - # with some other construct - self.handle_data("&") - i = self.updatepos(i, i + 1) - else: - break - else: - assert 0, "interesting.search() lied" - # end while - if end and i < n and not self.cdata_elem: - self.handle_data(rawdata[i:n]) - i = self.updatepos(i, n) - self.rawdata = rawdata[i:] - - # Internal -- parse html declarations, return length or -1 if not terminated - # See w3.org/TR/html5/tokenization.html#markup-declaration-open-state - # See also parse_declaration in _markupbase - def parse_html_declaration(self, i): - rawdata = self.rawdata - if rawdata[i:i+2] != '<!': - self.error('unexpected call to parse_html_declaration()') - if rawdata[i:i+4] == '<!--': - # this case is actually already handled in goahead() - return self.parse_comment(i) - elif rawdata[i:i+3] == '<![': - return self.parse_marked_section(i) - elif rawdata[i:i+9].lower() == '<!doctype': - # find the closing > - gtpos = rawdata.find('>', i+9) - if gtpos == -1: - return -1 - self.handle_decl(rawdata[i+2:gtpos]) - return gtpos+1 - else: - return self.parse_bogus_comment(i) - - # Internal -- parse bogus comment, return length or -1 if not terminated - # see http://www.w3.org/TR/html5/tokenization.html#bogus-comment-state - def parse_bogus_comment(self, i, report=1): - rawdata = self.rawdata - if rawdata[i:i+2] not in ('<!', '</'): - self.error('unexpected call to parse_comment()') - pos = rawdata.find('>', i+2) - if pos == -1: - return -1 - if report: - self.handle_comment(rawdata[i+2:pos]) - return pos + 1 - - # Internal -- parse processing instr, return end or -1 if not terminated - def parse_pi(self, i): - rawdata = self.rawdata - assert rawdata[i:i+2] == '<?', 'unexpected call to parse_pi()' - match = piclose.search(rawdata, i+2) # > - if not match: - return -1 - j = match.start() - self.handle_pi(rawdata[i+2: j]) - j = match.end() - return j - - # Internal -- handle starttag, return end or -1 if not terminated - def parse_starttag(self, i): - self.__starttag_text = None - endpos = self.check_for_whole_start_tag(i) - if endpos < 0: - return endpos - rawdata = self.rawdata - self.__starttag_text = rawdata[i:endpos] - - # Now parse the data between i+1 and j into a tag and attrs - attrs = [] - match = tagfind.match(rawdata, i+1) - assert match, 'unexpected call to parse_starttag()' - k = match.end() - self.lasttag = tag = match.group(1).lower() - while k < endpos: - if self.strict: - m = attrfind.match(rawdata, k) - else: - m = attrfind_tolerant.match(rawdata, k) - if not m: - break - attrname, rest, attrvalue = m.group(1, 2, 3) - if not rest: - attrvalue = None - elif attrvalue[:1] == '\'' == attrvalue[-1:] or \ - attrvalue[:1] == '"' == attrvalue[-1:]: - attrvalue = attrvalue[1:-1] - if attrvalue: - attrvalue = self.unescape(attrvalue) - attrs.append((attrname.lower(), attrvalue)) - k = m.end() - - end = rawdata[k:endpos].strip() - if end not in (">", "/>"): - lineno, offset = self.getpos() - if "\n" in self.__starttag_text: - lineno = lineno + self.__starttag_text.count("\n") - offset = len(self.__starttag_text) \ - - self.__starttag_text.rfind("\n") - else: - offset = offset + len(self.__starttag_text) - if self.strict: - self.error("junk characters in start tag: %r" - % (rawdata[k:endpos][:20],)) - self.handle_data(rawdata[i:endpos]) - return endpos - if end.endswith('/>'): - # XHTML-style empty tag: <span attr="value" /> - self.handle_startendtag(tag, attrs) - else: - self.handle_starttag(tag, attrs) - if tag in self.CDATA_CONTENT_ELEMENTS: - self.set_cdata_mode(tag) - return endpos - - # Internal -- check to see if we have a complete starttag; return end - # or -1 if incomplete. - def check_for_whole_start_tag(self, i): - rawdata = self.rawdata - if self.strict: - m = locatestarttagend.match(rawdata, i) - else: - m = locatestarttagend_tolerant.match(rawdata, i) - if m: - j = m.end() - next = rawdata[j:j+1] - if next == ">": - return j + 1 - if next == "/": - if rawdata.startswith("/>", j): - return j + 2 - if rawdata.startswith("/", j): - # buffer boundary - return -1 - # else bogus input - if self.strict: - self.updatepos(i, j + 1) - self.error("malformed empty start tag") - if j > i: - return j - else: - return i + 1 - if next == "": - # end of input - return -1 - if next in ("abcdefghijklmnopqrstuvwxyz=/" - "ABCDEFGHIJKLMNOPQRSTUVWXYZ"): - # end of input in or before attribute value, or we have the - # '/' from a '/>' ending - return -1 - if self.strict: - self.updatepos(i, j) - self.error("malformed start tag") - if j > i: - return j - else: - return i + 1 - raise AssertionError("we should not get here!") - - # Internal -- parse endtag, return end or -1 if incomplete - def parse_endtag(self, i): - rawdata = self.rawdata - assert rawdata[i:i+2] == "</", "unexpected call to parse_endtag" - match = endendtag.search(rawdata, i+1) # > - if not match: - return -1 - gtpos = match.end() - match = endtagfind.match(rawdata, i) # </ + tag + > - if not match: - if self.cdata_elem is not None: - self.handle_data(rawdata[i:gtpos]) - return gtpos - if self.strict: - self.error("bad end tag: %r" % (rawdata[i:gtpos],)) - # find the name: w3.org/TR/html5/tokenization.html#tag-name-state - namematch = tagfind_tolerant.match(rawdata, i+2) - if not namematch: - # w3.org/TR/html5/tokenization.html#end-tag-open-state - if rawdata[i:i+3] == '</>': - return i+3 - else: - return self.parse_bogus_comment(i) - tagname = namematch.group().lower() - # consume and ignore other stuff between the name and the > - # Note: this is not 100% correct, since we might have things like - # </tag attr=">">, but looking for > after tha name should cover - # most of the cases and is much simpler - gtpos = rawdata.find('>', namematch.end()) - self.handle_endtag(tagname) - return gtpos+1 - - elem = match.group(1).lower() # script or style - if self.cdata_elem is not None: - if elem != self.cdata_elem: - self.handle_data(rawdata[i:gtpos]) - return gtpos - - self.handle_endtag(elem.lower()) - self.clear_cdata_mode() - return gtpos - - # Overridable -- finish processing of start+end tag: <tag.../> - def handle_startendtag(self, tag, attrs): - self.handle_starttag(tag, attrs) - self.handle_endtag(tag) - - # Overridable -- handle start tag - def handle_starttag(self, tag, attrs): - pass - - # Overridable -- handle end tag - def handle_endtag(self, tag): - pass - - # Overridable -- handle character reference - def handle_charref(self, name): - pass - - # Overridable -- handle entity reference - def handle_entityref(self, name): - pass - - # Overridable -- handle data - def handle_data(self, data): - pass - - # Overridable -- handle comment - def handle_comment(self, data): - pass - - # Overridable -- handle declaration - def handle_decl(self, decl): - pass - - # Overridable -- handle processing instruction - def handle_pi(self, data): - pass - - def unknown_decl(self, data): - if self.strict: - self.error("unknown declaration: %r" % (data,)) - - # Internal -- helper to remove special character quoting - entitydefs = None - def unescape(self, s): - if '&' not in s: - return s - # -------------------------------------------------------- change start - if PY3: - def replaceEntities(s): - s = s.groups()[0] - try: - if s[0] == "#": - s = s[1:] - if s[0] in ['x','X']: - c = int(s[1:], 16) - else: - c = int(s) - return chr(c) - except ValueError: - return '&#'+ s +';' - else: - # Cannot use name2codepoint directly, because HTMLParser - # supports apos, which is not part of HTML 4 - import html.entities - if HTMLParser.entitydefs is None: - entitydefs = HTMLParser.entitydefs = {'apos':"'"} - for k, v in html.entities.name2codepoint.items(): - entitydefs[k] = chr(v) - try: - return self.entitydefs[s] - except KeyError: - return '&'+s+';' - - return re.sub(r"&(#?[xX]?(?:[0-9a-fA-F]+|\w{1,8}));", - replaceEntities, s, flags=re.ASCII) - else: - def replaceEntities(s): - s = s.groups()[0] - try: - if s[0] == "#": - s = s[1:] - if s[0] in ['x','X']: - c = int(s[1:], 16) - else: - c = int(s) - return unichr(c) - except ValueError: - return '&#'+s+';' - else: - # Cannot use name2codepoint directly, because HTMLParser supports apos, - # which is not part of HTML 4 - import htmlentitydefs - if HTMLParser.entitydefs is None: - entitydefs = HTMLParser.entitydefs = {'apos':"'"} - for k, v in htmlentitydefs.name2codepoint.iteritems(): - entitydefs[k] = unichr(v) - try: - return self.entitydefs[s] - except KeyError: - return '&'+s+';' - - return re.sub(r"&(#?[xX]?(?:[0-9a-fA-F]+|\w{1,8}));", replaceEntities, s) - # -------------------------------------------------------- change end diff --git a/creole/shared/base_emitter.py b/creole/shared/base_emitter.py index de6fd2f..d0c98f1 100644 --- a/creole/shared/base_emitter.py +++ b/creole/shared/base_emitter.py @@ -1,20 +1,16 @@ -#!/usr/bin/env python -# coding: utf-8 """ Base document tree emitter ~~~~~~~~~~~~~~~~~~~~~~~~~~ - :copyleft: 2008-2011 by python-creole team, see AUTHORS for more details. + :copyleft: 2008-2020 by python-creole team, see AUTHORS for more details. :license: GNU GPL v3 or above, see LICENSE for more details. """ - -from creole.parser.html_parser_config import BLOCK_TAGS from creole.html_tools.deentity import Deentity -from creole.py3compat import TEXT_TYPE +from creole.parser.html_parser_config import BLOCK_TAGS from creole.shared.markup_table import MarkupTable from creole.shared.unknown_tags import transparent_unknown_nodes @@ -24,6 +20,7 @@ class BaseEmitter(object): Build from a document_tree (html2creole.parser.HtmlParser instance) a creole markup text. """ + def __init__(self, document_tree, unknown_emit=None, debug=False): self.root = document_tree @@ -35,20 +32,20 @@ class BaseEmitter(object): self.last = None self.debugging = debug - self.deentity = Deentity() # for replacing html entities + self.deentity = Deentity() # for replacing html entities self._inner_list = "" self._mask_linebreak = False - #-------------------------------------------------------------------------- + # -------------------------------------------------------------------------- def blockdata_pass_emit(self, node): - return "%s\n\n" % node.content + return f"{node.content}\n\n" return node.content - #-------------------------------------------------------------------------- + # -------------------------------------------------------------------------- def data_emit(self, node): - #node.debug() + # node.debug() return node.content def entityref_emit(self, node): @@ -59,13 +56,13 @@ class BaseEmitter(object): try: return self.deentity.replace_named(entity) - except KeyError as err: + except KeyError: if self.debugging: - print("unknown html entity found: %r" % entity) - return "&%s" % entity # FIXME + print(f"unknown html entity found: {entity!r}") + return f"&{entity}" # FIXME except UnicodeDecodeError as err: raise UnicodeError( - "Error handling entity %r: %s" % (entity, err) + f"Error handling entity {entity!r}: {err}" ) def charref_emit(self, node): @@ -82,7 +79,7 @@ class BaseEmitter(object): # entity as a unicode number return self.deentity.replace_number(entity) - #-------------------------------------------------------------------------- + # -------------------------------------------------------------------------- def p_emit(self, node): return "%s\n\n" % self.emit_children(node) @@ -93,16 +90,16 @@ class BaseEmitter(object): else: return "\n" - #-------------------------------------------------------------------------- + # -------------------------------------------------------------------------- def _typeface(self, node, key): return key + self.emit_children(node) + key - #-------------------------------------------------------------------------- + # -------------------------------------------------------------------------- def li_emit(self, node): content = self.emit_children(node) - return "\n%s %s" % (self._inner_list, content) + return f"\n{self._inner_list} {content}" def _list_emit(self, node, list_type): start_newline = False @@ -110,7 +107,7 @@ class BaseEmitter(object): if not self.last.content or not self.last.content.endswith("\n"): start_newline = True - if self._inner_list == "": # Start a new list + if self._inner_list == "": # Start a new list self._inner_list = list_type else: self._inner_list += list_type @@ -119,7 +116,7 @@ class BaseEmitter(object): self._inner_list = self._inner_list[:-1] - if self._inner_list == "": # Start a new list + if self._inner_list == "": # Start a new list if start_newline: return "\n" + content + "\n\n" else: @@ -127,7 +124,7 @@ class BaseEmitter(object): else: return content - #-------------------------------------------------------------------------- + # -------------------------------------------------------------------------- def table_emit(self, node): self._table = MarkupTable( @@ -137,7 +134,7 @@ class BaseEmitter(object): ) self.emit_children(node) content = self._table.get_table_markup() - return "%s\n" % content + return f"{content}\n" def tr_emit(self, node): self._table.add_tr() @@ -165,13 +162,13 @@ class BaseEmitter(object): self._table.add_td(content) return "" - #-------------------------------------------------------------------------- + # -------------------------------------------------------------------------- def _emit_content(self, node): content = self.emit_children(node) content = self._escape_linebreaks(content) if node.kind in BLOCK_TAGS: - content = "%s\n\n" % content + content = f"{content}\n\n" return content def div_emit(self, node): @@ -180,7 +177,7 @@ class BaseEmitter(object): def span_emit(self, node): return self._emit_content(node) - #-------------------------------------------------------------------------- + # -------------------------------------------------------------------------- def document_emit(self, node): self.last = node @@ -196,7 +193,7 @@ class BaseEmitter(object): result = [] for child in node.children: content = self.emit_node(child) - assert isinstance(content, TEXT_TYPE) + assert isinstance(content, str) result.append(content) return result @@ -205,26 +202,24 @@ class BaseEmitter(object): def unicode_error(method_name, method, node, content): node.debug() raise AssertionError( - "Method '%s' (%s) returns no unicode - returns: %s (%s)" % ( - method_name, method, repr(content), type(content) - ) + f"Method '{method_name}' ({method}) returns no unicode - returns: {repr(content)} ({type(content)})" ) if node.level: - self.debug_msg("emit_node", "%s (level: %i): %r" % (node.kind, node.level, node.content)) + self.debug_msg("emit_node", f"{node.kind} (level: {node.level:d}): {node.content!r}") else: - self.debug_msg("emit_node", "%s: %r" % (node.kind, node.content)) + self.debug_msg("emit_node", f"{node.kind}: {node.content!r}") - method_name = "%s_emit" % node.kind + method_name = f"{node.kind}_emit" emit_method = getattr(self, method_name, None) if emit_method: content = emit_method(node) - if not isinstance(content, TEXT_TYPE): + if not isinstance(content, str): unicode_error(method_name, emit_method, node, content) else: content = self._unknown_emit(self, node) - if not isinstance(content, TEXT_TYPE): + if not isinstance(content, str): unicode_error(method_name, self._unknown_emit, node, content) self.last = node @@ -233,10 +228,10 @@ class BaseEmitter(object): # def emit(self): # """Emit the document represented by self.root DOM tree.""" # result = self.emit_node(self.root) -## return result.strip() # FIXME +# return result.strip() # FIXME # return result.rstrip() # FIXME - #------------------------------------------------------------------------- + # ------------------------------------------------------------------------- def debug_msg(self, method, txt): if not self.debugging: diff --git a/creole/shared/document_tree.py b/creole/shared/document_tree.py index 4971953..20c6a44 100644 --- a/creole/shared/document_tree.py +++ b/creole/shared/document_tree.py @@ -6,16 +6,13 @@ ~~~~~~~~~~~~~ - :copyleft: 2008-2011 by python-creole team, see AUTHORS for more details. + :copyleft: 2008-2020 by python-creole team, see AUTHORS for more details. :license: GNU GPL v3 or above, see LICENSE for more details. """ - - -import warnings import inspect +import warnings -from creole.py3compat import TEXT_TYPE from creole.shared.utils import dict2string @@ -25,6 +22,7 @@ class DocNode: The Document tree would be created in the parser and used in the emitter. """ + def __init__(self, kind='', parent=None, content=None, attrs=[], level=None): self.kind = kind @@ -35,9 +33,7 @@ class DocNode: self.attrs = dict(attrs) if content: - assert isinstance(content, TEXT_TYPE), "Given content %r is not unicode, it's type: %s" % ( - content, type(content) - ) + assert isinstance(content, str), f"Given content {content!r} is not unicode, it's type: {type(content)}" self.content = content self.level = level @@ -60,7 +56,7 @@ class DocNode: return str(self.__repr__()) def __repr__(self): - return "<DocNode %s: %r>" % (self.kind, self.content) + return f"<DocNode {self.kind}: {self.content!r}>" # return "<DocNode %s (parent: %r): %r>" % (self.kind, self.parent, self.content) def debug(self): @@ -80,7 +76,7 @@ class DocNode: """ print("_" * 80) print("\tDocNode - debug:") - print("str(): %s" % self) + print(f"str(): {self}") print("attributes:") for i in dir(self): if i.startswith("_") or i == "debug": @@ -96,7 +92,7 @@ class DebugList(list): super(DebugList, self).__init__() def append(self, item): -# for stack_frame in inspect.stack(): print(stack_frame) + # for stack_frame in inspect.stack(): print(stack_frame) line, method = inspect.stack()[1][2:4] msg = "%-8s append: %-35r (%-15s line:%s)" % ( diff --git a/creole/shared/example_macros.py b/creole/shared/example_macros.py index 428d5d2..354469d 100644 --- a/creole/shared/example_macros.py +++ b/creole/shared/example_macros.py @@ -15,14 +15,14 @@ from xml.sax.saxutils import escape +from creole.shared.utils import get_pygments_formatter, get_pygments_lexer + try: from pygments import highlight PYGMENTS = True except ImportError: PYGMENTS = False -from creole.shared.utils import get_pygments_lexer, get_pygments_formatter - def html(text): """ @@ -62,7 +62,7 @@ def code(ext, text): try: highlighted_text = highlight(text, lexer, formatter).decode('utf-8') - except: + except BaseException: highlighted_text = pre(text) finally: return highlighted_text.replace('\n', '<br />\n') diff --git a/creole/shared/html_parser.py b/creole/shared/html_parser.py deleted file mode 100644 index 0bdb7c4..0000000 --- a/creole/shared/html_parser.py +++ /dev/null @@ -1,32 +0,0 @@ -# coding: utf-8 - -""" - HTMLParser for Python 2.x and 3.x - ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - - The HTMLParser has problems with the correct handling of <script>...</script> - and <style>...</style> areas. - - It was fixed with v2.7.3 and 3.2.3, see: - http://www.python.org/download/releases/2.7.3/ - http://www.python.org/download/releases/3.2.3/ - see also: - http://bugs.python.org/issue670664#msg146770 - - :copyleft: 2011-2012 by python-creole team, see AUTHORS for more details. - :license: GNU GPL v3 or above, see LICENSE for more details. -""" - - -try: - import HTMLParser as OriginHTMLParser -except ImportError: - from html import parser as OriginHTMLParser # python 3 - - -if hasattr(OriginHTMLParser, "cdata_elem"): - # Current python version is patched -> use the original - HTMLParser = OriginHTMLParser -else: - # Current python version is not patched -> use own patched version - from creole.shared.HTMLParsercompat import HTMLParser diff --git a/creole/shared/markup_table.py b/creole/shared/markup_table.py index ec59e66..acb9b21 100644 --- a/creole/shared/markup_table.py +++ b/creole/shared/markup_table.py @@ -4,6 +4,7 @@ class MarkupTable(object): Container for holding table data and render the data in creole markup. Format every cell width to the same col width. """ + def __init__(self, head_prefix="= ", auto_width=True, debug_msg=None): self.head_prefix = head_prefix self.auto_width = auto_width @@ -30,7 +31,7 @@ class MarkupTable(object): self.add_td(self.head_prefix + text) def add_td(self, text): - if self.row_index == None: + if self.row_index is None: self.add_tr() self.debug_msg("Table.add_td", text) @@ -44,9 +45,9 @@ class MarkupTable(object): cell = cell.strip() if cell != "": if self.head_prefix and cell.startswith(self.head_prefix): - cell += " " # Headline + cell += " " # Headline else: - cell = " %s " % cell # normal cell + cell = f" {cell} " # normal cell line_cells.append(cell) cells.append(line_cells) @@ -81,8 +82,8 @@ class MarkupTable(object): # preformat every table cell cells, widths = self._get_preformat_info() - separator_line = "+%s+" % "+".join(["-"*width for width in widths]) - headline_separator = "+%s+" % "+".join(["="*width for width in widths]) + separator_line = "+%s+" % "+".join(["-" * width for width in widths]) + headline_separator = "+%s+" % "+".join(["=" * width for width in widths]) lines = [] for no, row in enumerate(cells): @@ -99,6 +100,7 @@ class MarkupTable(object): return "\n".join(lines) + if __name__ == '__main__': import doctest print(doctest.testmod()) diff --git a/creole/shared/unknown_tags.py b/creole/shared/unknown_tags.py index fe231f0..26cfd3a 100644 --- a/creole/shared/unknown_tags.py +++ b/creole/shared/unknown_tags.py @@ -12,7 +12,6 @@ """ - from xml.sax.saxutils import escape @@ -30,15 +29,14 @@ def _mask_content(emitter, node, mask_tag): content = emitter.emit_children(node) if not content: # single tag - return "<<%(mask_tag)s>><%(tag)s%(attrs)s /><</%(mask_tag)s>>" % tag_data + return f"<<{tag_data['mask_tag']}>><{tag_data['tag']}{tag_data['attrs']} /><</{tag_data['mask_tag']}>>" - start_tag = "<<%(mask_tag)s>><%(tag)s%(attrs)s><</%(mask_tag)s>>" % tag_data - end_tag = "<<%(mask_tag)s>></%(tag)s><</%(mask_tag)s>>" % tag_data + start_tag = f"<<{tag_data['mask_tag']}>><{tag_data['tag']}{tag_data['attrs']}><</{tag_data['mask_tag']}>>" + end_tag = f"<<{tag_data['mask_tag']}>></{tag_data['tag']}><</{tag_data['mask_tag']}>>" return start_tag + content + end_tag - def raise_unknown_node(emitter, node): """ unknown_emit callable for Html2CreoleEmitter @@ -47,9 +45,7 @@ def raise_unknown_node(emitter, node): """ content = emitter.emit_children(node) raise NotImplementedError( - "Node from type '%s' is not implemented! (child content: %r)" % ( - node.kind, content - ) + f"Node from type '{node.kind}' is not implemented! (child content: {content!r})" ) @@ -89,10 +85,10 @@ def escape_unknown_nodes(emitter, node): content = emitter.emit_children(node) if not content: # single tag - return escape("<%(tag)s%(attrs)s />" % tag_data) + return escape(f"<{tag_data['tag']}{tag_data['attrs']} />") - start_tag = escape("<%(tag)s%(attrs)s>" % tag_data) - end_tag = escape("</%(tag)s>" % tag_data) + start_tag = escape(f"<{tag_data['tag']}{tag_data['attrs']}>") + end_tag = escape(f"</{tag_data['tag']}>") return start_tag + content + end_tag diff --git a/creole/shared/utils.py b/creole/shared/utils.py index f1b981d..9ac82e2 100644 --- a/creole/shared/utils.py +++ b/creole/shared/utils.py @@ -5,16 +5,13 @@ python creole utilities ~~~~~~~~~~~~~~~~~~~~~~~ - :copyleft: 2011-2014 by python-creole team, see AUTHORS for more details. + :copyleft: 2011-2020 by python-creole team, see AUTHORS for more details. :license: GNU GPL v3 or above, see LICENSE for more details. """ - -import shlex import json - -from creole.py3compat import TEXT_TYPE, PY3 +import shlex try: from pygments import lexers @@ -31,7 +28,8 @@ KEYWORD_MAP = { "None": None, } -def string2dict(raw_content, encoding="utf-8"): + +def string2dict(raw_content): """ convert a string into a dictionary. e.g.: @@ -43,10 +41,6 @@ def string2dict(raw_content, encoding="utf-8"): See test_creole2html.TestString2Dict() """ - if not PY3 and isinstance(raw_content, TEXT_TYPE): - # shlex.split doesn't work with unicode?!? - raw_content = raw_content.encode(encoding) - parts = shlex.split(raw_content) result = {} @@ -83,13 +77,13 @@ def dict2string(d): attr_list = [] for key, value in sorted(d.items()): value_string = json.dumps(value) - attr_list.append("%s=%s" % (key, value_string)) + attr_list.append(f"{key}={value_string}") return " ".join(attr_list) def get_pygments_formatter(): if PYGMENTS: - return HtmlFormatter(lineos = True, encoding='utf-8', + return HtmlFormatter(lineos=True, encoding='utf-8', style='colorful', outencoding='utf-8', cssclass='pygments') @@ -98,7 +92,7 @@ def get_pygments_lexer(source_type, code): if PYGMENTS: try: return lexers.get_lexer_by_name(source_type) - except: + except BaseException: return lexers.guess_lexer(code) else: return None diff --git a/creole/tests/test_TODOs.py b/creole/tests/test_TODOs.py index b396c86..a2b75c9 100644 --- a/creole/tests/test_TODOs.py +++ b/creole/tests/test_TODOs.py @@ -6,8 +6,8 @@ import unittest -from creole.tests.utils.base_unittest import BaseCreoleTest from creole.html_tools.strip_html import strip_html +from creole.tests.utils.base_unittest import BaseCreoleTest class StripHtml(unittest.TestCase): @@ -45,7 +45,7 @@ class CrossCompareCreoleTests(BaseCreoleTest): @unittest.expectedFailure def test_cross_paragraphs(self): """ TODO: bold/italics cross paragraphs in creole2html - see: http://code.google.com/p/python-creole/issues/detail?id=13 + see: http://code.google.com/p/python-creole/issues/detail?id=13 """ self.assert_creole2html(""" But, should //not be... @@ -62,14 +62,14 @@ class CrossCompareCreoleTests(BaseCreoleTest): """ self.cross_compare_creole(r""" this is {{{**escaped** inline}}}, isn't it? - + {{{ a **code** block }}} """, """ <p>this is <tt>**escaped** inline</tt>, isn't it?</p> - + <pre> a **code** block @@ -92,25 +92,25 @@ class TestHtml2CreoleMarkup(BaseCreoleTest): """ TODO: http://code.google.com/p/python-creole/issues/detail?id=16#c5 """ self.assert_html2creole(r""" **foo** - + = one """, """ <b>foo</b> <h1>one</h1> - """)#, debug=True) + """) # , debug=True) @unittest.expectedFailure def test_no_space_before_blocktag(self): """ TODO: Bug in html2creole.strip_html(): Don't add a space before/after block tags """ self.assert_html2creole(r""" **foo** - + * one """, """ <b>foo</b> <ul><li>one</li></ul> - """#, debug=True - ) + """ # , debug=True + ) @unittest.expectedFailure def test_escape_char(self): @@ -144,7 +144,5 @@ class TestHtml2CreoleMarkup(BaseCreoleTest): a <img src="/image.png" alt="PNG pictures" /> !</p> <p>picture <a href="www.domain.tld"><img src="foo.JPG" alt="Foo"></a> as a link</p> - """#, debug=True - ) - - + """ # , debug=True + ) diff --git a/creole/tests/test_cli.py b/creole/tests/test_cli.py index 74fc7d7..cae1e40 100644 --- a/creole/tests/test_cli.py +++ b/creole/tests/test_cli.py @@ -10,16 +10,12 @@ """ - -import subprocess -import unittest import sys -import os import tempfile +import unittest -from creole import cmdline +from creole import VERSION_STRING, cmdline from creole.tests.utils.base_unittest import BaseCreoleTest -from creole import VERSION_STRING from creole.tests.utils.unittest_subprocess import SubprocessMixin CMDS = ("creole2html", "html2creole", "html2rest", "html2textile") @@ -71,7 +67,7 @@ class CreoleCLITests(BaseCreoleTest, SubprocessMixin, CliTestMixins): dest_file = tempfile.NamedTemporaryFile() destfilepath = dest_file.name - stdout=( + stdout = ( "Convert '%(src)s' to '%(dst)s' with %(prog)s (codec: utf-8)\n" "done. '%(dst)s' created." ) % { @@ -93,9 +89,7 @@ class CreoleCLITests(BaseCreoleTest, SubprocessMixin, CliTestMixins): def test_version(self): for cmd in CMDS: - version_info = "%s from python-creole v%s" % ( - cmd, VERSION_STRING - ) + version_info = f"{cmd} from python-creole v{VERSION_STRING}" self.assertSubprocess( popen_args=[cmd, "--version"], retcode=0, @@ -103,7 +97,6 @@ class CreoleCLITests(BaseCreoleTest, SubprocessMixin, CliTestMixins): ) - class CreoleCLITestsDirect(BaseCreoleTest, CliTestMixins): def setUp(self): @@ -125,7 +118,7 @@ class CreoleCLITestsDirect(BaseCreoleTest, CliTestMixins): destfilepath = dest_file.name sys.argv = [cli_str, sourcefilepath, destfilepath] - cli = getattr(cmdline, "cli_%s" % cli_str) + cli = getattr(cmdline, f"cli_{cli_str}") cli() dest_file.seek(0) diff --git a/creole/tests/test_creole2html.py b/creole/tests/test_creole2html.py index b75435d..6f60491 100644 --- a/creole/tests/test_creole2html.py +++ b/creole/tests/test_creole2html.py @@ -1,5 +1,3 @@ -#!/usr/bin/env python -# coding: utf-8 """ creole2html unittest @@ -12,40 +10,33 @@ Test the creole markup. - :copyleft: 2008-2014 by python-creole team, see AUTHORS for more details. + :copyleft: 2008-2020 by python-creole team, see AUTHORS for more details. :license: GNU GPL v3 or above, see LICENSE for more details. """ - import sys import unittest -import warnings +from io import StringIO -try: - from StringIO import StringIO -except ImportError: - from io import StringIO # python 3 +from creole import creole2html +from creole.shared import example_macros +from creole.shared.utils import dict2string, string2dict +from creole.tests import test_macros +from creole.tests.utils.base_unittest import BaseCreoleTest try: - from pygments import highlight + import pygments # noqa flake8 PYGMENTS = True except ImportError: PYGMENTS = False -from creole.tests.utils.base_unittest import BaseCreoleTest -from creole.tests import test_macros -from creole.py3compat import PY3 - -from creole import creole2html -from creole.shared import example_macros -from creole.shared.utils import string2dict, dict2string - class TestCreole2html(BaseCreoleTest): """ Tests around creole2html API and macro function. """ + def setUp(self): # For fallback tests example_macros.PYGMENTS = PYGMENTS @@ -58,8 +49,8 @@ class TestCreole2html(BaseCreoleTest): creole2html( markup_string="<<notexist1>><<notexist2>><</notexist2>>", emitter_kwargs={ - "verbose":2, - "stderr":my_stderr, + "verbose": 2, + "stderr": my_stderr, } ) error_msg = my_stderr.getvalue() @@ -72,11 +63,11 @@ class TestCreole2html(BaseCreoleTest): "Traceback", "'notexist1'", "'notexist2'", ) for part in must_have: - tb_lines = [" -"*40] + tb_lines = [" -" * 40] tb_lines += error_msg.splitlines() - tb_lines += [" -"*40] + tb_lines += [" -" * 40] tb = "\n".join([" >>> %s" % l for l in tb_lines]) - msg = "%r not found in:\n%s" % (part, tb) + msg = f"{part!r} not found in:\n{tb}" # TODO: use assertIn if python 2.6 will be not support anymore. if part not in error_msg: raise self.failureException(msg) @@ -88,9 +79,9 @@ class TestCreole2html(BaseCreoleTest): html = creole2html( markup_string="<<html>><p>foo</p><</html>><bar?>", emitter_kwargs={ - "verbose":1, - "macros":example_macros, - "stderr":sys.stderr, + "verbose": 1, + "macros": example_macros, + "stderr": sys.stderr, } ) self.assertEqual(html, '<p>foo</p>\n<p><bar?></p>') @@ -99,9 +90,9 @@ class TestCreole2html(BaseCreoleTest): html = creole2html( markup_string="<<html>>{{{<nocode>}}}<</html>>", emitter_kwargs={ - "verbose":1, - "macros":example_macros, - "stderr":sys.stderr, + "verbose": 1, + "macros": example_macros, + "stderr": sys.stderr, } ) self.assertEqual(html, '{{{<nocode>}}}') @@ -110,9 +101,9 @@ class TestCreole2html(BaseCreoleTest): html = creole2html( markup_string="<<html>>1<</html>><<html>>2<</html>>", emitter_kwargs={ - "verbose":1, - "macros":example_macros, - "stderr":sys.stderr, + "verbose": 1, + "macros": example_macros, + "stderr": sys.stderr, } ) self.assertEqual(html, '1\n2') @@ -127,9 +118,9 @@ class TestCreole2html(BaseCreoleTest): html = creole2html( markup_string="<<test bar='b' foo='a'>>c<</test>>", emitter_kwargs={ - "verbose":1, - "macros":{"test":test}, - "stderr":sys.stderr, + "verbose": 1, + "macros": {"test": test}, + "stderr": sys.stderr, } ) self.assertEqual(html, 'a|b|c') @@ -142,14 +133,14 @@ class TestCreole2html(BaseCreoleTest): pass self.assertRaises(TypeError, - creole2html, - markup_string="<<test no=1 arg2='foo'>>bar<</test>>", - emitter_kwargs={ - "verbose":1, - "macros":testmacro, - "stderr":sys.stderr, - } - ) + creole2html, + markup_string="<<test no=1 arg2='foo'>>bar<</test>>", + emitter_kwargs={ + "verbose": 1, + "macros": testmacro, + "stderr": sys.stderr, + } + ) def test_macro_wrong_arguments_with_error_report(self): """ @@ -162,14 +153,14 @@ class TestCreole2html(BaseCreoleTest): html = creole2html( markup_string="<<test bar='foo'>>c<</test>>", emitter_kwargs={ - "verbose":2, - "macros":{"test":test}, - "stderr":my_stderr, + "verbose": 2, + "macros": {"test": test}, + "stderr": my_stderr, } ) self.assertEqual(html, - "[Error: Macro 'test' error: test() got an unexpected keyword argument 'bar']" - ) + "[Error: Macro 'test' error: test() got an unexpected keyword argument 'bar']" + ) error_msg = my_stderr.getvalue() # Check traceback information into our stderr handler @@ -181,7 +172,6 @@ class TestCreole2html(BaseCreoleTest): for part in must_have: self.assertIn(part, error_msg) - def test_macro_wrong_arguments_quite(self): """ simple test for the "macro API" @@ -193,14 +183,14 @@ class TestCreole2html(BaseCreoleTest): html = creole2html( markup_string="<<test bar='foo'>>c<</test>>", emitter_kwargs={ - "verbose":1, - "macros":{"test":test}, - "stderr":my_stderr, + "verbose": 1, + "macros": {"test": test}, + "stderr": my_stderr, } ) self.assertEqual(html, - "[Error: Macro 'test' error: test() got an unexpected keyword argument 'bar']" - ) + "[Error: Macro 'test' error: test() got an unexpected keyword argument 'bar']" + ) error_msg = my_stderr.getvalue() self.assertEqual(error_msg, "") @@ -220,8 +210,8 @@ class TestCreole2html(BaseCreoleTest): <span class="nb">print</span><span class="p">(</span><span class="s1">'hello world'</span><span class="p">)</span><br /> </pre></div><br /> """, - macros={'code': example_macros.code} - ) + macros={'code': example_macros.code} + ) def test_code_macro_fallback(self): # force to use fallback. Will be reset in self.setUp() @@ -258,8 +248,6 @@ class TestCreole2html(BaseCreoleTest): ) - - class TestCreole2htmlMarkup(BaseCreoleTest): def test_creole_basic(self): @@ -277,7 +265,7 @@ class TestCreole2htmlMarkup(BaseCreoleTest): out_string = creole2html("first\r\nsecond") self.assertEqual(out_string, "<p>first<br />\nsecond</p>") - #-------------------------------------------------------------------------- + # -------------------------------------------------------------------------- def test_creole_linebreak(self): self.assert_creole2html(r""" @@ -391,8 +379,8 @@ class TestCreole2htmlMarkup(BaseCreoleTest): the|text <p>the end</p> """, - macros=test_macros, - ) + macros=test_macros, + ) def test_macro_html1(self): self.assert_creole2html(r""" @@ -408,8 +396,8 @@ class TestCreole2htmlMarkup(BaseCreoleTest): <p>inline: {...} code</p> """, - macros=example_macros, - ) + macros=example_macros, + ) def test_macro_not_exist1(self): """ @@ -439,7 +427,7 @@ class TestCreole2htmlMarkup(BaseCreoleTest): self.assert_creole2html(source_string, should_string, verbose=1) - #---------------------------------------------------------------------- + # ---------------------------------------------------------------------- # Test with verbose=2 ans a StringIO stderr handler def test_wrong_macro_syntax(self): @@ -450,8 +438,8 @@ class TestCreole2htmlMarkup(BaseCreoleTest): <p>wrong macro line:<br /> [Error: Wrong macro arguments: ">Some funky page summary.<</summary" for macro 'summary' (maybe wrong macro tag syntax?)] </p> - """, # verbose=True - ) + """, # verbose=True + ) def test_macro_not_exist2(self): """ @@ -473,8 +461,7 @@ class TestCreole2htmlMarkup(BaseCreoleTest): <p>inline macro:<br /> </p> """, verbose=False - ) - + ) def test_toc_simple(self): """ @@ -799,7 +786,7 @@ class TestCreole2htmlMarkup(BaseCreoleTest): """, """ <p>a link to the <a href="http://www.pylucid.org">http://www.pylucid.org</a> page.</p> """ - ) + ) def test_wiki_style_line_breaks1(self): html = creole2html( @@ -870,7 +857,6 @@ class TestCreole2htmlMarkup(BaseCreoleTest): <p>end</p> """)) - def test_headline_spaces(self): """ https://code.google.com/p/python-creole/issues/detail?id=15 @@ -957,24 +943,27 @@ class TestStr2Dict(unittest.TestCase): {'key3': 3, 'key2': 2, 'key1': 1} ) + class TestDict2String(unittest.TestCase): def test_basic(self): self.assertEqual( - dict2string({'key':'value'}), + dict2string({'key': 'value'}), 'key="value"' ) def test_basic2(self): self.assertEqual( - dict2string({'foo':"bar", "no":123}), + dict2string({'foo': "bar", "no": 123}), 'foo="bar" no=123' ) + def test_basic3(self): self.assertEqual( - dict2string({"foo":'bar', "no":"ABC"}), + dict2string({"foo": 'bar', "no": "ABC"}), 'foo="bar" no="ABC"' ) + if __name__ == '__main__': unittest.main( verbosity=2 diff --git a/creole/tests/test_cross_compare_all.py b/creole/tests/test_cross_compare_all.py index b10f59a..c069fa9 100644 --- a/creole/tests/test_cross_compare_all.py +++ b/creole/tests/test_cross_compare_all.py @@ -19,11 +19,8 @@ """ - import unittest - - from creole.tests.utils.base_unittest import BaseCreoleTest @@ -32,6 +29,7 @@ class CrossCompareTests(BaseCreoleTest): Cross compare tests for creol2html _and_ html2creole with the same test strings. Used BaseCreoleTest.assertCreole() """ + def test_bold_italics(self): self.cross_compare( creole_string=r""" @@ -391,7 +389,7 @@ class CrossCompareTests(BaseCreoleTest): * no list </pre> """) - self.cross_compare(# FIXME: Not the best html2rest output + self.cross_compare( # FIXME: Not the best html2rest output rest_string=""" Preformatting text: diff --git a/creole/tests/test_cross_compare_creole.py b/creole/tests/test_cross_compare_creole.py index ea48603..7c4d893 100644 --- a/creole/tests/test_cross_compare_creole.py +++ b/creole/tests/test_cross_compare_creole.py @@ -17,7 +17,6 @@ """ - import unittest from creole.shared import example_macros @@ -75,7 +74,6 @@ class CrossCompareCreoleTests(BaseCreoleTest): lines.</strong></p> """) - def test_small(self): """ http://code.google.com/p/python-creole/issues/detail?id=12#c0 @@ -214,7 +212,6 @@ class CrossCompareCreoleTests(BaseCreoleTest): <p>this is <tt><strong>strong</strong> Teletyper</tt> ;)</p> """) - def test_no_inline_headline(self): self.cross_compare_creole( creole_string=r""" @@ -361,8 +358,8 @@ class CrossCompareCreoleTests(BaseCreoleTest): </ol></li> </ol> """, -# debug = True - ) + # debug = True + ) def test_big_table(self): self.cross_compare_creole(r""" @@ -408,8 +405,8 @@ class CrossCompareCreoleTests(BaseCreoleTest): </table> <p>...end</p> """, -# debug = True - ) + # debug = True + ) def test_html_macro_unknown_nodes(self): """ @@ -428,11 +425,11 @@ class CrossCompareCreoleTests(BaseCreoleTest): <p>555<x />666</p> """, - # use macro in creole2html emitter: - macros=example_macros, - # escape unknown tags with <<html>> in html2creole emitter: - unknown_emit=use_html_macro, - ) + # use macro in creole2html emitter: + macros=example_macros, + # escape unknown tags with <<html>> in html2creole emitter: + unknown_emit=use_html_macro, + ) def test_entities(self): self.cross_compare_creole(""" @@ -471,7 +468,6 @@ class CrossCompareCreoleTests(BaseCreoleTest): # """) - # def test_macro_pygments_code(self): # self.cross_compare_creole(r""" # a macro: @@ -498,7 +494,5 @@ class CrossCompareCreoleTests(BaseCreoleTest): # """) - - if __name__ == '__main__': unittest.main() diff --git a/creole/tests/test_cross_compare_rest.py b/creole/tests/test_cross_compare_rest.py index 454d6e5..ea02e19 100644 --- a/creole/tests/test_cross_compare_rest.py +++ b/creole/tests/test_cross_compare_rest.py @@ -14,7 +14,6 @@ """ - import unittest from creole.tests.utils.base_unittest import BaseCreoleTest @@ -32,7 +31,7 @@ class CrossCompareReStTests(BaseCreoleTest): <p>less-than sign: <</p> <p>greater-than sign: ></p> """, -# debug=True + # debug=True ) def test_bullet_lists_basic(self): @@ -51,7 +50,7 @@ class CrossCompareReStTests(BaseCreoleTest): <li>item 3</li> </ul> """, -# debug=True + # debug=True ) def test_numbered_lists(self): @@ -81,7 +80,7 @@ class CrossCompareReStTests(BaseCreoleTest): </li> </ol> """, -# debug=True + # debug=True ) def test_bullet_lists_nested(self): @@ -133,7 +132,7 @@ class CrossCompareReStTests(BaseCreoleTest): </ul> <p>Text under list.</p> """, -# debug=True + # debug=True ) def test_typeface_basic(self): @@ -285,7 +284,7 @@ class CrossCompareReStTests(BaseCreoleTest): </table> <p>Text after table.</p> """, -# debug=True + # debug=True ) def test_reuse_link_substitution1(self): @@ -313,7 +312,7 @@ class CrossCompareReStTests(BaseCreoleTest): </table> <p>Text after table.</p> """, -# debug=True + # debug=True ) def test_reuse_link_substitution2(self): @@ -337,7 +336,7 @@ class CrossCompareReStTests(BaseCreoleTest): </table> <p>and here <a href="foo/bar/">foo bar</a> again, after table.</p> """, -# debug=True + # debug=True ) def test_reuse_image_substitution(self): @@ -362,7 +361,7 @@ class CrossCompareReStTests(BaseCreoleTest): </tr> </table> """, -# debug=True + # debug=True ) def test_duplicate_image_substitution(self): @@ -391,12 +390,10 @@ class CrossCompareReStTests(BaseCreoleTest): </table> <p>again: the <img alt="same" src="/image.png" /> image and <a href="/url/foo/">same</a> link!</p> """, -# debug=True + # debug=True ) - - # def test_inline_literal(self): # """ TODO # http://docutils.sourceforge.net/docs/user/rst/quickref.html#inline-markup @@ -422,9 +419,7 @@ class CrossCompareReStTests(BaseCreoleTest): # <html escaped> # </pre> # """) - - if __name__ == '__main__': unittest.main( -# defaultTest="CrossCompareReStTests.test_paragraph_bwlow_table_links", + # defaultTest="CrossCompareReStTests.test_paragraph_bwlow_table_links", ) diff --git a/creole/tests/test_cross_compare_textile.py b/creole/tests/test_cross_compare_textile.py index fbbd871..63ce6c7 100644 --- a/creole/tests/test_cross_compare_textile.py +++ b/creole/tests/test_cross_compare_textile.py @@ -17,7 +17,6 @@ """ - import unittest from creole.tests.utils.base_unittest import BaseCreoleTest diff --git a/creole/tests/test_html2creole.py b/creole/tests/test_html2creole.py index df7c951..60b35a3 100644 --- a/creole/tests/test_html2creole.py +++ b/creole/tests/test_html2creole.py @@ -14,14 +14,12 @@ """ - import unittest -from creole.tests.utils.base_unittest import BaseCreoleTest - from creole import html2creole -from creole.shared.unknown_tags import raise_unknown_node, use_html_macro, \ - escape_unknown_nodes, transparent_unknown_nodes +from creole.shared.unknown_tags import (escape_unknown_nodes, raise_unknown_node, transparent_unknown_nodes, + use_html_macro) +from creole.tests.utils.base_unittest import BaseCreoleTest class TestHtml2Creole(unittest.TestCase): @@ -31,14 +29,12 @@ class TestHtml2Creole(unittest.TestCase): pass - - class TestHtml2CreoleMarkup(BaseCreoleTest): -# def assertCreole(self, raw_markup, raw_html, debug=False, **kwargs): -# self.assert_html2creole(raw_markup, raw_html, debug=debug, **kwargs) + # def assertCreole(self, raw_markup, raw_html, debug=False, **kwargs): + # self.assert_html2creole(raw_markup, raw_html, debug=debug, **kwargs) - #-------------------------------------------------------------------------- + # -------------------------------------------------------------------------- def test_not_used(self): """ @@ -56,7 +52,7 @@ class TestHtml2CreoleMarkup(BaseCreoleTest): <i>italic</i></p> """) - #-------------------------------------------------------------------------- + # -------------------------------------------------------------------------- def test_raise_unknown_node(self): """ @@ -64,10 +60,10 @@ class TestHtml2CreoleMarkup(BaseCreoleTest): Raise NotImplementedError on unknown tags. """ self.assertRaises(NotImplementedError, - html2creole, - html_string="<unknwon>", - unknown_emit=raise_unknown_node - ) + html2creole, + html_string="<unknwon>", + unknown_emit=raise_unknown_node + ) def test_use_html_macro(self): """ @@ -85,8 +81,8 @@ class TestHtml2CreoleMarkup(BaseCreoleTest): <p>555<unknown />666</p> """, - unknown_emit=use_html_macro - ) + unknown_emit=use_html_macro + ) def test_escape_unknown_nodes(self): """ @@ -104,8 +100,8 @@ class TestHtml2CreoleMarkup(BaseCreoleTest): <p>555<unknown />666</p> """, - unknown_emit=escape_unknown_nodes - ) + unknown_emit=escape_unknown_nodes + ) def test_escape_unknown_nodes2(self): """ @@ -119,8 +115,8 @@ class TestHtml2CreoleMarkup(BaseCreoleTest): var js_sha_link='<p>***</p>'; </script> """, - unknown_emit=escape_unknown_nodes - ) + unknown_emit=escape_unknown_nodes + ) def test_transparent_unknown_nodes(self): """ @@ -133,7 +129,7 @@ class TestHtml2CreoleMarkup(BaseCreoleTest): """, """ <form class="foo" id="bar"><label><em>baz</em></label>, <strong>quux</strong></form> """, unknown_emit=transparent_unknown_nodes - ) + ) def test_transparent_unknown_nodes2(self): """ @@ -145,7 +141,7 @@ class TestHtml2CreoleMarkup(BaseCreoleTest): """, """ <p>FOO <script>var a='<em>STRONG</em>';</script> BAR</p> """, unknown_emit=transparent_unknown_nodes - ) + ) def test_transparent_unknown_nodes_block_elements(self): """ @@ -161,9 +157,9 @@ class TestHtml2CreoleMarkup(BaseCreoleTest): <div><em>baz</em>,</div> <fieldset><strong>quux</strong></fieldset> <span>spam, </span><label>ham, </label>and eggs """, unknown_emit=transparent_unknown_nodes - ) + ) - #-------------------------------------------------------------------------- + # -------------------------------------------------------------------------- def test_entities(self): """ @@ -376,7 +372,7 @@ class TestHtml2CreoleMarkup(BaseCreoleTest): # foo """, """ <ol class=gbtc><li>foo</li></ol> - """)#, debug=True) + """) # , debug=True) def test_ignore_links_without_href(self): """https://code.google.com/p/python-creole/issues/detail?id=19#c4""" @@ -384,7 +380,7 @@ class TestHtml2CreoleMarkup(BaseCreoleTest): bar """, """ <a class="foo">bar</a> - """)#, debug=True) + """) # , debug=True) def test_newlines_after_headlines(self): self.assert_html2creole(r""" @@ -518,7 +514,7 @@ class TestHtml2CreoleMarkup(BaseCreoleTest): # </ol> # <p>list 'b' end</p> # """, -## debug=True +# debug=True # ) # # def test_list2(self): @@ -539,12 +535,11 @@ class TestHtml2CreoleMarkup(BaseCreoleTest): # <li><tt>//this// is **not** [[processed]]</tt></li> # </ol> # """, -## debug=True +# debug=True # ) if __name__ == '__main__': unittest.main( -# defaultTest="TestHtml2CreoleMarkup.test_nested_listsitems_with_paragraph" + # defaultTest="TestHtml2CreoleMarkup.test_nested_listsitems_with_paragraph" ) - diff --git a/creole/tests/test_html2rest.py b/creole/tests/test_html2rest.py index 29d197a..a10ae2a 100644 --- a/creole/tests/test_html2rest.py +++ b/creole/tests/test_html2rest.py @@ -14,7 +14,6 @@ """ - import unittest from creole.emitter.html2rest_emitter import Html2restException @@ -38,7 +37,7 @@ class ReStTests(BaseCreoleTest): <p>second block, line 1 and line 2</p> """, -# debug=True + # debug=True ) def test_substitution_image_without_alt_or_title(self): @@ -213,7 +212,7 @@ class ReStTests(BaseCreoleTest): def test_duplicate_substitution1(self): self.assertRaises(Html2restException, self.assert_html2rest, - rest_string=""" + rest_string=""" +-----------------------------+ | this is `same`_ first time. | +-----------------------------+ @@ -222,20 +221,20 @@ class ReStTests(BaseCreoleTest): the `same </other/>`_ link? """, - html_string=""" + html_string=""" <table> <tr><td>the <a href="/first/">same</a> first time.</td> </tr> </table> <p>the <a href="/other/">same</a> link?</p> """, -# debug=True - ) + # debug=True + ) def test_duplicate_link_substitution(self): self.assertRaises(Html2restException, self.assert_html2rest, -# self.cross_compare( - rest_string=""" + # self.cross_compare( + rest_string=""" +-----------------------------+ | this is `same`_ first time. | +-----------------------------+ @@ -244,33 +243,32 @@ class ReStTests(BaseCreoleTest): the `same </other/>`_ link? """, - html_string=""" + html_string=""" <table> <tr><td>the <a href="/first/">same</a> first time.</td> </tr> </table> <p>the <a href="/other/">same</a> link?</p> """, -# debug=True - ) + # debug=True + ) def test_duplicate_image_substitution(self): self.assertRaises(Html2restException, self.assert_html2rest, -# self.cross_compare( - rest_string=""" + # self.cross_compare( + rest_string=""" a |image|... and a other |image|! .. |image| image:: /image.png .. |image| image:: /other.png """, - html_string=""" + html_string=""" <p>a <img src="/image.png" title="image" alt="image" />...<br /> and a other <img src="/other.png" title="image" alt="image" />!</p> """, -# debug=True - ) - + # debug=True + ) # def test_preformat_unknown_nodes(self): @@ -315,6 +313,5 @@ class ReStTests(BaseCreoleTest): # """, # ) - if __name__ == '__main__': unittest.main() diff --git a/creole/tests/test_html2textile.py b/creole/tests/test_html2textile.py index b26b3e9..cd03b88 100644 --- a/creole/tests/test_html2textile.py +++ b/creole/tests/test_html2textile.py @@ -14,7 +14,6 @@ """ - import unittest from creole.shared.unknown_tags import preformat_unknown_nodes @@ -35,7 +34,7 @@ class TextileTests(BaseCreoleTest): <p>less-than sign: <<br /> greater-than sign: ></p> """, -# debug=True + # debug=True ) def test_preformat_unknown_nodes(self): @@ -55,7 +54,7 @@ class TextileTests(BaseCreoleTest): <p>555<x />666</p> """, - emitter_kwargs={"unknown_emit":preformat_unknown_nodes} + emitter_kwargs={"unknown_emit": preformat_unknown_nodes} ) def test_transparent_unknown_nodes(self): diff --git a/creole/tests/test_macros.py b/creole/tests/test_macros.py index 2a036be..c596a81 100644 --- a/creole/tests/test_macros.py +++ b/creole/tests/test_macros.py @@ -12,10 +12,9 @@ """ - - import json + def unittest_macro1(**kwargs): """ >>> unittest_macro1(foo="bar") @@ -28,7 +27,7 @@ def unittest_macro1(**kwargs): '[test macro1 - kwargs: a=1,b=2]' """ kwargs = ','.join(['%s=%s' % (k, json.dumps(v)) for k, v in sorted(kwargs.items())]) - return "[test macro1 - kwargs: %s]" % kwargs + return f"[test macro1 - kwargs: {kwargs}]" def unittest_macro2(char, text): @@ -38,6 +37,7 @@ def unittest_macro2(char, text): """ return char.join(text.split()) + if __name__ == '__main__': import doctest print(doctest.testmod()) diff --git a/creole/tests/test_rest2html.py b/creole/tests/test_rest2html.py index 23261d9..dee787c 100644 --- a/creole/tests/test_rest2html.py +++ b/creole/tests/test_rest2html.py @@ -12,7 +12,6 @@ """ - import tempfile import unittest @@ -112,7 +111,7 @@ class ReSt2HtmlTests(BaseCreoleTest): .. include:: doesntexist.txt """, """ <p>Include should be disabled by default.</p> - """, report_level=3) # Set log level to "error" to suppress the waring output + """, report_level=3) # Set log level to "error" to suppress the waring output def test_include_enabled(self): test_content = "Content from include file." @@ -138,7 +137,7 @@ class ReSt2HtmlTests(BaseCreoleTest): <hr width=50 size=10> """, """ <p>Raw directive should be disabled by default.</p> - """, report_level=3) # Set log level to "error" to suppress the waring output + """, report_level=3) # Set log level to "error" to suppress the waring output def test_raw_enabled(self): self.assert_rest2html(""" diff --git a/creole/tests/test_setup_utils.py b/creole/tests/test_setup_utils.py index 462351e..33d998c 100644 --- a/creole/tests/test_setup_utils.py +++ b/creole/tests/test_setup_utils.py @@ -1,5 +1,3 @@ -#!/usr/bin/env python -# coding: utf-8 """ unittest for setup_utils @@ -7,28 +5,25 @@ https://code.google.com/p/python-creole/wiki/UseInSetup - :copyleft: 2011-2014 by python-creole team, see AUTHORS for more details. + :copyleft: 2011-2020 by python-creole team, see AUTHORS for more details. :license: GNU GPL v3 or above, see LICENSE for more details. """ - -import unittest import os +import tempfile import warnings +import creole +from creole.setup_utils import get_long_description +from creole.tests.utils.base_unittest import BaseCreoleTest + try: - import docutils + import docutils # noqa flake8 DOCUTILS = True except ImportError: DOCUTILS = False -import creole -from creole.setup_utils import get_long_description -from creole.tests.utils.base_unittest import BaseCreoleTest -from creole.py3compat import BINARY_TYPE, PY3, TEXT_TYPE -import tempfile - CREOLE_PACKAGE_ROOT = os.path.abspath(os.path.join(os.path.dirname(creole.__file__), "..")) TEST_README_DIR = os.path.abspath(os.path.dirname(__file__)) @@ -40,7 +35,7 @@ TEST_README_FILENAME = "test_README.creole" class SetupUtilsTests(BaseCreoleTest): def run(self, *args, **kwargs): # TODO: Remove if python 2.6 will be not support anymore. - if DOCUTILS == False: + if not DOCUTILS: warnings.warn("Skip SetupUtilsTests, because 'docutils' not installed.") return return super(SetupUtilsTests, self).run(*args, **kwargs) @@ -48,12 +43,12 @@ class SetupUtilsTests(BaseCreoleTest): def test_creole_package_path(self): self.assertTrue( os.path.isdir(CREOLE_PACKAGE_ROOT), - "CREOLE_PACKAGE_ROOT %r is not a existing direcotry!" % CREOLE_PACKAGE_ROOT + f"CREOLE_PACKAGE_ROOT {CREOLE_PACKAGE_ROOT!r} is not a existing direcotry!" ) filepath = os.path.join(CREOLE_PACKAGE_ROOT, "README.creole") self.assertTrue( os.path.isfile(filepath), - "README file %r not found!" % filepath + f"README file {filepath!r} not found!" ) def test_get_long_description_without_raise_errors(self): @@ -124,16 +119,7 @@ class SetupUtilsTests(BaseCreoleTest): def test_readme_encoding(self): long_description = get_long_description(TEST_README_DIR, filename=TEST_README_FILENAME, raise_errors=True) - - if PY3: - self.assertTrue(isinstance(long_description, TEXT_TYPE)) - else: - self.assertTrue(isinstance(long_description, BINARY_TYPE)) + self.assertTrue(isinstance(long_description, str)) txt = "German Umlaute: ä ö ü ß Ä Ö Ü" - if not PY3: - txt = txt.encode("utf-8") self.assertIn(txt, long_description) - -if __name__ == '__main__': - unittest.main() diff --git a/creole/tests/test_subprocess.py b/creole/tests/test_subprocess.py index fcce3bc..4a8b02e 100644 --- a/creole/tests/test_subprocess.py +++ b/creole/tests/test_subprocess.py @@ -9,7 +9,6 @@ """ -import os import sys import unittest diff --git a/creole/tests/utils/base_unittest.py b/creole/tests/utils/base_unittest.py index fb21245..3d28c94 100644 --- a/creole/tests/utils/base_unittest.py +++ b/creole/tests/utils/base_unittest.py @@ -1,5 +1,3 @@ -# coding: utf-8 - """ unitest base class @@ -7,18 +5,17 @@ Basic unittest class for all python-creole tests. - :copyleft: 2008-2014 by python-creole team, see AUTHORS for more details. + :copyleft: 2008-2020 by python-creole team, see AUTHORS for more details. :license: GNU GPL v3 or above, see LICENSE for more details. """ - import re import warnings +from creole import creole2html, html2creole, html2rest, html2textile +from creole.exceptions import DocutilsImportError from creole.tests.utils.utils import MarkupTest -from creole.py3compat import TEXT_TYPE - try: import textile @@ -32,21 +29,17 @@ else: test_textile = True -from creole.exceptions import DocutilsImportError -from creole import creole2html, html2creole, html2textile, html2rest - try: from creole.rest_tools.clean_writer import rest2html except DocutilsImportError as err: REST_INSTALLED = False - warnings.warn("Can't run all ReSt unittests: %s" % err) + warnings.warn(f"Can't run all ReSt unittests: {err}") else: REST_INSTALLED = True tabs2spaces_re = re.compile(r"^(\t*)(.*?)$", re.M) - def tabs2spaces(html): """ form reformating textile html code >>> tabs2spaces("\\t<p>one<br />\\n\\t\\ttwo<br />\\n\\t\\t\\ttree</p>") @@ -76,11 +69,11 @@ def strip_html_lines(html, strip_lines=False): return html - class BaseCreoleTest(MarkupTest): """ Basic unittest class for all python-creole unittest classes. """ + def _debug_text(self, msg, raw_text): text = raw_text.replace(" ", ".") text = text.replace("\n", "\\n\n") @@ -88,17 +81,17 @@ class BaseCreoleTest(MarkupTest): print() print("_" * 79) - print(" Debug Text: %s:" % msg) + print(f" Debug Text: {msg}:") print(text) print("-" * 79) def assert_creole2html( - self, raw_creole, raw_html, - strip_lines=False, debug=True, - parser_kwargs={}, emitter_kwargs={}, - block_rules=None, blog_line_breaks=True, macros=None, verbose=True, stderr=None, - strict=False, - ): + self, raw_creole, raw_html, + strip_lines=False, debug=True, + parser_kwargs={}, emitter_kwargs={}, + block_rules=None, blog_line_breaks=True, macros=None, verbose=True, stderr=None, + strict=False, + ): """ compare the generated html code from the markup string >creole_string< with the >html_string< reference. @@ -109,10 +102,10 @@ class BaseCreoleTest(MarkupTest): # prepare whitespace on test strings markup_string = self._prepare_text(raw_creole) - assert isinstance(markup_string, TEXT_TYPE) + assert isinstance(markup_string, str) html_string = self._prepare_text(raw_html) - assert isinstance(html_string, TEXT_TYPE) + assert isinstance(html_string, str) if strip_lines: html_string = strip_html_lines(html_string, strip_lines) self._debug_text("assert_creole2html() html_string reference", html_string) @@ -135,10 +128,10 @@ class BaseCreoleTest(MarkupTest): self.assertEqual(out_string, html_string, msg="creole2html") def assert_html2creole2(self, creole, html, - debug=True, - unknown_emit=None, - strict=False, - ): + debug=True, + unknown_emit=None, + strict=False, + ): # convert html code into creole markup out_string = html2creole( html, debug, unknown_emit=unknown_emit, strict=strict @@ -148,21 +141,21 @@ class BaseCreoleTest(MarkupTest): # compare self.assertEqual(out_string, creole, msg="html2creole") - def assert_html2creole(self, raw_creole, raw_html, \ - strip_lines=False, debug=False, - # OLD API: - parser_kwargs={}, emitter_kwargs={}, - # html2creole: - unknown_emit=None, - strict=False, - ): + def assert_html2creole(self, raw_creole, raw_html, + strip_lines=False, debug=True, + # OLD API: + parser_kwargs={}, emitter_kwargs={}, + # html2creole: + unknown_emit=None, + strict=False, + ): """ Compare the genereted markup from the given >raw_html< html code, with the given >creole_string< reference string. """ self.assertEqual(parser_kwargs, {}, "parser_kwargs is deprecated!") self.assertEqual(emitter_kwargs, {}, "parser_kwargs is deprecated!") -# assert isinstance(raw_html, TEXT_TYPE) +# assert isinstance(raw_html, str) # creole_string = unicode(creole_string, encoding="utf8") # raw_html = unicode(raw_html, "utf8") @@ -170,27 +163,27 @@ class BaseCreoleTest(MarkupTest): # prepare whitespace on test strings creole = self._prepare_text(raw_creole) - assert isinstance(creole, TEXT_TYPE) + assert isinstance(creole, str) if debug: self._debug_text("assert_creole2html() markup", creole) html = self._prepare_text(raw_html) - assert isinstance(html, TEXT_TYPE) + assert isinstance(html, str) self.assert_html2creole2(creole, html, debug, unknown_emit, strict) def cross_compare_creole(self, creole_string, html_string, - strip_lines=False, debug=True, - # creole2html old API: - creole_parser_kwargs={}, html_emitter_kwargs={}, - # html2creole old API: - html_parser_kwargs={}, creole_emitter_kwargs={}, - - # creole2html new API: - block_rules=None, blog_line_breaks=True, macros=None, stderr=None, - # html2creole: - unknown_emit=None - ): + strip_lines=False, debug=True, + # creole2html old API: + creole_parser_kwargs={}, html_emitter_kwargs={}, + # html2creole old API: + html_parser_kwargs={}, creole_emitter_kwargs={}, + + # creole2html new API: + block_rules=None, blog_line_breaks=True, macros=None, stderr=None, + # html2creole: + unknown_emit=None + ): """ Cross compare with: * creole2html @@ -201,8 +194,8 @@ class BaseCreoleTest(MarkupTest): self.assertEqual(html_parser_kwargs, {}, "html_parser_kwargs is deprecated!") self.assertEqual(creole_emitter_kwargs, {}, "creole_emitter_kwargs is deprecated!") - assert isinstance(creole_string, TEXT_TYPE) - assert isinstance(html_string, TEXT_TYPE) + assert isinstance(creole_string, str) + assert isinstance(html_string, str) self.assertNotEqual(creole_string, html_string) self.assert_creole2html( @@ -217,8 +210,8 @@ class BaseCreoleTest(MarkupTest): unknown_emit=unknown_emit, ) - def assert_html2textile(self, textile_string, html_string, \ - strip_lines=False, debug=False, parser_kwargs={}, emitter_kwargs={}): + def assert_html2textile(self, textile_string, html_string, + strip_lines=False, debug=False, parser_kwargs={}, emitter_kwargs={}): """ Check html2textile """ @@ -241,15 +234,15 @@ class BaseCreoleTest(MarkupTest): return textile_string, html_string - def cross_compare_textile(self, textile_string, html_string, \ - strip_lines=False, debug=False, parser_kwargs={}, emitter_kwargs={}): + def cross_compare_textile(self, textile_string, html_string, + strip_lines=False, debug=False, parser_kwargs={}, emitter_kwargs={}): """ Checks: * html2textile * textile2html """ -# assert isinstance(textile_string, TEXT_TYPE) -# assert isinstance(html_string, TEXT_TYPE) +# assert isinstance(textile_string, str) +# assert isinstance(html_string, str) self.assertNotEqual(textile_string, html_string) # compare html -> textile @@ -272,8 +265,8 @@ class BaseCreoleTest(MarkupTest): self.assertEqual(html_string, html, msg="textile2html") - def assert_html2rest(self, rest_string, html_string, \ - strip_lines=False, debug=False, parser_kwargs={}, emitter_kwargs={}): + def assert_html2rest(self, rest_string, html_string, + strip_lines=False, debug=False, parser_kwargs={}, emitter_kwargs={}): """ Check html to reStructuredText converter """ @@ -298,8 +291,8 @@ class BaseCreoleTest(MarkupTest): return rest_string, html_string - def assert_rest2html(self, rest_string, html_string, \ - strip_lines=False, debug=False, prepare_strings=True, **kwargs): + def assert_rest2html(self, rest_string, html_string, + strip_lines=False, debug=False, prepare_strings=True, **kwargs): # compare rest -> html if not REST_INSTALLED: @@ -325,10 +318,10 @@ class BaseCreoleTest(MarkupTest): self.assertEqual(html, html_string, msg="rest2html") - def cross_compare_rest(self, rest_string, html_string, \ - strip_lines=False, debug=False, parser_kwargs={}, emitter_kwargs={}): -# assert isinstance(textile_string, TEXT_TYPE) -# assert isinstance(html_string, TEXT_TYPE) + def cross_compare_rest(self, rest_string, html_string, + strip_lines=False, debug=False, parser_kwargs={}, emitter_kwargs={}): + # assert isinstance(textile_string, str) + # assert isinstance(html_string, str) self.assertNotEqual(rest_string, html_string) rest_string, html_string = self.assert_html2rest( @@ -344,11 +337,11 @@ class BaseCreoleTest(MarkupTest): ) def cross_compare(self, - html_string, - creole_string=None, - textile_string=None, - rest_string=None, - strip_lines=False, debug=False, parser_kwargs={}, emitter_kwargs={}): + html_string, + creole_string=None, + textile_string=None, + rest_string=None, + strip_lines=False, debug=False, parser_kwargs={}, emitter_kwargs={}): """ Cross compare with: * creole2html @@ -371,6 +364,7 @@ class BaseCreoleTest(MarkupTest): rest_string, html_string, strip_lines, debug, parser_kwargs, emitter_kwargs ) + if __name__ == '__main__': import doctest print(doctest.testmod()) diff --git a/creole/tests/utils/unittest_subprocess.py b/creole/tests/utils/unittest_subprocess.py index 16ef649..126a6bc 100644 --- a/creole/tests/utils/unittest_subprocess.py +++ b/creole/tests/utils/unittest_subprocess.py @@ -51,11 +51,11 @@ class SubprocessMixin: print(f"stderr: {stderr!r}") stdout = stdout.strip() - print("="*100) + print("=" * 100) print("stdout:") - print("-"*100) + print("-" * 100) print(stdout) - print("-"*100) + print("-" * 100) return popen_args, retcode, stdout def assertSubprocess(self, popen_args, retcode, stdout): diff --git a/pyproject.toml b/pyproject.toml index 56386ae..b821124 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = 'python-creole' -version = '1.4.0' +version = '1.4.1' description = 'python-creole is an open-source (GPL) markup converter in pure Python for: creole2html, html2creole, html2ReSt, html2textile' # TODO: convert it: |