diff options
author | JensDiemer <git@jensdiemer.de> | 2015-06-02 18:02:29 +0200 |
---|---|---|
committer | JensDiemer <git@jensdiemer.de> | 2015-06-02 18:02:29 +0200 |
commit | 0bd2955957227ba611954d180cc7c0a6c3e62cf7 (patch) | |
tree | 27d8e27f334df26263ca78b4681f4b14ec80a5c5 /creole/rest_tools | |
parent | aa309e7c4eee478386a7a38cdbbc3adb2fc8e1c7 (diff) | |
download | creole-0bd2955957227ba611954d180cc7c0a6c3e62cf7.tar.gz |
Squashed commit of the following:
commit 04b2b00e51e3a60369de4a2ecde07c4cd5f67c57
Author: JensDiemer <git@jensdiemer.de>
Date: Tue Jun 2 17:58:34 2015 +0200
--verbose not needed
commit 1206d8fd18f368e0cacb54f76788c5eb855f72c5
Author: JensDiemer <git@jensdiemer.de>
Date: Tue Jun 2 17:49:13 2015 +0200
Strange: argparse use stderr in Py2 and stdout in Py3 for --version
;)
commit 82140fcec5c1994081d31b288a28f924a7723ed8
Author: JensDiemer <git@jensdiemer.de>
Date: Tue Jun 2 17:47:39 2015 +0200
Bugfix: get rid of the u'' with Py2
commit cdbb3ff08848698ad9e0f7df5bfa7486e60cb7da
Author: JensDiemer <git@jensdiemer.de>
Date: Tue Jun 2 17:41:03 2015 +0200
fixup! WIP: refactor CLI tests
commit 6d959e3a9d793eed76cbe606f4cf43843ddfc281
Author: JensDiemer <git@jensdiemer.de>
Date: Tue Jun 2 17:29:28 2015 +0200
WIP: refactor CLI tests
commit 4ff6eb4f3454e92b50408541d3ed72215cfc4682
Author: JensDiemer <git@jensdiemer.de>
Date: Tue Jun 2 16:39:44 2015 +0200
bixfix travis config?
commit ecc22075a5433e91075700e1e928a63b96090de7
Author: JensDiemer <git@jensdiemer.de>
Date: Tue Jun 2 16:38:57 2015 +0200
new v1.3.0
commit 16f4244224856defa711d90b02b0e9498c55b220
Author: JensDiemer <git@jensdiemer.de>
Date: Tue Jun 2 16:38:46 2015 +0200
update README
commit ba59454aae040486d77843c9c41f1873fabf7f87
Author: JensDiemer <git@jensdiemer.de>
Date: Tue Jun 2 16:38:40 2015 +0200
+"pip install ." makes the CLI available on Travis?!?
commit 75a782dcd679f8c9aea6a5ef43f22245559f32c2
Author: JensDiemer <git@jensdiemer.de>
Date: Tue Jun 2 16:27:08 2015 +0200
run nosetests on travis with doctests
commit d3de2ebb979c1d08cc4bda1db439aeac98117500
Author: JensDiemer <git@jensdiemer.de>
Date: Tue Jun 2 15:49:22 2015 +0200
use 'json.dumps()' instead of own repr2()
commit 1ec9d9eacbaa1ec475ea2573f0a8744c713073da
Author: JensDiemer <git@jensdiemer.de>
Date: Tue Jun 2 14:13:50 2015 +0200
search in sys.path, too.
commit 1a101ec988b3c365d592e5e471f16d2a00700e04
Author: JensDiemer <git@jensdiemer.de>
Date: Tue Jun 2 14:07:10 2015 +0200
-"2.6"
commit 514d7b8f28708ad2c81e163e444cdd1901f9a992
Author: JensDiemer <git@jensdiemer.de>
Date: Tue Jun 2 14:07:00 2015 +0200
more info
commit 30c67fe9ef1af41a87fa676e6c345ec358a1928d
Author: JensDiemer <git@jensdiemer.de>
Date: Tue Jun 2 13:52:44 2015 +0200
sys.exit() if not importable
commit 5bf5c0829ea550609ff160d5240609d6948035d9
Author: JensDiemer <git@jensdiemer.de>
Date: Tue Jun 2 13:48:46 2015 +0200
run cli tests via subprocess
commit f9eac72fa1b9f7cfd60f1242887caa86c3d23422
Author: JensDiemer <git@jensdiemer.de>
Date: Tue Jun 2 13:48:19 2015 +0200
rename test macros, so that nose will not interpret them as a test
;)
commit 8e7f236490a7374e79d3a342b6446c79b645a484
Author: JensDiemer <git@jensdiemer.de>
Date: Tue Jun 2 13:47:39 2015 +0200
use "with"
commit a7376c023f0ef8886bdc8aff05e2eb4db82cb84b
Author: JensDiemer <git@jensdiemer.de>
Date: Tue Jun 2 13:47:29 2015 +0200
use nose and add a simple test, if nose is installed
commit 932aacb70ca387302fd6242aa9094df3d80322db
Author: JensDiemer <git@jensdiemer.de>
Date: Tue Jun 2 13:45:26 2015 +0200
Bugfix: --version
commit d884abc68a06961dcc0ae99f5642dcfdb560bd90
Author: JensDiemer <git@jensdiemer.de>
Date: Tue Jun 2 11:00:46 2015 +0200
use @unittest.expectedFailure
commit 75b21bd449e969bd05d5dcd41c74e2909172fb3f
Author: JensDiemer <git@jensdiemer.de>
Date: Tue Jun 2 10:54:09 2015 +0200
WIP: refactor
commit 17041df8d8cf1de91295da50b06de1c7f3bf7c62
Merge: 4285bbf e82d365
Author: JensDiemer <git@jensdiemer.de>
Date: Tue Jun 2 10:51:55 2015 +0200
Merge branch 'nose' into refactor
commit 4285bbf7707542772a70b3718d03394cd3537a43
Author: JensDiemer <git@jensdiemer.de>
Date: Tue Jun 2 10:51:15 2015 +0200
WIP: refactor
commit e82d365885bd5f23c3ad144d464a11ccb007fdae
Author: JensDiemer <git@jensdiemer.de>
Date: Tue Jun 2 10:38:24 2015 +0200
WIP: use nose
Diffstat (limited to 'creole/rest_tools')
-rw-r--r-- | creole/rest_tools/__init__.py | 0 | ||||
-rw-r--r-- | creole/rest_tools/clean_writer.py | 272 | ||||
-rw-r--r-- | creole/rest_tools/pypi_rest2html.py | 93 |
3 files changed, 365 insertions, 0 deletions
diff --git a/creole/rest_tools/__init__.py b/creole/rest_tools/__init__.py new file mode 100644 index 0000000..e69de29 --- /dev/null +++ b/creole/rest_tools/__init__.py diff --git a/creole/rest_tools/clean_writer.py b/creole/rest_tools/clean_writer.py new file mode 100644 index 0000000..5378bd7 --- /dev/null +++ b/creole/rest_tools/clean_writer.py @@ -0,0 +1,272 @@ +#!/usr/bin/env python +# coding: utf-8 + +""" + A clean reStructuredText html writer + ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + + It will produce a minimal set of html output. + (No extry divs, classes oder ids.) + + Some code stolen from: + http://www.arnebrodowski.de/blog/write-your-own-restructuredtext-writer.html + https://github.com/alex-morega/docutils-plainhtml/blob/master/plain_html_writer.py + + :copyleft: 2011-2013 by python-creole team, see AUTHORS for more details. + :license: GNU GPL v3 or above, see LICENSE for more details. +""" + +from __future__ import division, absolute_import, print_function, unicode_literals + +#import warnings +import sys + +from creole.exceptions import DocutilsImportError +from creole.py3compat import TEXT_TYPE, PY3 + +try: + import docutils + from docutils.core import publish_parts + from docutils.writers import html4css1 +except ImportError: + etype, evalue, etb = sys.exc_info() + msg = ( + "%s - You can't use rest2html!" + " Please install: http://pypi.python.org/pypi/docutils" + ) % evalue + evalue = etype(msg) + + # Doesn't work with Python 3: + # http://www.python-forum.de/viewtopic.php?f=1&t=27507 + # raise DocutilsImportError, evalue, etb + + raise DocutilsImportError(msg) + + + +DEBUG = False +#DEBUG = True + +IGNORE_ATTR = ( + "start", "class", "frame", "rules", +) +IGNORE_TAGS = ( + "div", +) + + +class CleanHTMLWriter(html4css1.Writer): + """ + This docutils writer will use the CleanHTMLTranslator class below. + """ + def __init__(self): + html4css1.Writer.__init__(self) + self.translator_class = CleanHTMLTranslator + + +class CleanHTMLTranslator(html4css1.HTMLTranslator, object): + """ + Clean html translator for docutils system. + """ + def _do_nothing(self, node, *args, **kwargs): + pass + + def starttag(self, node, tagname, suffix='\n', empty=0, **attributes): + """ + create start tag with the filter IGNORE_TAGS and IGNORE_ATTR. + """ +# return super(CleanHTMLTranslator, self).starttag(node, tagname, suffix, empty, **attributes) +# return "XXX%r" % tagname + + if tagname in IGNORE_TAGS: + if DEBUG: + print("ignore tag %r" % tagname) + return "" + + parts = [tagname] + for name, value in sorted(attributes.items()): + # value=None was used for boolean attributes without + # value, but this isn't supported by XHTML. + assert value is not None + + name = name.lower() + + if name in IGNORE_ATTR: + continue + + if isinstance(value, list): + value = ' '.join([TEXT_TYPE(x) for x in value]) + + part = '%s="%s"' % (name.lower(), self.attval(TEXT_TYPE(value))) + parts.append(part) + + if DEBUG: + print("Tag %r - ids: %r - attributes: %r - parts: %r" % ( + tagname, getattr(node, "ids", "-"), attributes, parts + )) + + if empty: + infix = ' /' + else: + infix = '' + html = '<%s%s>%s' % (' '.join(parts), infix, suffix) + if DEBUG: + print("startag html: %r" % html) + return html + + def visit_section(self, node): + self.section_level += 1 + + def depart_section(self, node): + self.section_level -= 1 + + set_class_on_child = _do_nothing + set_first_last = _do_nothing + + # remove <blockquote> (e.g. in nested lists) + visit_block_quote = _do_nothing + depart_block_quote = _do_nothing + + # set only html_body, we used in rest2html() and don't surround it with <div> + def depart_document(self, node): + self.html_body.extend(self.body_prefix[1:] + self.body_pre_docinfo + + self.docinfo + self.body + + self.body_suffix[:-1]) + assert not self.context, 'len(context) = %s' % len(self.context) + + + #__________________________________________________________________________ + # Clean table: + + visit_thead = _do_nothing + depart_thead = _do_nothing + visit_tbody = _do_nothing + depart_tbody = _do_nothing + + def visit_table(self, node): + if docutils.__version__ > "0.10": + self.context.append(self.compact_p) + self.compact_p = True + self.body.append(self.starttag(node, 'table')) + + def visit_tgroup(self, node): + node.stubs = [] + + def visit_field_list(self, node): + super(CleanHTMLTranslator, self).visit_field_list(node) + if "<col" in self.body[-1]: + del(self.body[-1]) + + def depart_field_list(self, node): + self.body.append('</table>\n') + self.compact_field_list, self.compact_p = self.context.pop() + + def visit_docinfo(self, node): + self.body.append(self.starttag(node, 'table')) + + def depart_docinfo(self, node): + self.body.append('</table>\n') + + #__________________________________________________________________________ + # Clean image: + + depart_figure = _do_nothing + + def visit_image(self, node): + super(CleanHTMLTranslator, self).visit_image(node) + if self.body[-1].startswith('<img'): + align = None + + if 'align' in node: + # image with alignment + align = node['align'] + + elif node.parent.tagname == 'figure' and 'align' in node.parent: + # figure with alignment + align = node.parent['align'] + + if align: + self.body[-1] = self.body[-1].replace(' />', ' align="%s" />' % align) + + + +def rest2html(content, enable_exit_status=None, **kwargs): + """ + Convert reStructuredText markup to clean html code: No extra div, class or ids. + + >>> rest2html("- bullet list") + '<ul>\\n<li>bullet list</li>\\n</ul>\\n' + + >>> rest2html("A ReSt link to `PyLucid CMS <http://www.pylucid.org>`_ :)") + '<p>A ReSt link to <a href="http://www.pylucid.org">PyLucid CMS</a> :)</p>\\n' + + >>> rest2html("========", enable_exit_status=1, traceback=False, exit_status_level=2) + Traceback (most recent call last): + ... + SystemExit: 13 + """ + if not PY3: + content = unicode(content) + + assert isinstance(content, TEXT_TYPE), "rest2html content must be %s, but it's %s" % (TEXT_TYPE, type(content)) + + settings_overrides = { + "input_encoding": "unicode", + "doctitle_xform": False, + "file_insertion_enabled": False, + "raw_enabled": False, + } + settings_overrides.update(kwargs) + + parts = publish_parts( + source=content, + writer=CleanHTMLWriter(), + settings_overrides=settings_overrides, + enable_exit_status=enable_exit_status, + ) +# import pprint +# pprint.pprint(parts) + return parts["html_body"] # Don't detache the first heading + + +if __name__ == '__main__': + import doctest + print(doctest.testmod()) + +# print(rest2html(""") +#+------------+------------+ +#| Headline 1 | Headline 2 | +#+============+============+ +#| cell one | cell two | +#+------------+------------+ +# """) + +# print(rest2html(""") +#:homepage: +# http://code.google.com/p/python-creole/ +# +#:sourcecode: +# http://github.com/jedie/python-creole +# """) + + print(rest2html(""" +=============== +Section Title 1 +=============== + +--------------- +Section Title 2 +--------------- + +Section Title 3 +=============== + +Section Title 4 +--------------- + +Section Title 5 +``````````````` + +Section Title 6 +''''''''''''''' + """)) diff --git a/creole/rest_tools/pypi_rest2html.py b/creole/rest_tools/pypi_rest2html.py new file mode 100644 index 0000000..fd7d16d --- /dev/null +++ b/creole/rest_tools/pypi_rest2html.py @@ -0,0 +1,93 @@ +#!/usr/bin/env python +# coding: utf-8 + +""" + Try to be so strict as PyPi. + + Code based on: + https://bitbucket.org/pypa/pypi/src/tip/description_utils.py + + see also: + https://bitbucket.org/pypa/pypi/issue/161/rest-formatting-fails-and-there-is-no-way +""" + +from __future__ import division, absolute_import, print_function + +try: + # Python 3 + from urllib.parse import urlparse +except ImportError: + # Python 2 + from urlparse import urlparse + +from creole.exceptions import DocutilsImportError + +try: + import docutils + from docutils import io, readers + from docutils.core import publish_doctree, Publisher + from docutils.writers import get_writer_class + from docutils.transforms import TransformError, Transform +except ImportError as err: + msg = ( + "%s - You can't use rest2html!" + " Please install: http://pypi.python.org/pypi/docutils" + ) % err + raise DocutilsImportError(msg) + + +ALLOWED_SCHEMES = '''file ftp gopher hdl http https imap mailto mms news nntp +prospero rsync rtsp rtspu sftp shttp sip sips snews svn svn+ssh telnet +wais irc'''.split() + + +def pypi_rest2html(source, output_encoding='unicode'): + """ + >>> pypi_rest2html("test!") + '<p>test!</p>\\n' + """ + settings_overrides = { + 'raw_enabled': 0, # no raw HTML code + 'file_insertion_enabled': 0, # no file/URL access + 'halt_level': 2, # at warnings or errors, raise an exception + 'report_level': 5, # never report problems with the reST code + } + + # Convert reStructuredText to HTML using Docutils. + document = publish_doctree(source=source, + settings_overrides=settings_overrides) + + for node in document.traverse(): + if node.tagname == '#text': + continue + if node.hasattr('refuri'): + uri = node['refuri'] + elif node.hasattr('uri'): + uri = node['uri'] + else: + continue + o = urlparse(uri) + if o.scheme not in ALLOWED_SCHEMES: + raise TransformError('link scheme not allowed') + + # now turn the transformed document into HTML + reader = readers.doctree.Reader(parser_name='null') + pub = Publisher(reader, source=io.DocTreeInput(document), + destination_class=io.StringOutput) + pub.set_writer('html') + pub.process_programmatic_settings(None, settings_overrides, None) + pub.set_destination(None, None) + pub.publish() + parts = pub.writer.parts + + output = parts['body'] + + if output_encoding != 'unicode': + output = output.encode(output_encoding) + + return output + + +if __name__ == '__main__': + import doctest + print(doctest.testmod()) |