creole/rest_tools/pypi_rest2html.py


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87

#!/usr/bin/env python
# coding: utf-8

"""
    Try to be so strict as PyPi.

    Code based on:
    https://bitbucket.org/pypa/pypi/src/tip/description_utils.py

    see also:
    https://bitbucket.org/pypa/pypi/issue/161/rest-formatting-fails-and-there-is-no-way
"""


from urllib.parse import urlparse

from creole.exceptions import DocutilsImportError


try:
    import docutils  # noqa flake8
    from docutils import io, readers
    from docutils.core import publish_doctree, Publisher
    from docutils.transforms import TransformError
except ImportError as err:
    msg = (
        "%s - You can't use rest2html!"
        " Please install: http://pypi.python.org/pypi/docutils"
    ) % err
    raise DocutilsImportError(msg)


ALLOWED_SCHEMES = '''file ftp gopher hdl http https imap mailto mms news nntp
prospero rsync rtsp rtspu sftp shttp sip sips snews svn svn+ssh telnet
wais irc'''.split()


def pypi_rest2html(source, output_encoding='unicode'):
    """
    >>> pypi_rest2html("test!")
    '<p>test!</p>\\n'
    """
    settings_overrides = {
        'raw_enabled': 0,  # no raw HTML code
        'file_insertion_enabled': 0,  # no file/URL access
        'halt_level': 2,  # at warnings or errors, raise an exception
        'report_level': 5,  # never report problems with the reST code
    }

    # Convert reStructuredText to HTML using Docutils.
    document = publish_doctree(source=source,
                               settings_overrides=settings_overrides)

    for node in document.traverse():
        if node.tagname == '#text':
            continue
        if node.hasattr('refuri'):
            uri = node['refuri']
        elif node.hasattr('uri'):
            uri = node['uri']
        else:
            continue
        o = urlparse(uri)
        if o.scheme not in ALLOWED_SCHEMES:
            raise TransformError('link scheme not allowed')

    # now turn the transformed document into HTML
    reader = readers.doctree.Reader(parser_name='null')
    pub = Publisher(reader, source=io.DocTreeInput(document),
                    destination_class=io.StringOutput)
    pub.set_writer('html')
    pub.process_programmatic_settings(None, settings_overrides, None)
    pub.set_destination(None, None)
    pub.publish()
    parts = pub.writer.parts

    output = parts['body']

    if output_encoding != 'unicode':
        output = output.encode(output_encoding)

    return output


if __name__ == '__main__':
    import doctest
    print(doctest.testmod())