1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
|
#!/usr/bin/env python
# coding: utf-8
"""
Try to be so strict as PyPi.
Code based on:
https://bitbucket.org/pypa/pypi/src/tip/description_utils.py
see also:
https://bitbucket.org/pypa/pypi/issue/161/rest-formatting-fails-and-there-is-no-way
"""
from urllib.parse import urlparse
from creole.exceptions import DocutilsImportError
try:
import docutils # noqa flake8
from docutils import io, readers
from docutils.core import publish_doctree, Publisher
from docutils.transforms import TransformError
except ImportError as err:
msg = (
"%s - You can't use rest2html!"
" Please install: http://pypi.python.org/pypi/docutils"
) % err
raise DocutilsImportError(msg)
ALLOWED_SCHEMES = '''file ftp gopher hdl http https imap mailto mms news nntp
prospero rsync rtsp rtspu sftp shttp sip sips snews svn svn+ssh telnet
wais irc'''.split()
def pypi_rest2html(source, output_encoding='unicode'):
"""
>>> pypi_rest2html("test!")
'<p>test!</p>\\n'
"""
settings_overrides = {
'raw_enabled': 0, # no raw HTML code
'file_insertion_enabled': 0, # no file/URL access
'halt_level': 2, # at warnings or errors, raise an exception
'report_level': 5, # never report problems with the reST code
}
# Convert reStructuredText to HTML using Docutils.
document = publish_doctree(source=source,
settings_overrides=settings_overrides)
for node in document.traverse():
if node.tagname == '#text':
continue
if node.hasattr('refuri'):
uri = node['refuri']
elif node.hasattr('uri'):
uri = node['uri']
else:
continue
o = urlparse(uri)
if o.scheme not in ALLOWED_SCHEMES:
raise TransformError('link scheme not allowed')
# now turn the transformed document into HTML
reader = readers.doctree.Reader(parser_name='null')
pub = Publisher(reader, source=io.DocTreeInput(document),
destination_class=io.StringOutput)
pub.set_writer('html')
pub.process_programmatic_settings(None, settings_overrides, None)
pub.set_destination(None, None)
pub.publish()
parts = pub.writer.parts
output = parts['body']
if output_encoding != 'unicode':
output = output.encode(output_encoding)
return output
if __name__ == '__main__':
import doctest
print(doctest.testmod())
|