summaryrefslogtreecommitdiff
path: root/creole/html_tools
diff options
context:
space:
mode:
Diffstat (limited to 'creole/html_tools')
-rw-r--r--creole/html_tools/deentity.py33
-rw-r--r--creole/html_tools/strip_html.py9
-rw-r--r--creole/html_tools/text_tools.py4
3 files changed, 12 insertions, 34 deletions
diff --git a/creole/html_tools/deentity.py b/creole/html_tools/deentity.py
index 2f6104a..23a6190 100644
--- a/creole/html_tools/deentity.py
+++ b/creole/html_tools/deentity.py
@@ -1,32 +1,23 @@
-#!/usr/bin/env python
-# coding: utf-8
"""
python-creole utils
~~~~~~~~~~~~~~~~~~~
- :copyleft: 2008-2011 by python-creole team, see AUTHORS for more details.
+ :copyleft: 2008-2020 by python-creole team, see AUTHORS for more details.
:license: GNU GPL v3 or above, see LICENSE for more details.
"""
-
import re
-try:
- import htmlentitydefs as entities
-except ImportError:
- from html import entities # python 3
-
-from creole.py3compat import PY3
-
+from html import entities
entities_rules = '|'.join([
r"(&\#(?P<number>\d+);)",
r"(&\#x(?P<hex>[a-fA-F0-9]+);)",
r"(&(?P<named>[a-zA-Z]+);)",
])
-#print(entities_rules)
+# print(entities_rules)
entities_regex = re.compile(
entities_rules, re.VERBOSE | re.UNICODE | re.MULTILINE
)
@@ -50,21 +41,16 @@ class Deentity(object):
>>> d.replace_named("amp")
'&'
"""
+
def replace_number(self, text):
""" unicode number entity """
unicode_no = int(text)
- if PY3:
- return chr(unicode_no)
- else:
- return unichr(unicode_no)
+ return chr(unicode_no)
def replace_hex(self, text):
""" hex entity """
unicode_no = int(text, 16)
- if PY3:
- return chr(unicode_no)
- else:
- return unichr(unicode_no)
+ return chr(unicode_no)
def replace_named(self, text):
""" named entity """
@@ -73,10 +59,7 @@ class Deentity(object):
return " "
else:
codepoint = entities.name2codepoint[text]
- if PY3:
- return chr(codepoint)
- else:
- return unichr(codepoint)
+ return chr(codepoint)
def replace_all(self, content):
""" replace all html entities form the given text. """
@@ -84,7 +67,7 @@ class Deentity(object):
groups = match.groupdict()
for name, text in groups.items():
if text is not None:
- replace_method = getattr(self, 'replace_%s' % name)
+ replace_method = getattr(self, f'replace_{name}')
return replace_method(text)
# Should never happen:
diff --git a/creole/html_tools/strip_html.py b/creole/html_tools/strip_html.py
index 10534ad..11a2f91 100644
--- a/creole/html_tools/strip_html.py
+++ b/creole/html_tools/strip_html.py
@@ -12,12 +12,10 @@
"""
-
import re
from creole.parser.html_parser_config import BLOCK_TAGS
-
strip_html_regex = re.compile(
r"""
\s*
@@ -33,7 +31,6 @@ strip_html_regex = re.compile(
)
-
def strip_html(html_code):
"""
Delete whitespace from html code. Doesn't recordnize preformatted blocks!
@@ -58,8 +55,6 @@ def strip_html(html_code):
>>> strip_html('<p>a <img src="/image.jpg" /> image.</p>')
'<p>a <img src="/image.jpg" /> image.</p>'
-
-
"""
def strip_tag(match):
@@ -90,10 +85,10 @@ def strip_html(html_code):
elif startend_tag:
# It's a closed start tag e.g.: <br />
- if space_start: # there was space before the tag
+ if space_start: # there was space before the tag
result = " " + result
- if space_end: # there was space after the tag
+ if space_end: # there was space after the tag
result += " "
else:
# a start tag e.g.: <strong>
diff --git a/creole/html_tools/text_tools.py b/creole/html_tools/text_tools.py
index 5843cf6..16487a5 100644
--- a/creole/html_tools/text_tools.py
+++ b/creole/html_tools/text_tools.py
@@ -12,11 +12,11 @@
"""
-
import re
-
space_re = re.compile(r"^(\s*)(.*?)(\s*)$", re.DOTALL)
+
+
def clean_whitespace(txt):
"""
Special whitespaces cleanup