diff options
Diffstat (limited to 'creole/html_tools')
-rw-r--r-- | creole/html_tools/deentity.py | 33 | ||||
-rw-r--r-- | creole/html_tools/strip_html.py | 9 | ||||
-rw-r--r-- | creole/html_tools/text_tools.py | 4 |
3 files changed, 12 insertions, 34 deletions
diff --git a/creole/html_tools/deentity.py b/creole/html_tools/deentity.py index 2f6104a..23a6190 100644 --- a/creole/html_tools/deentity.py +++ b/creole/html_tools/deentity.py @@ -1,32 +1,23 @@ -#!/usr/bin/env python -# coding: utf-8 """ python-creole utils ~~~~~~~~~~~~~~~~~~~ - :copyleft: 2008-2011 by python-creole team, see AUTHORS for more details. + :copyleft: 2008-2020 by python-creole team, see AUTHORS for more details. :license: GNU GPL v3 or above, see LICENSE for more details. """ - import re -try: - import htmlentitydefs as entities -except ImportError: - from html import entities # python 3 - -from creole.py3compat import PY3 - +from html import entities entities_rules = '|'.join([ r"(&\#(?P<number>\d+);)", r"(&\#x(?P<hex>[a-fA-F0-9]+);)", r"(&(?P<named>[a-zA-Z]+);)", ]) -#print(entities_rules) +# print(entities_rules) entities_regex = re.compile( entities_rules, re.VERBOSE | re.UNICODE | re.MULTILINE ) @@ -50,21 +41,16 @@ class Deentity(object): >>> d.replace_named("amp") '&' """ + def replace_number(self, text): """ unicode number entity """ unicode_no = int(text) - if PY3: - return chr(unicode_no) - else: - return unichr(unicode_no) + return chr(unicode_no) def replace_hex(self, text): """ hex entity """ unicode_no = int(text, 16) - if PY3: - return chr(unicode_no) - else: - return unichr(unicode_no) + return chr(unicode_no) def replace_named(self, text): """ named entity """ @@ -73,10 +59,7 @@ class Deentity(object): return " " else: codepoint = entities.name2codepoint[text] - if PY3: - return chr(codepoint) - else: - return unichr(codepoint) + return chr(codepoint) def replace_all(self, content): """ replace all html entities form the given text. """ @@ -84,7 +67,7 @@ class Deentity(object): groups = match.groupdict() for name, text in groups.items(): if text is not None: - replace_method = getattr(self, 'replace_%s' % name) + replace_method = getattr(self, f'replace_{name}') return replace_method(text) # Should never happen: diff --git a/creole/html_tools/strip_html.py b/creole/html_tools/strip_html.py index 10534ad..11a2f91 100644 --- a/creole/html_tools/strip_html.py +++ b/creole/html_tools/strip_html.py @@ -12,12 +12,10 @@ """ - import re from creole.parser.html_parser_config import BLOCK_TAGS - strip_html_regex = re.compile( r""" \s* @@ -33,7 +31,6 @@ strip_html_regex = re.compile( ) - def strip_html(html_code): """ Delete whitespace from html code. Doesn't recordnize preformatted blocks! @@ -58,8 +55,6 @@ def strip_html(html_code): >>> strip_html('<p>a <img src="/image.jpg" /> image.</p>') '<p>a <img src="/image.jpg" /> image.</p>' - - """ def strip_tag(match): @@ -90,10 +85,10 @@ def strip_html(html_code): elif startend_tag: # It's a closed start tag e.g.: <br /> - if space_start: # there was space before the tag + if space_start: # there was space before the tag result = " " + result - if space_end: # there was space after the tag + if space_end: # there was space after the tag result += " " else: # a start tag e.g.: <strong> diff --git a/creole/html_tools/text_tools.py b/creole/html_tools/text_tools.py index 5843cf6..16487a5 100644 --- a/creole/html_tools/text_tools.py +++ b/creole/html_tools/text_tools.py @@ -12,11 +12,11 @@ """ - import re - space_re = re.compile(r"^(\s*)(.*?)(\s*)$", re.DOTALL) + + def clean_whitespace(txt): """ Special whitespaces cleanup |