diff options
Diffstat (limited to 'rdflib/compat.py')
-rw-r--r-- | rdflib/compat.py | 62 |
1 files changed, 41 insertions, 21 deletions
diff --git a/rdflib/compat.py b/rdflib/compat.py index 139f2428..d288c10e 100644 --- a/rdflib/compat.py +++ b/rdflib/compat.py @@ -5,9 +5,11 @@ and different versions of support libraries. import re import codecs -import typing as t +import warnings +from typing import TYPE_CHECKING, Match -if t.TYPE_CHECKING: + +if TYPE_CHECKING: import xml.etree.ElementTree as etree else: try: @@ -59,6 +61,14 @@ def _unicodeExpand(s): def decodeStringEscape(s): + warnings.warn( + DeprecationWarning( + "rdflib.compat.decodeStringEscape() is deprecated, " + "it will be removed in rdflib 7.0.0. " + "This function is not used anywhere in rdflib anymore " + "and the utility that it does provide is not implemented correctly." + ) + ) r""" s is byte-string - replace \ escapes in string """ @@ -76,28 +86,38 @@ def decodeStringEscape(s): # return _unicodeExpand(s) # hmm - string escape doesn't do unicode escaping -def decodeUnicodeEscape(s): - """ - s is a unicode string - replace ``\\n`` and ``\\u00AC`` unicode escapes - """ - if "\\" not in s: - # Most of times, there are no backslashes in strings. - # In the general case, it could use maketrans and translate. - return s +_string_escape_map = { + "t": "\t", + "b": "\b", + "n": "\n", + "r": "\r", + "f": "\f", + '"': '"', + "'": "'", + "\\": "\\", +} +_string_escape_translator = str.maketrans(_string_escape_map) - s = s.replace("\\t", "\t") - s = s.replace("\\n", "\n") - s = s.replace("\\r", "\r") - s = s.replace("\\b", "\b") - s = s.replace("\\f", "\f") - s = s.replace('\\"', '"') - s = s.replace("\\'", "'") - s = s.replace("\\\\", "\\") - s = _unicodeExpand(s) # hmm - string escape doesn't do unicode escaping +def _turtle_escape_subber(match: Match[str]) -> str: + smatch, umatch = match.groups() + if smatch is not None: + return smatch.translate(_string_escape_translator) + else: + return chr(int(umatch[1:], 16)) - return s + +_turtle_escape_pattern = re.compile( + r"""\\(?:([tbnrf"'\\])|(u[0-9A-Fa-f]{4}|U[0-9A-Fa-f]{8}))""", +) + + +def decodeUnicodeEscape(escaped: str) -> str: + if "\\" not in escaped: + # Most of times, there are no backslashes in strings. + # In the general case, it could use maketrans and translate. + return escaped + return _turtle_escape_pattern.sub(_turtle_escape_subber, escaped) # Migration to abc in Python 3.8 |