summaryrefslogtreecommitdiff
path: root/rdflib/compat.py
diff options
context:
space:
mode:
Diffstat (limited to 'rdflib/compat.py')
-rw-r--r--rdflib/compat.py62
1 files changed, 41 insertions, 21 deletions
diff --git a/rdflib/compat.py b/rdflib/compat.py
index 139f2428..d288c10e 100644
--- a/rdflib/compat.py
+++ b/rdflib/compat.py
@@ -5,9 +5,11 @@ and different versions of support libraries.
import re
import codecs
-import typing as t
+import warnings
+from typing import TYPE_CHECKING, Match
-if t.TYPE_CHECKING:
+
+if TYPE_CHECKING:
import xml.etree.ElementTree as etree
else:
try:
@@ -59,6 +61,14 @@ def _unicodeExpand(s):
def decodeStringEscape(s):
+ warnings.warn(
+ DeprecationWarning(
+ "rdflib.compat.decodeStringEscape() is deprecated, "
+ "it will be removed in rdflib 7.0.0. "
+ "This function is not used anywhere in rdflib anymore "
+ "and the utility that it does provide is not implemented correctly."
+ )
+ )
r"""
s is byte-string - replace \ escapes in string
"""
@@ -76,28 +86,38 @@ def decodeStringEscape(s):
# return _unicodeExpand(s) # hmm - string escape doesn't do unicode escaping
-def decodeUnicodeEscape(s):
- """
- s is a unicode string
- replace ``\\n`` and ``\\u00AC`` unicode escapes
- """
- if "\\" not in s:
- # Most of times, there are no backslashes in strings.
- # In the general case, it could use maketrans and translate.
- return s
+_string_escape_map = {
+ "t": "\t",
+ "b": "\b",
+ "n": "\n",
+ "r": "\r",
+ "f": "\f",
+ '"': '"',
+ "'": "'",
+ "\\": "\\",
+}
+_string_escape_translator = str.maketrans(_string_escape_map)
- s = s.replace("\\t", "\t")
- s = s.replace("\\n", "\n")
- s = s.replace("\\r", "\r")
- s = s.replace("\\b", "\b")
- s = s.replace("\\f", "\f")
- s = s.replace('\\"', '"')
- s = s.replace("\\'", "'")
- s = s.replace("\\\\", "\\")
- s = _unicodeExpand(s) # hmm - string escape doesn't do unicode escaping
+def _turtle_escape_subber(match: Match[str]) -> str:
+ smatch, umatch = match.groups()
+ if smatch is not None:
+ return smatch.translate(_string_escape_translator)
+ else:
+ return chr(int(umatch[1:], 16))
- return s
+
+_turtle_escape_pattern = re.compile(
+ r"""\\(?:([tbnrf"'\\])|(u[0-9A-Fa-f]{4}|U[0-9A-Fa-f]{8}))""",
+)
+
+
+def decodeUnicodeEscape(escaped: str) -> str:
+ if "\\" not in escaped:
+ # Most of times, there are no backslashes in strings.
+ # In the general case, it could use maketrans and translate.
+ return escaped
+ return _turtle_escape_pattern.sub(_turtle_escape_subber, escaped)
# Migration to abc in Python 3.8