diff options
author | Nicholas Car <nick@kurrawong.net> | 2020-03-16 10:42:42 +1000 |
---|---|---|
committer | GitHub <noreply@github.com> | 2020-03-16 10:42:42 +1000 |
commit | 41021d97e4077a53a7f7ba41294109775adf4435 (patch) | |
tree | 360d45fafa42d307f1f66a3ab75aaee392e00958 /rdflib | |
parent | 4acba111c7f8b22171247397bc2bb72df15b3a2a (diff) | |
parent | e60e024e3bb26c85e34b0c9c80c6e47f47d90858 (diff) | |
download | rdflib-41021d97e4077a53a7f7ba41294109775adf4435.tar.gz |
Merge pull request #961 from kempei/patch-1
fixed URIRef including native unicode characters
Diffstat (limited to 'rdflib')
-rw-r--r-- | rdflib/term.py | 6 | ||||
-rw-r--r-- | rdflib/util.py | 4 |
2 files changed, 5 insertions, 5 deletions
diff --git a/rdflib/term.py b/rdflib/term.py index 1a75d91a..3a8ce798 100644 --- a/rdflib/term.py +++ b/rdflib/term.py @@ -51,6 +51,7 @@ import xml.dom.minidom from datetime import date, time, datetime, timedelta from re import sub, compile from collections import defaultdict +from unicodedata import category from isodate import parse_time, parse_date, parse_datetime, Duration, parse_duration, duration_isoformat @@ -74,10 +75,7 @@ _invalid_uri_chars = '<>" {}|\\^`' def _is_valid_uri(uri): - for c in _invalid_uri_chars: - if c in uri: - return False - return True + return all(map(lambda c: ord(c) > 256 or not c in _invalid_uri_chars, uri)) _lang_tag_regex = compile('^[a-zA-Z]+(?:-[a-zA-Z0-9]+)*$') diff --git a/rdflib/util.py b/rdflib/util.py index f0c6207d..1789aa70 100644 --- a/rdflib/util.py +++ b/rdflib/util.py @@ -156,7 +156,9 @@ def from_n3(s, default=None, backend=None, nsm=None): if not s: return default if s.startswith('<'): - return URIRef(s[1:-1]) + # Hack: this should correctly handle strings with either native unicode + # characters, or \u1234 unicode escapes. + return URIRef(s[1:-1].encode("raw-unicode-escape").decode("unicode-escape")) elif s.startswith('"'): if s.startswith('"""'): quotes = '"""' |