summaryrefslogtreecommitdiff
path: root/rdflib
diff options
context:
space:
mode:
authorNicholas Car <nick@kurrawong.net>2020-03-16 10:42:42 +1000
committerGitHub <noreply@github.com>2020-03-16 10:42:42 +1000
commit41021d97e4077a53a7f7ba41294109775adf4435 (patch)
tree360d45fafa42d307f1f66a3ab75aaee392e00958 /rdflib
parent4acba111c7f8b22171247397bc2bb72df15b3a2a (diff)
parente60e024e3bb26c85e34b0c9c80c6e47f47d90858 (diff)
downloadrdflib-41021d97e4077a53a7f7ba41294109775adf4435.tar.gz
Merge pull request #961 from kempei/patch-1
fixed URIRef including native unicode characters
Diffstat (limited to 'rdflib')
-rw-r--r--rdflib/term.py6
-rw-r--r--rdflib/util.py4
2 files changed, 5 insertions, 5 deletions
diff --git a/rdflib/term.py b/rdflib/term.py
index 1a75d91a..3a8ce798 100644
--- a/rdflib/term.py
+++ b/rdflib/term.py
@@ -51,6 +51,7 @@ import xml.dom.minidom
from datetime import date, time, datetime, timedelta
from re import sub, compile
from collections import defaultdict
+from unicodedata import category
from isodate import parse_time, parse_date, parse_datetime, Duration, parse_duration, duration_isoformat
@@ -74,10 +75,7 @@ _invalid_uri_chars = '<>" {}|\\^`'
def _is_valid_uri(uri):
- for c in _invalid_uri_chars:
- if c in uri:
- return False
- return True
+ return all(map(lambda c: ord(c) > 256 or not c in _invalid_uri_chars, uri))
_lang_tag_regex = compile('^[a-zA-Z]+(?:-[a-zA-Z0-9]+)*$')
diff --git a/rdflib/util.py b/rdflib/util.py
index f0c6207d..1789aa70 100644
--- a/rdflib/util.py
+++ b/rdflib/util.py
@@ -156,7 +156,9 @@ def from_n3(s, default=None, backend=None, nsm=None):
if not s:
return default
if s.startswith('<'):
- return URIRef(s[1:-1])
+ # Hack: this should correctly handle strings with either native unicode
+ # characters, or \u1234 unicode escapes.
+ return URIRef(s[1:-1].encode("raw-unicode-escape").decode("unicode-escape"))
elif s.startswith('"'):
if s.startswith('"""'):
quotes = '"""'