summaryrefslogtreecommitdiff
path: root/rdflib
diff options
context:
space:
mode:
authorTom Gillespie <tgbugs@gmail.com>2017-04-25 13:31:15 -0700
committerTom Gillespie <tgbugs@gmail.com>2020-03-12 19:27:14 -0700
commitf1474c5f42bbaf0ecf9ee28b392f4c24e01cbde8 (patch)
tree8933729b55ca6bac343cc1e36c1d029d20ce4386 /rdflib
parentee5ffb9ca972f453f9118cd76ff0f37f1b4de5e7 (diff)
downloadrdflib-f1474c5f42bbaf0ecf9ee28b392f4c24e01cbde8.tar.gz
namespace.py change split_uri to work with trie
Attempting to implement the 'faster' version of compute_qname revealed significant issues with the current split_uri implementation.
Diffstat (limited to 'rdflib')
-rw-r--r--rdflib/namespace.py50
1 files changed, 32 insertions, 18 deletions
diff --git a/rdflib/namespace.py b/rdflib/namespace.py
index 066af507..07a46c29 100644
--- a/rdflib/namespace.py
+++ b/rdflib/namespace.py
@@ -355,6 +355,7 @@ class NamespaceManager(object):
self.__cache = {}
self.__log = None
self.__trie = {}
+ self.__strie = {}
for p, n in self.namespaces(): # self.bind is not always called
insert_trie(self.__trie, n)
self.bind("xml", "http://www.w3.org/XML/1998/namespace")
@@ -384,6 +385,8 @@ class NamespaceManager(object):
"""
try:
namespace, name = split_uri(rdfTerm)
+ if namespace not in self.__strie:
+ insert_strie(self.__strie, self.__trie, namespace)
namespace = URIRef(text_type(namespace))
except:
if isinstance(rdfTerm, Variable):
@@ -407,19 +410,22 @@ class NamespaceManager(object):
)
if uri not in self.__cache:
- namespace = get_longest_namespace(self.__trie, uri)
- if namespace is not None:
- name = uri[len(namespace):]
- if '/' in name or '#' in name:
- name = None
- else:
- prefix = self.store.prefix(namespace) # warning multiple prefixes problem
- else:
- name = None
+ namespace, name = split_uri(uri)
+ if namespace not in self.__strie:
+ insert_strie(self.__strie, self.__trie, namespace)
+
+ if self.__strie[namespace]:
+ #print('BEFORE', namespace, uri, self.__strie[namespace])
+ pl_namespace = get_longest_namespace(self.__strie[namespace], uri)
+ if pl_namespace is not None:
+ namespace = pl_namespace
+ name = uri[len(namespace):]
+ #print('AFTER', namespace)
+
+ namespace = URIRef(namespace)
+ prefix = self.store.prefix(namespace) # warning multiple prefixes problem
- if name is None:
- namespace, name = split_uri(uri)
- namespace = URIRef(namespace)
+ if prefix is None:
if not generate:
raise KeyError(
"No known prefix for {} and generate=False".format(namespace)
@@ -430,6 +436,7 @@ class NamespaceManager(object):
if not self.store.namespace(prefix):
break
num += 1
+ print(prefix, namespace)
self.bind(prefix, namespace)
self.__cache[uri] = (prefix, namespace, name)
return self.__cache[uri]
@@ -592,18 +599,25 @@ def split_uri(uri):
break
raise ValueError("Can't split '{}'".format(uri))
-def insert_trie(trie, value):
+def insert_trie(trie, value): # aka get_subtrie_or_insert
+ """ Insert a value into the trie if it is not already contained in the trie.
+ Return the subtree for the value regardless of whether it is a new value
+ or not. """
if value in trie:
- return
+ return trie[value]
for key in trie:
if value.startswith(key):
- insert_trie(trie[key], value)
- return
+ return insert_trie(trie[key], value)
elif key.startswith(value):
- dict_ = trie.pop(key)
+ dict_ = trie.pop(key) # does not break strie since key<->dict_ remains unchanged
trie[value] = {key:dict_}
- return
+ return trie[value]
trie[value] = {}
+ return trie[value]
+
+def insert_strie(strie, trie, value):
+ if value not in strie:
+ strie[value] = insert_trie(trie, value)
def get_longest_namespace(trie, value):
for key in trie: