summaryrefslogtreecommitdiff
path: root/rdflib/plugins/memory.py
diff options
context:
space:
mode:
Diffstat (limited to 'rdflib/plugins/memory.py')
-rw-r--r--rdflib/plugins/memory.py512
1 files changed, 0 insertions, 512 deletions
diff --git a/rdflib/plugins/memory.py b/rdflib/plugins/memory.py
deleted file mode 100644
index 1f8bcfa7..00000000
--- a/rdflib/plugins/memory.py
+++ /dev/null
@@ -1,512 +0,0 @@
-import random
-
-from rdflib.store import Store
-
-__all__ = ["Memory", "IOMemory"]
-
-ANY = Any = None
-
-
-class Memory(Store):
- """\
- An in memory implementation of a triple store.
-
- This triple store uses nested dictionaries to store triples. Each
- triple is stored in two such indices as follows spo[s][p][o] = 1 and
- pos[p][o][s] = 1.
-
- Authors: Michel Pelletier, Daniel Krech, Stefan Niederhauser
- """
-
- def __init__(self, configuration=None, identifier=None):
- super(Memory, self).__init__(configuration)
- self.identifier = identifier
-
- # indexed by [subject][predicate][object]
- self.__spo = {}
-
- # indexed by [predicate][object][subject]
- self.__pos = {}
-
- # indexed by [predicate][object][subject]
- self.__osp = {}
-
- self.__namespace = {}
- self.__prefix = {}
-
- def add(self, triple, context, quoted=False):
- """\
- Add a triple to the store of triples.
- """
- # add dictionary entries for spo[s][p][p] = 1 and pos[p][o][s]
- # = 1, creating the nested dictionaries where they do not yet
- # exits.
- subject, predicate, object = triple
- spo = self.__spo
- try:
- po = spo[subject]
- except:
- po = spo[subject] = {}
- try:
- o = po[predicate]
- except:
- o = po[predicate] = {}
- o[object] = 1
-
- pos = self.__pos
- try:
- os = pos[predicate]
- except:
- os = pos[predicate] = {}
- try:
- s = os[object]
- except:
- s = os[object] = {}
- s[subject] = 1
-
- osp = self.__osp
- try:
- sp = osp[object]
- except:
- sp = osp[object] = {}
- try:
- p = sp[subject]
- except:
- p = sp[subject] = {}
- p[predicate] = 1
-
- def remove(self, triple_pattern, context=None):
- for (subject, predicate, object), c in self.triples(triple_pattern):
- del self.__spo[subject][predicate][object]
- del self.__pos[predicate][object][subject]
- del self.__osp[object][subject][predicate]
-
- def triples(self, triple_pattern, context=None):
- """A generator over all the triples matching """
- subject, predicate, object = triple_pattern
- if subject != ANY: # subject is given
- spo = self.__spo
- if subject in spo:
- subjectDictionary = spo[subject]
- if predicate != ANY: # subject+predicate is given
- if predicate in subjectDictionary:
- if object != ANY: # subject+predicate+object is given
- if object in subjectDictionary[predicate]:
- yield (subject, predicate, object), self.__contexts()
- else: # given object not found
- pass
- else: # subject+predicate is given, object unbound
- for o in subjectDictionary[predicate].keys():
- yield (subject, predicate, o), self.__contexts()
- else: # given predicate not found
- pass
- else: # subject given, predicate unbound
- for p in subjectDictionary.keys():
- if object != ANY: # object is given
- if object in subjectDictionary[p]:
- yield (subject, p, object), self.__contexts()
- else: # given object not found
- pass
- else: # object unbound
- for o in subjectDictionary[p].keys():
- yield (subject, p, o), self.__contexts()
- else: # given subject not found
- pass
- elif predicate != ANY: # predicate is given, subject unbound
- pos = self.__pos
- if predicate in pos:
- predicateDictionary = pos[predicate]
- if object != ANY: # predicate+object is given, subject unbound
- if object in predicateDictionary:
- for s in predicateDictionary[object].keys():
- yield (s, predicate, object), self.__contexts()
- else: # given object not found
- pass
- else: # predicate is given, object+subject unbound
- for o in predicateDictionary.keys():
- for s in predicateDictionary[o].keys():
- yield (s, predicate, o), self.__contexts()
- elif object != ANY: # object is given, subject+predicate unbound
- osp = self.__osp
- if object in osp:
- objectDictionary = osp[object]
- for s in objectDictionary.keys():
- for p in objectDictionary[s].keys():
- yield (s, p, object), self.__contexts()
- else: # subject+predicate+object unbound
- spo = self.__spo
- for s in spo.keys():
- subjectDictionary = spo[s]
- for p in subjectDictionary.keys():
- for o in subjectDictionary[p].keys():
- yield (s, p, o), self.__contexts()
-
- def __len__(self, context=None):
- # @@ optimize
- i = 0
- for triple in self.triples((None, None, None)):
- i += 1
- return i
-
- def bind(self, prefix, namespace):
- self.__prefix[namespace] = prefix
- self.__namespace[prefix] = namespace
-
- def namespace(self, prefix):
- return self.__namespace.get(prefix, None)
-
- def prefix(self, namespace):
- return self.__prefix.get(namespace, None)
-
- def namespaces(self):
- for prefix, namespace in self.__namespace.items():
- yield prefix, namespace
-
- def __contexts(self):
- return (c for c in []) # TODO: best way to return empty generator
-
-
-class IOMemory(Store):
- """\
- An integer-key-optimized context-aware in-memory store.
-
- Uses three dict indices (for subjects, objects and predicates) holding
- sets of triples. Context information is tracked in a separate dict, with
- the triple as key and a dict of {context: quoted} items as value. The
- context information is used to filter triple query results.
-
- Memory usage is low due to several optimizations. RDF nodes are not
- stored directly in the indices; instead, the indices hold integer keys
- and the actual nodes are only stored once in int-to-object and
- object-to-int mapping dictionaries. A default context is determined
- based on the first triple that is added to the store, and no context
- information is actually stored for subsequent other triples with the
- same context information.
-
- Most operations should be quite fast, but a triples() query with two
- bound parts requires a set intersection operation, which may be slow in
- some cases. When multiple contexts are used in the same store, filtering
- based on context has to be done after each query, which may also be
- slow.
-
- """
-
- context_aware = True
- formula_aware = True
- graph_aware = True
-
- # The following variable name conventions are used in this class:
- #
- # subject, predicate, object unencoded triple parts
- # triple = (subject, predicate, object) unencoded triple
- # context: unencoded context
- #
- # sid, pid, oid integer-encoded triple parts
- # enctriple = (sid, pid, oid) integer-encoded triple
- # cid integer-encoded context
-
- def __init__(self, configuration=None, identifier=None):
- super(IOMemory, self).__init__()
- self.__namespace = {}
- self.__prefix = {}
-
- # Mappings for encoding RDF nodes using integer keys, to save memory
- # in the indexes Note that None is always mapped to itself, to make
- # it easy to test for it in either encoded or unencoded form.
- self.__int2obj = {None: None} # maps integer keys to objects
- self.__obj2int = {None: None} # maps objects to integer keys
-
- # Indexes for each triple part, and a list of contexts for each triple
- self.__subjectIndex = {} # key: sid val: set(enctriples)
- self.__predicateIndex = {} # key: pid val: set(enctriples)
- self.__objectIndex = {} # key: oid val: set(enctriples)
- self.__tripleContexts = (
- {}
- ) # key: enctriple val: {cid1: quoted, cid2: quoted ...}
- self.__contextTriples = {None: set()} # key: cid val: set(enctriples)
-
- # all contexts used in store (unencoded)
- self.__all_contexts = set()
- # default context information for triples
- self.__defaultContexts = None
-
- def bind(self, prefix, namespace):
- self.__prefix[namespace] = prefix
- self.__namespace[prefix] = namespace
-
- def namespace(self, prefix):
- return self.__namespace.get(prefix, None)
-
- def prefix(self, namespace):
- return self.__prefix.get(namespace, None)
-
- def namespaces(self):
- for prefix, namespace in self.__namespace.items():
- yield prefix, namespace
-
- def add(self, triple, context, quoted=False):
- Store.add(self, triple, context, quoted)
-
- if context is not None:
- self.__all_contexts.add(context)
-
- enctriple = self.__encodeTriple(triple)
- sid, pid, oid = enctriple
-
- self.__addTripleContext(enctriple, context, quoted)
-
- if sid in self.__subjectIndex:
- self.__subjectIndex[sid].add(enctriple)
- else:
- self.__subjectIndex[sid] = set([enctriple])
-
- if pid in self.__predicateIndex:
- self.__predicateIndex[pid].add(enctriple)
- else:
- self.__predicateIndex[pid] = set([enctriple])
-
- if oid in self.__objectIndex:
- self.__objectIndex[oid].add(enctriple)
- else:
- self.__objectIndex[oid] = set([enctriple])
-
- def remove(self, triplepat, context=None):
- req_cid = self.__obj2id(context)
- for triple, contexts in self.triples(triplepat, context):
- enctriple = self.__encodeTriple(triple)
- for cid in self.__getTripleContexts(enctriple):
- if context is not None and req_cid != cid:
- continue
- self.__removeTripleContext(enctriple, cid)
- ctxs = self.__getTripleContexts(enctriple, skipQuoted=True)
- if None in ctxs and (context is None or len(ctxs) == 1):
- self.__removeTripleContext(enctriple, None)
- if len(self.__getTripleContexts(enctriple)) == 0:
- # triple has been removed from all contexts
- sid, pid, oid = enctriple
- self.__subjectIndex[sid].remove(enctriple)
- self.__predicateIndex[pid].remove(enctriple)
- self.__objectIndex[oid].remove(enctriple)
-
- del self.__tripleContexts[enctriple]
-
- if (
- req_cid is not None
- and req_cid in self.__contextTriples
- and len(self.__contextTriples[req_cid]) == 0
- ):
- # all triples are removed out of this context
- # and it's not the default context so delete it
- del self.__contextTriples[req_cid]
-
- if (
- triplepat == (None, None, None)
- and context in self.__all_contexts
- and not self.graph_aware
- ):
- # remove the whole context
- self.__all_contexts.remove(context)
-
- def triples(self, triplein, context=None):
- if context is not None:
- if context == self: # hmm...does this really ever happen?
- context = None
-
- cid = self.__obj2id(context)
- enctriple = self.__encodeTriple(triplein)
- sid, pid, oid = enctriple
-
- # all triples case (no triple parts given as pattern)
- if sid is None and pid is None and oid is None:
- return self.__all_triples(cid)
-
- # optimize "triple in graph" case (all parts given)
- if sid is not None and pid is not None and oid is not None:
- if (
- sid in self.__subjectIndex
- and enctriple in self.__subjectIndex[sid]
- and self.__tripleHasContext(enctriple, cid)
- ):
- return ((triplein, self.__contexts(enctriple)) for i in [0])
- else:
- return self.__emptygen()
-
- # remaining cases: one or two out of three given
- sets = []
- if sid is not None:
- if sid in self.__subjectIndex:
- sets.append(self.__subjectIndex[sid])
- else:
- return self.__emptygen()
- if pid is not None:
- if pid in self.__predicateIndex:
- sets.append(self.__predicateIndex[pid])
- else:
- return self.__emptygen()
- if oid is not None:
- if oid in self.__objectIndex:
- sets.append(self.__objectIndex[oid])
- else:
- return self.__emptygen()
-
- # to get the result, do an intersection of the sets (if necessary)
- if len(sets) > 1:
- enctriples = sets[0].intersection(*sets[1:])
- else:
- enctriples = sets[0].copy()
-
- return (
- (self.__decodeTriple(enctriple), self.__contexts(enctriple))
- for enctriple in enctriples
- if self.__tripleHasContext(enctriple, cid)
- )
-
- def contexts(self, triple=None):
- if triple is None or triple == (None, None, None):
- return (context for context in self.__all_contexts)
-
- enctriple = self.__encodeTriple(triple)
- sid, pid, oid = enctriple
- if sid in self.__subjectIndex and enctriple in self.__subjectIndex[sid]:
- return self.__contexts(enctriple)
- else:
- return self.__emptygen()
-
- def __len__(self, context=None):
- cid = self.__obj2id(context)
- if cid not in self.__contextTriples:
- return 0
- return len(self.__contextTriples[cid])
-
- def add_graph(self, graph):
- if not self.graph_aware:
- Store.add_graph(self, graph)
- else:
- self.__all_contexts.add(graph)
-
- def remove_graph(self, graph):
- if not self.graph_aware:
- Store.remove_graph(self, graph)
- else:
- self.remove((None, None, None), graph)
- try:
- self.__all_contexts.remove(graph)
- except KeyError:
- pass # we didn't know this graph, no problem
-
- # internal utility methods below
-
- def __addTripleContext(self, enctriple, context, quoted):
- """add the given context to the set of contexts for the triple"""
- cid = self.__obj2id(context)
-
- sid, pid, oid = enctriple
- if sid in self.__subjectIndex and enctriple in self.__subjectIndex[sid]:
- # we know the triple exists somewhere in the store
- if enctriple not in self.__tripleContexts:
- # triple exists with default ctx info
- # start with a copy of the default ctx info
- self.__tripleContexts[enctriple] = self.__defaultContexts.copy()
-
- self.__tripleContexts[enctriple][cid] = quoted
- if not quoted:
- self.__tripleContexts[enctriple][None] = quoted
- else:
- # the triple didn't exist before in the store
- if quoted: # this context only
- self.__tripleContexts[enctriple] = {cid: quoted}
- else: # default context as well
- self.__tripleContexts[enctriple] = {cid: quoted, None: quoted}
-
- # if the triple is not quoted add it to the default context
- if not quoted:
- self.__contextTriples[None].add(enctriple)
-
- # always add the triple to given context, making sure it's initialized
- if cid not in self.__contextTriples:
- self.__contextTriples[cid] = set()
- self.__contextTriples[cid].add(enctriple)
-
- # if this is the first ever triple in the store, set default ctx info
- if self.__defaultContexts is None:
- self.__defaultContexts = self.__tripleContexts[enctriple]
-
- # if the context info is the same as default, no need to store it
- if self.__tripleContexts[enctriple] == self.__defaultContexts:
- del self.__tripleContexts[enctriple]
-
- def __getTripleContexts(self, enctriple, skipQuoted=False):
- """return a list of (encoded) contexts for the triple, skipping
- quoted contexts if skipQuoted==True"""
-
- ctxs = self.__tripleContexts.get(enctriple, self.__defaultContexts)
-
- if not skipQuoted:
- return ctxs.keys()
-
- return [cid for cid, quoted in ctxs.items() if not quoted]
-
- def __tripleHasContext(self, enctriple, cid):
- """return True iff the triple exists in the given context"""
- ctxs = self.__tripleContexts.get(enctriple, self.__defaultContexts)
- return cid in ctxs
-
- def __removeTripleContext(self, enctriple, cid):
- """remove the context from the triple"""
- ctxs = self.__tripleContexts.get(enctriple, self.__defaultContexts).copy()
- del ctxs[cid]
- if ctxs == self.__defaultContexts:
- del self.__tripleContexts[enctriple]
- else:
- self.__tripleContexts[enctriple] = ctxs
- self.__contextTriples[cid].remove(enctriple)
-
- def __obj2id(self, obj):
- """encode object, storing it in the encoding map if necessary,
- and return the integer key"""
- if obj not in self.__obj2int:
- id = randid()
- while id in self.__int2obj:
- id = randid()
- self.__obj2int[obj] = id
- self.__int2obj[id] = obj
- return id
- return self.__obj2int[obj]
-
- def __encodeTriple(self, triple):
- """encode a whole triple, returning the encoded triple"""
- return tuple(map(self.__obj2id, triple))
-
- def __decodeTriple(self, enctriple):
- """decode a whole encoded triple, returning the original
- triple"""
- return tuple(map(self.__int2obj.get, enctriple))
-
- def __all_triples(self, cid):
- """return a generator which yields all the triples (unencoded)
- of the given context"""
- if cid not in self.__contextTriples:
- return
- for enctriple in self.__contextTriples[cid].copy():
- yield self.__decodeTriple(enctriple), self.__contexts(enctriple)
-
- def __contexts(self, enctriple):
- """return a generator for all the non-quoted contexts
- (unencoded) the encoded triple appears in"""
- return (
- self.__int2obj.get(cid)
- for cid in self.__getTripleContexts(enctriple, skipQuoted=True)
- if cid is not None
- )
-
- def __emptygen(self):
- """return an empty generator"""
- if False:
- yield
-
-
-def randid(randint=random.randint, choice=random.choice, signs=(-1, 1)):
- return choice(signs) * randint(1, 2000000000)
-
-
-del random