summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--docs/persistence.rst4
-rw-r--r--docs/plugin_parsers.rst2
-rw-r--r--docs/plugin_stores.rst7
-rw-r--r--docs/sphinx-requirements.txt2
-rw-r--r--docs/univrdfstore.rst2
-rw-r--r--examples/conjunctive_graphs.py6
-rw-r--r--rdflib/__init__.py6
-rw-r--r--rdflib/collection.py4
-rw-r--r--rdflib/compare.py2
-rw-r--r--rdflib/extras/describer.py2
-rw-r--r--rdflib/graph.py60
-rw-r--r--rdflib/namespace.py6
-rw-r--r--rdflib/parser.py100
-rw-r--r--rdflib/plugin.py17
-rw-r--r--rdflib/plugins/memory.py512
-rwxr-xr-xrdflib/plugins/parsers/notation3.py102
-rw-r--r--rdflib/plugins/parsers/nquads.py11
-rw-r--r--rdflib/plugins/parsers/nt.py33
-rw-r--r--rdflib/plugins/parsers/ntriples.py119
-rw-r--r--rdflib/plugins/parsers/trig.py12
-rw-r--r--rdflib/plugins/stores/memory.py533
-rw-r--r--rdflib/plugins/stores/sleepycat.py (renamed from rdflib/plugins/sleepycat.py)0
-rw-r--r--rdflib/plugins/stores/sparqlstore.py2
-rw-r--r--rdflib/term.py2
-rw-r--r--rdflib/util.py2
-rw-r--r--test/rdf/datatypes/test001.borked29
-rw-r--r--test/store_performance.py2
-rw-r--r--test/test_aggregate_graphs.py4
-rw-r--r--test/test_canonicalization.py4
-rw-r--r--test/test_dataset.py2
-rw-r--r--test/test_graph.py69
-rw-r--r--test/test_graph_context.py2
-rw-r--r--test/test_iomemory.py67
-rw-r--r--test/test_issue247.py4
-rw-r--r--test/test_issue363.py2
-rw-r--r--test/test_issue801.py19
-rw-r--r--test/test_issue_git_336.py2
-rw-r--r--test/test_literal.py2
-rw-r--r--test/test_memory_store.py26
-rw-r--r--test/test_namespace.py4
-rw-r--r--test/test_nt_misc.py62
-rw-r--r--test/test_parse_file_guess_format.py7
-rw-r--r--test/test_parser.py1
-rw-r--r--test/test_seq.py2
-rw-r--r--test/test_util.py4
-rw-r--r--test/test_xmlliterals.py2
46 files changed, 1039 insertions, 825 deletions
diff --git a/docs/persistence.rst b/docs/persistence.rst
index fbddf38f..bd270a14 100644
--- a/docs/persistence.rst
+++ b/docs/persistence.rst
@@ -19,8 +19,8 @@ this API for a different store.
Stores currently shipped with core RDFLib
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
-* :class:`Memory <rdflib.plugins.memory.IOMemory>` (not persistent!)
-* :class:`~rdflib.plugins.sleepycat.Sleepycat` (on disk persistence via Python's :ref:`bsddb` or :ref:`bsddb3` packages)
+* :class:`Memory <rdflib.plugins.stores.memory.Memory>` (not persistent!)
+* :class:`~rdflib.plugins.stores.sleepycat.Sleepycat` (on disk persistence via Python's :ref:`bsddb` or :ref:`bsddb3` packages)
* :class:`~rdflib.plugins.stores.sparqlstore.SPARQLStore` - a read-only wrapper around a remote SPARQL Query endpoint.
* :class:`~rdflib.plugins.stores.sparqlstore.SPARQLUpdateStore` - a read-write wrapper around a remote SPARQL query/update endpoint pair.
diff --git a/docs/plugin_parsers.rst b/docs/plugin_parsers.rst
index e114958d..81ab7ae6 100644
--- a/docs/plugin_parsers.rst
+++ b/docs/plugin_parsers.rst
@@ -26,7 +26,7 @@ mdata :class:`~rdflib.plugins.parsers.structureddata.MicrodataParser`
microdata :class:`~rdflib.plugins.parsers.structureddata.MicrodataParser`
n3 :class:`~rdflib.plugins.parsers.notation3.N3Parser`
nquads :class:`~rdflib.plugins.parsers.nquads.NQuadsParser`
-nt :class:`~rdflib.plugins.parsers.nt.NTParser`
+nt :class:`~rdflib.plugins.parsers.ntriples.NTParser`
rdfa :class:`~rdflib.plugins.parsers.structureddata.RDFaParser`
rdfa1.0 :class:`~rdflib.plugins.parsers.structureddata.RDFa10Parser`
rdfa1.1 :class:`~rdflib.plugins.parsers.structureddata.RDFaParser`
diff --git a/docs/plugin_stores.rst b/docs/plugin_stores.rst
index 68063577..a936c54e 100644
--- a/docs/plugin_stores.rst
+++ b/docs/plugin_stores.rst
@@ -10,9 +10,10 @@ Name Class
================= ============================================================
Auditable :class:`~rdflib.plugins.stores.auditable.AuditableStore`
Concurrent :class:`~rdflib.plugins.stores.concurrent.ConcurrentStore`
-IOMemory :class:`~rdflib.plugins.memory.IOMemory`
+SimpleMemory :class:`~rdflib.plugins.stores.memory.SimpleMemory`
+Memory :class:`~rdflib.plugins.stores.memory.Memory`
SPARQLStore :class:`~rdflib.plugins.stores.sparqlstore.SPARQLStore`
SPARQLUpdateStore :class:`~rdflib.plugins.stores.sparqlstore.SPARQLUpdateStore`
-Sleepycat :class:`~rdflib.plugins.sleepycat.Sleepycat`
-default :class:`~rdflib.plugins.memory.IOMemory`
+Sleepycat :class:`~rdflib.plugins.stores.sleepycat.Sleepycat`
+default :class:`~rdflib.plugins.stores.memory.Memory`
================= ============================================================
diff --git a/docs/sphinx-requirements.txt b/docs/sphinx-requirements.txt
index abed60cb..175ef14e 100644
--- a/docs/sphinx-requirements.txt
+++ b/docs/sphinx-requirements.txt
@@ -1,3 +1,3 @@
-sphinx==3.0.4
+sphinx==3.2.1
sphinxcontrib-apidoc
git+https://github.com/gniezen/n3pygments.git
diff --git a/docs/univrdfstore.rst b/docs/univrdfstore.rst
index f6822e5b..dfb96d81 100644
--- a/docs/univrdfstore.rst
+++ b/docs/univrdfstore.rst
@@ -344,7 +344,7 @@ These are a list of additional kinds of RDF terms (all of which are special Lite
Namespace Management Interfaces
===============================
-The following namespace management interfaces (defined in Graph) could be implemented in the RDF store. Currently, they exist as stub methods of :class:`~rdflib.store.Store` and are defined in the store subclasses (e.g. :class:`~rdflib.store.IOMemory`):
+The following namespace management interfaces (defined in Graph) could be implemented in the RDF store. Currently, they exist as stub methods of :class:`~rdflib.store.Store` and are defined in the store subclasses (e.g. :class:`~rdflib.plugins.store.memory.Memory`):
.. automethod:: rdflib.store.Store.bind
:noindex:
diff --git a/examples/conjunctive_graphs.py b/examples/conjunctive_graphs.py
index f714d9ff..289046ec 100644
--- a/examples/conjunctive_graphs.py
+++ b/examples/conjunctive_graphs.py
@@ -10,7 +10,7 @@ conjunction (union) of all the graphs.
from rdflib import Namespace, Literal, URIRef
from rdflib.graph import Graph, ConjunctiveGraph
-from rdflib.plugins.memory import IOMemory
+from rdflib.plugins.stores.memory import Memory
if __name__ == "__main__":
@@ -22,7 +22,7 @@ if __name__ == "__main__":
cmary = URIRef("http://love.com/lovers/mary")
cjohn = URIRef("http://love.com/lovers/john")
- store = IOMemory()
+ store = Memory()
g = ConjunctiveGraph(store=store)
g.bind("love", ns)
@@ -33,7 +33,7 @@ if __name__ == "__main__":
gmary.add((mary, ns["hasName"], Literal("Mary")))
gmary.add((mary, ns["loves"], john))
- # add a graph for Mary's facts to the Conjunctive Graph
+ # add a graph for John's facts to the Conjunctive Graph
gjohn = Graph(store=store, identifier=cjohn)
# John's graph contains his cute name
gjohn.add((john, ns["hasCuteName"], Literal("Johnny Boy")))
diff --git a/rdflib/__init__.py b/rdflib/__init__.py
index bce8204f..06b1c2eb 100644
--- a/rdflib/__init__.py
+++ b/rdflib/__init__.py
@@ -92,7 +92,11 @@ _interactive_mode = False
try:
import __main__
- if not hasattr(__main__, "__file__") and sys.stdout is not None and sys.stderr.isatty():
+ if (
+ not hasattr(__main__, "__file__")
+ and sys.stdout is not None
+ and sys.stderr.isatty()
+ ):
# show log messages in interactive mode
_interactive_mode = True
logger.setLevel(logging.INFO)
diff --git a/rdflib/collection.py b/rdflib/collection.py
index 60f2890f..3136bafd 100644
--- a/rdflib/collection.py
+++ b/rdflib/collection.py
@@ -14,7 +14,7 @@ class Collection(object):
>>> from rdflib.graph import Graph
>>> from pprint import pprint
>>> listName = BNode()
- >>> g = Graph('IOMemory')
+ >>> g = Graph('Memory')
>>> listItem1 = BNode()
>>> listItem2 = BNode()
>>> g.add((listName, RDF.first, Literal(1)))
@@ -48,7 +48,7 @@ class Collection(object):
"""
>>> from rdflib.graph import Graph
>>> listName = BNode()
- >>> g = Graph('IOMemory')
+ >>> g = Graph('Memory')
>>> listItem1 = BNode()
>>> listItem2 = BNode()
>>> g.add((listName, RDF.first, Literal(1)))
diff --git a/rdflib/compare.py b/rdflib/compare.py
index 897a30db..ed4415f2 100644
--- a/rdflib/compare.py
+++ b/rdflib/compare.py
@@ -335,7 +335,7 @@ class _TripleCanonicalizer(object):
coloring.extend(colors)
try:
si = sequence.index(c)
- sequence = sequence[:si] + colors + sequence[si + 1:]
+ sequence = sequence[:si] + colors + sequence[si + 1 :]
except ValueError:
sequence = colors[1:] + sequence
combined_colors = []
diff --git a/rdflib/extras/describer.py b/rdflib/extras/describer.py
index 5f7f3841..48d0bebf 100644
--- a/rdflib/extras/describer.py
+++ b/rdflib/extras/describer.py
@@ -98,7 +98,7 @@ Full example in the ``to_rdf`` method below::
... </cv:hasWorkHistory>
... </cv:CV>
... </rdf:RDF>
- ... ''')
+ ... ''', format="xml")
>>>
>>> from rdflib.compare import isomorphic
>>> isomorphic(person_graph, expected) #doctest: +SKIP
diff --git a/rdflib/graph.py b/rdflib/graph.py
index 0932c2f3..ebe1c0f1 100644
--- a/rdflib/graph.py
+++ b/rdflib/graph.py
@@ -20,6 +20,7 @@ from rdflib.namespace import NamespaceManager
from rdflib.resource import Resource
from rdflib.collection import Collection
import rdflib.util # avoid circular dependency
+from rdflib.exceptions import ParserError
import os
import shutil
@@ -100,31 +101,31 @@ see :class:`~rdflib.graph.Dataset`
Working with graphs
===================
-Instantiating Graphs with default store (IOMemory) and default identifier
+Instantiating Graphs with default store (Memory) and default identifier
(a BNode):
>>> g = Graph()
>>> g.store.__class__
- <class 'rdflib.plugins.memory.IOMemory'>
+ <class 'rdflib.plugins.stores.memory.Memory'>
>>> g.identifier.__class__
<class 'rdflib.term.BNode'>
-Instantiating Graphs with a IOMemory store and an identifier -
+Instantiating Graphs with a Memory store and an identifier -
<http://rdflib.net>:
- >>> g = Graph('IOMemory', URIRef("http://rdflib.net"))
+ >>> g = Graph('Memory', URIRef("http://rdflib.net"))
>>> g.identifier
rdflib.term.URIRef('http://rdflib.net')
>>> str(g) # doctest: +NORMALIZE_WHITESPACE
"<http://rdflib.net> a rdfg:Graph;rdflib:storage
- [a rdflib:Store;rdfs:label 'IOMemory']."
+ [a rdflib:Store;rdfs:label 'Memory']."
Creating a ConjunctiveGraph - The top level container for all named Graphs
in a "database":
>>> g = ConjunctiveGraph()
>>> str(g.default_context)
- "[a rdfg:Graph;rdflib:storage [a rdflib:Store;rdfs:label 'IOMemory']]."
+ "[a rdfg:Graph;rdflib:storage [a rdflib:Store;rdfs:label 'Memory']]."
Adding / removing reified triples to Graph and iterating over it directly or
via triple pattern:
@@ -188,7 +189,7 @@ by RDFLib they are UUIDs and unique.
Graph Aggregation - ConjunctiveGraphs and ReadOnlyGraphAggregate within
the same store:
- >>> store = plugin.get("IOMemory", Store)()
+ >>> store = plugin.get("Memory", Store)()
>>> g1 = Graph(store)
>>> g2 = Graph(store)
>>> g3 = Graph(store)
@@ -774,13 +775,17 @@ class Graph(Node):
# setup the language filtering
if lang is not None:
if lang == "": # we only want not language-tagged literals
+
def langfilter(l_):
return l_.language is None
+
else:
+
def langfilter(l_):
return l_.language == lang
else: # we don't care about language tags
+
def langfilter(l_):
return True
@@ -992,7 +997,7 @@ class Graph(Node):
**args
):
"""
- Parse source adding the resulting triples to the Graph.
+ Parse an RDF source adding the resulting triples to the Graph.
The source is specified using one of source, location, file or
data.
@@ -1006,9 +1011,10 @@ class Graph(Node):
is specified.
- `file`: A file-like object.
- `data`: A string containing the data to be parsed.
- - `format`: Used if format can not be determined from source.
- Defaults to rdf/xml. Format support can be extended with plugins,
- but "xml", "n3", "nt" & "trix" are built in.
+ - `format`: Used if format can not be determined from source, e.g. file
+ extension or Media Type. Defaults to text/turtle. Format support can
+ be extended with plugins, but "xml", "n3" (use for turtle), "nt" &
+ "trix" are built in.
- `publicID`: the logical URI to use as the document base. If None
specified the document location is used (at least in the case where
there is a document location).
@@ -1054,6 +1060,11 @@ class Graph(Node):
>>> os.remove(file_name)
+ >>> # default turtle parsing
+ >>> result = g.parse(data="<http://example.com/a> <http://example.com/a> <http://example.com/a> .")
+ >>> len(g)
+ 3
+
"""
source = create_input_source(
@@ -1066,24 +1077,27 @@ class Graph(Node):
)
if format is None:
format = source.content_type
- assumed_xml = False
+ could_not_guess_format = False
if format is None:
- if (hasattr(source, "file")
- and getattr(source.file, "name", None)
- and isinstance(source.file.name, str)):
+ if (
+ hasattr(source, "file")
+ and getattr(source.file, "name", None)
+ and isinstance(source.file.name, str)
+ ):
format = rdflib.util.guess_format(source.file.name)
if format is None:
- format = "application/rdf+xml"
- assumed_xml = True
+ format = "turtle"
+ could_not_guess_format = True
parser = plugin.get(format, Parser)()
try:
parser.parse(source, self, **args)
- except SAXParseException as saxpe:
- if assumed_xml:
- logger.warning(
- "Could not guess format for %r, so assumed xml."
- " You can explicitly specify format using the format argument." % source)
- raise saxpe
+ except SyntaxError as se:
+ if could_not_guess_format:
+ raise ParserError(
+ "Could not guess RDF format for %r from file extension so tried Turtle but failed."
+ "You can explicitly specify format using the format argument." % source)
+ else:
+ raise se
finally:
if source.auto_close:
source.close()
diff --git a/rdflib/namespace.py b/rdflib/namespace.py
index b30b4b00..69ccd31d 100644
--- a/rdflib/namespace.py
+++ b/rdflib/namespace.py
@@ -485,8 +485,8 @@ SKOS = ClosedNamespace(
"relatedMatch",
],
)
-SOSA = Namespace("http://www.w3.org/ns/ssn/")
-SSN = Namespace("http://www.w3.org/ns/sosa/")
+SSN = Namespace("http://www.w3.org/ns/ssn/")
+SOSA = Namespace("http://www.w3.org/ns/sosa/")
TIME = Namespace("http://www.w3.org/2006/time#")
VOID = Namespace("http://rdfs.org/ns/void#")
XMLNS = Namespace("http://www.w3.org/XML/1998/namespace")
@@ -800,7 +800,7 @@ class NamespaceManager(object):
NAME_START_CATEGORIES = ["Ll", "Lu", "Lo", "Lt", "Nl"]
SPLIT_START_CATEGORIES = NAME_START_CATEGORIES + ["Nd"]
NAME_CATEGORIES = NAME_START_CATEGORIES + ["Mc", "Me", "Mn", "Lm", "Nd"]
-ALLOWED_NAME_CHARS = ["\u00B7", "\u0387", "-", ".", "_", ":"]
+ALLOWED_NAME_CHARS = ["\u00B7", "\u0387", "-", ".", "_", ":", "%"]
# http://www.w3.org/TR/REC-xml-names/#NT-NCName
diff --git a/rdflib/parser.py b/rdflib/parser.py
index 4d807e7e..fcaed5e4 100644
--- a/rdflib/parser.py
+++ b/rdflib/parser.py
@@ -10,11 +10,11 @@ want to do so through the Graph class parse method.
"""
+import codecs
import os
import sys
-from io import BytesIO
-
+from io import BytesIO, TextIOBase, TextIOWrapper, StringIO, BufferedIOBase
from urllib.request import pathname2url
from urllib.request import Request
@@ -38,6 +38,8 @@ __all__ = [
class Parser(object):
+ __slots__ = set()
+
def __init__(self):
pass
@@ -45,6 +47,37 @@ class Parser(object):
pass
+class BytesIOWrapper(BufferedIOBase):
+ __slots__ = ("wrapped", "encoded", "encoding")
+
+ def __init__(self, wrapped: str, encoding="utf-8"):
+ super(BytesIOWrapper, self).__init__()
+ self.wrapped = wrapped
+ self.encoding = encoding
+ self.encoded = None
+
+ def read(self, *args, **kwargs):
+ if self.encoded is None:
+ b, blen = codecs.getencoder(self.encoding)(self.wrapped)
+ self.encoded = BytesIO(b)
+ return self.encoded.read(*args, **kwargs)
+
+ def read1(self, *args, **kwargs):
+ if self.encoded is None:
+ b = codecs.getencoder(self.encoding)(self.wrapped)
+ self.encoded = BytesIO(b)
+ return self.encoded.read1(*args, **kwargs)
+
+ def readinto(self, *args, **kwargs):
+ raise NotImplementedError()
+
+ def readinto1(self, *args, **kwargs):
+ raise NotImplementedError()
+
+ def write(self, *args, **kwargs):
+ raise NotImplementedError()
+
+
class InputSource(xmlreader.InputSource, object):
"""
TODO:
@@ -56,23 +89,39 @@ class InputSource(xmlreader.InputSource, object):
self.auto_close = False # see Graph.parse(), true if opened by us
def close(self):
+ c = self.getCharacterStream()
+ if c and hasattr(c, "close"):
+ try:
+ c.close()
+ except Exception:
+ pass
f = self.getByteStream()
if f and hasattr(f, "close"):
- f.close()
+ try:
+ f.close()
+ except Exception:
+ pass
class StringInputSource(InputSource):
"""
- TODO:
+ Constructs an RDFLib Parser InputSource from a Python String or Bytes
"""
- def __init__(self, value, system_id=None):
+ def __init__(self, value, encoding="utf-8", system_id=None):
super(StringInputSource, self).__init__(system_id)
- stream = BytesIO(value)
- self.setByteStream(stream)
- # TODO:
- # encoding = value.encoding
- # self.setEncoding(encoding)
+ if isinstance(value, str):
+ stream = StringIO(value)
+ self.setCharacterStream(stream)
+ self.setEncoding(encoding)
+ b_stream = BytesIOWrapper(value, encoding)
+ self.setByteStream(b_stream)
+ else:
+ stream = BytesIO(value)
+ self.setByteStream(stream)
+ c_stream = TextIOWrapper(stream, encoding)
+ self.setCharacterStream(c_stream)
+ self.setEncoding(c_stream.encoding)
headers = {
@@ -131,8 +180,18 @@ class FileInputSource(InputSource):
system_id = URIRef(urljoin("file:", pathname2url(file.name)), base=base)
super(FileInputSource, self).__init__(system_id)
self.file = file
- self.setByteStream(file)
- # TODO: self.setEncoding(encoding)
+ if isinstance(file, TextIOBase): # Python3 unicode fp
+ self.setCharacterStream(file)
+ self.setEncoding(file.encoding)
+ try:
+ b = file.buffer
+ self.setByteStream(b)
+ except (AttributeError, LookupError):
+ self.setByteStream(file)
+ else:
+ self.setByteStream(file)
+ # We cannot set characterStream here because
+ # we do not know the Raw Bytes File encoding.
def __repr__(self):
return repr(self.file)
@@ -168,10 +227,21 @@ def create_input_source(
else:
if isinstance(source, str):
location = source
+ elif isinstance(source, bytes):
+ data = source
elif hasattr(source, "read") and not isinstance(source, Namespace):
f = source
input_source = InputSource()
- input_source.setByteStream(f)
+ if hasattr(source, "encoding"):
+ input_source.setCharacterStream(source)
+ input_source.setEncoding(source.encoding)
+ try:
+ b = file.buffer
+ input_source.setByteStream(b)
+ except (AttributeError, LookupError):
+ input_source.setByteStream(source)
+ else:
+ input_source.setByteStream(f)
if f is sys.stdin:
input_source.setSystemId("file:///dev/stdin")
elif hasattr(f, "name"):
@@ -203,8 +273,8 @@ def create_input_source(
input_source = FileInputSource(file)
if data is not None:
- if isinstance(data, str):
- data = data.encode("utf-8")
+ if not isinstance(data, (str, bytes, bytearray)):
+ raise RuntimeError("parse data can only str, or bytes.")
input_source = StringInputSource(data)
auto_close = True
diff --git a/rdflib/plugin.py b/rdflib/plugin.py
index b653be01..2af7370f 100644
--- a/rdflib/plugin.py
+++ b/rdflib/plugin.py
@@ -11,7 +11,7 @@ following to your setup::
entry_points = {
'rdf.plugins.parser': [
- 'nt = rdf.plugins.parsers.nt:NTParser',
+ 'nt = rdf.plugins.parsers.ntriples:NTParser',
],
'rdf.plugins.serializer': [
'nt = rdf.plugins.serializers.NTSerializer:NTSerializer',
@@ -128,11 +128,12 @@ def plugins(name=None, kind=None):
yield p
-register("default", Store, "rdflib.plugins.memory", "IOMemory")
-register("IOMemory", Store, "rdflib.plugins.memory", "IOMemory")
+register("default", Store, "rdflib.plugins.stores.memory", "Memory")
+register("Memory", Store, "rdflib.plugins.stores.memory", "Memory")
+register("SimpleMemory", Store, "rdflib.plugins.stores.memory", "SimpleMemory")
register("Auditable", Store, "rdflib.plugins.stores.auditable", "AuditableStore")
register("Concurrent", Store, "rdflib.plugins.stores.concurrent", "ConcurrentStore")
-register("Sleepycat", Store, "rdflib.plugins.sleepycat", "Sleepycat")
+register("Sleepycat", Store, "rdflib.plugins.stores.sleepycat", "Sleepycat")
register("SPARQLStore", Store, "rdflib.plugins.stores.sparqlstore", "SPARQLStore")
register(
"SPARQLUpdateStore", Store, "rdflib.plugins.stores.sparqlstore", "SPARQLUpdateStore"
@@ -182,10 +183,10 @@ register("n3", Parser, "rdflib.plugins.parsers.notation3", "N3Parser")
register("text/turtle", Parser, "rdflib.plugins.parsers.notation3", "TurtleParser")
register("turtle", Parser, "rdflib.plugins.parsers.notation3", "TurtleParser")
register("ttl", Parser, "rdflib.plugins.parsers.notation3", "TurtleParser")
-register("application/n-triples", Parser, "rdflib.plugins.parsers.nt", "NTParser")
-register("ntriples", Parser, "rdflib.plugins.parsers.nt", "NTParser")
-register("nt", Parser, "rdflib.plugins.parsers.nt", "NTParser")
-register("nt11", Parser, "rdflib.plugins.parsers.nt", "NTParser")
+register("application/n-triples", Parser, "rdflib.plugins.parsers.ntriples", "NTParser")
+register("ntriples", Parser, "rdflib.plugins.parsers.ntriples", "NTParser")
+register("nt", Parser, "rdflib.plugins.parsers.ntriples", "NTParser")
+register("nt11", Parser, "rdflib.plugins.parsers.ntriples", "NTParser")
register("application/n-quads", Parser, "rdflib.plugins.parsers.nquads", "NQuadsParser")
register("nquads", Parser, "rdflib.plugins.parsers.nquads", "NQuadsParser")
register("application/trix", Parser, "rdflib.plugins.parsers.trix", "TriXParser")
diff --git a/rdflib/plugins/memory.py b/rdflib/plugins/memory.py
deleted file mode 100644
index 1f8bcfa7..00000000
--- a/rdflib/plugins/memory.py
+++ /dev/null
@@ -1,512 +0,0 @@
-import random
-
-from rdflib.store import Store
-
-__all__ = ["Memory", "IOMemory"]
-
-ANY = Any = None
-
-
-class Memory(Store):
- """\
- An in memory implementation of a triple store.
-
- This triple store uses nested dictionaries to store triples. Each
- triple is stored in two such indices as follows spo[s][p][o] = 1 and
- pos[p][o][s] = 1.
-
- Authors: Michel Pelletier, Daniel Krech, Stefan Niederhauser
- """
-
- def __init__(self, configuration=None, identifier=None):
- super(Memory, self).__init__(configuration)
- self.identifier = identifier
-
- # indexed by [subject][predicate][object]
- self.__spo = {}
-
- # indexed by [predicate][object][subject]
- self.__pos = {}
-
- # indexed by [predicate][object][subject]
- self.__osp = {}
-
- self.__namespace = {}
- self.__prefix = {}
-
- def add(self, triple, context, quoted=False):
- """\
- Add a triple to the store of triples.
- """
- # add dictionary entries for spo[s][p][p] = 1 and pos[p][o][s]
- # = 1, creating the nested dictionaries where they do not yet
- # exits.
- subject, predicate, object = triple
- spo = self.__spo
- try:
- po = spo[subject]
- except:
- po = spo[subject] = {}
- try:
- o = po[predicate]
- except:
- o = po[predicate] = {}
- o[object] = 1
-
- pos = self.__pos
- try:
- os = pos[predicate]
- except:
- os = pos[predicate] = {}
- try:
- s = os[object]
- except:
- s = os[object] = {}
- s[subject] = 1
-
- osp = self.__osp
- try:
- sp = osp[object]
- except:
- sp = osp[object] = {}
- try:
- p = sp[subject]
- except:
- p = sp[subject] = {}
- p[predicate] = 1
-
- def remove(self, triple_pattern, context=None):
- for (subject, predicate, object), c in self.triples(triple_pattern):
- del self.__spo[subject][predicate][object]
- del self.__pos[predicate][object][subject]
- del self.__osp[object][subject][predicate]
-
- def triples(self, triple_pattern, context=None):
- """A generator over all the triples matching """
- subject, predicate, object = triple_pattern
- if subject != ANY: # subject is given
- spo = self.__spo
- if subject in spo:
- subjectDictionary = spo[subject]
- if predicate != ANY: # subject+predicate is given
- if predicate in subjectDictionary:
- if object != ANY: # subject+predicate+object is given
- if object in subjectDictionary[predicate]:
- yield (subject, predicate, object), self.__contexts()
- else: # given object not found
- pass
- else: # subject+predicate is given, object unbound
- for o in subjectDictionary[predicate].keys():
- yield (subject, predicate, o), self.__contexts()
- else: # given predicate not found
- pass
- else: # subject given, predicate unbound
- for p in subjectDictionary.keys():
- if object != ANY: # object is given
- if object in subjectDictionary[p]:
- yield (subject, p, object), self.__contexts()
- else: # given object not found
- pass
- else: # object unbound
- for o in subjectDictionary[p].keys():
- yield (subject, p, o), self.__contexts()
- else: # given subject not found
- pass
- elif predicate != ANY: # predicate is given, subject unbound
- pos = self.__pos
- if predicate in pos:
- predicateDictionary = pos[predicate]
- if object != ANY: # predicate+object is given, subject unbound
- if object in predicateDictionary:
- for s in predicateDictionary[object].keys():
- yield (s, predicate, object), self.__contexts()
- else: # given object not found
- pass
- else: # predicate is given, object+subject unbound
- for o in predicateDictionary.keys():
- for s in predicateDictionary[o].keys():
- yield (s, predicate, o), self.__contexts()
- elif object != ANY: # object is given, subject+predicate unbound
- osp = self.__osp
- if object in osp:
- objectDictionary = osp[object]
- for s in objectDictionary.keys():
- for p in objectDictionary[s].keys():
- yield (s, p, object), self.__contexts()
- else: # subject+predicate+object unbound
- spo = self.__spo
- for s in spo.keys():
- subjectDictionary = spo[s]
- for p in subjectDictionary.keys():
- for o in subjectDictionary[p].keys():
- yield (s, p, o), self.__contexts()
-
- def __len__(self, context=None):
- # @@ optimize
- i = 0
- for triple in self.triples((None, None, None)):
- i += 1
- return i
-
- def bind(self, prefix, namespace):
- self.__prefix[namespace] = prefix
- self.__namespace[prefix] = namespace
-
- def namespace(self, prefix):
- return self.__namespace.get(prefix, None)
-
- def prefix(self, namespace):
- return self.__prefix.get(namespace, None)
-
- def namespaces(self):
- for prefix, namespace in self.__namespace.items():
- yield prefix, namespace
-
- def __contexts(self):
- return (c for c in []) # TODO: best way to return empty generator
-
-
-class IOMemory(Store):
- """\
- An integer-key-optimized context-aware in-memory store.
-
- Uses three dict indices (for subjects, objects and predicates) holding
- sets of triples. Context information is tracked in a separate dict, with
- the triple as key and a dict of {context: quoted} items as value. The
- context information is used to filter triple query results.
-
- Memory usage is low due to several optimizations. RDF nodes are not
- stored directly in the indices; instead, the indices hold integer keys
- and the actual nodes are only stored once in int-to-object and
- object-to-int mapping dictionaries. A default context is determined
- based on the first triple that is added to the store, and no context
- information is actually stored for subsequent other triples with the
- same context information.
-
- Most operations should be quite fast, but a triples() query with two
- bound parts requires a set intersection operation, which may be slow in
- some cases. When multiple contexts are used in the same store, filtering
- based on context has to be done after each query, which may also be
- slow.
-
- """
-
- context_aware = True
- formula_aware = True
- graph_aware = True
-
- # The following variable name conventions are used in this class:
- #
- # subject, predicate, object unencoded triple parts
- # triple = (subject, predicate, object) unencoded triple
- # context: unencoded context
- #
- # sid, pid, oid integer-encoded triple parts
- # enctriple = (sid, pid, oid) integer-encoded triple
- # cid integer-encoded context
-
- def __init__(self, configuration=None, identifier=None):
- super(IOMemory, self).__init__()
- self.__namespace = {}
- self.__prefix = {}
-
- # Mappings for encoding RDF nodes using integer keys, to save memory
- # in the indexes Note that None is always mapped to itself, to make
- # it easy to test for it in either encoded or unencoded form.
- self.__int2obj = {None: None} # maps integer keys to objects
- self.__obj2int = {None: None} # maps objects to integer keys
-
- # Indexes for each triple part, and a list of contexts for each triple
- self.__subjectIndex = {} # key: sid val: set(enctriples)
- self.__predicateIndex = {} # key: pid val: set(enctriples)
- self.__objectIndex = {} # key: oid val: set(enctriples)
- self.__tripleContexts = (
- {}
- ) # key: enctriple val: {cid1: quoted, cid2: quoted ...}
- self.__contextTriples = {None: set()} # key: cid val: set(enctriples)
-
- # all contexts used in store (unencoded)
- self.__all_contexts = set()
- # default context information for triples
- self.__defaultContexts = None
-
- def bind(self, prefix, namespace):
- self.__prefix[namespace] = prefix
- self.__namespace[prefix] = namespace
-
- def namespace(self, prefix):
- return self.__namespace.get(prefix, None)
-
- def prefix(self, namespace):
- return self.__prefix.get(namespace, None)
-
- def namespaces(self):
- for prefix, namespace in self.__namespace.items():
- yield prefix, namespace
-
- def add(self, triple, context, quoted=False):
- Store.add(self, triple, context, quoted)
-
- if context is not None:
- self.__all_contexts.add(context)
-
- enctriple = self.__encodeTriple(triple)
- sid, pid, oid = enctriple
-
- self.__addTripleContext(enctriple, context, quoted)
-
- if sid in self.__subjectIndex:
- self.__subjectIndex[sid].add(enctriple)
- else:
- self.__subjectIndex[sid] = set([enctriple])
-
- if pid in self.__predicateIndex:
- self.__predicateIndex[pid].add(enctriple)
- else:
- self.__predicateIndex[pid] = set([enctriple])
-
- if oid in self.__objectIndex:
- self.__objectIndex[oid].add(enctriple)
- else:
- self.__objectIndex[oid] = set([enctriple])
-
- def remove(self, triplepat, context=None):
- req_cid = self.__obj2id(context)
- for triple, contexts in self.triples(triplepat, context):
- enctriple = self.__encodeTriple(triple)
- for cid in self.__getTripleContexts(enctriple):
- if context is not None and req_cid != cid:
- continue
- self.__removeTripleContext(enctriple, cid)
- ctxs = self.__getTripleContexts(enctriple, skipQuoted=True)
- if None in ctxs and (context is None or len(ctxs) == 1):
- self.__removeTripleContext(enctriple, None)
- if len(self.__getTripleContexts(enctriple)) == 0:
- # triple has been removed from all contexts
- sid, pid, oid = enctriple
- self.__subjectIndex[sid].remove(enctriple)
- self.__predicateIndex[pid].remove(enctriple)
- self.__objectIndex[oid].remove(enctriple)
-
- del self.__tripleContexts[enctriple]
-
- if (
- req_cid is not None
- and req_cid in self.__contextTriples
- and len(self.__contextTriples[req_cid]) == 0
- ):
- # all triples are removed out of this context
- # and it's not the default context so delete it
- del self.__contextTriples[req_cid]
-
- if (
- triplepat == (None, None, None)
- and context in self.__all_contexts
- and not self.graph_aware
- ):
- # remove the whole context
- self.__all_contexts.remove(context)
-
- def triples(self, triplein, context=None):
- if context is not None:
- if context == self: # hmm...does this really ever happen?
- context = None
-
- cid = self.__obj2id(context)
- enctriple = self.__encodeTriple(triplein)
- sid, pid, oid = enctriple
-
- # all triples case (no triple parts given as pattern)
- if sid is None and pid is None and oid is None:
- return self.__all_triples(cid)
-
- # optimize "triple in graph" case (all parts given)
- if sid is not None and pid is not None and oid is not None:
- if (
- sid in self.__subjectIndex
- and enctriple in self.__subjectIndex[sid]
- and self.__tripleHasContext(enctriple, cid)
- ):
- return ((triplein, self.__contexts(enctriple)) for i in [0])
- else:
- return self.__emptygen()
-
- # remaining cases: one or two out of three given
- sets = []
- if sid is not None:
- if sid in self.__subjectIndex:
- sets.append(self.__subjectIndex[sid])
- else:
- return self.__emptygen()
- if pid is not None:
- if pid in self.__predicateIndex:
- sets.append(self.__predicateIndex[pid])
- else:
- return self.__emptygen()
- if oid is not None:
- if oid in self.__objectIndex:
- sets.append(self.__objectIndex[oid])
- else:
- return self.__emptygen()
-
- # to get the result, do an intersection of the sets (if necessary)
- if len(sets) > 1:
- enctriples = sets[0].intersection(*sets[1:])
- else:
- enctriples = sets[0].copy()
-
- return (
- (self.__decodeTriple(enctriple), self.__contexts(enctriple))
- for enctriple in enctriples
- if self.__tripleHasContext(enctriple, cid)
- )
-
- def contexts(self, triple=None):
- if triple is None or triple == (None, None, None):
- return (context for context in self.__all_contexts)
-
- enctriple = self.__encodeTriple(triple)
- sid, pid, oid = enctriple
- if sid in self.__subjectIndex and enctriple in self.__subjectIndex[sid]:
- return self.__contexts(enctriple)
- else:
- return self.__emptygen()
-
- def __len__(self, context=None):
- cid = self.__obj2id(context)
- if cid not in self.__contextTriples:
- return 0
- return len(self.__contextTriples[cid])
-
- def add_graph(self, graph):
- if not self.graph_aware:
- Store.add_graph(self, graph)
- else:
- self.__all_contexts.add(graph)
-
- def remove_graph(self, graph):
- if not self.graph_aware:
- Store.remove_graph(self, graph)
- else:
- self.remove((None, None, None), graph)
- try:
- self.__all_contexts.remove(graph)
- except KeyError:
- pass # we didn't know this graph, no problem
-
- # internal utility methods below
-
- def __addTripleContext(self, enctriple, context, quoted):
- """add the given context to the set of contexts for the triple"""
- cid = self.__obj2id(context)
-
- sid, pid, oid = enctriple
- if sid in self.__subjectIndex and enctriple in self.__subjectIndex[sid]:
- # we know the triple exists somewhere in the store
- if enctriple not in self.__tripleContexts:
- # triple exists with default ctx info
- # start with a copy of the default ctx info
- self.__tripleContexts[enctriple] = self.__defaultContexts.copy()
-
- self.__tripleContexts[enctriple][cid] = quoted
- if not quoted:
- self.__tripleContexts[enctriple][None] = quoted
- else:
- # the triple didn't exist before in the store
- if quoted: # this context only
- self.__tripleContexts[enctriple] = {cid: quoted}
- else: # default context as well
- self.__tripleContexts[enctriple] = {cid: quoted, None: quoted}
-
- # if the triple is not quoted add it to the default context
- if not quoted:
- self.__contextTriples[None].add(enctriple)
-
- # always add the triple to given context, making sure it's initialized
- if cid not in self.__contextTriples:
- self.__contextTriples[cid] = set()
- self.__contextTriples[cid].add(enctriple)
-
- # if this is the first ever triple in the store, set default ctx info
- if self.__defaultContexts is None:
- self.__defaultContexts = self.__tripleContexts[enctriple]
-
- # if the context info is the same as default, no need to store it
- if self.__tripleContexts[enctriple] == self.__defaultContexts:
- del self.__tripleContexts[enctriple]
-
- def __getTripleContexts(self, enctriple, skipQuoted=False):
- """return a list of (encoded) contexts for the triple, skipping
- quoted contexts if skipQuoted==True"""
-
- ctxs = self.__tripleContexts.get(enctriple, self.__defaultContexts)
-
- if not skipQuoted:
- return ctxs.keys()
-
- return [cid for cid, quoted in ctxs.items() if not quoted]
-
- def __tripleHasContext(self, enctriple, cid):
- """return True iff the triple exists in the given context"""
- ctxs = self.__tripleContexts.get(enctriple, self.__defaultContexts)
- return cid in ctxs
-
- def __removeTripleContext(self, enctriple, cid):
- """remove the context from the triple"""
- ctxs = self.__tripleContexts.get(enctriple, self.__defaultContexts).copy()
- del ctxs[cid]
- if ctxs == self.__defaultContexts:
- del self.__tripleContexts[enctriple]
- else:
- self.__tripleContexts[enctriple] = ctxs
- self.__contextTriples[cid].remove(enctriple)
-
- def __obj2id(self, obj):
- """encode object, storing it in the encoding map if necessary,
- and return the integer key"""
- if obj not in self.__obj2int:
- id = randid()
- while id in self.__int2obj:
- id = randid()
- self.__obj2int[obj] = id
- self.__int2obj[id] = obj
- return id
- return self.__obj2int[obj]
-
- def __encodeTriple(self, triple):
- """encode a whole triple, returning the encoded triple"""
- return tuple(map(self.__obj2id, triple))
-
- def __decodeTriple(self, enctriple):
- """decode a whole encoded triple, returning the original
- triple"""
- return tuple(map(self.__int2obj.get, enctriple))
-
- def __all_triples(self, cid):
- """return a generator which yields all the triples (unencoded)
- of the given context"""
- if cid not in self.__contextTriples:
- return
- for enctriple in self.__contextTriples[cid].copy():
- yield self.__decodeTriple(enctriple), self.__contexts(enctriple)
-
- def __contexts(self, enctriple):
- """return a generator for all the non-quoted contexts
- (unencoded) the encoded triple appears in"""
- return (
- self.__int2obj.get(cid)
- for cid in self.__getTripleContexts(enctriple, skipQuoted=True)
- if cid is not None
- )
-
- def __emptygen(self):
- """return an empty generator"""
- if False:
- yield
-
-
-def randid(randint=random.randint, choice=random.choice, signs=(-1, 1)):
- return choice(signs) * randint(1, 2000000000)
-
-
-del random
diff --git a/rdflib/plugins/parsers/notation3.py b/rdflib/plugins/parsers/notation3.py
index 37b2cde8..2573d5d1 100755
--- a/rdflib/plugins/parsers/notation3.py
+++ b/rdflib/plugins/parsers/notation3.py
@@ -136,10 +136,13 @@ def join(here, there):
return here + frag
# join('mid:foo@example', '../foo') bzzt
- if here[bcolonl + 1: bcolonl + 2] != "/":
- raise ValueError("Base <%s> has no slash after " "colon - with relative '%s'." % (here, there))
+ if here[bcolonl + 1 : bcolonl + 2] != "/":
+ raise ValueError(
+ "Base <%s> has no slash after "
+ "colon - with relative '%s'." % (here, there)
+ )
- if here[bcolonl + 1: bcolonl + 3] == "//":
+ if here[bcolonl + 1 : bcolonl + 3] == "//":
bpath = here.find("/", bcolonl + 3)
else:
bpath = bcolonl + 1
@@ -499,14 +502,14 @@ class SinkParser:
"""
assert tok[0] not in _notNameChars # not for punctuation
- if argstr[i: i + 1] == "@":
+ if argstr[i : i + 1] == "@":
i = i + 1
else:
if tok not in self.keywords:
return -1 # No, this has neither keywords declaration nor "@"
if (
- argstr[i: i + len(tok)] == tok
+ argstr[i : i + len(tok)] == tok
and (argstr[i + len(tok)] in _notKeywordsChars)
or (colon and argstr[i + len(tok)] == ":")
):
@@ -523,7 +526,7 @@ class SinkParser:
assert tok[0] not in _notNameChars # not for punctuation
- if argstr[i: i + len(tok)].lower() == tok.lower() and (
+ if argstr[i : i + len(tok)].lower() == tok.lower() and (
argstr[i + len(tok)] in _notQNameChars
):
i = i + len(tok)
@@ -791,23 +794,23 @@ class SinkParser:
res.append(("->", RDF_type))
return j
- if argstr[i: i + 2] == "<=":
+ if argstr[i : i + 2] == "<=":
if self.turtle:
self.BadSyntax(argstr, i, "Found '<=' in Turtle mode. ")
res.append(("<-", self._store.newSymbol(Logic_NS + "implies")))
return i + 2
- if argstr[i: i + 1] == "=":
+ if argstr[i : i + 1] == "=":
if self.turtle:
self.BadSyntax(argstr, i, "Found '=' in Turtle mode")
- if argstr[i + 1: i + 2] == ">":
+ if argstr[i + 1 : i + 2] == ">":
res.append(("->", self._store.newSymbol(Logic_NS + "implies")))
return i + 2
res.append(("->", DAML_sameAs))
return i + 1
- if argstr[i: i + 2] == ":=":
+ if argstr[i : i + 2] == ":=":
if self.turtle:
self.BadSyntax(argstr, i, "Found ':=' in Turtle mode")
@@ -820,7 +823,7 @@ class SinkParser:
res.append(("->", r[0]))
return j
- if argstr[i: i + 2] == ">-" or argstr[i: i + 2] == "<-":
+ if argstr[i : i + 2] == ">-" or argstr[i : i + 2] == "<-":
self.BadSyntax(argstr, j, ">- ... -> syntax is obsolete.")
return -1
@@ -841,8 +844,8 @@ class SinkParser:
if j < 0:
return j # nope
- while argstr[j: j + 1] in "!^": # no spaces, must follow exactly (?)
- ch = argstr[j: j + 1]
+ while argstr[j : j + 1] in "!^": # no spaces, must follow exactly (?)
+ ch = argstr[j : j + 1]
subj = res.pop()
obj = self.blankNode(uri=self.here(j))
j = self.node(argstr, j + 1, res)
@@ -876,7 +879,7 @@ class SinkParser:
if j < 0:
return j # eof
i = j
- ch = argstr[i: i + 1] # Quick 1-character checks first:
+ ch = argstr[i : i + 1] # Quick 1-character checks first:
if ch == "[":
bnodeID = self.here(i)
@@ -884,7 +887,7 @@ class SinkParser:
if j < 0:
self.BadSyntax(argstr, i, "EOF after '['")
# Hack for "is" binding name to anon node
- if argstr[j: j + 1] == "=":
+ if argstr[j : j + 1] == "=":
if self.turtle:
self.BadSyntax(
argstr, j, "Found '[=' or '[ =' when in turtle mode."
@@ -902,7 +905,7 @@ class SinkParser:
self.BadSyntax(
argstr, i, "EOF when objectList expected after [ = "
)
- if argstr[j: j + 1] == ";":
+ if argstr[j : j + 1] == ";":
j = j + 1
else:
self.BadSyntax(argstr, i, "objectList expected after [= ")
@@ -919,7 +922,7 @@ class SinkParser:
self.BadSyntax(
argstr, i, "EOF when ']' expected after [ <propertyList>"
)
- if argstr[j: j + 1] != "]":
+ if argstr[j : j + 1] != "]":
self.BadSyntax(argstr, j, "']' expected")
res.append(subj)
return j + 1
@@ -928,7 +931,7 @@ class SinkParser:
# if self.turtle:
# self.BadSyntax(argstr, i,
# "found '{' while in Turtle mode, Formulas not supported!")
- ch2 = argstr[i + 1: i + 2]
+ ch2 = argstr[i + 1 : i + 2]
if ch2 == "$":
# a set
i += 1
@@ -939,12 +942,12 @@ class SinkParser:
i = self.skipSpace(argstr, j)
if i < 0:
self.BadSyntax(argstr, i, "needed '$}', found end.")
- if argstr[i: i + 2] == "$}":
+ if argstr[i : i + 2] == "$}":
j = i + 2
break
if not first_run:
- if argstr[i: i + 1] == ",":
+ if argstr[i : i + 1] == ",":
i += 1
else:
self.BadSyntax(argstr, i, "expected: ','")
@@ -979,7 +982,7 @@ class SinkParser:
if i < 0:
self.BadSyntax(argstr, i, "needed '}', found end.")
- if argstr[i: i + 1] == "}":
+ if argstr[i : i + 1] == "}":
j = i + 1
break
@@ -998,7 +1001,7 @@ class SinkParser:
if ch == "(":
thing_type = self._store.newList
- ch2 = argstr[i + 1: i + 2]
+ ch2 = argstr[i + 1 : i + 2]
if ch2 == "$":
thing_type = self._store.newSet
i += 1
@@ -1009,7 +1012,7 @@ class SinkParser:
i = self.skipSpace(argstr, j)
if i < 0:
self.BadSyntax(argstr, i, "needed ')', found end.")
- if argstr[i: i + 1] == ")":
+ if argstr[i : i + 1] == ")":
j = i + 1
break
@@ -1062,7 +1065,7 @@ class SinkParser:
break
i = j + 1
- if argstr[j: j + 2] == ":-":
+ if argstr[j : j + 2] == ":-":
if self.turtle:
self.BadSyntax(argstr, j, "Found in ':-' in Turtle mode")
i = j + 2
@@ -1092,7 +1095,7 @@ class SinkParser:
j = self.skipSpace(argstr, i)
if j < 0:
self.BadSyntax(argstr, j, "EOF found in list of objects")
- if argstr[i: i + 1] != ";":
+ if argstr[i : i + 1] != ";":
return i
i = i + 1 # skip semicolon and continue
@@ -1113,7 +1116,7 @@ class SinkParser:
j = self.skipSpace(argstr, i)
if j < 0:
return j # eof
- ch = argstr[j: j + 1]
+ ch = argstr[j : j + 1]
if ch != ",":
if ch != ".":
return -1
@@ -1130,7 +1133,7 @@ class SinkParser:
j = self.skipSpace(argstr, i)
if j < 0:
self.BadSyntax(argstr, j, "EOF found after object")
- if argstr[j: j + 1] != ",":
+ if argstr[j : j + 1] != ",":
return j # Found something else!
i = self.object(argstr, j + 1, res)
if i < 0:
@@ -1140,11 +1143,11 @@ class SinkParser:
j = self.skipSpace(argstr, i)
if j < 0:
return j # eof
- if argstr[j: j + 1] == ".":
+ if argstr[j : j + 1] == ".":
return j + 1 # skip
- if argstr[j: j + 1] == "}":
+ if argstr[j : j + 1] == "}":
return j # don't skip it
- if argstr[j: j + 1] == "]":
+ if argstr[j : j + 1] == "]":
return j
self.BadSyntax(argstr, j, "expected '.' or '}' or ']' at end of statement")
@@ -1209,7 +1212,7 @@ class SinkParser:
assert (
":" in uref
), "With no base URI, cannot deal with relative URIs"
- if argstr[i - 1: i] == "#" and not uref[-1:] == "#":
+ if argstr[i - 1 : i] == "#" and not uref[-1:] == "#":
uref = uref + "#" # She meant it! Weirdness in urlparse?
symb = self._store.newSymbol(uref)
if symb in self._variables:
@@ -1258,7 +1261,7 @@ class SinkParser:
if j < 0:
return -1
- if argstr[j: j + 1] != "?":
+ if argstr[j : j + 1] != "?":
return -1
j = j + 1
i = j
@@ -1416,7 +1419,7 @@ class SinkParser:
i = j
if argstr[i] in self.string_delimiters:
- if argstr[i: i + 3] == argstr[i] * 3:
+ if argstr[i : i + 3] == argstr[i] * 3:
delim = argstr[i] * 3
else:
delim = argstr[i]
@@ -1464,7 +1467,7 @@ class SinkParser:
# return -1 ## or fall through?
if argstr[i] in self.string_delimiters:
- if argstr[i: i + 3] == argstr[i] * 3:
+ if argstr[i : i + 3] == argstr[i] * 3:
delim = argstr[i] * 3
else:
delim = argstr[i]
@@ -1473,7 +1476,7 @@ class SinkParser:
dt = None
j, s = self.strconst(argstr, i, delim)
lang = None
- if argstr[j: j + 1] == "@": # Language?
+ if argstr[j : j + 1] == "@": # Language?
m = langcode.match(argstr, j + 1)
if m is None:
raise BadSyntax(
@@ -1484,9 +1487,9 @@ class SinkParser:
"Bad language code syntax on string " + "literal, after @",
)
i = m.end()
- lang = argstr[j + 1: i]
+ lang = argstr[j + 1 : i]
j = i
- if argstr[j: j + 2] == "^^":
+ if argstr[j : j + 2] == "^^":
res2 = []
j = self.uri_ref2(argstr, j + 2, res2) # Read datatype URI
dt = res2[0]
@@ -1519,15 +1522,15 @@ class SinkParser:
if (
delim == delim3
): # done when delim is """ or ''' and, respectively ...
- if argstr[j: j + 5] == delim5: # ... we have "" or '' before
+ if argstr[j : j + 5] == delim5: # ... we have "" or '' before
i = j + 5
ustr = ustr + delim2
return i, ustr
- if argstr[j: j + 4] == delim4: # ... we have " or ' before
+ if argstr[j : j + 4] == delim4: # ... we have " or ' before
i = j + 4
ustr = ustr + delim1
return i, ustr
- if argstr[j: j + 3] == delim3: # current " or ' is part of delim
+ if argstr[j : j + 3] == delim3: # current " or ' is part of delim
i = j + 3
return i, ustr
@@ -1539,8 +1542,8 @@ class SinkParser:
m = interesting.search(argstr, j) # was argstr[j:].
# Note for pos param to work, MUST be compiled ... re bug?
assert m, "Quote expected in string at ^ in %s^%s" % (
- argstr[j - 20: j],
- argstr[j: j + 20],
+ argstr[j - 20 : j],
+ argstr[j : j + 20],
) # at least need a quote
i = m.start()
@@ -1586,7 +1589,7 @@ class SinkParser:
elif ch == "\\":
j = i + 1
- ch = argstr[j: j + 1] # Will be empty if string ends
+ ch = argstr[j : j + 1] # Will be empty if string ends
if not ch:
raise BadSyntax(
self._thisDoc,
@@ -1617,14 +1620,14 @@ class SinkParser:
self._thisDoc, startline, argstr, i, "unterminated string literal(3)"
)
try:
- return i + n, reg.sub(unicodeExpand, "\\" + prefix + argstr[i: i + n])
+ return i + n, reg.sub(unicodeExpand, "\\" + prefix + argstr[i : i + n])
except:
raise BadSyntax(
self._thisDoc,
startline,
argstr,
i,
- "bad string literal hex escape: " + argstr[i: i + n],
+ "bad string literal hex escape: " + argstr[i : i + n],
)
def uEscape(self, argstr, i, startline):
@@ -1669,7 +1672,7 @@ class BadSyntax(SyntaxError):
self._why,
pre,
argstr[st:i],
- argstr[i: i + 60],
+ argstr[i : i + 60],
post,
)
@@ -1893,8 +1896,11 @@ class TurtleParser(Parser):
baseURI = graph.absolutize(source.getPublicId() or source.getSystemId() or "")
p = SinkParser(sink, baseURI=baseURI, turtle=turtle)
-
- p.loadStream(source.getByteStream())
+ # N3 parser prefers str stream
+ stream = source.getCharacterStream()
+ if not stream:
+ stream = source.getByteStream()
+ p.loadStream(stream)
for prefix, namespace in p._bindings.items():
graph.bind(prefix, namespace)
diff --git a/rdflib/plugins/parsers/nquads.py b/rdflib/plugins/parsers/nquads.py
index 5defabc9..d44139c5 100644
--- a/rdflib/plugins/parsers/nquads.py
+++ b/rdflib/plugins/parsers/nquads.py
@@ -28,7 +28,7 @@ from codecs import getreader
from rdflib import ConjunctiveGraph
# Build up from the NTriples parser:
-from rdflib.plugins.parsers.ntriples import NTriplesParser
+from rdflib.plugins.parsers.ntriples import W3CNTriplesParser
from rdflib.plugins.parsers.ntriples import ParseError
from rdflib.plugins.parsers.ntriples import r_tail
from rdflib.plugins.parsers.ntriples import r_wspace
@@ -36,7 +36,7 @@ from rdflib.plugins.parsers.ntriples import r_wspace
__all__ = ["NQuadsParser"]
-class NQuadsParser(NTriplesParser):
+class NQuadsParser(W3CNTriplesParser):
def parse(self, inputsource, sink, bnode_context=None, **kwargs):
"""
Parse inputsource as an N-Quads file.
@@ -54,13 +54,14 @@ class NQuadsParser(NTriplesParser):
)
self.sink = ConjunctiveGraph(store=sink.store, identifier=sink.identifier)
- source = inputsource.getByteStream()
+ source = inputsource.getCharacterStream()
+ if not source:
+ source = inputsource.getByteStream()
+ source = getreader("utf-8")(source)
if not hasattr(source, "read"):
raise ParseError("Item to parse must be a file-like object.")
- source = getreader("utf-8")(source)
-
self.file = source
self.buffer = ""
while True:
diff --git a/rdflib/plugins/parsers/nt.py b/rdflib/plugins/parsers/nt.py
deleted file mode 100644
index c37a1aa0..00000000
--- a/rdflib/plugins/parsers/nt.py
+++ /dev/null
@@ -1,33 +0,0 @@
-from rdflib.parser import Parser
-from rdflib.plugins.parsers.ntriples import NTriplesParser
-
-__all__ = ["NTSink", "NTParser"]
-
-
-class NTSink(object):
- def __init__(self, graph):
- self.graph = graph
-
- def triple(self, s, p, o):
- self.graph.add((s, p, o))
-
-
-class NTParser(Parser):
- """parser for the ntriples format, often stored with the .nt extension
-
- See http://www.w3.org/TR/rdf-testcases/#ntriples"""
-
- def parse(self, source, sink, **kwargs):
- '''
- Parse the NT format
-
- :type source: `rdflib.parser.InputSource`
- :param source: the source of NT-formatted data
- :type sink: `rdflib.graph.Graph`
- :param sink: where to send parsed triples
- :param kwargs: Additional arguments to pass to `.NTriplesParser.parse`
- '''
- f = source.getByteStream() # TODO getCharacterStream?
- parser = NTriplesParser(NTSink(sink))
- parser.parse(f, **kwargs)
- f.close()
diff --git a/rdflib/plugins/parsers/ntriples.py b/rdflib/plugins/parsers/ntriples.py
index a89aece5..9d441beb 100644
--- a/rdflib/plugins/parsers/ntriples.py
+++ b/rdflib/plugins/parsers/ntriples.py
@@ -1,6 +1,6 @@
-#!/usr/bin/env python
+#!/usr/bin/env python3
-__doc__ = """
+__doc__ = """\
N-Triples Parser
License: GPL 2, W3C, BSD, or MIT
Author: Sean B. Palmer, inamidst.com
@@ -12,14 +12,13 @@ import codecs
from rdflib.term import URIRef as URI
from rdflib.term import BNode as bNode
from rdflib.term import Literal
-
-
-from rdflib.compat import cast_bytes
from rdflib.compat import decodeUnicodeEscape
+from rdflib.exceptions import ParserError as ParseError
+from rdflib.parser import Parser
-from io import BytesIO
+from io import StringIO, TextIOBase, BytesIO
-__all__ = ["unquote", "uriquote", "Sink", "NTriplesParser"]
+__all__ = ["unquote", "uriquote", "W3CNTriplesParser", "NTGraphSink", "NTParser"]
uriref = r'<([^:]+:[^\s"<>]*)>'
literal = r'"([^"\\]*(?:\\.[^"\\]*)*)"'
@@ -37,15 +36,7 @@ bufsiz = 2048
validate = False
-class Node(str):
- pass
-
-
-class ParseError(Exception):
- pass
-
-
-class Sink(object):
+class DummySink(object):
def __init__(self):
self.length = 0
@@ -75,7 +66,7 @@ def unquote(s):
while s:
m = r_safe.match(s)
if m:
- s = s[m.end():]
+ s = s[m.end() :]
result.append(m.group(1))
continue
@@ -87,7 +78,7 @@ def unquote(s):
m = r_uniquot.match(s)
if m:
- s = s[m.end():]
+ s = s[m.end() :]
u, U = m.groups()
codepoint = int(u or U, 16)
if codepoint > 0x10FFFF:
@@ -110,11 +101,10 @@ def uriquote(uri):
return r_hibyte.sub(lambda m: "%%%02X" % ord(m.group(1)), uri)
-class NTriplesParser(object):
+class W3CNTriplesParser(object):
"""An N-Triples Parser.
-
+ This is a legacy-style Triples parser for NTriples provided by W3C
Usage::
-
p = NTriplesParser(sink=MySink())
sink = p.parse(f) # file; use parsestring for a string
@@ -124,6 +114,8 @@ class NTriplesParser(object):
`NTriplesParser`.
"""
+ __slots__ = ("_bnode_ids", "sink", "buffer", "file", "line")
+
def __init__(self, sink=None, bnode_context=None):
if bnode_context is not None:
self._bnode_ids = bnode_context
@@ -133,7 +125,11 @@ class NTriplesParser(object):
if sink is not None:
self.sink = sink
else:
- self.sink = Sink()
+ self.sink = DummySink()
+
+ self.buffer = None
+ self.file = None
+ self.line = ""
def parse(self, f, bnode_context=None):
"""
@@ -147,10 +143,13 @@ class NTriplesParser(object):
passed in to define a distinct context for a given call to
`parse`.
"""
+
if not hasattr(f, "read"):
raise ParseError("Item to parse must be a file-like object.")
- # since N-Triples 1.1 files can and should be utf-8 encoded
- f = codecs.getreader("utf-8")(f)
+
+ if not hasattr(f, "encoding") and not hasattr(f, "charbuffer"):
+ # someone still using a bytestream here?
+ f = codecs.getreader("utf-8")(f)
self.file = f
self.buffer = ""
@@ -161,16 +160,17 @@ class NTriplesParser(object):
try:
self.parseline(bnode_context=bnode_context)
except ParseError:
- raise ParseError("Invalid line: %r" % self.line)
+ raise ParseError("Invalid line: {}".format(self.line))
return self.sink
def parsestring(self, s, **kwargs):
"""Parse s as an N-Triples string."""
- if not isinstance(s, str):
+ if not isinstance(s, (str, bytes, bytearray)):
raise ParseError("Item to parse must be a string instance.")
- f = BytesIO()
- f.write(cast_bytes(s))
- f.seek(0)
+ if isinstance(s, (bytes, bytearray)):
+ f = codecs.getreader("utf-8")(BytesIO(s))
+ else:
+ f = StringIO(s)
self.parse(f, **kwargs)
def readline(self):
@@ -186,7 +186,7 @@ class NTriplesParser(object):
while True:
m = r_line.match(self.buffer)
if m: # the more likely prospect
- self.buffer = self.buffer[m.end():]
+ self.buffer = self.buffer[m.end() :]
return m.group(1)
else:
buffer = self.file.read(bufsiz)
@@ -208,12 +208,12 @@ class NTriplesParser(object):
predicate = self.predicate()
self.eat(r_wspaces)
- object = self.object(bnode_context)
+ object_ = self.object(bnode_context)
self.eat(r_tail)
if self.line:
- raise ParseError("Trailing garbage")
- self.sink.triple(subject, predicate, object)
+ raise ParseError("Trailing garbage: {}".format(self.line))
+ self.sink.triple(subject, predicate, object_)
def peek(self, token):
return self.line.startswith(token)
@@ -224,7 +224,7 @@ class NTriplesParser(object):
# print(dir(pattern))
# print repr(self.line), type(self.line)
raise ParseError("Failed to eat %s at %s" % (pattern.pattern, self.line))
- self.line = self.line[m.end():]
+ self.line = self.line[m.end() :]
return m
def subject(self, bnode_context=None):
@@ -292,13 +292,44 @@ class NTriplesParser(object):
return False
-# # Obsolete, unused
-# def parseURI(uri):
-# import urllib
-# parser = NTriplesParser()
-# u = urllib.urlopen(uri)
-# sink = parser.parse(u)
-# u.close()
-# # for triple in sink:
-# # print triple
-# print 'Length of input:', sink.length
+class NTGraphSink(object):
+ __slots__ = ("g",)
+
+ def __init__(self, graph):
+ self.g = graph
+
+ def triple(self, s, p, o):
+ self.g.add((s, p, o))
+
+
+class NTParser(Parser):
+ """parser for the ntriples format, often stored with the .nt extension
+
+ See http://www.w3.org/TR/rdf-testcases/#ntriples"""
+
+ __slots__ = set()
+
+ @classmethod
+ def parse(cls, source, sink, **kwargs):
+ """
+ Parse the NT format
+
+ :type source: `rdflib.parser.InputSource`
+ :param source: the source of NT-formatted data
+ :type sink: `rdflib.graph.Graph`
+ :param sink: where to send parsed triples
+ :param kwargs: Additional arguments to pass to `.NTriplesParser.parse`
+ """
+ f = source.getCharacterStream()
+ if not f:
+ b = source.getByteStream()
+ # TextIOBase includes: StringIO and TextIOWrapper
+ if isinstance(b, TextIOBase):
+ # f is not really a ByteStream, but a CharacterStream
+ f = b
+ else:
+ # since N-Triples 1.1 files can and should be utf-8 encoded
+ f = codecs.getreader("utf-8")(b)
+ parser = W3CNTriplesParser(NTGraphSink(sink))
+ parser.parse(f, **kwargs)
+ f.close()
diff --git a/rdflib/plugins/parsers/trig.py b/rdflib/plugins/parsers/trig.py
index c0906c88..9caa0662 100644
--- a/rdflib/plugins/parsers/trig.py
+++ b/rdflib/plugins/parsers/trig.py
@@ -80,7 +80,7 @@ class TrigSinkParser(SinkParser):
if j < 0:
self.BadSyntax(argstr, i, "EOF found when expected graph")
- if argstr[j: j + 1] == "=": # optional = for legacy support
+ if argstr[j : j + 1] == "=": # optional = for legacy support
i = self.skipSpace(argstr, j + 1)
if i < 0:
@@ -88,7 +88,7 @@ class TrigSinkParser(SinkParser):
else:
i = j
- if argstr[i: i + 1] != "{":
+ if argstr[i : i + 1] != "{":
return -1 # the node wasn't part of a graph
j = i + 1
@@ -104,7 +104,7 @@ class TrigSinkParser(SinkParser):
if i < 0:
self.BadSyntax(argstr, i, "needed '}', found end.")
- if argstr[i: i + 1] == "}":
+ if argstr[i : i + 1] == "}":
j = i + 1
break
@@ -151,7 +151,11 @@ class TrigParser(Parser):
)
p = TrigSinkParser(sink, baseURI=baseURI, turtle=True)
- p.loadStream(source.getByteStream())
+ stream = source.getCharacterStream() # try to get str stream first
+ if not stream:
+ # fallback to get the bytes stream
+ stream = source.getByteStream()
+ p.loadStream(stream)
for prefix, namespace in p._bindings.items():
conj_graph.bind(prefix, namespace)
diff --git a/rdflib/plugins/stores/memory.py b/rdflib/plugins/stores/memory.py
new file mode 100644
index 00000000..93b6ec25
--- /dev/null
+++ b/rdflib/plugins/stores/memory.py
@@ -0,0 +1,533 @@
+#
+#
+from rdflib.store import Store
+
+__all__ = ["SimpleMemory", "Memory"]
+
+ANY = None
+
+
+class SimpleMemory(Store):
+ """\
+ A fast naive in memory implementation of a triple store.
+
+ This triple store uses nested dictionaries to store triples. Each
+ triple is stored in two such indices as follows spo[s][p][o] = 1 and
+ pos[p][o][s] = 1.
+
+ Authors: Michel Pelletier, Daniel Krech, Stefan Niederhauser
+ """
+
+ def __init__(self, configuration=None, identifier=None):
+ super(SimpleMemory, self).__init__(configuration)
+ self.identifier = identifier
+
+ # indexed by [subject][predicate][object]
+ self.__spo = {}
+
+ # indexed by [predicate][object][subject]
+ self.__pos = {}
+
+ # indexed by [predicate][object][subject]
+ self.__osp = {}
+
+ self.__namespace = {}
+ self.__prefix = {}
+
+ def add(self, triple, context, quoted=False):
+ """\
+ Add a triple to the store of triples.
+ """
+ # add dictionary entries for spo[s][p][p] = 1 and pos[p][o][s]
+ # = 1, creating the nested dictionaries where they do not yet
+ # exits.
+ subject, predicate, object = triple
+ spo = self.__spo
+ try:
+ po = spo[subject]
+ except:
+ po = spo[subject] = {}
+ try:
+ o = po[predicate]
+ except:
+ o = po[predicate] = {}
+ o[object] = 1
+
+ pos = self.__pos
+ try:
+ os = pos[predicate]
+ except:
+ os = pos[predicate] = {}
+ try:
+ s = os[object]
+ except:
+ s = os[object] = {}
+ s[subject] = 1
+
+ osp = self.__osp
+ try:
+ sp = osp[object]
+ except:
+ sp = osp[object] = {}
+ try:
+ p = sp[subject]
+ except:
+ p = sp[subject] = {}
+ p[predicate] = 1
+
+ def remove(self, triple_pattern, context=None):
+ for (subject, predicate, object), c in list(self.triples(triple_pattern)):
+ del self.__spo[subject][predicate][object]
+ del self.__pos[predicate][object][subject]
+ del self.__osp[object][subject][predicate]
+
+ def triples(self, triple_pattern, context=None):
+ """A generator over all the triples matching """
+ subject, predicate, object = triple_pattern
+ if subject != ANY: # subject is given
+ spo = self.__spo
+ if subject in spo:
+ subjectDictionary = spo[subject]
+ if predicate != ANY: # subject+predicate is given
+ if predicate in subjectDictionary:
+ if object != ANY: # subject+predicate+object is given
+ if object in subjectDictionary[predicate]:
+ yield (subject, predicate, object), self.__contexts()
+ else: # given object not found
+ pass
+ else: # subject+predicate is given, object unbound
+ for o in subjectDictionary[predicate].keys():
+ yield (subject, predicate, o), self.__contexts()
+ else: # given predicate not found
+ pass
+ else: # subject given, predicate unbound
+ for p in subjectDictionary.keys():
+ if object != ANY: # object is given
+ if object in subjectDictionary[p]:
+ yield (subject, p, object), self.__contexts()
+ else: # given object not found
+ pass
+ else: # object unbound
+ for o in subjectDictionary[p].keys():
+ yield (subject, p, o), self.__contexts()
+ else: # given subject not found
+ pass
+ elif predicate != ANY: # predicate is given, subject unbound
+ pos = self.__pos
+ if predicate in pos:
+ predicateDictionary = pos[predicate]
+ if object != ANY: # predicate+object is given, subject unbound
+ if object in predicateDictionary:
+ for s in predicateDictionary[object].keys():
+ yield (s, predicate, object), self.__contexts()
+ else: # given object not found
+ pass
+ else: # predicate is given, object+subject unbound
+ for o in predicateDictionary.keys():
+ for s in predicateDictionary[o].keys():
+ yield (s, predicate, o), self.__contexts()
+ elif object != ANY: # object is given, subject+predicate unbound
+ osp = self.__osp
+ if object in osp:
+ objectDictionary = osp[object]
+ for s in objectDictionary.keys():
+ for p in objectDictionary[s].keys():
+ yield (s, p, object), self.__contexts()
+ else: # subject+predicate+object unbound
+ spo = self.__spo
+ for s in spo.keys():
+ subjectDictionary = spo[s]
+ for p in subjectDictionary.keys():
+ for o in subjectDictionary[p].keys():
+ yield (s, p, o), self.__contexts()
+
+ def __len__(self, context=None):
+ # @@ optimize
+ i = 0
+ for triple in self.triples((None, None, None)):
+ i += 1
+ return i
+
+ def bind(self, prefix, namespace):
+ self.__prefix[namespace] = prefix
+ self.__namespace[prefix] = namespace
+
+ def namespace(self, prefix):
+ return self.__namespace.get(prefix, None)
+
+ def prefix(self, namespace):
+ return self.__prefix.get(namespace, None)
+
+ def namespaces(self):
+ for prefix, namespace in self.__namespace.items():
+ yield prefix, namespace
+
+ def __contexts(self):
+ return (c for c in []) # TODO: best way to return empty generator
+
+ def query(self, query, initNs, initBindings, queryGraph, **kwargs):
+ super(SimpleMemory, self).query(query, initNs, initBindings, queryGraph, **kwargs)
+
+ def update(self, update, initNs, initBindings, queryGraph, **kwargs):
+ super(SimpleMemory, self).update(update, initNs, initBindings, queryGraph, **kwargs)
+
+
+class Memory(Store):
+ """\
+ An in memory implementation of a triple store.
+
+ Same as SimpleMemory above, but is Context-aware, Graph-aware, and Formula-aware
+ Authors: Ashley Sommer
+ """
+
+ context_aware = True
+ formula_aware = True
+ graph_aware = True
+
+ def __init__(self, configuration=None, identifier=None):
+ super(Memory, self).__init__(configuration)
+ self.identifier = identifier
+
+ # indexed by [subject][predicate][object]
+ self.__spo = {}
+
+ # indexed by [predicate][object][subject]
+ self.__pos = {}
+
+ # indexed by [predicate][object][subject]
+ self.__osp = {}
+
+ self.__namespace = {}
+ self.__prefix = {}
+ self.__context_obj_map = {}
+ self.__tripleContexts = (
+ {}
+ )
+ self.__contextTriples = {None: set()}
+ # all contexts used in store (unencoded)
+ self.__all_contexts = set()
+ # default context information for triples
+ self.__defaultContexts = None
+
+ def add(self, triple, context, quoted=False):
+ """\
+ Add a triple to the store of triples.
+ """
+ # add dictionary entries for spo[s][p][p] = 1 and pos[p][o][s]
+ # = 1, creating the nested dictionaries where they do not yet
+ # exits.
+ Store.add(self, triple, context, quoted=quoted)
+ if context is not None:
+ self.__all_contexts.add(context)
+ subject, predicate, object_ = triple
+ self.__add_triple_context(triple, context, quoted)
+
+ spo = self.__spo
+ try:
+ po = spo[subject]
+ except LookupError:
+ po = spo[subject] = {}
+ try:
+ o = po[predicate]
+ except LookupError:
+ o = po[predicate] = {}
+ o[object_] = 1
+
+ pos = self.__pos
+ try:
+ os = pos[predicate]
+ except LookupError:
+ os = pos[predicate] = {}
+ try:
+ s = os[object_]
+ except LookupError:
+ s = os[object_] = {}
+ s[subject] = 1
+
+ osp = self.__osp
+ try:
+ sp = osp[object_]
+ except LookupError:
+ sp = osp[object_] = {}
+ try:
+ p = sp[subject]
+ except LookupError:
+ p = sp[subject] = {}
+ p[predicate] = 1
+
+ def remove(self, triple_pattern, context=None):
+ req_ctx = self.__ctx_to_str(context)
+ for triple, c in self.triples(triple_pattern, context=context):
+ subject, predicate, object_ = triple
+ for ctx in self.__get_context_for_triple(triple):
+ if context is not None and req_ctx != ctx:
+ continue
+ self.__remove_triple_context(triple, ctx)
+ ctxs = self.__get_context_for_triple(triple, skipQuoted=True)
+ if None in ctxs and (context is None or len(ctxs) == 1):
+ # remove from default graph too
+ self.__remove_triple_context(triple, None)
+ if len(self.__get_context_for_triple(triple)) == 0:
+ del self.__spo[subject][predicate][object_]
+ del self.__pos[predicate][object_][subject]
+ del self.__osp[object_][subject][predicate]
+ del self.__tripleContexts[triple]
+ if (
+ req_ctx is not None
+ and req_ctx in self.__contextTriples
+ and len(self.__contextTriples[req_ctx]) == 0
+ ):
+ # all triples are removed out of this context
+ # and it's not the default context so delete it
+ del self.__contextTriples[req_ctx]
+
+ if (
+ triple_pattern == (None, None, None)
+ and context in self.__all_contexts
+ and not self.graph_aware
+ ):
+ # remove the whole context
+ self.__all_contexts.remove(context)
+
+ def triples(self, triple_pattern, context=None):
+ """A generator over all the triples matching """
+ req_ctx = self.__ctx_to_str(context)
+ subject, predicate, object_ = triple_pattern
+
+ # all triples case (no triple parts given as pattern)
+ if subject is None and predicate is None and object_ is None:
+ # Just dump all known triples from the given graph
+ if req_ctx not in self.__contextTriples:
+ return
+ for triple in self.__contextTriples[req_ctx].copy():
+ yield triple, self.__contexts(triple)
+
+ # optimize "triple in graph" case (all parts given)
+ elif subject is not None and predicate is not None and object_ is not None:
+ triple = triple_pattern
+ try:
+ _ = self.__spo[subject][predicate][object_]
+ if self.__triple_has_context(triple, req_ctx):
+ yield triple, self.__contexts(triple)
+ except KeyError:
+ return
+
+ elif subject is not None: # subject is given
+ spo = self.__spo
+ if subject in spo:
+ subjectDictionary = spo[subject]
+ if predicate is not None: # subject+predicate is given
+ if predicate in subjectDictionary:
+ if object_ is not None: # subject+predicate+object is given
+ if object_ in subjectDictionary[predicate]:
+ triple = (subject, predicate, object_)
+ if self.__triple_has_context(triple, req_ctx):
+ yield triple, self.__contexts(triple)
+ else: # given object not found
+ pass
+ else: # subject+predicate is given, object unbound
+ for o in list(subjectDictionary[predicate].keys()):
+ triple = (subject, predicate, o)
+ if self.__triple_has_context(triple, req_ctx):
+ yield triple, self.__contexts(triple)
+ else: # given predicate not found
+ pass
+ else: # subject given, predicate unbound
+ for p in list(subjectDictionary.keys()):
+ if object_ is not None: # object is given
+ if object_ in subjectDictionary[p]:
+ triple = (subject, p, object_)
+ if self.__triple_has_context(triple, req_ctx):
+ yield triple, self.__contexts(triple)
+ else: # given object not found
+ pass
+ else: # object unbound
+ for o in list(subjectDictionary[p].keys()):
+ triple = (subject, p, o)
+ if self.__triple_has_context(triple, req_ctx):
+ yield triple, self.__contexts(triple)
+ else: # given subject not found
+ pass
+ elif predicate is not None: # predicate is given, subject unbound
+ pos = self.__pos
+ if predicate in pos:
+ predicateDictionary = pos[predicate]
+ if object_ is not None: # predicate+object is given, subject unbound
+ if object_ in predicateDictionary:
+ for s in list(predicateDictionary[object_].keys()):
+ triple = (s, predicate, object_)
+ if self.__triple_has_context(triple, req_ctx):
+ yield triple, self.__contexts(triple)
+ else: # given object not found
+ pass
+ else: # predicate is given, object+subject unbound
+ for o in list(predicateDictionary.keys()):
+ for s in list(predicateDictionary[o].keys()):
+ triple = (s, predicate, o)
+ if self.__triple_has_context(triple, req_ctx):
+ yield triple, self.__contexts(triple)
+ elif object_ is not None: # object is given, subject+predicate unbound
+ osp = self.__osp
+ if object_ in osp:
+ objectDictionary = osp[object_]
+ for s in list(objectDictionary.keys()):
+ for p in list(objectDictionary[s].keys()):
+ triple = (s, p, object_)
+ if self.__triple_has_context(triple, req_ctx):
+ yield triple, self.__contexts(triple)
+ else: # subject+predicate+object unbound
+ # Shouldn't get here if all other cases above worked correctly.
+ spo = self.__spo
+ for s in list(spo.keys()):
+ subjectDictionary = spo[s]
+ for p in list(subjectDictionary.keys()):
+ for o in list(subjectDictionary[p].keys()):
+ triple = (s, p, o)
+ if self.__triple_has_context(triple, req_ctx):
+ yield triple, self.__contexts(triple)
+
+ def bind(self, prefix, namespace):
+ self.__prefix[namespace] = prefix
+ self.__namespace[prefix] = namespace
+
+ def namespace(self, prefix):
+ return self.__namespace.get(prefix, None)
+
+ def prefix(self, namespace):
+ return self.__prefix.get(namespace, None)
+
+ def namespaces(self):
+ for prefix, namespace in self.__namespace.items():
+ yield prefix, namespace
+
+ def contexts(self, triple=None):
+ if triple is None or triple == (None, None, None):
+ return (context for context in self.__all_contexts)
+
+ subj, pred, obj = triple
+ try:
+ _ = self.__spo[subj][pred][obj]
+ return self.__contexts(triple)
+ except KeyError:
+ return (_ for _ in [])
+
+ def __len__(self, context=None):
+ ctx = self.__ctx_to_str(context)
+ if ctx not in self.__contextTriples:
+ return 0
+ return len(self.__contextTriples[ctx])
+
+ def add_graph(self, graph):
+ if not self.graph_aware:
+ Store.add_graph(self, graph)
+ else:
+ self.__all_contexts.add(graph)
+
+ def remove_graph(self, graph):
+ if not self.graph_aware:
+ Store.remove_graph(self, graph)
+ else:
+ self.remove((None, None, None), graph)
+ try:
+ self.__all_contexts.remove(graph)
+ except KeyError:
+ pass # we didn't know this graph, no problem
+
+ # internal utility methods below
+ def __add_triple_context(self, triple, context, quoted):
+ """add the given context to the set of contexts for the triple"""
+ ctx = self.__ctx_to_str(context)
+ quoted = bool(quoted)
+ try:
+ subj, pred, obj = triple
+ _ = self.__spo[subj][pred][obj]
+ # we know the triple exists somewhere in the store
+ if triple not in self.__tripleContexts:
+ # triple exists with default ctx info
+ # start with a copy of the default ctx info
+ self.__tripleContexts[triple] = self.__defaultContexts.copy()
+
+ self.__tripleContexts[triple][ctx] = quoted
+ if not quoted:
+ self.__tripleContexts[triple][None] = quoted
+ except KeyError:
+ # the triple didn't exist before in the store
+ if quoted: # this context only
+ self.__tripleContexts[triple] = {ctx: quoted}
+ else: # default context as well
+ self.__tripleContexts[triple] = {ctx: quoted, None: quoted}
+
+ # if the triple is not quoted add it to the default context
+ if not quoted:
+ self.__contextTriples[None].add(triple)
+
+ # always add the triple to given context, making sure it's initialized
+ if ctx not in self.__contextTriples:
+ self.__contextTriples[ctx] = set()
+ self.__contextTriples[ctx].add(triple)
+
+ # if this is the first ever triple in the store, set default ctx info
+ if self.__defaultContexts is None:
+ self.__defaultContexts = self.__tripleContexts[triple]
+
+ # if the context info is the same as default, no need to store it
+ if self.__tripleContexts[triple] == self.__defaultContexts:
+ del self.__tripleContexts[triple]
+
+ def __get_context_for_triple(self, triple, skipQuoted=False):
+ """return a list of contexts (str) for the triple, skipping
+ quoted contexts if skipQuoted==True"""
+
+ ctxs = self.__tripleContexts.get(triple, self.__defaultContexts)
+
+ if not skipQuoted:
+ return ctxs.keys()
+
+ return [ctx for ctx, quoted in ctxs.items() if not quoted]
+
+ def __triple_has_context(self, triple, ctx):
+ """return True if the triple exists in the given context"""
+ return ctx in self.__tripleContexts.get(triple, self.__defaultContexts)
+
+ def __remove_triple_context(self, triple, ctx):
+ """remove the context from the triple"""
+ ctxs = self.__tripleContexts.get(triple, self.__defaultContexts).copy()
+ del ctxs[ctx]
+ if ctxs == self.__defaultContexts:
+ del self.__tripleContexts[triple]
+ else:
+ self.__tripleContexts[triple] = ctxs
+ self.__contextTriples[ctx].remove(triple)
+
+ def __ctx_to_str(self, ctx):
+ if ctx is None:
+ return None
+ try:
+ # ctx could be a graph. In that case, use its identifier
+ ctx_str = "{}:{}".format(str(ctx.identifier.__class__.__name__), str(ctx.identifier))
+ self.__context_obj_map[ctx_str] = ctx
+ return ctx_str
+ except AttributeError:
+ # otherwise, ctx should be a URIRef or BNode or str
+ if isinstance(ctx, str):
+ ctx_str = "{}:{}".format(str(ctx.__class__.__name__), str(ctx))
+ if ctx_str in self.__context_obj_map:
+ return ctx_str
+ self.__context_obj_map[ctx_str] = ctx
+ return ctx_str
+ raise RuntimeError("Cannot use that type of object as a Graph context")
+
+ def __contexts(self, triple):
+ """return a generator for all the non-quoted contexts
+ (dereferenced) the encoded triple appears in"""
+ return (
+ self.__context_obj_map.get(ctx_str, ctx_str)
+ for ctx_str in self.__get_context_for_triple(triple, skipQuoted=True)
+ if ctx_str is not None
+ )
+
+ def query(self, query, initNs, initBindings, queryGraph, **kwargs):
+ super(Memory, self).query(query, initNs, initBindings, queryGraph, **kwargs)
+
+ def update(self, update, initNs, initBindings, queryGraph, **kwargs):
+ super(Memory, self).update(update, initNs, initBindings, queryGraph, **kwargs)
diff --git a/rdflib/plugins/sleepycat.py b/rdflib/plugins/stores/sleepycat.py
index 735d3c3a..735d3c3a 100644
--- a/rdflib/plugins/sleepycat.py
+++ b/rdflib/plugins/stores/sleepycat.py
diff --git a/rdflib/plugins/stores/sparqlstore.py b/rdflib/plugins/stores/sparqlstore.py
index 2d20c97c..631792c7 100644
--- a/rdflib/plugins/stores/sparqlstore.py
+++ b/rdflib/plugins/stores/sparqlstore.py
@@ -646,7 +646,7 @@ class SPARQLUpdateStore(SPARQLStore):
.. admonition:: Context-aware query rewriting
- **When:** If context-awareness is enabled and the graph is not the default graph of the store.
- - **Why:** To ensure consistency with the :class:`~rdflib.plugins.memory.IOMemory` store.
+ - **Why:** To ensure consistency with the :class:`~rdflib.plugins.stores.memory.Memory` store.
The graph must except "local" SPARQL requests (requests with no GRAPH keyword)
like if it was the default graph.
- **What is done:** These "local" queries are rewritten by this store.
diff --git a/rdflib/term.py b/rdflib/term.py
index 6e8f81d0..563f5e18 100644
--- a/rdflib/term.py
+++ b/rdflib/term.py
@@ -1405,7 +1405,7 @@ def _parseBoolean(value):
if new_value not in false_accepted_values:
warnings.warn(
"Parsing weird boolean, % r does not map to True or False" % value,
- category=DeprecationWarning,
+ category=UserWarning,
)
return False
diff --git a/rdflib/util.py b/rdflib/util.py
index 88cef828..8d2743bc 100644
--- a/rdflib/util.py
+++ b/rdflib/util.py
@@ -349,8 +349,8 @@ def parse_date_time(val):
SUFFIX_FORMAT_MAP = {
+ "xml": "xml",
"rdf": "xml",
- "rdfs": "xml",
"owl": "xml",
"n3": "n3",
"ttl": "turtle",
diff --git a/test/rdf/datatypes/test001.borked b/test/rdf/datatypes/test001.borked
new file mode 100644
index 00000000..a4c86aea
--- /dev/null
+++ b/test/rdf/datatypes/test001.borked
@@ -0,0 +1,29 @@
+<?xml version="1.0"?>
+
+<!--
+ Copyright World Wide Web Consortium, (Massachusetts Institute of
+ Technology, Institut National de Recherche en Informatique et en
+ Automatique, Keio University).
+
+ All Rights Reserved.
+
+ Please see the full Copyright clause at
+ <http://www.w3.org/Consortium/Legal/copyright-software.html>
+
+ Description: A simple datatype production; a language+
+ datatype production. Simply duplicate the constructs under
+ http://www.w3.org/2000/10/rdf-tests/rdfcore/ntriples/test.nt
+
+ $Id: test001.rdf,v 1.2 2002/11/20 14:51:34 jgrant Exp $
+
+-->
+
+<rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
+ xmlns:eg="http://example.org/">
+
+ <rdf:Description rdf:about="http://example.org/foo">
+ <eg:bar rdf:datatype="http://www.w3.org/2001/XMLSchema#integer">10</eg:bar>
+ <eg:baz rdf:datatype="http://www.w3.org/2001/XMLSchema#integer" xml:lang="fr">10</eg:baz>
+ </rdf:Description>
+
+</rdf:RDF>
diff --git a/test/store_performance.py b/test/store_performance.py
index aa70742a..dc3fe7ce 100644
--- a/test/store_performance.py
+++ b/test/store_performance.py
@@ -96,7 +96,7 @@ class StoreTestCase(unittest.TestCase):
class MemoryStoreTestCase(StoreTestCase):
- store = "IOMemory"
+ store = "Memory"
if __name__ == "__main__":
diff --git a/test/test_aggregate_graphs.py b/test/test_aggregate_graphs.py
index 5d58f4d3..efe684d3 100644
--- a/test/test_aggregate_graphs.py
+++ b/test/test_aggregate_graphs.py
@@ -60,7 +60,7 @@ WHERE {?n3Doc a log:N3Document }"""
class GraphAggregates1(unittest.TestCase):
def setUp(self):
- memStore = plugin.get("IOMemory", Store)()
+ memStore = plugin.get("Memory", Store)()
self.graph1 = Graph(memStore)
self.graph2 = Graph(memStore)
self.graph3 = Graph(memStore)
@@ -109,7 +109,7 @@ class GraphAggregates2(unittest.TestCase):
sparql = True
def setUp(self):
- memStore = plugin.get("IOMemory", Store)()
+ memStore = plugin.get("Memory", Store)()
self.graph1 = Graph(memStore, URIRef("http://example.com/graph1"))
self.graph2 = Graph(memStore, URIRef("http://example.com/graph2"))
self.graph3 = Graph(memStore, URIRef("http://example.com/graph3"))
diff --git a/test/test_canonicalization.py b/test/test_canonicalization.py
index 93c8b4c5..c3a8bf04 100644
--- a/test/test_canonicalization.py
+++ b/test/test_canonicalization.py
@@ -3,7 +3,7 @@ from rdflib import Graph, RDF, BNode, URIRef, Namespace, ConjunctiveGraph, Liter
from rdflib.compare import to_isomorphic, to_canonical_graph
import rdflib
-from rdflib.plugins.memory import IOMemory
+from rdflib.plugins.stores.memory import Memory
from io import StringIO
@@ -287,7 +287,7 @@ def test_issue682_signing_named_graphs():
cmary = URIRef("http://love.com/lovers/mary#")
cjohn = URIRef("http://love.com/lovers/john#")
- store = IOMemory()
+ store = Memory()
g = ConjunctiveGraph(store=store)
g.bind("love", ns)
diff --git a/test/test_dataset.py b/test/test_dataset.py
index 33a2721d..734b58cd 100644
--- a/test/test_dataset.py
+++ b/test/test_dataset.py
@@ -178,7 +178,7 @@ if __name__ == "__main__":
tests = 0
for s in plugin.plugins(pluginname, plugin.Store):
- if s.name in ("default", "IOMemory", "Auditable", "Concurrent", "SPARQLStore"):
+ if s.name in ("default", "Memory", "Auditable", "Concurrent", "SPARQLStore"):
continue # these are tested by default
if not s.getClass().graph_aware:
diff --git a/test/test_graph.py b/test/test_graph.py
index 77f47dbc..fba32e5d 100644
--- a/test/test_graph.py
+++ b/test/test_graph.py
@@ -5,7 +5,9 @@ import unittest
from tempfile import mkdtemp, mkstemp
import shutil
-from rdflib import URIRef, RDF, Graph, plugin
+from rdflib import URIRef, Graph, plugin
+from rdflib.exceptions import ParserError
+from rdflib.plugin import PluginException
from nose.exc import SkipTest
@@ -248,6 +250,65 @@ class GraphTestCase(unittest.TestCase):
self.assertEqual((michel, likes, cheese) in g1, True)
+ def testGuessFormatForParse(self):
+ self.graph = Graph()
+
+ # files
+ with self.assertRaises(ParserError):
+ self.graph.parse(__file__) # here we are trying to parse a Python file!!
+
+ # .nt can be parsed by Turtle Parser
+ self.graph.parse("test/nt/anons-01.nt")
+ # RDF/XML
+ self.graph.parse("test/rdf/datatypes/test001.rdf") # XML
+ # bad filename but set format
+ self.graph.parse("test/rdf/datatypes/test001.borked", format="xml")
+
+ # strings
+ self.graph = Graph()
+
+ with self.assertRaises(ParserError):
+ self.graph.parse(data="rubbish")
+
+ # Turtle - default
+ self.graph.parse(data="<http://example.com/a> <http://example.com/a> <http://example.com/a> .")
+
+ # Turtle - format given
+ self.graph.parse(data="<http://example.com/a> <http://example.com/a> <http://example.com/a> .", format="turtle")
+
+ # RDF/XML - format given
+ rdf = """<rdf:RDF
+ xmlns:ns1="http://example.org/#"
+ xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
+>
+ <rdf:Description rdf:nodeID="ub63bL2C1">
+ <ns1:p rdf:resource="http://example.org/q"/>
+ <ns1:r rdf:resource="http://example.org/s"/>
+ </rdf:Description>
+ <rdf:Description rdf:nodeID="ub63bL5C1">
+ <ns1:r>
+ <rdf:Description rdf:nodeID="ub63bL6C11">
+ <ns1:s rdf:resource="http://example.org/#t"/>
+ </rdf:Description>
+ </ns1:r>
+ <ns1:p rdf:resource="http://example.org/q"/>
+ </rdf:Description>
+</rdf:RDF>
+ """
+ self.graph.parse(data=rdf, format="xml")
+
+ # URI
+ self.graph = Graph()
+
+ # only getting HTML
+ with self.assertRaises(PluginException):
+ self.graph.parse(location="https://www.google.com")
+
+ self.graph.parse(location="http://www.w3.org/ns/adms.ttl")
+ self.graph.parse(location="http://www.w3.org/ns/adms.rdf")
+ # persistent Australian Government online RDF resource without a file-like ending
+ self.graph.parse(location="https://linked.data.gov.au/def/agrif?_format=text/turtle")
+
# dynamically create classes for each registered Store
@@ -260,7 +321,7 @@ tests = 0
for s in plugin.plugins(pluginname, plugin.Store):
if s.name in (
"default",
- "IOMemory",
+ "Memory",
"Auditable",
"Concurrent",
"SPARQLStore",
@@ -268,6 +329,10 @@ for s in plugin.plugins(pluginname, plugin.Store):
):
continue # these are tested by default
+ if s.name in ("SimpleMemory",):
+ # these (by design) won't pass some of the tests (like Intersection)
+ continue
+
locals()["t%d" % tests] = type(
"%sGraphTestCase" % s.name, (GraphTestCase,), {"store": s.name}
)
diff --git a/test/test_graph_context.py b/test/test_graph_context.py
index 5221434b..52220d2c 100644
--- a/test/test_graph_context.py
+++ b/test/test_graph_context.py
@@ -370,7 +370,7 @@ tests = 0
for s in plugin.plugins(pluginname, plugin.Store):
if s.name in (
"default",
- "IOMemory",
+ "Memory",
"Auditable",
"Concurrent",
"SPARQLStore",
diff --git a/test/test_iomemory.py b/test/test_iomemory.py
deleted file mode 100644
index 74048ab4..00000000
--- a/test/test_iomemory.py
+++ /dev/null
@@ -1,67 +0,0 @@
-"""
-
-Iteration and update conflict with set based IOMemory store
-
-https://github.com/RDFLib/rdflib/issues/286
-
-"""
-
-from rdflib.store import Store
-from rdflib import plugin
-
-from rdflib import Graph, Literal, Namespace
-
-
-def test_concurrent1():
- dns = Namespace("http://www.example.com/")
-
- store = plugin.get("IOMemory", Store)()
- g1 = Graph(store=store)
-
- g1.add((dns.Name, dns.prop, Literal("test")))
- g1.add((dns.Name, dns.prop, Literal("test2")))
- g1.add((dns.Name, dns.prop, Literal("test3")))
-
- n = len(g1)
- i = 0
-
- for t in g1.triples((None, None, None)):
- i += 1
- # next line causes problems because it adds a new Subject that needs
- # to be indexed in __subjectIndex dictionary in IOMemory Store.
- # which invalidates the iterator used to iterate over g1
- g1.add(t)
-
- assert i == n
-
-
-def test_concurrent2():
- dns = Namespace("http://www.example.com/")
-
- store = plugin.get("IOMemory", Store)()
- g1 = Graph(store=store)
- g2 = Graph(store=store)
-
- g1.add((dns.Name, dns.prop, Literal("test")))
- g1.add((dns.Name, dns.prop, Literal("test2")))
- g1.add((dns.Name, dns.prop, Literal("test3")))
-
- n = len(g1)
- i = 0
-
- for t in g1.triples((None, None, None)):
- i += 1
- g2.add(t)
- # next line causes problems because it adds a new Subject that needs
- # to be indexed in __subjectIndex dictionary in IOMemory Store.
- # which invalidates the iterator used to iterate over g1
- g2.add((dns.Name1, dns.prop1, Literal("test")))
- g2.add((dns.Name1, dns.prop, Literal("test")))
- g2.add((dns.Name, dns.prop, Literal("test4")))
-
- assert i == n
-
-
-if __name__ == "__main__":
- test_concurrent1()
- test_concurrent2()
diff --git a/test/test_issue247.py b/test/test_issue247.py
index 747dd1e0..7a51dd24 100644
--- a/test/test_issue247.py
+++ b/test/test_issue247.py
@@ -38,7 +38,7 @@ class TestXMLLiteralwithLangAttr(unittest.TestCase):
it contains a XML Literal with a xml:lang attribute:
"""
g = rdflib.Graph()
- g.parse(data=passxml)
+ g.parse(data=passxml, format="xml")
def test_failing_parse_of_literal_with_xmllang_attr(self):
"""
@@ -47,7 +47,7 @@ class TestXMLLiteralwithLangAttr(unittest.TestCase):
it contains a XML Literal with a xml:lang attribute:
"""
g = rdflib.Graph()
- g.parse(data=failxml)
+ g.parse(data=failxml, format="xml")
if __name__ == "__main__":
diff --git a/test/test_issue363.py b/test/test_issue363.py
index 792c2441..5f88a6f4 100644
--- a/test/test_issue363.py
+++ b/test/test_issue363.py
@@ -38,7 +38,7 @@ def test_broken_rdfxml():
def test_parsetype_resource():
- g = rdflib.Graph().parse(data=data2)
+ g = rdflib.Graph().parse(data=data2, format="xml")
print(g.serialize(format="n3"))
diff --git a/test/test_issue801.py b/test/test_issue801.py
new file mode 100644
index 00000000..ae27f346
--- /dev/null
+++ b/test/test_issue801.py
@@ -0,0 +1,19 @@
+"""
+Issue 801 - Problem with prefixes created for URIs containing %20
+"""
+from rdflib import Namespace, Graph, BNode, Literal
+import unittest
+
+class TestIssue801(unittest.TestCase):
+
+ def test_issue_801(self):
+ g = Graph()
+ example = Namespace('http://example.org/')
+ g.bind('', example)
+ node = BNode()
+ g.add((node, example['first%20name'], Literal('John')))
+ self.assertEqual(g.serialize(format="turtle").decode().split("\n")[-3],
+ '[] :first%20name "John" .')
+
+if __name__ == "__main__":
+ unittest.main()
diff --git a/test/test_issue_git_336.py b/test/test_issue_git_336.py
index 6a8abb7c..c3d4a581 100644
--- a/test/test_issue_git_336.py
+++ b/test/test_issue_git_336.py
@@ -37,7 +37,7 @@ def test_ns_localname_roundtrip():
xmldump = g.serialize().decode("utf-8")
g1 = rdflib.Graph()
- g1.parse(data=xmldump)
+ g1.parse(data=xmldump, format="xml")
g1.parse(data=turtledump, format="turtle")
diff --git a/test/test_literal.py b/test/test_literal.py
index 714bea00..656bfb10 100644
--- a/test/test_literal.py
+++ b/test/test_literal.py
@@ -33,7 +33,7 @@ class TestLiteral(unittest.TestCase):
</rdf:RDF>
"""
g = rdflib.Graph()
- g.parse(data=d)
+ g.parse(data=d, format="xml")
a = rdflib.Literal("a\\b")
b = list(g.objects())[0]
self.assertEqual(a, b)
diff --git a/test/test_memory_store.py b/test/test_memory_store.py
index 546d12ad..ad46d6c0 100644
--- a/test/test_memory_store.py
+++ b/test/test_memory_store.py
@@ -1,10 +1,32 @@
import unittest
import rdflib
-rdflib.plugin.register("Memory", rdflib.store.Store, "rdflib.plugins.memory", "Memory")
+rdflib.plugin.register("SimpleMemory", rdflib.store.Store, "rdflib.plugins.stores.memory", "SimpleMemory")
+rdflib.plugin.register("Memory", rdflib.store.Store, "rdflib.plugins.stores.memory", "Memory")
+class SimpleStoreTestCase(unittest.TestCase):
+ def test_memory_store(self):
+ g = rdflib.Graph("SimpleMemory")
+ subj1 = rdflib.URIRef("http://example.org/foo#bar1")
+ pred1 = rdflib.URIRef("http://example.org/foo#bar2")
+ obj1 = rdflib.URIRef("http://example.org/foo#bar3")
+ triple1 = (subj1, pred1, obj1)
+ triple2 = (
+ subj1,
+ rdflib.URIRef("http://example.org/foo#bar4"),
+ rdflib.URIRef("http://example.org/foo#bar5"),
+ )
+ g.add(triple1)
+ self.assertTrue(len(g) == 1)
+ g.add(triple2)
+ self.assertTrue(len(list(g.triples((subj1, None, None)))) == 2)
+ self.assertTrue(len(list(g.triples((None, pred1, None)))) == 1)
+ self.assertTrue(len(list(g.triples((None, None, obj1)))) == 1)
+ g.remove(triple1)
+ self.assertTrue(len(g) == 1)
+ g.serialize()
-class StoreTestCase(unittest.TestCase):
+class MemoryStoreTestCase(unittest.TestCase):
def test_memory_store(self):
g = rdflib.Graph("Memory")
subj1 = rdflib.URIRef("http://example.org/foo#bar1")
diff --git a/test/test_namespace.py b/test/test_namespace.py
index 48896fdc..510d8515 100644
--- a/test/test_namespace.py
+++ b/test/test_namespace.py
@@ -39,8 +39,8 @@ class NamespacePrefixTest(unittest.TestCase):
)
graph = Graph().parse(data=data, format="turtle")
for p, n in tuple(graph.namespaces()):
- graph.store._IOMemory__namespace.pop(p)
- graph.store._IOMemory__prefix.pop(n)
+ graph.store._Memory__namespace.pop(p)
+ graph.store._Memory__prefix.pop(n)
graph.namespace_manager.reset()
self.assertFalse(tuple(graph.namespaces()))
u = URIRef("http://example.org/a")
diff --git a/test/test_nt_misc.py b/test/test_nt_misc.py
index 15f0d4af..399f7bff 100644
--- a/test/test_nt_misc.py
+++ b/test/test_nt_misc.py
@@ -34,8 +34,34 @@ class NTTestCase(unittest.TestCase):
s = g.serialize(format="nt").strip()
self.assertEqual(s, '<foo> <foo> "test\\n"@en .'.encode("latin-1"))
+ def testIssue1144_rdflib(self):
+ fname = "test/nt/lists-02.nt"
+ g1 = Graph()
+ with open(fname, "r") as f:
+ g1.parse(f, format='nt')
+ self.assertEqual(14, len(g1))
+ g2 = Graph()
+ with open(fname, "rb") as fb:
+ g2.parse(fb, format='nt')
+ self.assertEqual(14, len(g2))
+
+
+ def testIssue1144_w3c(self):
+ fname = "test/nt/lists-02.nt"
+ sink1 = ntriples.NTGraphSink(Graph())
+ p1 = ntriples.W3CNTriplesParser(sink1)
+ with open(fname, "r") as f:
+ p1.parse(f)
+ self.assertEqual(14, len(sink1.g))
+ sink2 = ntriples.NTGraphSink(Graph())
+ p2 = ntriples.W3CNTriplesParser(sink2)
+ with open(fname, "rb") as f:
+ p2.parse(f)
+ self.assertEqual(14, len(sink2.g))
+
+
def test_sink(self):
- s = ntriples.Sink()
+ s = ntriples.DummySink()
self.assertTrue(s.length == 0)
s.triple(None, None, None)
self.assertTrue(s.length == 1)
@@ -77,26 +103,26 @@ class NTTestCase(unittest.TestCase):
ntriples.validate = False
self.assertEqual(res, uniquot)
- def test_NTriplesParser_fpath(self):
+ def test_W3CNTriplesParser_fpath(self):
fpath = "test/nt/" + os.listdir("test/nt")[0]
- p = ntriples.NTriplesParser()
+ p = ntriples.W3CNTriplesParser()
self.assertRaises(ntriples.ParseError, p.parse, fpath)
- def test_NTriplesParser_parsestring(self):
- p = ntriples.NTriplesParser()
+ def test_W3CNTriplesParser_parsestring(self):
+ p = ntriples.W3CNTriplesParser()
data = 3
self.assertRaises(ntriples.ParseError, p.parsestring, data)
fname = "test/nt/lists-02.nt"
with open(fname, "r") as f:
data = f.read()
- p = ntriples.NTriplesParser()
+ p = ntriples.W3CNTriplesParser()
res = p.parsestring(data)
self.assertTrue(res == None)
def test_w3_ntriple_variants(self):
uri = "file:///" + os.getcwd() + "/test/nt/test.ntriples"
- parser = ntriples.NTriplesParser()
+ parser = ntriples.W3CNTriplesParser()
u = urlopen(uri)
sink = parser.parse(u)
u.close()
@@ -107,14 +133,14 @@ class NTTestCase(unittest.TestCase):
data = (
"""<http://example.org/resource32> 3 <http://example.org/datatype1> .\n"""
)
- p = ntriples.NTriplesParser()
+ p = ntriples.W3CNTriplesParser()
self.assertRaises(ntriples.ParseError, p.parsestring, data)
def test_cover_eat(self):
data = (
"""<http://example.org/resource32> 3 <http://example.org/datatype1> .\n"""
)
- p = ntriples.NTriplesParser()
+ p = ntriples.W3CNTriplesParser()
p.line = data
self.assertRaises(
ntriples.ParseError, p.eat, re.compile("<http://example.org/datatype1>")
@@ -122,7 +148,7 @@ class NTTestCase(unittest.TestCase):
def test_cover_subjectobjectliteral(self):
# data = '''<http://example.org/resource32> 3 <http://example.org/datatype1> .\n'''
- p = ntriples.NTriplesParser()
+ p = ntriples.W3CNTriplesParser()
p.line = "baz"
self.assertRaises(ntriples.ParseError, p.subject)
self.assertRaises(ntriples.ParseError, p.object)
@@ -134,12 +160,12 @@ class BNodeContextTestCase(unittest.TestCase):
def test_bnode_shared_across_instances(self):
my_sink = FakeSink()
bnode_context = dict()
- p = ntriples.NTriplesParser(my_sink, bnode_context=bnode_context)
+ p = ntriples.W3CNTriplesParser(my_sink, bnode_context=bnode_context)
p.parsestring('''
_:0 <http://purl.obolibrary.org/obo/RO_0002350> <http://www.gbif.org/species/0000001> .
''')
- q = ntriples.NTriplesParser(my_sink, bnode_context=bnode_context)
+ q = ntriples.W3CNTriplesParser(my_sink, bnode_context=bnode_context)
q.parsestring('''
_:0 <http://purl.obolibrary.org/obo/RO_0002350> <http://www.gbif.org/species/0000002> .
''')
@@ -148,12 +174,12 @@ class BNodeContextTestCase(unittest.TestCase):
def test_bnode_distinct_across_instances(self):
my_sink = FakeSink()
- p = ntriples.NTriplesParser(my_sink)
+ p = ntriples.W3CNTriplesParser(my_sink)
p.parsestring('''
_:0 <http://purl.obolibrary.org/obo/RO_0002350> <http://www.gbif.org/species/0000001> .
''')
- q = ntriples.NTriplesParser(my_sink)
+ q = ntriples.W3CNTriplesParser(my_sink)
q.parsestring('''
_:0 <http://purl.obolibrary.org/obo/RO_0002350> <http://www.gbif.org/species/0000002> .
''')
@@ -162,7 +188,7 @@ class BNodeContextTestCase(unittest.TestCase):
def test_bnode_distinct_across_parse(self):
my_sink = FakeSink()
- p = ntriples.NTriplesParser(my_sink)
+ p = ntriples.W3CNTriplesParser(my_sink)
p.parsestring('''
_:0 <http://purl.obolibrary.org/obo/RO_0002350> <http://www.gbif.org/species/0000001> .
@@ -176,7 +202,7 @@ class BNodeContextTestCase(unittest.TestCase):
def test_bnode_shared_across_parse(self):
my_sink = FakeSink()
- p = ntriples.NTriplesParser(my_sink)
+ p = ntriples.W3CNTriplesParser(my_sink)
p.parsestring('''
_:0 <http://purl.obolibrary.org/obo/RO_0002350> <http://www.gbif.org/species/0000001> .
@@ -192,12 +218,12 @@ class BNodeContextTestCase(unittest.TestCase):
my_sink = FakeSink()
bnode_ctx = dict()
- p = ntriples.NTriplesParser(my_sink)
+ p = ntriples.W3CNTriplesParser(my_sink)
p.parsestring('''
_:0 <http://purl.obolibrary.org/obo/RO_0002350> <http://www.gbif.org/species/0000001> .
''', bnode_context=bnode_ctx)
- q = ntriples.NTriplesParser(my_sink)
+ q = ntriples.W3CNTriplesParser(my_sink)
q.parsestring('''
_:0 <http://purl.obolibrary.org/obo/RO_0002350> <http://www.gbif.org/species/0000002> .
''', bnode_context=bnode_ctx)
diff --git a/test/test_parse_file_guess_format.py b/test/test_parse_file_guess_format.py
index abb039df..5706f8df 100644
--- a/test/test_parse_file_guess_format.py
+++ b/test/test_parse_file_guess_format.py
@@ -3,7 +3,7 @@ from pathlib import Path
from shutil import copyfile
from tempfile import TemporaryDirectory
-from xml.sax import SAXParseException
+from rdflib.exceptions import ParserError
from rdflib import Graph, logger as graph_logger
@@ -21,11 +21,10 @@ class FileParserGuessFormatTest(unittest.TestCase):
g = Graph()
with TemporaryDirectory() as tmpdirname:
newpath = Path(tmpdirname).joinpath("no_file_ext")
- copyfile("test/w3c/turtle/IRI_subject.ttl", str(newpath))
+ copyfile("test/rdf/Manifest.rdf", str(newpath))
with self.assertLogs(graph_logger, "WARNING") as log_cm:
- with self.assertRaises(SAXParseException):
+ with self.assertRaises(ParserError):
g.parse(str(newpath))
- self.assertTrue(any("Could not guess format" in msg for msg in log_cm.output))
if __name__ == '__main__':
diff --git a/test/test_parser.py b/test/test_parser.py
index 3aaf5658..e337969c 100644
--- a/test/test_parser.py
+++ b/test/test_parser.py
@@ -33,6 +33,7 @@ class ParserTestCase(unittest.TestCase):
</rdf:RDF>
""",
+ format="xml",
publicID="http://example.org",
)
diff --git a/test/test_seq.py b/test/test_seq.py
index 7f177574..5a987ef4 100644
--- a/test/test_seq.py
+++ b/test/test_seq.py
@@ -29,7 +29,7 @@ class SeqTestCase(unittest.TestCase):
def setUp(self):
store = self.store = Graph(store=self.backend)
store.open(self.path)
- store.parse(data=s)
+ store.parse(data=s, format="xml")
def tearDown(self):
self.store.close()
diff --git a/test/test_util.py b/test/test_util.py
index 81ab17ab..ea3b122e 100644
--- a/test/test_util.py
+++ b/test/test_util.py
@@ -293,12 +293,12 @@ class TestUtilTermConvert(unittest.TestCase):
def test_util_from_n3_expectquotedgraph(self):
s = "{<http://example.com/schema>}"
- res = util.from_n3(s, default=None, backend="IOMemory")
+ res = util.from_n3(s, default=None, backend="Memory")
self.assertTrue(isinstance(res, QuotedGraph))
def test_util_from_n3_expectgraph(self):
s = "[<http://example.com/schema>]"
- res = util.from_n3(s, default=None, backend="IOMemory")
+ res = util.from_n3(s, default=None, backend="Memory")
self.assertTrue(isinstance(res, Graph))
diff --git a/test/test_xmlliterals.py b/test/test_xmlliterals.py
index fcc0ddf2..aeabbe88 100644
--- a/test/test_xmlliterals.py
+++ b/test/test_xmlliterals.py
@@ -42,7 +42,7 @@ def testRDFXMLParse():
</rdf:RDF>"""
g = rdflib.Graph()
- g.parse(data=rdfxml)
+ g.parse(data=rdfxml, format="xml")
l1 = list(g)[0][2]
assert l1.datatype == RDF.XMLLiteral