diff options
46 files changed, 1039 insertions, 825 deletions
diff --git a/docs/persistence.rst b/docs/persistence.rst index fbddf38f..bd270a14 100644 --- a/docs/persistence.rst +++ b/docs/persistence.rst @@ -19,8 +19,8 @@ this API for a different store. Stores currently shipped with core RDFLib ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ -* :class:`Memory <rdflib.plugins.memory.IOMemory>` (not persistent!) -* :class:`~rdflib.plugins.sleepycat.Sleepycat` (on disk persistence via Python's :ref:`bsddb` or :ref:`bsddb3` packages) +* :class:`Memory <rdflib.plugins.stores.memory.Memory>` (not persistent!) +* :class:`~rdflib.plugins.stores.sleepycat.Sleepycat` (on disk persistence via Python's :ref:`bsddb` or :ref:`bsddb3` packages) * :class:`~rdflib.plugins.stores.sparqlstore.SPARQLStore` - a read-only wrapper around a remote SPARQL Query endpoint. * :class:`~rdflib.plugins.stores.sparqlstore.SPARQLUpdateStore` - a read-write wrapper around a remote SPARQL query/update endpoint pair. diff --git a/docs/plugin_parsers.rst b/docs/plugin_parsers.rst index e114958d..81ab7ae6 100644 --- a/docs/plugin_parsers.rst +++ b/docs/plugin_parsers.rst @@ -26,7 +26,7 @@ mdata :class:`~rdflib.plugins.parsers.structureddata.MicrodataParser` microdata :class:`~rdflib.plugins.parsers.structureddata.MicrodataParser` n3 :class:`~rdflib.plugins.parsers.notation3.N3Parser` nquads :class:`~rdflib.plugins.parsers.nquads.NQuadsParser` -nt :class:`~rdflib.plugins.parsers.nt.NTParser` +nt :class:`~rdflib.plugins.parsers.ntriples.NTParser` rdfa :class:`~rdflib.plugins.parsers.structureddata.RDFaParser` rdfa1.0 :class:`~rdflib.plugins.parsers.structureddata.RDFa10Parser` rdfa1.1 :class:`~rdflib.plugins.parsers.structureddata.RDFaParser` diff --git a/docs/plugin_stores.rst b/docs/plugin_stores.rst index 68063577..a936c54e 100644 --- a/docs/plugin_stores.rst +++ b/docs/plugin_stores.rst @@ -10,9 +10,10 @@ Name Class ================= ============================================================ Auditable :class:`~rdflib.plugins.stores.auditable.AuditableStore` Concurrent :class:`~rdflib.plugins.stores.concurrent.ConcurrentStore` -IOMemory :class:`~rdflib.plugins.memory.IOMemory` +SimpleMemory :class:`~rdflib.plugins.stores.memory.SimpleMemory` +Memory :class:`~rdflib.plugins.stores.memory.Memory` SPARQLStore :class:`~rdflib.plugins.stores.sparqlstore.SPARQLStore` SPARQLUpdateStore :class:`~rdflib.plugins.stores.sparqlstore.SPARQLUpdateStore` -Sleepycat :class:`~rdflib.plugins.sleepycat.Sleepycat` -default :class:`~rdflib.plugins.memory.IOMemory` +Sleepycat :class:`~rdflib.plugins.stores.sleepycat.Sleepycat` +default :class:`~rdflib.plugins.stores.memory.Memory` ================= ============================================================ diff --git a/docs/sphinx-requirements.txt b/docs/sphinx-requirements.txt index abed60cb..175ef14e 100644 --- a/docs/sphinx-requirements.txt +++ b/docs/sphinx-requirements.txt @@ -1,3 +1,3 @@ -sphinx==3.0.4 +sphinx==3.2.1 sphinxcontrib-apidoc git+https://github.com/gniezen/n3pygments.git diff --git a/docs/univrdfstore.rst b/docs/univrdfstore.rst index f6822e5b..dfb96d81 100644 --- a/docs/univrdfstore.rst +++ b/docs/univrdfstore.rst @@ -344,7 +344,7 @@ These are a list of additional kinds of RDF terms (all of which are special Lite Namespace Management Interfaces =============================== -The following namespace management interfaces (defined in Graph) could be implemented in the RDF store. Currently, they exist as stub methods of :class:`~rdflib.store.Store` and are defined in the store subclasses (e.g. :class:`~rdflib.store.IOMemory`): +The following namespace management interfaces (defined in Graph) could be implemented in the RDF store. Currently, they exist as stub methods of :class:`~rdflib.store.Store` and are defined in the store subclasses (e.g. :class:`~rdflib.plugins.store.memory.Memory`): .. automethod:: rdflib.store.Store.bind :noindex: diff --git a/examples/conjunctive_graphs.py b/examples/conjunctive_graphs.py index f714d9ff..289046ec 100644 --- a/examples/conjunctive_graphs.py +++ b/examples/conjunctive_graphs.py @@ -10,7 +10,7 @@ conjunction (union) of all the graphs. from rdflib import Namespace, Literal, URIRef from rdflib.graph import Graph, ConjunctiveGraph -from rdflib.plugins.memory import IOMemory +from rdflib.plugins.stores.memory import Memory if __name__ == "__main__": @@ -22,7 +22,7 @@ if __name__ == "__main__": cmary = URIRef("http://love.com/lovers/mary") cjohn = URIRef("http://love.com/lovers/john") - store = IOMemory() + store = Memory() g = ConjunctiveGraph(store=store) g.bind("love", ns) @@ -33,7 +33,7 @@ if __name__ == "__main__": gmary.add((mary, ns["hasName"], Literal("Mary"))) gmary.add((mary, ns["loves"], john)) - # add a graph for Mary's facts to the Conjunctive Graph + # add a graph for John's facts to the Conjunctive Graph gjohn = Graph(store=store, identifier=cjohn) # John's graph contains his cute name gjohn.add((john, ns["hasCuteName"], Literal("Johnny Boy"))) diff --git a/rdflib/__init__.py b/rdflib/__init__.py index bce8204f..06b1c2eb 100644 --- a/rdflib/__init__.py +++ b/rdflib/__init__.py @@ -92,7 +92,11 @@ _interactive_mode = False try: import __main__ - if not hasattr(__main__, "__file__") and sys.stdout is not None and sys.stderr.isatty(): + if ( + not hasattr(__main__, "__file__") + and sys.stdout is not None + and sys.stderr.isatty() + ): # show log messages in interactive mode _interactive_mode = True logger.setLevel(logging.INFO) diff --git a/rdflib/collection.py b/rdflib/collection.py index 60f2890f..3136bafd 100644 --- a/rdflib/collection.py +++ b/rdflib/collection.py @@ -14,7 +14,7 @@ class Collection(object): >>> from rdflib.graph import Graph >>> from pprint import pprint >>> listName = BNode() - >>> g = Graph('IOMemory') + >>> g = Graph('Memory') >>> listItem1 = BNode() >>> listItem2 = BNode() >>> g.add((listName, RDF.first, Literal(1))) @@ -48,7 +48,7 @@ class Collection(object): """ >>> from rdflib.graph import Graph >>> listName = BNode() - >>> g = Graph('IOMemory') + >>> g = Graph('Memory') >>> listItem1 = BNode() >>> listItem2 = BNode() >>> g.add((listName, RDF.first, Literal(1))) diff --git a/rdflib/compare.py b/rdflib/compare.py index 897a30db..ed4415f2 100644 --- a/rdflib/compare.py +++ b/rdflib/compare.py @@ -335,7 +335,7 @@ class _TripleCanonicalizer(object): coloring.extend(colors) try: si = sequence.index(c) - sequence = sequence[:si] + colors + sequence[si + 1:] + sequence = sequence[:si] + colors + sequence[si + 1 :] except ValueError: sequence = colors[1:] + sequence combined_colors = [] diff --git a/rdflib/extras/describer.py b/rdflib/extras/describer.py index 5f7f3841..48d0bebf 100644 --- a/rdflib/extras/describer.py +++ b/rdflib/extras/describer.py @@ -98,7 +98,7 @@ Full example in the ``to_rdf`` method below:: ... </cv:hasWorkHistory> ... </cv:CV> ... </rdf:RDF> - ... ''') + ... ''', format="xml") >>> >>> from rdflib.compare import isomorphic >>> isomorphic(person_graph, expected) #doctest: +SKIP diff --git a/rdflib/graph.py b/rdflib/graph.py index 0932c2f3..ebe1c0f1 100644 --- a/rdflib/graph.py +++ b/rdflib/graph.py @@ -20,6 +20,7 @@ from rdflib.namespace import NamespaceManager from rdflib.resource import Resource from rdflib.collection import Collection import rdflib.util # avoid circular dependency +from rdflib.exceptions import ParserError import os import shutil @@ -100,31 +101,31 @@ see :class:`~rdflib.graph.Dataset` Working with graphs =================== -Instantiating Graphs with default store (IOMemory) and default identifier +Instantiating Graphs with default store (Memory) and default identifier (a BNode): >>> g = Graph() >>> g.store.__class__ - <class 'rdflib.plugins.memory.IOMemory'> + <class 'rdflib.plugins.stores.memory.Memory'> >>> g.identifier.__class__ <class 'rdflib.term.BNode'> -Instantiating Graphs with a IOMemory store and an identifier - +Instantiating Graphs with a Memory store and an identifier - <http://rdflib.net>: - >>> g = Graph('IOMemory', URIRef("http://rdflib.net")) + >>> g = Graph('Memory', URIRef("http://rdflib.net")) >>> g.identifier rdflib.term.URIRef('http://rdflib.net') >>> str(g) # doctest: +NORMALIZE_WHITESPACE "<http://rdflib.net> a rdfg:Graph;rdflib:storage - [a rdflib:Store;rdfs:label 'IOMemory']." + [a rdflib:Store;rdfs:label 'Memory']." Creating a ConjunctiveGraph - The top level container for all named Graphs in a "database": >>> g = ConjunctiveGraph() >>> str(g.default_context) - "[a rdfg:Graph;rdflib:storage [a rdflib:Store;rdfs:label 'IOMemory']]." + "[a rdfg:Graph;rdflib:storage [a rdflib:Store;rdfs:label 'Memory']]." Adding / removing reified triples to Graph and iterating over it directly or via triple pattern: @@ -188,7 +189,7 @@ by RDFLib they are UUIDs and unique. Graph Aggregation - ConjunctiveGraphs and ReadOnlyGraphAggregate within the same store: - >>> store = plugin.get("IOMemory", Store)() + >>> store = plugin.get("Memory", Store)() >>> g1 = Graph(store) >>> g2 = Graph(store) >>> g3 = Graph(store) @@ -774,13 +775,17 @@ class Graph(Node): # setup the language filtering if lang is not None: if lang == "": # we only want not language-tagged literals + def langfilter(l_): return l_.language is None + else: + def langfilter(l_): return l_.language == lang else: # we don't care about language tags + def langfilter(l_): return True @@ -992,7 +997,7 @@ class Graph(Node): **args ): """ - Parse source adding the resulting triples to the Graph. + Parse an RDF source adding the resulting triples to the Graph. The source is specified using one of source, location, file or data. @@ -1006,9 +1011,10 @@ class Graph(Node): is specified. - `file`: A file-like object. - `data`: A string containing the data to be parsed. - - `format`: Used if format can not be determined from source. - Defaults to rdf/xml. Format support can be extended with plugins, - but "xml", "n3", "nt" & "trix" are built in. + - `format`: Used if format can not be determined from source, e.g. file + extension or Media Type. Defaults to text/turtle. Format support can + be extended with plugins, but "xml", "n3" (use for turtle), "nt" & + "trix" are built in. - `publicID`: the logical URI to use as the document base. If None specified the document location is used (at least in the case where there is a document location). @@ -1054,6 +1060,11 @@ class Graph(Node): >>> os.remove(file_name) + >>> # default turtle parsing + >>> result = g.parse(data="<http://example.com/a> <http://example.com/a> <http://example.com/a> .") + >>> len(g) + 3 + """ source = create_input_source( @@ -1066,24 +1077,27 @@ class Graph(Node): ) if format is None: format = source.content_type - assumed_xml = False + could_not_guess_format = False if format is None: - if (hasattr(source, "file") - and getattr(source.file, "name", None) - and isinstance(source.file.name, str)): + if ( + hasattr(source, "file") + and getattr(source.file, "name", None) + and isinstance(source.file.name, str) + ): format = rdflib.util.guess_format(source.file.name) if format is None: - format = "application/rdf+xml" - assumed_xml = True + format = "turtle" + could_not_guess_format = True parser = plugin.get(format, Parser)() try: parser.parse(source, self, **args) - except SAXParseException as saxpe: - if assumed_xml: - logger.warning( - "Could not guess format for %r, so assumed xml." - " You can explicitly specify format using the format argument." % source) - raise saxpe + except SyntaxError as se: + if could_not_guess_format: + raise ParserError( + "Could not guess RDF format for %r from file extension so tried Turtle but failed." + "You can explicitly specify format using the format argument." % source) + else: + raise se finally: if source.auto_close: source.close() diff --git a/rdflib/namespace.py b/rdflib/namespace.py index b30b4b00..69ccd31d 100644 --- a/rdflib/namespace.py +++ b/rdflib/namespace.py @@ -485,8 +485,8 @@ SKOS = ClosedNamespace( "relatedMatch", ], ) -SOSA = Namespace("http://www.w3.org/ns/ssn/") -SSN = Namespace("http://www.w3.org/ns/sosa/") +SSN = Namespace("http://www.w3.org/ns/ssn/") +SOSA = Namespace("http://www.w3.org/ns/sosa/") TIME = Namespace("http://www.w3.org/2006/time#") VOID = Namespace("http://rdfs.org/ns/void#") XMLNS = Namespace("http://www.w3.org/XML/1998/namespace") @@ -800,7 +800,7 @@ class NamespaceManager(object): NAME_START_CATEGORIES = ["Ll", "Lu", "Lo", "Lt", "Nl"] SPLIT_START_CATEGORIES = NAME_START_CATEGORIES + ["Nd"] NAME_CATEGORIES = NAME_START_CATEGORIES + ["Mc", "Me", "Mn", "Lm", "Nd"] -ALLOWED_NAME_CHARS = ["\u00B7", "\u0387", "-", ".", "_", ":"] +ALLOWED_NAME_CHARS = ["\u00B7", "\u0387", "-", ".", "_", ":", "%"] # http://www.w3.org/TR/REC-xml-names/#NT-NCName diff --git a/rdflib/parser.py b/rdflib/parser.py index 4d807e7e..fcaed5e4 100644 --- a/rdflib/parser.py +++ b/rdflib/parser.py @@ -10,11 +10,11 @@ want to do so through the Graph class parse method. """ +import codecs import os import sys -from io import BytesIO - +from io import BytesIO, TextIOBase, TextIOWrapper, StringIO, BufferedIOBase from urllib.request import pathname2url from urllib.request import Request @@ -38,6 +38,8 @@ __all__ = [ class Parser(object): + __slots__ = set() + def __init__(self): pass @@ -45,6 +47,37 @@ class Parser(object): pass +class BytesIOWrapper(BufferedIOBase): + __slots__ = ("wrapped", "encoded", "encoding") + + def __init__(self, wrapped: str, encoding="utf-8"): + super(BytesIOWrapper, self).__init__() + self.wrapped = wrapped + self.encoding = encoding + self.encoded = None + + def read(self, *args, **kwargs): + if self.encoded is None: + b, blen = codecs.getencoder(self.encoding)(self.wrapped) + self.encoded = BytesIO(b) + return self.encoded.read(*args, **kwargs) + + def read1(self, *args, **kwargs): + if self.encoded is None: + b = codecs.getencoder(self.encoding)(self.wrapped) + self.encoded = BytesIO(b) + return self.encoded.read1(*args, **kwargs) + + def readinto(self, *args, **kwargs): + raise NotImplementedError() + + def readinto1(self, *args, **kwargs): + raise NotImplementedError() + + def write(self, *args, **kwargs): + raise NotImplementedError() + + class InputSource(xmlreader.InputSource, object): """ TODO: @@ -56,23 +89,39 @@ class InputSource(xmlreader.InputSource, object): self.auto_close = False # see Graph.parse(), true if opened by us def close(self): + c = self.getCharacterStream() + if c and hasattr(c, "close"): + try: + c.close() + except Exception: + pass f = self.getByteStream() if f and hasattr(f, "close"): - f.close() + try: + f.close() + except Exception: + pass class StringInputSource(InputSource): """ - TODO: + Constructs an RDFLib Parser InputSource from a Python String or Bytes """ - def __init__(self, value, system_id=None): + def __init__(self, value, encoding="utf-8", system_id=None): super(StringInputSource, self).__init__(system_id) - stream = BytesIO(value) - self.setByteStream(stream) - # TODO: - # encoding = value.encoding - # self.setEncoding(encoding) + if isinstance(value, str): + stream = StringIO(value) + self.setCharacterStream(stream) + self.setEncoding(encoding) + b_stream = BytesIOWrapper(value, encoding) + self.setByteStream(b_stream) + else: + stream = BytesIO(value) + self.setByteStream(stream) + c_stream = TextIOWrapper(stream, encoding) + self.setCharacterStream(c_stream) + self.setEncoding(c_stream.encoding) headers = { @@ -131,8 +180,18 @@ class FileInputSource(InputSource): system_id = URIRef(urljoin("file:", pathname2url(file.name)), base=base) super(FileInputSource, self).__init__(system_id) self.file = file - self.setByteStream(file) - # TODO: self.setEncoding(encoding) + if isinstance(file, TextIOBase): # Python3 unicode fp + self.setCharacterStream(file) + self.setEncoding(file.encoding) + try: + b = file.buffer + self.setByteStream(b) + except (AttributeError, LookupError): + self.setByteStream(file) + else: + self.setByteStream(file) + # We cannot set characterStream here because + # we do not know the Raw Bytes File encoding. def __repr__(self): return repr(self.file) @@ -168,10 +227,21 @@ def create_input_source( else: if isinstance(source, str): location = source + elif isinstance(source, bytes): + data = source elif hasattr(source, "read") and not isinstance(source, Namespace): f = source input_source = InputSource() - input_source.setByteStream(f) + if hasattr(source, "encoding"): + input_source.setCharacterStream(source) + input_source.setEncoding(source.encoding) + try: + b = file.buffer + input_source.setByteStream(b) + except (AttributeError, LookupError): + input_source.setByteStream(source) + else: + input_source.setByteStream(f) if f is sys.stdin: input_source.setSystemId("file:///dev/stdin") elif hasattr(f, "name"): @@ -203,8 +273,8 @@ def create_input_source( input_source = FileInputSource(file) if data is not None: - if isinstance(data, str): - data = data.encode("utf-8") + if not isinstance(data, (str, bytes, bytearray)): + raise RuntimeError("parse data can only str, or bytes.") input_source = StringInputSource(data) auto_close = True diff --git a/rdflib/plugin.py b/rdflib/plugin.py index b653be01..2af7370f 100644 --- a/rdflib/plugin.py +++ b/rdflib/plugin.py @@ -11,7 +11,7 @@ following to your setup:: entry_points = { 'rdf.plugins.parser': [ - 'nt = rdf.plugins.parsers.nt:NTParser', + 'nt = rdf.plugins.parsers.ntriples:NTParser', ], 'rdf.plugins.serializer': [ 'nt = rdf.plugins.serializers.NTSerializer:NTSerializer', @@ -128,11 +128,12 @@ def plugins(name=None, kind=None): yield p -register("default", Store, "rdflib.plugins.memory", "IOMemory") -register("IOMemory", Store, "rdflib.plugins.memory", "IOMemory") +register("default", Store, "rdflib.plugins.stores.memory", "Memory") +register("Memory", Store, "rdflib.plugins.stores.memory", "Memory") +register("SimpleMemory", Store, "rdflib.plugins.stores.memory", "SimpleMemory") register("Auditable", Store, "rdflib.plugins.stores.auditable", "AuditableStore") register("Concurrent", Store, "rdflib.plugins.stores.concurrent", "ConcurrentStore") -register("Sleepycat", Store, "rdflib.plugins.sleepycat", "Sleepycat") +register("Sleepycat", Store, "rdflib.plugins.stores.sleepycat", "Sleepycat") register("SPARQLStore", Store, "rdflib.plugins.stores.sparqlstore", "SPARQLStore") register( "SPARQLUpdateStore", Store, "rdflib.plugins.stores.sparqlstore", "SPARQLUpdateStore" @@ -182,10 +183,10 @@ register("n3", Parser, "rdflib.plugins.parsers.notation3", "N3Parser") register("text/turtle", Parser, "rdflib.plugins.parsers.notation3", "TurtleParser") register("turtle", Parser, "rdflib.plugins.parsers.notation3", "TurtleParser") register("ttl", Parser, "rdflib.plugins.parsers.notation3", "TurtleParser") -register("application/n-triples", Parser, "rdflib.plugins.parsers.nt", "NTParser") -register("ntriples", Parser, "rdflib.plugins.parsers.nt", "NTParser") -register("nt", Parser, "rdflib.plugins.parsers.nt", "NTParser") -register("nt11", Parser, "rdflib.plugins.parsers.nt", "NTParser") +register("application/n-triples", Parser, "rdflib.plugins.parsers.ntriples", "NTParser") +register("ntriples", Parser, "rdflib.plugins.parsers.ntriples", "NTParser") +register("nt", Parser, "rdflib.plugins.parsers.ntriples", "NTParser") +register("nt11", Parser, "rdflib.plugins.parsers.ntriples", "NTParser") register("application/n-quads", Parser, "rdflib.plugins.parsers.nquads", "NQuadsParser") register("nquads", Parser, "rdflib.plugins.parsers.nquads", "NQuadsParser") register("application/trix", Parser, "rdflib.plugins.parsers.trix", "TriXParser") diff --git a/rdflib/plugins/memory.py b/rdflib/plugins/memory.py deleted file mode 100644 index 1f8bcfa7..00000000 --- a/rdflib/plugins/memory.py +++ /dev/null @@ -1,512 +0,0 @@ -import random - -from rdflib.store import Store - -__all__ = ["Memory", "IOMemory"] - -ANY = Any = None - - -class Memory(Store): - """\ - An in memory implementation of a triple store. - - This triple store uses nested dictionaries to store triples. Each - triple is stored in two such indices as follows spo[s][p][o] = 1 and - pos[p][o][s] = 1. - - Authors: Michel Pelletier, Daniel Krech, Stefan Niederhauser - """ - - def __init__(self, configuration=None, identifier=None): - super(Memory, self).__init__(configuration) - self.identifier = identifier - - # indexed by [subject][predicate][object] - self.__spo = {} - - # indexed by [predicate][object][subject] - self.__pos = {} - - # indexed by [predicate][object][subject] - self.__osp = {} - - self.__namespace = {} - self.__prefix = {} - - def add(self, triple, context, quoted=False): - """\ - Add a triple to the store of triples. - """ - # add dictionary entries for spo[s][p][p] = 1 and pos[p][o][s] - # = 1, creating the nested dictionaries where they do not yet - # exits. - subject, predicate, object = triple - spo = self.__spo - try: - po = spo[subject] - except: - po = spo[subject] = {} - try: - o = po[predicate] - except: - o = po[predicate] = {} - o[object] = 1 - - pos = self.__pos - try: - os = pos[predicate] - except: - os = pos[predicate] = {} - try: - s = os[object] - except: - s = os[object] = {} - s[subject] = 1 - - osp = self.__osp - try: - sp = osp[object] - except: - sp = osp[object] = {} - try: - p = sp[subject] - except: - p = sp[subject] = {} - p[predicate] = 1 - - def remove(self, triple_pattern, context=None): - for (subject, predicate, object), c in self.triples(triple_pattern): - del self.__spo[subject][predicate][object] - del self.__pos[predicate][object][subject] - del self.__osp[object][subject][predicate] - - def triples(self, triple_pattern, context=None): - """A generator over all the triples matching """ - subject, predicate, object = triple_pattern - if subject != ANY: # subject is given - spo = self.__spo - if subject in spo: - subjectDictionary = spo[subject] - if predicate != ANY: # subject+predicate is given - if predicate in subjectDictionary: - if object != ANY: # subject+predicate+object is given - if object in subjectDictionary[predicate]: - yield (subject, predicate, object), self.__contexts() - else: # given object not found - pass - else: # subject+predicate is given, object unbound - for o in subjectDictionary[predicate].keys(): - yield (subject, predicate, o), self.__contexts() - else: # given predicate not found - pass - else: # subject given, predicate unbound - for p in subjectDictionary.keys(): - if object != ANY: # object is given - if object in subjectDictionary[p]: - yield (subject, p, object), self.__contexts() - else: # given object not found - pass - else: # object unbound - for o in subjectDictionary[p].keys(): - yield (subject, p, o), self.__contexts() - else: # given subject not found - pass - elif predicate != ANY: # predicate is given, subject unbound - pos = self.__pos - if predicate in pos: - predicateDictionary = pos[predicate] - if object != ANY: # predicate+object is given, subject unbound - if object in predicateDictionary: - for s in predicateDictionary[object].keys(): - yield (s, predicate, object), self.__contexts() - else: # given object not found - pass - else: # predicate is given, object+subject unbound - for o in predicateDictionary.keys(): - for s in predicateDictionary[o].keys(): - yield (s, predicate, o), self.__contexts() - elif object != ANY: # object is given, subject+predicate unbound - osp = self.__osp - if object in osp: - objectDictionary = osp[object] - for s in objectDictionary.keys(): - for p in objectDictionary[s].keys(): - yield (s, p, object), self.__contexts() - else: # subject+predicate+object unbound - spo = self.__spo - for s in spo.keys(): - subjectDictionary = spo[s] - for p in subjectDictionary.keys(): - for o in subjectDictionary[p].keys(): - yield (s, p, o), self.__contexts() - - def __len__(self, context=None): - # @@ optimize - i = 0 - for triple in self.triples((None, None, None)): - i += 1 - return i - - def bind(self, prefix, namespace): - self.__prefix[namespace] = prefix - self.__namespace[prefix] = namespace - - def namespace(self, prefix): - return self.__namespace.get(prefix, None) - - def prefix(self, namespace): - return self.__prefix.get(namespace, None) - - def namespaces(self): - for prefix, namespace in self.__namespace.items(): - yield prefix, namespace - - def __contexts(self): - return (c for c in []) # TODO: best way to return empty generator - - -class IOMemory(Store): - """\ - An integer-key-optimized context-aware in-memory store. - - Uses three dict indices (for subjects, objects and predicates) holding - sets of triples. Context information is tracked in a separate dict, with - the triple as key and a dict of {context: quoted} items as value. The - context information is used to filter triple query results. - - Memory usage is low due to several optimizations. RDF nodes are not - stored directly in the indices; instead, the indices hold integer keys - and the actual nodes are only stored once in int-to-object and - object-to-int mapping dictionaries. A default context is determined - based on the first triple that is added to the store, and no context - information is actually stored for subsequent other triples with the - same context information. - - Most operations should be quite fast, but a triples() query with two - bound parts requires a set intersection operation, which may be slow in - some cases. When multiple contexts are used in the same store, filtering - based on context has to be done after each query, which may also be - slow. - - """ - - context_aware = True - formula_aware = True - graph_aware = True - - # The following variable name conventions are used in this class: - # - # subject, predicate, object unencoded triple parts - # triple = (subject, predicate, object) unencoded triple - # context: unencoded context - # - # sid, pid, oid integer-encoded triple parts - # enctriple = (sid, pid, oid) integer-encoded triple - # cid integer-encoded context - - def __init__(self, configuration=None, identifier=None): - super(IOMemory, self).__init__() - self.__namespace = {} - self.__prefix = {} - - # Mappings for encoding RDF nodes using integer keys, to save memory - # in the indexes Note that None is always mapped to itself, to make - # it easy to test for it in either encoded or unencoded form. - self.__int2obj = {None: None} # maps integer keys to objects - self.__obj2int = {None: None} # maps objects to integer keys - - # Indexes for each triple part, and a list of contexts for each triple - self.__subjectIndex = {} # key: sid val: set(enctriples) - self.__predicateIndex = {} # key: pid val: set(enctriples) - self.__objectIndex = {} # key: oid val: set(enctriples) - self.__tripleContexts = ( - {} - ) # key: enctriple val: {cid1: quoted, cid2: quoted ...} - self.__contextTriples = {None: set()} # key: cid val: set(enctriples) - - # all contexts used in store (unencoded) - self.__all_contexts = set() - # default context information for triples - self.__defaultContexts = None - - def bind(self, prefix, namespace): - self.__prefix[namespace] = prefix - self.__namespace[prefix] = namespace - - def namespace(self, prefix): - return self.__namespace.get(prefix, None) - - def prefix(self, namespace): - return self.__prefix.get(namespace, None) - - def namespaces(self): - for prefix, namespace in self.__namespace.items(): - yield prefix, namespace - - def add(self, triple, context, quoted=False): - Store.add(self, triple, context, quoted) - - if context is not None: - self.__all_contexts.add(context) - - enctriple = self.__encodeTriple(triple) - sid, pid, oid = enctriple - - self.__addTripleContext(enctriple, context, quoted) - - if sid in self.__subjectIndex: - self.__subjectIndex[sid].add(enctriple) - else: - self.__subjectIndex[sid] = set([enctriple]) - - if pid in self.__predicateIndex: - self.__predicateIndex[pid].add(enctriple) - else: - self.__predicateIndex[pid] = set([enctriple]) - - if oid in self.__objectIndex: - self.__objectIndex[oid].add(enctriple) - else: - self.__objectIndex[oid] = set([enctriple]) - - def remove(self, triplepat, context=None): - req_cid = self.__obj2id(context) - for triple, contexts in self.triples(triplepat, context): - enctriple = self.__encodeTriple(triple) - for cid in self.__getTripleContexts(enctriple): - if context is not None and req_cid != cid: - continue - self.__removeTripleContext(enctriple, cid) - ctxs = self.__getTripleContexts(enctriple, skipQuoted=True) - if None in ctxs and (context is None or len(ctxs) == 1): - self.__removeTripleContext(enctriple, None) - if len(self.__getTripleContexts(enctriple)) == 0: - # triple has been removed from all contexts - sid, pid, oid = enctriple - self.__subjectIndex[sid].remove(enctriple) - self.__predicateIndex[pid].remove(enctriple) - self.__objectIndex[oid].remove(enctriple) - - del self.__tripleContexts[enctriple] - - if ( - req_cid is not None - and req_cid in self.__contextTriples - and len(self.__contextTriples[req_cid]) == 0 - ): - # all triples are removed out of this context - # and it's not the default context so delete it - del self.__contextTriples[req_cid] - - if ( - triplepat == (None, None, None) - and context in self.__all_contexts - and not self.graph_aware - ): - # remove the whole context - self.__all_contexts.remove(context) - - def triples(self, triplein, context=None): - if context is not None: - if context == self: # hmm...does this really ever happen? - context = None - - cid = self.__obj2id(context) - enctriple = self.__encodeTriple(triplein) - sid, pid, oid = enctriple - - # all triples case (no triple parts given as pattern) - if sid is None and pid is None and oid is None: - return self.__all_triples(cid) - - # optimize "triple in graph" case (all parts given) - if sid is not None and pid is not None and oid is not None: - if ( - sid in self.__subjectIndex - and enctriple in self.__subjectIndex[sid] - and self.__tripleHasContext(enctriple, cid) - ): - return ((triplein, self.__contexts(enctriple)) for i in [0]) - else: - return self.__emptygen() - - # remaining cases: one or two out of three given - sets = [] - if sid is not None: - if sid in self.__subjectIndex: - sets.append(self.__subjectIndex[sid]) - else: - return self.__emptygen() - if pid is not None: - if pid in self.__predicateIndex: - sets.append(self.__predicateIndex[pid]) - else: - return self.__emptygen() - if oid is not None: - if oid in self.__objectIndex: - sets.append(self.__objectIndex[oid]) - else: - return self.__emptygen() - - # to get the result, do an intersection of the sets (if necessary) - if len(sets) > 1: - enctriples = sets[0].intersection(*sets[1:]) - else: - enctriples = sets[0].copy() - - return ( - (self.__decodeTriple(enctriple), self.__contexts(enctriple)) - for enctriple in enctriples - if self.__tripleHasContext(enctriple, cid) - ) - - def contexts(self, triple=None): - if triple is None or triple == (None, None, None): - return (context for context in self.__all_contexts) - - enctriple = self.__encodeTriple(triple) - sid, pid, oid = enctriple - if sid in self.__subjectIndex and enctriple in self.__subjectIndex[sid]: - return self.__contexts(enctriple) - else: - return self.__emptygen() - - def __len__(self, context=None): - cid = self.__obj2id(context) - if cid not in self.__contextTriples: - return 0 - return len(self.__contextTriples[cid]) - - def add_graph(self, graph): - if not self.graph_aware: - Store.add_graph(self, graph) - else: - self.__all_contexts.add(graph) - - def remove_graph(self, graph): - if not self.graph_aware: - Store.remove_graph(self, graph) - else: - self.remove((None, None, None), graph) - try: - self.__all_contexts.remove(graph) - except KeyError: - pass # we didn't know this graph, no problem - - # internal utility methods below - - def __addTripleContext(self, enctriple, context, quoted): - """add the given context to the set of contexts for the triple""" - cid = self.__obj2id(context) - - sid, pid, oid = enctriple - if sid in self.__subjectIndex and enctriple in self.__subjectIndex[sid]: - # we know the triple exists somewhere in the store - if enctriple not in self.__tripleContexts: - # triple exists with default ctx info - # start with a copy of the default ctx info - self.__tripleContexts[enctriple] = self.__defaultContexts.copy() - - self.__tripleContexts[enctriple][cid] = quoted - if not quoted: - self.__tripleContexts[enctriple][None] = quoted - else: - # the triple didn't exist before in the store - if quoted: # this context only - self.__tripleContexts[enctriple] = {cid: quoted} - else: # default context as well - self.__tripleContexts[enctriple] = {cid: quoted, None: quoted} - - # if the triple is not quoted add it to the default context - if not quoted: - self.__contextTriples[None].add(enctriple) - - # always add the triple to given context, making sure it's initialized - if cid not in self.__contextTriples: - self.__contextTriples[cid] = set() - self.__contextTriples[cid].add(enctriple) - - # if this is the first ever triple in the store, set default ctx info - if self.__defaultContexts is None: - self.__defaultContexts = self.__tripleContexts[enctriple] - - # if the context info is the same as default, no need to store it - if self.__tripleContexts[enctriple] == self.__defaultContexts: - del self.__tripleContexts[enctriple] - - def __getTripleContexts(self, enctriple, skipQuoted=False): - """return a list of (encoded) contexts for the triple, skipping - quoted contexts if skipQuoted==True""" - - ctxs = self.__tripleContexts.get(enctriple, self.__defaultContexts) - - if not skipQuoted: - return ctxs.keys() - - return [cid for cid, quoted in ctxs.items() if not quoted] - - def __tripleHasContext(self, enctriple, cid): - """return True iff the triple exists in the given context""" - ctxs = self.__tripleContexts.get(enctriple, self.__defaultContexts) - return cid in ctxs - - def __removeTripleContext(self, enctriple, cid): - """remove the context from the triple""" - ctxs = self.__tripleContexts.get(enctriple, self.__defaultContexts).copy() - del ctxs[cid] - if ctxs == self.__defaultContexts: - del self.__tripleContexts[enctriple] - else: - self.__tripleContexts[enctriple] = ctxs - self.__contextTriples[cid].remove(enctriple) - - def __obj2id(self, obj): - """encode object, storing it in the encoding map if necessary, - and return the integer key""" - if obj not in self.__obj2int: - id = randid() - while id in self.__int2obj: - id = randid() - self.__obj2int[obj] = id - self.__int2obj[id] = obj - return id - return self.__obj2int[obj] - - def __encodeTriple(self, triple): - """encode a whole triple, returning the encoded triple""" - return tuple(map(self.__obj2id, triple)) - - def __decodeTriple(self, enctriple): - """decode a whole encoded triple, returning the original - triple""" - return tuple(map(self.__int2obj.get, enctriple)) - - def __all_triples(self, cid): - """return a generator which yields all the triples (unencoded) - of the given context""" - if cid not in self.__contextTriples: - return - for enctriple in self.__contextTriples[cid].copy(): - yield self.__decodeTriple(enctriple), self.__contexts(enctriple) - - def __contexts(self, enctriple): - """return a generator for all the non-quoted contexts - (unencoded) the encoded triple appears in""" - return ( - self.__int2obj.get(cid) - for cid in self.__getTripleContexts(enctriple, skipQuoted=True) - if cid is not None - ) - - def __emptygen(self): - """return an empty generator""" - if False: - yield - - -def randid(randint=random.randint, choice=random.choice, signs=(-1, 1)): - return choice(signs) * randint(1, 2000000000) - - -del random diff --git a/rdflib/plugins/parsers/notation3.py b/rdflib/plugins/parsers/notation3.py index 37b2cde8..2573d5d1 100755 --- a/rdflib/plugins/parsers/notation3.py +++ b/rdflib/plugins/parsers/notation3.py @@ -136,10 +136,13 @@ def join(here, there): return here + frag # join('mid:foo@example', '../foo') bzzt - if here[bcolonl + 1: bcolonl + 2] != "/": - raise ValueError("Base <%s> has no slash after " "colon - with relative '%s'." % (here, there)) + if here[bcolonl + 1 : bcolonl + 2] != "/": + raise ValueError( + "Base <%s> has no slash after " + "colon - with relative '%s'." % (here, there) + ) - if here[bcolonl + 1: bcolonl + 3] == "//": + if here[bcolonl + 1 : bcolonl + 3] == "//": bpath = here.find("/", bcolonl + 3) else: bpath = bcolonl + 1 @@ -499,14 +502,14 @@ class SinkParser: """ assert tok[0] not in _notNameChars # not for punctuation - if argstr[i: i + 1] == "@": + if argstr[i : i + 1] == "@": i = i + 1 else: if tok not in self.keywords: return -1 # No, this has neither keywords declaration nor "@" if ( - argstr[i: i + len(tok)] == tok + argstr[i : i + len(tok)] == tok and (argstr[i + len(tok)] in _notKeywordsChars) or (colon and argstr[i + len(tok)] == ":") ): @@ -523,7 +526,7 @@ class SinkParser: assert tok[0] not in _notNameChars # not for punctuation - if argstr[i: i + len(tok)].lower() == tok.lower() and ( + if argstr[i : i + len(tok)].lower() == tok.lower() and ( argstr[i + len(tok)] in _notQNameChars ): i = i + len(tok) @@ -791,23 +794,23 @@ class SinkParser: res.append(("->", RDF_type)) return j - if argstr[i: i + 2] == "<=": + if argstr[i : i + 2] == "<=": if self.turtle: self.BadSyntax(argstr, i, "Found '<=' in Turtle mode. ") res.append(("<-", self._store.newSymbol(Logic_NS + "implies"))) return i + 2 - if argstr[i: i + 1] == "=": + if argstr[i : i + 1] == "=": if self.turtle: self.BadSyntax(argstr, i, "Found '=' in Turtle mode") - if argstr[i + 1: i + 2] == ">": + if argstr[i + 1 : i + 2] == ">": res.append(("->", self._store.newSymbol(Logic_NS + "implies"))) return i + 2 res.append(("->", DAML_sameAs)) return i + 1 - if argstr[i: i + 2] == ":=": + if argstr[i : i + 2] == ":=": if self.turtle: self.BadSyntax(argstr, i, "Found ':=' in Turtle mode") @@ -820,7 +823,7 @@ class SinkParser: res.append(("->", r[0])) return j - if argstr[i: i + 2] == ">-" or argstr[i: i + 2] == "<-": + if argstr[i : i + 2] == ">-" or argstr[i : i + 2] == "<-": self.BadSyntax(argstr, j, ">- ... -> syntax is obsolete.") return -1 @@ -841,8 +844,8 @@ class SinkParser: if j < 0: return j # nope - while argstr[j: j + 1] in "!^": # no spaces, must follow exactly (?) - ch = argstr[j: j + 1] + while argstr[j : j + 1] in "!^": # no spaces, must follow exactly (?) + ch = argstr[j : j + 1] subj = res.pop() obj = self.blankNode(uri=self.here(j)) j = self.node(argstr, j + 1, res) @@ -876,7 +879,7 @@ class SinkParser: if j < 0: return j # eof i = j - ch = argstr[i: i + 1] # Quick 1-character checks first: + ch = argstr[i : i + 1] # Quick 1-character checks first: if ch == "[": bnodeID = self.here(i) @@ -884,7 +887,7 @@ class SinkParser: if j < 0: self.BadSyntax(argstr, i, "EOF after '['") # Hack for "is" binding name to anon node - if argstr[j: j + 1] == "=": + if argstr[j : j + 1] == "=": if self.turtle: self.BadSyntax( argstr, j, "Found '[=' or '[ =' when in turtle mode." @@ -902,7 +905,7 @@ class SinkParser: self.BadSyntax( argstr, i, "EOF when objectList expected after [ = " ) - if argstr[j: j + 1] == ";": + if argstr[j : j + 1] == ";": j = j + 1 else: self.BadSyntax(argstr, i, "objectList expected after [= ") @@ -919,7 +922,7 @@ class SinkParser: self.BadSyntax( argstr, i, "EOF when ']' expected after [ <propertyList>" ) - if argstr[j: j + 1] != "]": + if argstr[j : j + 1] != "]": self.BadSyntax(argstr, j, "']' expected") res.append(subj) return j + 1 @@ -928,7 +931,7 @@ class SinkParser: # if self.turtle: # self.BadSyntax(argstr, i, # "found '{' while in Turtle mode, Formulas not supported!") - ch2 = argstr[i + 1: i + 2] + ch2 = argstr[i + 1 : i + 2] if ch2 == "$": # a set i += 1 @@ -939,12 +942,12 @@ class SinkParser: i = self.skipSpace(argstr, j) if i < 0: self.BadSyntax(argstr, i, "needed '$}', found end.") - if argstr[i: i + 2] == "$}": + if argstr[i : i + 2] == "$}": j = i + 2 break if not first_run: - if argstr[i: i + 1] == ",": + if argstr[i : i + 1] == ",": i += 1 else: self.BadSyntax(argstr, i, "expected: ','") @@ -979,7 +982,7 @@ class SinkParser: if i < 0: self.BadSyntax(argstr, i, "needed '}', found end.") - if argstr[i: i + 1] == "}": + if argstr[i : i + 1] == "}": j = i + 1 break @@ -998,7 +1001,7 @@ class SinkParser: if ch == "(": thing_type = self._store.newList - ch2 = argstr[i + 1: i + 2] + ch2 = argstr[i + 1 : i + 2] if ch2 == "$": thing_type = self._store.newSet i += 1 @@ -1009,7 +1012,7 @@ class SinkParser: i = self.skipSpace(argstr, j) if i < 0: self.BadSyntax(argstr, i, "needed ')', found end.") - if argstr[i: i + 1] == ")": + if argstr[i : i + 1] == ")": j = i + 1 break @@ -1062,7 +1065,7 @@ class SinkParser: break i = j + 1 - if argstr[j: j + 2] == ":-": + if argstr[j : j + 2] == ":-": if self.turtle: self.BadSyntax(argstr, j, "Found in ':-' in Turtle mode") i = j + 2 @@ -1092,7 +1095,7 @@ class SinkParser: j = self.skipSpace(argstr, i) if j < 0: self.BadSyntax(argstr, j, "EOF found in list of objects") - if argstr[i: i + 1] != ";": + if argstr[i : i + 1] != ";": return i i = i + 1 # skip semicolon and continue @@ -1113,7 +1116,7 @@ class SinkParser: j = self.skipSpace(argstr, i) if j < 0: return j # eof - ch = argstr[j: j + 1] + ch = argstr[j : j + 1] if ch != ",": if ch != ".": return -1 @@ -1130,7 +1133,7 @@ class SinkParser: j = self.skipSpace(argstr, i) if j < 0: self.BadSyntax(argstr, j, "EOF found after object") - if argstr[j: j + 1] != ",": + if argstr[j : j + 1] != ",": return j # Found something else! i = self.object(argstr, j + 1, res) if i < 0: @@ -1140,11 +1143,11 @@ class SinkParser: j = self.skipSpace(argstr, i) if j < 0: return j # eof - if argstr[j: j + 1] == ".": + if argstr[j : j + 1] == ".": return j + 1 # skip - if argstr[j: j + 1] == "}": + if argstr[j : j + 1] == "}": return j # don't skip it - if argstr[j: j + 1] == "]": + if argstr[j : j + 1] == "]": return j self.BadSyntax(argstr, j, "expected '.' or '}' or ']' at end of statement") @@ -1209,7 +1212,7 @@ class SinkParser: assert ( ":" in uref ), "With no base URI, cannot deal with relative URIs" - if argstr[i - 1: i] == "#" and not uref[-1:] == "#": + if argstr[i - 1 : i] == "#" and not uref[-1:] == "#": uref = uref + "#" # She meant it! Weirdness in urlparse? symb = self._store.newSymbol(uref) if symb in self._variables: @@ -1258,7 +1261,7 @@ class SinkParser: if j < 0: return -1 - if argstr[j: j + 1] != "?": + if argstr[j : j + 1] != "?": return -1 j = j + 1 i = j @@ -1416,7 +1419,7 @@ class SinkParser: i = j if argstr[i] in self.string_delimiters: - if argstr[i: i + 3] == argstr[i] * 3: + if argstr[i : i + 3] == argstr[i] * 3: delim = argstr[i] * 3 else: delim = argstr[i] @@ -1464,7 +1467,7 @@ class SinkParser: # return -1 ## or fall through? if argstr[i] in self.string_delimiters: - if argstr[i: i + 3] == argstr[i] * 3: + if argstr[i : i + 3] == argstr[i] * 3: delim = argstr[i] * 3 else: delim = argstr[i] @@ -1473,7 +1476,7 @@ class SinkParser: dt = None j, s = self.strconst(argstr, i, delim) lang = None - if argstr[j: j + 1] == "@": # Language? + if argstr[j : j + 1] == "@": # Language? m = langcode.match(argstr, j + 1) if m is None: raise BadSyntax( @@ -1484,9 +1487,9 @@ class SinkParser: "Bad language code syntax on string " + "literal, after @", ) i = m.end() - lang = argstr[j + 1: i] + lang = argstr[j + 1 : i] j = i - if argstr[j: j + 2] == "^^": + if argstr[j : j + 2] == "^^": res2 = [] j = self.uri_ref2(argstr, j + 2, res2) # Read datatype URI dt = res2[0] @@ -1519,15 +1522,15 @@ class SinkParser: if ( delim == delim3 ): # done when delim is """ or ''' and, respectively ... - if argstr[j: j + 5] == delim5: # ... we have "" or '' before + if argstr[j : j + 5] == delim5: # ... we have "" or '' before i = j + 5 ustr = ustr + delim2 return i, ustr - if argstr[j: j + 4] == delim4: # ... we have " or ' before + if argstr[j : j + 4] == delim4: # ... we have " or ' before i = j + 4 ustr = ustr + delim1 return i, ustr - if argstr[j: j + 3] == delim3: # current " or ' is part of delim + if argstr[j : j + 3] == delim3: # current " or ' is part of delim i = j + 3 return i, ustr @@ -1539,8 +1542,8 @@ class SinkParser: m = interesting.search(argstr, j) # was argstr[j:]. # Note for pos param to work, MUST be compiled ... re bug? assert m, "Quote expected in string at ^ in %s^%s" % ( - argstr[j - 20: j], - argstr[j: j + 20], + argstr[j - 20 : j], + argstr[j : j + 20], ) # at least need a quote i = m.start() @@ -1586,7 +1589,7 @@ class SinkParser: elif ch == "\\": j = i + 1 - ch = argstr[j: j + 1] # Will be empty if string ends + ch = argstr[j : j + 1] # Will be empty if string ends if not ch: raise BadSyntax( self._thisDoc, @@ -1617,14 +1620,14 @@ class SinkParser: self._thisDoc, startline, argstr, i, "unterminated string literal(3)" ) try: - return i + n, reg.sub(unicodeExpand, "\\" + prefix + argstr[i: i + n]) + return i + n, reg.sub(unicodeExpand, "\\" + prefix + argstr[i : i + n]) except: raise BadSyntax( self._thisDoc, startline, argstr, i, - "bad string literal hex escape: " + argstr[i: i + n], + "bad string literal hex escape: " + argstr[i : i + n], ) def uEscape(self, argstr, i, startline): @@ -1669,7 +1672,7 @@ class BadSyntax(SyntaxError): self._why, pre, argstr[st:i], - argstr[i: i + 60], + argstr[i : i + 60], post, ) @@ -1893,8 +1896,11 @@ class TurtleParser(Parser): baseURI = graph.absolutize(source.getPublicId() or source.getSystemId() or "") p = SinkParser(sink, baseURI=baseURI, turtle=turtle) - - p.loadStream(source.getByteStream()) + # N3 parser prefers str stream + stream = source.getCharacterStream() + if not stream: + stream = source.getByteStream() + p.loadStream(stream) for prefix, namespace in p._bindings.items(): graph.bind(prefix, namespace) diff --git a/rdflib/plugins/parsers/nquads.py b/rdflib/plugins/parsers/nquads.py index 5defabc9..d44139c5 100644 --- a/rdflib/plugins/parsers/nquads.py +++ b/rdflib/plugins/parsers/nquads.py @@ -28,7 +28,7 @@ from codecs import getreader from rdflib import ConjunctiveGraph # Build up from the NTriples parser: -from rdflib.plugins.parsers.ntriples import NTriplesParser +from rdflib.plugins.parsers.ntriples import W3CNTriplesParser from rdflib.plugins.parsers.ntriples import ParseError from rdflib.plugins.parsers.ntriples import r_tail from rdflib.plugins.parsers.ntriples import r_wspace @@ -36,7 +36,7 @@ from rdflib.plugins.parsers.ntriples import r_wspace __all__ = ["NQuadsParser"] -class NQuadsParser(NTriplesParser): +class NQuadsParser(W3CNTriplesParser): def parse(self, inputsource, sink, bnode_context=None, **kwargs): """ Parse inputsource as an N-Quads file. @@ -54,13 +54,14 @@ class NQuadsParser(NTriplesParser): ) self.sink = ConjunctiveGraph(store=sink.store, identifier=sink.identifier) - source = inputsource.getByteStream() + source = inputsource.getCharacterStream() + if not source: + source = inputsource.getByteStream() + source = getreader("utf-8")(source) if not hasattr(source, "read"): raise ParseError("Item to parse must be a file-like object.") - source = getreader("utf-8")(source) - self.file = source self.buffer = "" while True: diff --git a/rdflib/plugins/parsers/nt.py b/rdflib/plugins/parsers/nt.py deleted file mode 100644 index c37a1aa0..00000000 --- a/rdflib/plugins/parsers/nt.py +++ /dev/null @@ -1,33 +0,0 @@ -from rdflib.parser import Parser -from rdflib.plugins.parsers.ntriples import NTriplesParser - -__all__ = ["NTSink", "NTParser"] - - -class NTSink(object): - def __init__(self, graph): - self.graph = graph - - def triple(self, s, p, o): - self.graph.add((s, p, o)) - - -class NTParser(Parser): - """parser for the ntriples format, often stored with the .nt extension - - See http://www.w3.org/TR/rdf-testcases/#ntriples""" - - def parse(self, source, sink, **kwargs): - ''' - Parse the NT format - - :type source: `rdflib.parser.InputSource` - :param source: the source of NT-formatted data - :type sink: `rdflib.graph.Graph` - :param sink: where to send parsed triples - :param kwargs: Additional arguments to pass to `.NTriplesParser.parse` - ''' - f = source.getByteStream() # TODO getCharacterStream? - parser = NTriplesParser(NTSink(sink)) - parser.parse(f, **kwargs) - f.close() diff --git a/rdflib/plugins/parsers/ntriples.py b/rdflib/plugins/parsers/ntriples.py index a89aece5..9d441beb 100644 --- a/rdflib/plugins/parsers/ntriples.py +++ b/rdflib/plugins/parsers/ntriples.py @@ -1,6 +1,6 @@ -#!/usr/bin/env python +#!/usr/bin/env python3 -__doc__ = """ +__doc__ = """\ N-Triples Parser License: GPL 2, W3C, BSD, or MIT Author: Sean B. Palmer, inamidst.com @@ -12,14 +12,13 @@ import codecs from rdflib.term import URIRef as URI from rdflib.term import BNode as bNode from rdflib.term import Literal - - -from rdflib.compat import cast_bytes from rdflib.compat import decodeUnicodeEscape +from rdflib.exceptions import ParserError as ParseError +from rdflib.parser import Parser -from io import BytesIO +from io import StringIO, TextIOBase, BytesIO -__all__ = ["unquote", "uriquote", "Sink", "NTriplesParser"] +__all__ = ["unquote", "uriquote", "W3CNTriplesParser", "NTGraphSink", "NTParser"] uriref = r'<([^:]+:[^\s"<>]*)>' literal = r'"([^"\\]*(?:\\.[^"\\]*)*)"' @@ -37,15 +36,7 @@ bufsiz = 2048 validate = False -class Node(str): - pass - - -class ParseError(Exception): - pass - - -class Sink(object): +class DummySink(object): def __init__(self): self.length = 0 @@ -75,7 +66,7 @@ def unquote(s): while s: m = r_safe.match(s) if m: - s = s[m.end():] + s = s[m.end() :] result.append(m.group(1)) continue @@ -87,7 +78,7 @@ def unquote(s): m = r_uniquot.match(s) if m: - s = s[m.end():] + s = s[m.end() :] u, U = m.groups() codepoint = int(u or U, 16) if codepoint > 0x10FFFF: @@ -110,11 +101,10 @@ def uriquote(uri): return r_hibyte.sub(lambda m: "%%%02X" % ord(m.group(1)), uri) -class NTriplesParser(object): +class W3CNTriplesParser(object): """An N-Triples Parser. - + This is a legacy-style Triples parser for NTriples provided by W3C Usage:: - p = NTriplesParser(sink=MySink()) sink = p.parse(f) # file; use parsestring for a string @@ -124,6 +114,8 @@ class NTriplesParser(object): `NTriplesParser`. """ + __slots__ = ("_bnode_ids", "sink", "buffer", "file", "line") + def __init__(self, sink=None, bnode_context=None): if bnode_context is not None: self._bnode_ids = bnode_context @@ -133,7 +125,11 @@ class NTriplesParser(object): if sink is not None: self.sink = sink else: - self.sink = Sink() + self.sink = DummySink() + + self.buffer = None + self.file = None + self.line = "" def parse(self, f, bnode_context=None): """ @@ -147,10 +143,13 @@ class NTriplesParser(object): passed in to define a distinct context for a given call to `parse`. """ + if not hasattr(f, "read"): raise ParseError("Item to parse must be a file-like object.") - # since N-Triples 1.1 files can and should be utf-8 encoded - f = codecs.getreader("utf-8")(f) + + if not hasattr(f, "encoding") and not hasattr(f, "charbuffer"): + # someone still using a bytestream here? + f = codecs.getreader("utf-8")(f) self.file = f self.buffer = "" @@ -161,16 +160,17 @@ class NTriplesParser(object): try: self.parseline(bnode_context=bnode_context) except ParseError: - raise ParseError("Invalid line: %r" % self.line) + raise ParseError("Invalid line: {}".format(self.line)) return self.sink def parsestring(self, s, **kwargs): """Parse s as an N-Triples string.""" - if not isinstance(s, str): + if not isinstance(s, (str, bytes, bytearray)): raise ParseError("Item to parse must be a string instance.") - f = BytesIO() - f.write(cast_bytes(s)) - f.seek(0) + if isinstance(s, (bytes, bytearray)): + f = codecs.getreader("utf-8")(BytesIO(s)) + else: + f = StringIO(s) self.parse(f, **kwargs) def readline(self): @@ -186,7 +186,7 @@ class NTriplesParser(object): while True: m = r_line.match(self.buffer) if m: # the more likely prospect - self.buffer = self.buffer[m.end():] + self.buffer = self.buffer[m.end() :] return m.group(1) else: buffer = self.file.read(bufsiz) @@ -208,12 +208,12 @@ class NTriplesParser(object): predicate = self.predicate() self.eat(r_wspaces) - object = self.object(bnode_context) + object_ = self.object(bnode_context) self.eat(r_tail) if self.line: - raise ParseError("Trailing garbage") - self.sink.triple(subject, predicate, object) + raise ParseError("Trailing garbage: {}".format(self.line)) + self.sink.triple(subject, predicate, object_) def peek(self, token): return self.line.startswith(token) @@ -224,7 +224,7 @@ class NTriplesParser(object): # print(dir(pattern)) # print repr(self.line), type(self.line) raise ParseError("Failed to eat %s at %s" % (pattern.pattern, self.line)) - self.line = self.line[m.end():] + self.line = self.line[m.end() :] return m def subject(self, bnode_context=None): @@ -292,13 +292,44 @@ class NTriplesParser(object): return False -# # Obsolete, unused -# def parseURI(uri): -# import urllib -# parser = NTriplesParser() -# u = urllib.urlopen(uri) -# sink = parser.parse(u) -# u.close() -# # for triple in sink: -# # print triple -# print 'Length of input:', sink.length +class NTGraphSink(object): + __slots__ = ("g",) + + def __init__(self, graph): + self.g = graph + + def triple(self, s, p, o): + self.g.add((s, p, o)) + + +class NTParser(Parser): + """parser for the ntriples format, often stored with the .nt extension + + See http://www.w3.org/TR/rdf-testcases/#ntriples""" + + __slots__ = set() + + @classmethod + def parse(cls, source, sink, **kwargs): + """ + Parse the NT format + + :type source: `rdflib.parser.InputSource` + :param source: the source of NT-formatted data + :type sink: `rdflib.graph.Graph` + :param sink: where to send parsed triples + :param kwargs: Additional arguments to pass to `.NTriplesParser.parse` + """ + f = source.getCharacterStream() + if not f: + b = source.getByteStream() + # TextIOBase includes: StringIO and TextIOWrapper + if isinstance(b, TextIOBase): + # f is not really a ByteStream, but a CharacterStream + f = b + else: + # since N-Triples 1.1 files can and should be utf-8 encoded + f = codecs.getreader("utf-8")(b) + parser = W3CNTriplesParser(NTGraphSink(sink)) + parser.parse(f, **kwargs) + f.close() diff --git a/rdflib/plugins/parsers/trig.py b/rdflib/plugins/parsers/trig.py index c0906c88..9caa0662 100644 --- a/rdflib/plugins/parsers/trig.py +++ b/rdflib/plugins/parsers/trig.py @@ -80,7 +80,7 @@ class TrigSinkParser(SinkParser): if j < 0: self.BadSyntax(argstr, i, "EOF found when expected graph") - if argstr[j: j + 1] == "=": # optional = for legacy support + if argstr[j : j + 1] == "=": # optional = for legacy support i = self.skipSpace(argstr, j + 1) if i < 0: @@ -88,7 +88,7 @@ class TrigSinkParser(SinkParser): else: i = j - if argstr[i: i + 1] != "{": + if argstr[i : i + 1] != "{": return -1 # the node wasn't part of a graph j = i + 1 @@ -104,7 +104,7 @@ class TrigSinkParser(SinkParser): if i < 0: self.BadSyntax(argstr, i, "needed '}', found end.") - if argstr[i: i + 1] == "}": + if argstr[i : i + 1] == "}": j = i + 1 break @@ -151,7 +151,11 @@ class TrigParser(Parser): ) p = TrigSinkParser(sink, baseURI=baseURI, turtle=True) - p.loadStream(source.getByteStream()) + stream = source.getCharacterStream() # try to get str stream first + if not stream: + # fallback to get the bytes stream + stream = source.getByteStream() + p.loadStream(stream) for prefix, namespace in p._bindings.items(): conj_graph.bind(prefix, namespace) diff --git a/rdflib/plugins/stores/memory.py b/rdflib/plugins/stores/memory.py new file mode 100644 index 00000000..93b6ec25 --- /dev/null +++ b/rdflib/plugins/stores/memory.py @@ -0,0 +1,533 @@ +# +# +from rdflib.store import Store + +__all__ = ["SimpleMemory", "Memory"] + +ANY = None + + +class SimpleMemory(Store): + """\ + A fast naive in memory implementation of a triple store. + + This triple store uses nested dictionaries to store triples. Each + triple is stored in two such indices as follows spo[s][p][o] = 1 and + pos[p][o][s] = 1. + + Authors: Michel Pelletier, Daniel Krech, Stefan Niederhauser + """ + + def __init__(self, configuration=None, identifier=None): + super(SimpleMemory, self).__init__(configuration) + self.identifier = identifier + + # indexed by [subject][predicate][object] + self.__spo = {} + + # indexed by [predicate][object][subject] + self.__pos = {} + + # indexed by [predicate][object][subject] + self.__osp = {} + + self.__namespace = {} + self.__prefix = {} + + def add(self, triple, context, quoted=False): + """\ + Add a triple to the store of triples. + """ + # add dictionary entries for spo[s][p][p] = 1 and pos[p][o][s] + # = 1, creating the nested dictionaries where they do not yet + # exits. + subject, predicate, object = triple + spo = self.__spo + try: + po = spo[subject] + except: + po = spo[subject] = {} + try: + o = po[predicate] + except: + o = po[predicate] = {} + o[object] = 1 + + pos = self.__pos + try: + os = pos[predicate] + except: + os = pos[predicate] = {} + try: + s = os[object] + except: + s = os[object] = {} + s[subject] = 1 + + osp = self.__osp + try: + sp = osp[object] + except: + sp = osp[object] = {} + try: + p = sp[subject] + except: + p = sp[subject] = {} + p[predicate] = 1 + + def remove(self, triple_pattern, context=None): + for (subject, predicate, object), c in list(self.triples(triple_pattern)): + del self.__spo[subject][predicate][object] + del self.__pos[predicate][object][subject] + del self.__osp[object][subject][predicate] + + def triples(self, triple_pattern, context=None): + """A generator over all the triples matching """ + subject, predicate, object = triple_pattern + if subject != ANY: # subject is given + spo = self.__spo + if subject in spo: + subjectDictionary = spo[subject] + if predicate != ANY: # subject+predicate is given + if predicate in subjectDictionary: + if object != ANY: # subject+predicate+object is given + if object in subjectDictionary[predicate]: + yield (subject, predicate, object), self.__contexts() + else: # given object not found + pass + else: # subject+predicate is given, object unbound + for o in subjectDictionary[predicate].keys(): + yield (subject, predicate, o), self.__contexts() + else: # given predicate not found + pass + else: # subject given, predicate unbound + for p in subjectDictionary.keys(): + if object != ANY: # object is given + if object in subjectDictionary[p]: + yield (subject, p, object), self.__contexts() + else: # given object not found + pass + else: # object unbound + for o in subjectDictionary[p].keys(): + yield (subject, p, o), self.__contexts() + else: # given subject not found + pass + elif predicate != ANY: # predicate is given, subject unbound + pos = self.__pos + if predicate in pos: + predicateDictionary = pos[predicate] + if object != ANY: # predicate+object is given, subject unbound + if object in predicateDictionary: + for s in predicateDictionary[object].keys(): + yield (s, predicate, object), self.__contexts() + else: # given object not found + pass + else: # predicate is given, object+subject unbound + for o in predicateDictionary.keys(): + for s in predicateDictionary[o].keys(): + yield (s, predicate, o), self.__contexts() + elif object != ANY: # object is given, subject+predicate unbound + osp = self.__osp + if object in osp: + objectDictionary = osp[object] + for s in objectDictionary.keys(): + for p in objectDictionary[s].keys(): + yield (s, p, object), self.__contexts() + else: # subject+predicate+object unbound + spo = self.__spo + for s in spo.keys(): + subjectDictionary = spo[s] + for p in subjectDictionary.keys(): + for o in subjectDictionary[p].keys(): + yield (s, p, o), self.__contexts() + + def __len__(self, context=None): + # @@ optimize + i = 0 + for triple in self.triples((None, None, None)): + i += 1 + return i + + def bind(self, prefix, namespace): + self.__prefix[namespace] = prefix + self.__namespace[prefix] = namespace + + def namespace(self, prefix): + return self.__namespace.get(prefix, None) + + def prefix(self, namespace): + return self.__prefix.get(namespace, None) + + def namespaces(self): + for prefix, namespace in self.__namespace.items(): + yield prefix, namespace + + def __contexts(self): + return (c for c in []) # TODO: best way to return empty generator + + def query(self, query, initNs, initBindings, queryGraph, **kwargs): + super(SimpleMemory, self).query(query, initNs, initBindings, queryGraph, **kwargs) + + def update(self, update, initNs, initBindings, queryGraph, **kwargs): + super(SimpleMemory, self).update(update, initNs, initBindings, queryGraph, **kwargs) + + +class Memory(Store): + """\ + An in memory implementation of a triple store. + + Same as SimpleMemory above, but is Context-aware, Graph-aware, and Formula-aware + Authors: Ashley Sommer + """ + + context_aware = True + formula_aware = True + graph_aware = True + + def __init__(self, configuration=None, identifier=None): + super(Memory, self).__init__(configuration) + self.identifier = identifier + + # indexed by [subject][predicate][object] + self.__spo = {} + + # indexed by [predicate][object][subject] + self.__pos = {} + + # indexed by [predicate][object][subject] + self.__osp = {} + + self.__namespace = {} + self.__prefix = {} + self.__context_obj_map = {} + self.__tripleContexts = ( + {} + ) + self.__contextTriples = {None: set()} + # all contexts used in store (unencoded) + self.__all_contexts = set() + # default context information for triples + self.__defaultContexts = None + + def add(self, triple, context, quoted=False): + """\ + Add a triple to the store of triples. + """ + # add dictionary entries for spo[s][p][p] = 1 and pos[p][o][s] + # = 1, creating the nested dictionaries where they do not yet + # exits. + Store.add(self, triple, context, quoted=quoted) + if context is not None: + self.__all_contexts.add(context) + subject, predicate, object_ = triple + self.__add_triple_context(triple, context, quoted) + + spo = self.__spo + try: + po = spo[subject] + except LookupError: + po = spo[subject] = {} + try: + o = po[predicate] + except LookupError: + o = po[predicate] = {} + o[object_] = 1 + + pos = self.__pos + try: + os = pos[predicate] + except LookupError: + os = pos[predicate] = {} + try: + s = os[object_] + except LookupError: + s = os[object_] = {} + s[subject] = 1 + + osp = self.__osp + try: + sp = osp[object_] + except LookupError: + sp = osp[object_] = {} + try: + p = sp[subject] + except LookupError: + p = sp[subject] = {} + p[predicate] = 1 + + def remove(self, triple_pattern, context=None): + req_ctx = self.__ctx_to_str(context) + for triple, c in self.triples(triple_pattern, context=context): + subject, predicate, object_ = triple + for ctx in self.__get_context_for_triple(triple): + if context is not None and req_ctx != ctx: + continue + self.__remove_triple_context(triple, ctx) + ctxs = self.__get_context_for_triple(triple, skipQuoted=True) + if None in ctxs and (context is None or len(ctxs) == 1): + # remove from default graph too + self.__remove_triple_context(triple, None) + if len(self.__get_context_for_triple(triple)) == 0: + del self.__spo[subject][predicate][object_] + del self.__pos[predicate][object_][subject] + del self.__osp[object_][subject][predicate] + del self.__tripleContexts[triple] + if ( + req_ctx is not None + and req_ctx in self.__contextTriples + and len(self.__contextTriples[req_ctx]) == 0 + ): + # all triples are removed out of this context + # and it's not the default context so delete it + del self.__contextTriples[req_ctx] + + if ( + triple_pattern == (None, None, None) + and context in self.__all_contexts + and not self.graph_aware + ): + # remove the whole context + self.__all_contexts.remove(context) + + def triples(self, triple_pattern, context=None): + """A generator over all the triples matching """ + req_ctx = self.__ctx_to_str(context) + subject, predicate, object_ = triple_pattern + + # all triples case (no triple parts given as pattern) + if subject is None and predicate is None and object_ is None: + # Just dump all known triples from the given graph + if req_ctx not in self.__contextTriples: + return + for triple in self.__contextTriples[req_ctx].copy(): + yield triple, self.__contexts(triple) + + # optimize "triple in graph" case (all parts given) + elif subject is not None and predicate is not None and object_ is not None: + triple = triple_pattern + try: + _ = self.__spo[subject][predicate][object_] + if self.__triple_has_context(triple, req_ctx): + yield triple, self.__contexts(triple) + except KeyError: + return + + elif subject is not None: # subject is given + spo = self.__spo + if subject in spo: + subjectDictionary = spo[subject] + if predicate is not None: # subject+predicate is given + if predicate in subjectDictionary: + if object_ is not None: # subject+predicate+object is given + if object_ in subjectDictionary[predicate]: + triple = (subject, predicate, object_) + if self.__triple_has_context(triple, req_ctx): + yield triple, self.__contexts(triple) + else: # given object not found + pass + else: # subject+predicate is given, object unbound + for o in list(subjectDictionary[predicate].keys()): + triple = (subject, predicate, o) + if self.__triple_has_context(triple, req_ctx): + yield triple, self.__contexts(triple) + else: # given predicate not found + pass + else: # subject given, predicate unbound + for p in list(subjectDictionary.keys()): + if object_ is not None: # object is given + if object_ in subjectDictionary[p]: + triple = (subject, p, object_) + if self.__triple_has_context(triple, req_ctx): + yield triple, self.__contexts(triple) + else: # given object not found + pass + else: # object unbound + for o in list(subjectDictionary[p].keys()): + triple = (subject, p, o) + if self.__triple_has_context(triple, req_ctx): + yield triple, self.__contexts(triple) + else: # given subject not found + pass + elif predicate is not None: # predicate is given, subject unbound + pos = self.__pos + if predicate in pos: + predicateDictionary = pos[predicate] + if object_ is not None: # predicate+object is given, subject unbound + if object_ in predicateDictionary: + for s in list(predicateDictionary[object_].keys()): + triple = (s, predicate, object_) + if self.__triple_has_context(triple, req_ctx): + yield triple, self.__contexts(triple) + else: # given object not found + pass + else: # predicate is given, object+subject unbound + for o in list(predicateDictionary.keys()): + for s in list(predicateDictionary[o].keys()): + triple = (s, predicate, o) + if self.__triple_has_context(triple, req_ctx): + yield triple, self.__contexts(triple) + elif object_ is not None: # object is given, subject+predicate unbound + osp = self.__osp + if object_ in osp: + objectDictionary = osp[object_] + for s in list(objectDictionary.keys()): + for p in list(objectDictionary[s].keys()): + triple = (s, p, object_) + if self.__triple_has_context(triple, req_ctx): + yield triple, self.__contexts(triple) + else: # subject+predicate+object unbound + # Shouldn't get here if all other cases above worked correctly. + spo = self.__spo + for s in list(spo.keys()): + subjectDictionary = spo[s] + for p in list(subjectDictionary.keys()): + for o in list(subjectDictionary[p].keys()): + triple = (s, p, o) + if self.__triple_has_context(triple, req_ctx): + yield triple, self.__contexts(triple) + + def bind(self, prefix, namespace): + self.__prefix[namespace] = prefix + self.__namespace[prefix] = namespace + + def namespace(self, prefix): + return self.__namespace.get(prefix, None) + + def prefix(self, namespace): + return self.__prefix.get(namespace, None) + + def namespaces(self): + for prefix, namespace in self.__namespace.items(): + yield prefix, namespace + + def contexts(self, triple=None): + if triple is None or triple == (None, None, None): + return (context for context in self.__all_contexts) + + subj, pred, obj = triple + try: + _ = self.__spo[subj][pred][obj] + return self.__contexts(triple) + except KeyError: + return (_ for _ in []) + + def __len__(self, context=None): + ctx = self.__ctx_to_str(context) + if ctx not in self.__contextTriples: + return 0 + return len(self.__contextTriples[ctx]) + + def add_graph(self, graph): + if not self.graph_aware: + Store.add_graph(self, graph) + else: + self.__all_contexts.add(graph) + + def remove_graph(self, graph): + if not self.graph_aware: + Store.remove_graph(self, graph) + else: + self.remove((None, None, None), graph) + try: + self.__all_contexts.remove(graph) + except KeyError: + pass # we didn't know this graph, no problem + + # internal utility methods below + def __add_triple_context(self, triple, context, quoted): + """add the given context to the set of contexts for the triple""" + ctx = self.__ctx_to_str(context) + quoted = bool(quoted) + try: + subj, pred, obj = triple + _ = self.__spo[subj][pred][obj] + # we know the triple exists somewhere in the store + if triple not in self.__tripleContexts: + # triple exists with default ctx info + # start with a copy of the default ctx info + self.__tripleContexts[triple] = self.__defaultContexts.copy() + + self.__tripleContexts[triple][ctx] = quoted + if not quoted: + self.__tripleContexts[triple][None] = quoted + except KeyError: + # the triple didn't exist before in the store + if quoted: # this context only + self.__tripleContexts[triple] = {ctx: quoted} + else: # default context as well + self.__tripleContexts[triple] = {ctx: quoted, None: quoted} + + # if the triple is not quoted add it to the default context + if not quoted: + self.__contextTriples[None].add(triple) + + # always add the triple to given context, making sure it's initialized + if ctx not in self.__contextTriples: + self.__contextTriples[ctx] = set() + self.__contextTriples[ctx].add(triple) + + # if this is the first ever triple in the store, set default ctx info + if self.__defaultContexts is None: + self.__defaultContexts = self.__tripleContexts[triple] + + # if the context info is the same as default, no need to store it + if self.__tripleContexts[triple] == self.__defaultContexts: + del self.__tripleContexts[triple] + + def __get_context_for_triple(self, triple, skipQuoted=False): + """return a list of contexts (str) for the triple, skipping + quoted contexts if skipQuoted==True""" + + ctxs = self.__tripleContexts.get(triple, self.__defaultContexts) + + if not skipQuoted: + return ctxs.keys() + + return [ctx for ctx, quoted in ctxs.items() if not quoted] + + def __triple_has_context(self, triple, ctx): + """return True if the triple exists in the given context""" + return ctx in self.__tripleContexts.get(triple, self.__defaultContexts) + + def __remove_triple_context(self, triple, ctx): + """remove the context from the triple""" + ctxs = self.__tripleContexts.get(triple, self.__defaultContexts).copy() + del ctxs[ctx] + if ctxs == self.__defaultContexts: + del self.__tripleContexts[triple] + else: + self.__tripleContexts[triple] = ctxs + self.__contextTriples[ctx].remove(triple) + + def __ctx_to_str(self, ctx): + if ctx is None: + return None + try: + # ctx could be a graph. In that case, use its identifier + ctx_str = "{}:{}".format(str(ctx.identifier.__class__.__name__), str(ctx.identifier)) + self.__context_obj_map[ctx_str] = ctx + return ctx_str + except AttributeError: + # otherwise, ctx should be a URIRef or BNode or str + if isinstance(ctx, str): + ctx_str = "{}:{}".format(str(ctx.__class__.__name__), str(ctx)) + if ctx_str in self.__context_obj_map: + return ctx_str + self.__context_obj_map[ctx_str] = ctx + return ctx_str + raise RuntimeError("Cannot use that type of object as a Graph context") + + def __contexts(self, triple): + """return a generator for all the non-quoted contexts + (dereferenced) the encoded triple appears in""" + return ( + self.__context_obj_map.get(ctx_str, ctx_str) + for ctx_str in self.__get_context_for_triple(triple, skipQuoted=True) + if ctx_str is not None + ) + + def query(self, query, initNs, initBindings, queryGraph, **kwargs): + super(Memory, self).query(query, initNs, initBindings, queryGraph, **kwargs) + + def update(self, update, initNs, initBindings, queryGraph, **kwargs): + super(Memory, self).update(update, initNs, initBindings, queryGraph, **kwargs) diff --git a/rdflib/plugins/sleepycat.py b/rdflib/plugins/stores/sleepycat.py index 735d3c3a..735d3c3a 100644 --- a/rdflib/plugins/sleepycat.py +++ b/rdflib/plugins/stores/sleepycat.py diff --git a/rdflib/plugins/stores/sparqlstore.py b/rdflib/plugins/stores/sparqlstore.py index 2d20c97c..631792c7 100644 --- a/rdflib/plugins/stores/sparqlstore.py +++ b/rdflib/plugins/stores/sparqlstore.py @@ -646,7 +646,7 @@ class SPARQLUpdateStore(SPARQLStore): .. admonition:: Context-aware query rewriting - **When:** If context-awareness is enabled and the graph is not the default graph of the store. - - **Why:** To ensure consistency with the :class:`~rdflib.plugins.memory.IOMemory` store. + - **Why:** To ensure consistency with the :class:`~rdflib.plugins.stores.memory.Memory` store. The graph must except "local" SPARQL requests (requests with no GRAPH keyword) like if it was the default graph. - **What is done:** These "local" queries are rewritten by this store. diff --git a/rdflib/term.py b/rdflib/term.py index 6e8f81d0..563f5e18 100644 --- a/rdflib/term.py +++ b/rdflib/term.py @@ -1405,7 +1405,7 @@ def _parseBoolean(value): if new_value not in false_accepted_values: warnings.warn( "Parsing weird boolean, % r does not map to True or False" % value, - category=DeprecationWarning, + category=UserWarning, ) return False diff --git a/rdflib/util.py b/rdflib/util.py index 88cef828..8d2743bc 100644 --- a/rdflib/util.py +++ b/rdflib/util.py @@ -349,8 +349,8 @@ def parse_date_time(val): SUFFIX_FORMAT_MAP = { + "xml": "xml", "rdf": "xml", - "rdfs": "xml", "owl": "xml", "n3": "n3", "ttl": "turtle", diff --git a/test/rdf/datatypes/test001.borked b/test/rdf/datatypes/test001.borked new file mode 100644 index 00000000..a4c86aea --- /dev/null +++ b/test/rdf/datatypes/test001.borked @@ -0,0 +1,29 @@ +<?xml version="1.0"?> + +<!-- + Copyright World Wide Web Consortium, (Massachusetts Institute of + Technology, Institut National de Recherche en Informatique et en + Automatique, Keio University). + + All Rights Reserved. + + Please see the full Copyright clause at + <http://www.w3.org/Consortium/Legal/copyright-software.html> + + Description: A simple datatype production; a language+ + datatype production. Simply duplicate the constructs under + http://www.w3.org/2000/10/rdf-tests/rdfcore/ntriples/test.nt + + $Id: test001.rdf,v 1.2 2002/11/20 14:51:34 jgrant Exp $ + +--> + +<rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" + xmlns:eg="http://example.org/"> + + <rdf:Description rdf:about="http://example.org/foo"> + <eg:bar rdf:datatype="http://www.w3.org/2001/XMLSchema#integer">10</eg:bar> + <eg:baz rdf:datatype="http://www.w3.org/2001/XMLSchema#integer" xml:lang="fr">10</eg:baz> + </rdf:Description> + +</rdf:RDF> diff --git a/test/store_performance.py b/test/store_performance.py index aa70742a..dc3fe7ce 100644 --- a/test/store_performance.py +++ b/test/store_performance.py @@ -96,7 +96,7 @@ class StoreTestCase(unittest.TestCase): class MemoryStoreTestCase(StoreTestCase): - store = "IOMemory" + store = "Memory" if __name__ == "__main__": diff --git a/test/test_aggregate_graphs.py b/test/test_aggregate_graphs.py index 5d58f4d3..efe684d3 100644 --- a/test/test_aggregate_graphs.py +++ b/test/test_aggregate_graphs.py @@ -60,7 +60,7 @@ WHERE {?n3Doc a log:N3Document }""" class GraphAggregates1(unittest.TestCase): def setUp(self): - memStore = plugin.get("IOMemory", Store)() + memStore = plugin.get("Memory", Store)() self.graph1 = Graph(memStore) self.graph2 = Graph(memStore) self.graph3 = Graph(memStore) @@ -109,7 +109,7 @@ class GraphAggregates2(unittest.TestCase): sparql = True def setUp(self): - memStore = plugin.get("IOMemory", Store)() + memStore = plugin.get("Memory", Store)() self.graph1 = Graph(memStore, URIRef("http://example.com/graph1")) self.graph2 = Graph(memStore, URIRef("http://example.com/graph2")) self.graph3 = Graph(memStore, URIRef("http://example.com/graph3")) diff --git a/test/test_canonicalization.py b/test/test_canonicalization.py index 93c8b4c5..c3a8bf04 100644 --- a/test/test_canonicalization.py +++ b/test/test_canonicalization.py @@ -3,7 +3,7 @@ from rdflib import Graph, RDF, BNode, URIRef, Namespace, ConjunctiveGraph, Liter from rdflib.compare import to_isomorphic, to_canonical_graph import rdflib -from rdflib.plugins.memory import IOMemory +from rdflib.plugins.stores.memory import Memory from io import StringIO @@ -287,7 +287,7 @@ def test_issue682_signing_named_graphs(): cmary = URIRef("http://love.com/lovers/mary#") cjohn = URIRef("http://love.com/lovers/john#") - store = IOMemory() + store = Memory() g = ConjunctiveGraph(store=store) g.bind("love", ns) diff --git a/test/test_dataset.py b/test/test_dataset.py index 33a2721d..734b58cd 100644 --- a/test/test_dataset.py +++ b/test/test_dataset.py @@ -178,7 +178,7 @@ if __name__ == "__main__": tests = 0 for s in plugin.plugins(pluginname, plugin.Store): - if s.name in ("default", "IOMemory", "Auditable", "Concurrent", "SPARQLStore"): + if s.name in ("default", "Memory", "Auditable", "Concurrent", "SPARQLStore"): continue # these are tested by default if not s.getClass().graph_aware: diff --git a/test/test_graph.py b/test/test_graph.py index 77f47dbc..fba32e5d 100644 --- a/test/test_graph.py +++ b/test/test_graph.py @@ -5,7 +5,9 @@ import unittest from tempfile import mkdtemp, mkstemp import shutil -from rdflib import URIRef, RDF, Graph, plugin +from rdflib import URIRef, Graph, plugin +from rdflib.exceptions import ParserError +from rdflib.plugin import PluginException from nose.exc import SkipTest @@ -248,6 +250,65 @@ class GraphTestCase(unittest.TestCase): self.assertEqual((michel, likes, cheese) in g1, True) + def testGuessFormatForParse(self): + self.graph = Graph() + + # files + with self.assertRaises(ParserError): + self.graph.parse(__file__) # here we are trying to parse a Python file!! + + # .nt can be parsed by Turtle Parser + self.graph.parse("test/nt/anons-01.nt") + # RDF/XML + self.graph.parse("test/rdf/datatypes/test001.rdf") # XML + # bad filename but set format + self.graph.parse("test/rdf/datatypes/test001.borked", format="xml") + + # strings + self.graph = Graph() + + with self.assertRaises(ParserError): + self.graph.parse(data="rubbish") + + # Turtle - default + self.graph.parse(data="<http://example.com/a> <http://example.com/a> <http://example.com/a> .") + + # Turtle - format given + self.graph.parse(data="<http://example.com/a> <http://example.com/a> <http://example.com/a> .", format="turtle") + + # RDF/XML - format given + rdf = """<rdf:RDF + xmlns:ns1="http://example.org/#" + xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" +> + <rdf:Description rdf:nodeID="ub63bL2C1"> + <ns1:p rdf:resource="http://example.org/q"/> + <ns1:r rdf:resource="http://example.org/s"/> + </rdf:Description> + <rdf:Description rdf:nodeID="ub63bL5C1"> + <ns1:r> + <rdf:Description rdf:nodeID="ub63bL6C11"> + <ns1:s rdf:resource="http://example.org/#t"/> + </rdf:Description> + </ns1:r> + <ns1:p rdf:resource="http://example.org/q"/> + </rdf:Description> +</rdf:RDF> + """ + self.graph.parse(data=rdf, format="xml") + + # URI + self.graph = Graph() + + # only getting HTML + with self.assertRaises(PluginException): + self.graph.parse(location="https://www.google.com") + + self.graph.parse(location="http://www.w3.org/ns/adms.ttl") + self.graph.parse(location="http://www.w3.org/ns/adms.rdf") + # persistent Australian Government online RDF resource without a file-like ending + self.graph.parse(location="https://linked.data.gov.au/def/agrif?_format=text/turtle") + # dynamically create classes for each registered Store @@ -260,7 +321,7 @@ tests = 0 for s in plugin.plugins(pluginname, plugin.Store): if s.name in ( "default", - "IOMemory", + "Memory", "Auditable", "Concurrent", "SPARQLStore", @@ -268,6 +329,10 @@ for s in plugin.plugins(pluginname, plugin.Store): ): continue # these are tested by default + if s.name in ("SimpleMemory",): + # these (by design) won't pass some of the tests (like Intersection) + continue + locals()["t%d" % tests] = type( "%sGraphTestCase" % s.name, (GraphTestCase,), {"store": s.name} ) diff --git a/test/test_graph_context.py b/test/test_graph_context.py index 5221434b..52220d2c 100644 --- a/test/test_graph_context.py +++ b/test/test_graph_context.py @@ -370,7 +370,7 @@ tests = 0 for s in plugin.plugins(pluginname, plugin.Store): if s.name in ( "default", - "IOMemory", + "Memory", "Auditable", "Concurrent", "SPARQLStore", diff --git a/test/test_iomemory.py b/test/test_iomemory.py deleted file mode 100644 index 74048ab4..00000000 --- a/test/test_iomemory.py +++ /dev/null @@ -1,67 +0,0 @@ -""" - -Iteration and update conflict with set based IOMemory store - -https://github.com/RDFLib/rdflib/issues/286 - -""" - -from rdflib.store import Store -from rdflib import plugin - -from rdflib import Graph, Literal, Namespace - - -def test_concurrent1(): - dns = Namespace("http://www.example.com/") - - store = plugin.get("IOMemory", Store)() - g1 = Graph(store=store) - - g1.add((dns.Name, dns.prop, Literal("test"))) - g1.add((dns.Name, dns.prop, Literal("test2"))) - g1.add((dns.Name, dns.prop, Literal("test3"))) - - n = len(g1) - i = 0 - - for t in g1.triples((None, None, None)): - i += 1 - # next line causes problems because it adds a new Subject that needs - # to be indexed in __subjectIndex dictionary in IOMemory Store. - # which invalidates the iterator used to iterate over g1 - g1.add(t) - - assert i == n - - -def test_concurrent2(): - dns = Namespace("http://www.example.com/") - - store = plugin.get("IOMemory", Store)() - g1 = Graph(store=store) - g2 = Graph(store=store) - - g1.add((dns.Name, dns.prop, Literal("test"))) - g1.add((dns.Name, dns.prop, Literal("test2"))) - g1.add((dns.Name, dns.prop, Literal("test3"))) - - n = len(g1) - i = 0 - - for t in g1.triples((None, None, None)): - i += 1 - g2.add(t) - # next line causes problems because it adds a new Subject that needs - # to be indexed in __subjectIndex dictionary in IOMemory Store. - # which invalidates the iterator used to iterate over g1 - g2.add((dns.Name1, dns.prop1, Literal("test"))) - g2.add((dns.Name1, dns.prop, Literal("test"))) - g2.add((dns.Name, dns.prop, Literal("test4"))) - - assert i == n - - -if __name__ == "__main__": - test_concurrent1() - test_concurrent2() diff --git a/test/test_issue247.py b/test/test_issue247.py index 747dd1e0..7a51dd24 100644 --- a/test/test_issue247.py +++ b/test/test_issue247.py @@ -38,7 +38,7 @@ class TestXMLLiteralwithLangAttr(unittest.TestCase): it contains a XML Literal with a xml:lang attribute: """ g = rdflib.Graph() - g.parse(data=passxml) + g.parse(data=passxml, format="xml") def test_failing_parse_of_literal_with_xmllang_attr(self): """ @@ -47,7 +47,7 @@ class TestXMLLiteralwithLangAttr(unittest.TestCase): it contains a XML Literal with a xml:lang attribute: """ g = rdflib.Graph() - g.parse(data=failxml) + g.parse(data=failxml, format="xml") if __name__ == "__main__": diff --git a/test/test_issue363.py b/test/test_issue363.py index 792c2441..5f88a6f4 100644 --- a/test/test_issue363.py +++ b/test/test_issue363.py @@ -38,7 +38,7 @@ def test_broken_rdfxml(): def test_parsetype_resource(): - g = rdflib.Graph().parse(data=data2) + g = rdflib.Graph().parse(data=data2, format="xml") print(g.serialize(format="n3")) diff --git a/test/test_issue801.py b/test/test_issue801.py new file mode 100644 index 00000000..ae27f346 --- /dev/null +++ b/test/test_issue801.py @@ -0,0 +1,19 @@ +""" +Issue 801 - Problem with prefixes created for URIs containing %20 +""" +from rdflib import Namespace, Graph, BNode, Literal +import unittest + +class TestIssue801(unittest.TestCase): + + def test_issue_801(self): + g = Graph() + example = Namespace('http://example.org/') + g.bind('', example) + node = BNode() + g.add((node, example['first%20name'], Literal('John'))) + self.assertEqual(g.serialize(format="turtle").decode().split("\n")[-3], + '[] :first%20name "John" .') + +if __name__ == "__main__": + unittest.main() diff --git a/test/test_issue_git_336.py b/test/test_issue_git_336.py index 6a8abb7c..c3d4a581 100644 --- a/test/test_issue_git_336.py +++ b/test/test_issue_git_336.py @@ -37,7 +37,7 @@ def test_ns_localname_roundtrip(): xmldump = g.serialize().decode("utf-8") g1 = rdflib.Graph() - g1.parse(data=xmldump) + g1.parse(data=xmldump, format="xml") g1.parse(data=turtledump, format="turtle") diff --git a/test/test_literal.py b/test/test_literal.py index 714bea00..656bfb10 100644 --- a/test/test_literal.py +++ b/test/test_literal.py @@ -33,7 +33,7 @@ class TestLiteral(unittest.TestCase): </rdf:RDF> """ g = rdflib.Graph() - g.parse(data=d) + g.parse(data=d, format="xml") a = rdflib.Literal("a\\b") b = list(g.objects())[0] self.assertEqual(a, b) diff --git a/test/test_memory_store.py b/test/test_memory_store.py index 546d12ad..ad46d6c0 100644 --- a/test/test_memory_store.py +++ b/test/test_memory_store.py @@ -1,10 +1,32 @@ import unittest import rdflib -rdflib.plugin.register("Memory", rdflib.store.Store, "rdflib.plugins.memory", "Memory") +rdflib.plugin.register("SimpleMemory", rdflib.store.Store, "rdflib.plugins.stores.memory", "SimpleMemory") +rdflib.plugin.register("Memory", rdflib.store.Store, "rdflib.plugins.stores.memory", "Memory") +class SimpleStoreTestCase(unittest.TestCase): + def test_memory_store(self): + g = rdflib.Graph("SimpleMemory") + subj1 = rdflib.URIRef("http://example.org/foo#bar1") + pred1 = rdflib.URIRef("http://example.org/foo#bar2") + obj1 = rdflib.URIRef("http://example.org/foo#bar3") + triple1 = (subj1, pred1, obj1) + triple2 = ( + subj1, + rdflib.URIRef("http://example.org/foo#bar4"), + rdflib.URIRef("http://example.org/foo#bar5"), + ) + g.add(triple1) + self.assertTrue(len(g) == 1) + g.add(triple2) + self.assertTrue(len(list(g.triples((subj1, None, None)))) == 2) + self.assertTrue(len(list(g.triples((None, pred1, None)))) == 1) + self.assertTrue(len(list(g.triples((None, None, obj1)))) == 1) + g.remove(triple1) + self.assertTrue(len(g) == 1) + g.serialize() -class StoreTestCase(unittest.TestCase): +class MemoryStoreTestCase(unittest.TestCase): def test_memory_store(self): g = rdflib.Graph("Memory") subj1 = rdflib.URIRef("http://example.org/foo#bar1") diff --git a/test/test_namespace.py b/test/test_namespace.py index 48896fdc..510d8515 100644 --- a/test/test_namespace.py +++ b/test/test_namespace.py @@ -39,8 +39,8 @@ class NamespacePrefixTest(unittest.TestCase): ) graph = Graph().parse(data=data, format="turtle") for p, n in tuple(graph.namespaces()): - graph.store._IOMemory__namespace.pop(p) - graph.store._IOMemory__prefix.pop(n) + graph.store._Memory__namespace.pop(p) + graph.store._Memory__prefix.pop(n) graph.namespace_manager.reset() self.assertFalse(tuple(graph.namespaces())) u = URIRef("http://example.org/a") diff --git a/test/test_nt_misc.py b/test/test_nt_misc.py index 15f0d4af..399f7bff 100644 --- a/test/test_nt_misc.py +++ b/test/test_nt_misc.py @@ -34,8 +34,34 @@ class NTTestCase(unittest.TestCase): s = g.serialize(format="nt").strip() self.assertEqual(s, '<foo> <foo> "test\\n"@en .'.encode("latin-1")) + def testIssue1144_rdflib(self): + fname = "test/nt/lists-02.nt" + g1 = Graph() + with open(fname, "r") as f: + g1.parse(f, format='nt') + self.assertEqual(14, len(g1)) + g2 = Graph() + with open(fname, "rb") as fb: + g2.parse(fb, format='nt') + self.assertEqual(14, len(g2)) + + + def testIssue1144_w3c(self): + fname = "test/nt/lists-02.nt" + sink1 = ntriples.NTGraphSink(Graph()) + p1 = ntriples.W3CNTriplesParser(sink1) + with open(fname, "r") as f: + p1.parse(f) + self.assertEqual(14, len(sink1.g)) + sink2 = ntriples.NTGraphSink(Graph()) + p2 = ntriples.W3CNTriplesParser(sink2) + with open(fname, "rb") as f: + p2.parse(f) + self.assertEqual(14, len(sink2.g)) + + def test_sink(self): - s = ntriples.Sink() + s = ntriples.DummySink() self.assertTrue(s.length == 0) s.triple(None, None, None) self.assertTrue(s.length == 1) @@ -77,26 +103,26 @@ class NTTestCase(unittest.TestCase): ntriples.validate = False self.assertEqual(res, uniquot) - def test_NTriplesParser_fpath(self): + def test_W3CNTriplesParser_fpath(self): fpath = "test/nt/" + os.listdir("test/nt")[0] - p = ntriples.NTriplesParser() + p = ntriples.W3CNTriplesParser() self.assertRaises(ntriples.ParseError, p.parse, fpath) - def test_NTriplesParser_parsestring(self): - p = ntriples.NTriplesParser() + def test_W3CNTriplesParser_parsestring(self): + p = ntriples.W3CNTriplesParser() data = 3 self.assertRaises(ntriples.ParseError, p.parsestring, data) fname = "test/nt/lists-02.nt" with open(fname, "r") as f: data = f.read() - p = ntriples.NTriplesParser() + p = ntriples.W3CNTriplesParser() res = p.parsestring(data) self.assertTrue(res == None) def test_w3_ntriple_variants(self): uri = "file:///" + os.getcwd() + "/test/nt/test.ntriples" - parser = ntriples.NTriplesParser() + parser = ntriples.W3CNTriplesParser() u = urlopen(uri) sink = parser.parse(u) u.close() @@ -107,14 +133,14 @@ class NTTestCase(unittest.TestCase): data = ( """<http://example.org/resource32> 3 <http://example.org/datatype1> .\n""" ) - p = ntriples.NTriplesParser() + p = ntriples.W3CNTriplesParser() self.assertRaises(ntriples.ParseError, p.parsestring, data) def test_cover_eat(self): data = ( """<http://example.org/resource32> 3 <http://example.org/datatype1> .\n""" ) - p = ntriples.NTriplesParser() + p = ntriples.W3CNTriplesParser() p.line = data self.assertRaises( ntriples.ParseError, p.eat, re.compile("<http://example.org/datatype1>") @@ -122,7 +148,7 @@ class NTTestCase(unittest.TestCase): def test_cover_subjectobjectliteral(self): # data = '''<http://example.org/resource32> 3 <http://example.org/datatype1> .\n''' - p = ntriples.NTriplesParser() + p = ntriples.W3CNTriplesParser() p.line = "baz" self.assertRaises(ntriples.ParseError, p.subject) self.assertRaises(ntriples.ParseError, p.object) @@ -134,12 +160,12 @@ class BNodeContextTestCase(unittest.TestCase): def test_bnode_shared_across_instances(self): my_sink = FakeSink() bnode_context = dict() - p = ntriples.NTriplesParser(my_sink, bnode_context=bnode_context) + p = ntriples.W3CNTriplesParser(my_sink, bnode_context=bnode_context) p.parsestring(''' _:0 <http://purl.obolibrary.org/obo/RO_0002350> <http://www.gbif.org/species/0000001> . ''') - q = ntriples.NTriplesParser(my_sink, bnode_context=bnode_context) + q = ntriples.W3CNTriplesParser(my_sink, bnode_context=bnode_context) q.parsestring(''' _:0 <http://purl.obolibrary.org/obo/RO_0002350> <http://www.gbif.org/species/0000002> . ''') @@ -148,12 +174,12 @@ class BNodeContextTestCase(unittest.TestCase): def test_bnode_distinct_across_instances(self): my_sink = FakeSink() - p = ntriples.NTriplesParser(my_sink) + p = ntriples.W3CNTriplesParser(my_sink) p.parsestring(''' _:0 <http://purl.obolibrary.org/obo/RO_0002350> <http://www.gbif.org/species/0000001> . ''') - q = ntriples.NTriplesParser(my_sink) + q = ntriples.W3CNTriplesParser(my_sink) q.parsestring(''' _:0 <http://purl.obolibrary.org/obo/RO_0002350> <http://www.gbif.org/species/0000002> . ''') @@ -162,7 +188,7 @@ class BNodeContextTestCase(unittest.TestCase): def test_bnode_distinct_across_parse(self): my_sink = FakeSink() - p = ntriples.NTriplesParser(my_sink) + p = ntriples.W3CNTriplesParser(my_sink) p.parsestring(''' _:0 <http://purl.obolibrary.org/obo/RO_0002350> <http://www.gbif.org/species/0000001> . @@ -176,7 +202,7 @@ class BNodeContextTestCase(unittest.TestCase): def test_bnode_shared_across_parse(self): my_sink = FakeSink() - p = ntriples.NTriplesParser(my_sink) + p = ntriples.W3CNTriplesParser(my_sink) p.parsestring(''' _:0 <http://purl.obolibrary.org/obo/RO_0002350> <http://www.gbif.org/species/0000001> . @@ -192,12 +218,12 @@ class BNodeContextTestCase(unittest.TestCase): my_sink = FakeSink() bnode_ctx = dict() - p = ntriples.NTriplesParser(my_sink) + p = ntriples.W3CNTriplesParser(my_sink) p.parsestring(''' _:0 <http://purl.obolibrary.org/obo/RO_0002350> <http://www.gbif.org/species/0000001> . ''', bnode_context=bnode_ctx) - q = ntriples.NTriplesParser(my_sink) + q = ntriples.W3CNTriplesParser(my_sink) q.parsestring(''' _:0 <http://purl.obolibrary.org/obo/RO_0002350> <http://www.gbif.org/species/0000002> . ''', bnode_context=bnode_ctx) diff --git a/test/test_parse_file_guess_format.py b/test/test_parse_file_guess_format.py index abb039df..5706f8df 100644 --- a/test/test_parse_file_guess_format.py +++ b/test/test_parse_file_guess_format.py @@ -3,7 +3,7 @@ from pathlib import Path from shutil import copyfile from tempfile import TemporaryDirectory -from xml.sax import SAXParseException +from rdflib.exceptions import ParserError from rdflib import Graph, logger as graph_logger @@ -21,11 +21,10 @@ class FileParserGuessFormatTest(unittest.TestCase): g = Graph() with TemporaryDirectory() as tmpdirname: newpath = Path(tmpdirname).joinpath("no_file_ext") - copyfile("test/w3c/turtle/IRI_subject.ttl", str(newpath)) + copyfile("test/rdf/Manifest.rdf", str(newpath)) with self.assertLogs(graph_logger, "WARNING") as log_cm: - with self.assertRaises(SAXParseException): + with self.assertRaises(ParserError): g.parse(str(newpath)) - self.assertTrue(any("Could not guess format" in msg for msg in log_cm.output)) if __name__ == '__main__': diff --git a/test/test_parser.py b/test/test_parser.py index 3aaf5658..e337969c 100644 --- a/test/test_parser.py +++ b/test/test_parser.py @@ -33,6 +33,7 @@ class ParserTestCase(unittest.TestCase): </rdf:RDF> """, + format="xml", publicID="http://example.org", ) diff --git a/test/test_seq.py b/test/test_seq.py index 7f177574..5a987ef4 100644 --- a/test/test_seq.py +++ b/test/test_seq.py @@ -29,7 +29,7 @@ class SeqTestCase(unittest.TestCase): def setUp(self): store = self.store = Graph(store=self.backend) store.open(self.path) - store.parse(data=s) + store.parse(data=s, format="xml") def tearDown(self): self.store.close() diff --git a/test/test_util.py b/test/test_util.py index 81ab17ab..ea3b122e 100644 --- a/test/test_util.py +++ b/test/test_util.py @@ -293,12 +293,12 @@ class TestUtilTermConvert(unittest.TestCase): def test_util_from_n3_expectquotedgraph(self): s = "{<http://example.com/schema>}" - res = util.from_n3(s, default=None, backend="IOMemory") + res = util.from_n3(s, default=None, backend="Memory") self.assertTrue(isinstance(res, QuotedGraph)) def test_util_from_n3_expectgraph(self): s = "[<http://example.com/schema>]" - res = util.from_n3(s, default=None, backend="IOMemory") + res = util.from_n3(s, default=None, backend="Memory") self.assertTrue(isinstance(res, Graph)) diff --git a/test/test_xmlliterals.py b/test/test_xmlliterals.py index fcc0ddf2..aeabbe88 100644 --- a/test/test_xmlliterals.py +++ b/test/test_xmlliterals.py @@ -42,7 +42,7 @@ def testRDFXMLParse(): </rdf:RDF>""" g = rdflib.Graph() - g.parse(data=rdfxml) + g.parse(data=rdfxml, format="xml") l1 = list(g)[0][2] assert l1.datatype == RDF.XMLLiteral |