Merge branch 'master' into improve_graph_parseimprove_graph_parse

author: Nicholas Car <nicholas.car@surroundaustralia.com> 2020-08-27 13:13:45 +1000
committer: GitHub <noreply@github.com> 2020-08-27 13:13:45 +1000
commit: 3afffcd19d3a5d240e83b3a59b53e3ee1120c165 (patch)
tree: 42ba0191f0a8f645cbc5b60aefd8a3cbfc383a8b /rdflib/plugins
parent: 3e42f5eea742563cdeab7d655fe55f7d0e25ea16 (diff)
parent: 94295389204175783c2f369c2826f0ba55a2d42c (diff)
download: rdflib-improve_graph_parse.tar.gz
5 files changed, 143 insertions, 137 deletions
diff --git a/rdflib/plugins/parsers/notation3.py b/rdflib/plugins/parsers/notation3.py
index c427f153..d866977d 100755
--- a/rdflib/plugins/parsers/notation3.py
+++ b/rdflib/plugins/parsers/notation3.py
@@ -139,10 +139,13 @@ def join(here, there):
         return here + frag
 
     # join('mid:foo@example', '../foo') bzzt
-    if here[bcolonl + 1: bcolonl + 2] != "/":
-        raise ValueError("Base <%s> has no slash after " "colon - with relative '%s'." % (here, there))
+    if here[bcolonl + 1 : bcolonl + 2] != "/":
+        raise ValueError(
+            "Base <%s> has no slash after "
+            "colon - with relative '%s'." % (here, there)
+        )
 
-    if here[bcolonl + 1: bcolonl + 3] == "//":
+    if here[bcolonl + 1 : bcolonl + 3] == "//":
         bpath = here.find("/", bcolonl + 3)
     else:
         bpath = bcolonl + 1
@@ -502,14 +505,14 @@ class SinkParser:
         """
 
         assert tok[0] not in _notNameChars  # not for punctuation
-        if argstr[i: i + 1] == "@":
+        if argstr[i : i + 1] == "@":
             i = i + 1
         else:
             if tok not in self.keywords:
                 return -1  # No, this has neither keywords declaration nor "@"
 
         if (
-            argstr[i: i + len(tok)] == tok
+            argstr[i : i + len(tok)] == tok
             and (argstr[i + len(tok)] in _notKeywordsChars)
             or (colon and argstr[i + len(tok)] == ":")
         ):
@@ -526,7 +529,7 @@ class SinkParser:
 
         assert tok[0] not in _notNameChars  # not for punctuation
 
-        if argstr[i: i + len(tok)].lower() == tok.lower() and (
+        if argstr[i : i + len(tok)].lower() == tok.lower() and (
             argstr[i + len(tok)] in _notQNameChars
         ):
             i = i + len(tok)
@@ -794,23 +797,23 @@ class SinkParser:
             res.append(("->", RDF_type))
             return j
 
-        if argstr[i: i + 2] == "<=":
+        if argstr[i : i + 2] == "<=":
             if self.turtle:
                 self.BadSyntax(argstr, i, "Found '<=' in Turtle mode. ")
 
             res.append(("<-", self._store.newSymbol(Logic_NS + "implies")))
             return i + 2
 
-        if argstr[i: i + 1] == "=":
+        if argstr[i : i + 1] == "=":
             if self.turtle:
                 self.BadSyntax(argstr, i, "Found '=' in Turtle mode")
-            if argstr[i + 1: i + 2] == ">":
+            if argstr[i + 1 : i + 2] == ">":
                 res.append(("->", self._store.newSymbol(Logic_NS + "implies")))
                 return i + 2
             res.append(("->", DAML_sameAs))
             return i + 1
 
-        if argstr[i: i + 2] == ":=":
+        if argstr[i : i + 2] == ":=":
             if self.turtle:
                 self.BadSyntax(argstr, i, "Found ':=' in Turtle mode")
 
@@ -823,7 +826,7 @@ class SinkParser:
             res.append(("->", r[0]))
             return j
 
-        if argstr[i: i + 2] == ">-" or argstr[i: i + 2] == "<-":
+        if argstr[i : i + 2] == ">-" or argstr[i : i + 2] == "<-":
             self.BadSyntax(argstr, j, ">- ... -> syntax is obsolete.")
 
         return -1
@@ -844,8 +847,8 @@ class SinkParser:
         if j < 0:
             return j  # nope
 
-        while argstr[j: j + 1] in "!^":  # no spaces, must follow exactly (?)
-            ch = argstr[j: j + 1]
+        while argstr[j : j + 1] in "!^":  # no spaces, must follow exactly (?)
+            ch = argstr[j : j + 1]
             subj = res.pop()
             obj = self.blankNode(uri=self.here(j))
             j = self.node(argstr, j + 1, res)
@@ -879,7 +882,7 @@ class SinkParser:
         if j < 0:
             return j  # eof
         i = j
-        ch = argstr[i: i + 1]  # Quick 1-character checks first:
+        ch = argstr[i : i + 1]  # Quick 1-character checks first:
 
         if ch == "[":
             bnodeID = self.here(i)
@@ -887,7 +890,7 @@ class SinkParser:
             if j < 0:
                 self.BadSyntax(argstr, i, "EOF after '['")
             # Hack for "is" binding name to anon node
-            if argstr[j: j + 1] == "=":
+            if argstr[j : j + 1] == "=":
                 if self.turtle:
                     self.BadSyntax(
                         argstr, j, "Found '[=' or '[ =' when in turtle mode."
@@ -905,7 +908,7 @@ class SinkParser:
                         self.BadSyntax(
                             argstr, i, "EOF when objectList expected after [ = "
                         )
-                    if argstr[j: j + 1] == ";":
+                    if argstr[j : j + 1] == ";":
                         j = j + 1
                 else:
                     self.BadSyntax(argstr, i, "objectList expected after [= ")
@@ -922,7 +925,7 @@ class SinkParser:
                 self.BadSyntax(
                     argstr, i, "EOF when ']' expected after [ <propertyList>"
                 )
-            if argstr[j: j + 1] != "]":
+            if argstr[j : j + 1] != "]":
                 self.BadSyntax(argstr, j, "']' expected")
             res.append(subj)
             return j + 1
@@ -931,7 +934,7 @@ class SinkParser:
             # if self.turtle:
             #     self.BadSyntax(argstr, i,
             #                     "found '{' while in Turtle mode, Formulas not supported!")
-            ch2 = argstr[i + 1: i + 2]
+            ch2 = argstr[i + 1 : i + 2]
             if ch2 == "$":
                 # a set
                 i += 1
@@ -942,12 +945,12 @@ class SinkParser:
                     i = self.skipSpace(argstr, j)
                     if i < 0:
                         self.BadSyntax(argstr, i, "needed '$}', found end.")
-                    if argstr[i: i + 2] == "$}":
+                    if argstr[i : i + 2] == "$}":
                         j = i + 2
                         break
 
                     if not first_run:
-                        if argstr[i: i + 1] == ",":
+                        if argstr[i : i + 1] == ",":
                             i += 1
                         else:
                             self.BadSyntax(argstr, i, "expected: ','")
@@ -982,7 +985,7 @@ class SinkParser:
                     if i < 0:
                         self.BadSyntax(argstr, i, "needed '}', found end.")
 
-                    if argstr[i: i + 1] == "}":
+                    if argstr[i : i + 1] == "}":
                         j = i + 1
                         break
 
@@ -1001,7 +1004,7 @@ class SinkParser:
 
         if ch == "(":
             thing_type = self._store.newList
-            ch2 = argstr[i + 1: i + 2]
+            ch2 = argstr[i + 1 : i + 2]
             if ch2 == "$":
                 thing_type = self._store.newSet
                 i += 1
@@ -1012,7 +1015,7 @@ class SinkParser:
                 i = self.skipSpace(argstr, j)
                 if i < 0:
                     self.BadSyntax(argstr, i, "needed ')', found end.")
-                if argstr[i: i + 1] == ")":
+                if argstr[i : i + 1] == ")":
                     j = i + 1
                     break
 
@@ -1065,7 +1068,7 @@ class SinkParser:
                     break
                 i = j + 1
 
-            if argstr[j: j + 2] == ":-":
+            if argstr[j : j + 2] == ":-":
                 if self.turtle:
                     self.BadSyntax(argstr, j, "Found in ':-' in Turtle mode")
                 i = j + 2
@@ -1095,7 +1098,7 @@ class SinkParser:
             j = self.skipSpace(argstr, i)
             if j < 0:
                 self.BadSyntax(argstr, j, "EOF found in list of objects")
-            if argstr[i: i + 1] != ";":
+            if argstr[i : i + 1] != ";":
                 return i
             i = i + 1  # skip semicolon and continue
 
@@ -1116,7 +1119,7 @@ class SinkParser:
             j = self.skipSpace(argstr, i)
             if j < 0:
                 return j  # eof
-            ch = argstr[j: j + 1]
+            ch = argstr[j : j + 1]
             if ch != ",":
                 if ch != ".":
                     return -1
@@ -1133,7 +1136,7 @@ class SinkParser:
             j = self.skipSpace(argstr, i)
             if j < 0:
                 self.BadSyntax(argstr, j, "EOF found after object")
-            if argstr[j: j + 1] != ",":
+            if argstr[j : j + 1] != ",":
                 return j  # Found something else!
             i = self.object(argstr, j + 1, res)
             if i < 0:
@@ -1143,11 +1146,11 @@ class SinkParser:
         j = self.skipSpace(argstr, i)
         if j < 0:
             return j  # eof
-        if argstr[j: j + 1] == ".":
+        if argstr[j : j + 1] == ".":
             return j + 1  # skip
-        if argstr[j: j + 1] == "}":
+        if argstr[j : j + 1] == "}":
             return j  # don't skip it
-        if argstr[j: j + 1] == "]":
+        if argstr[j : j + 1] == "]":
             return j
         self.BadSyntax(argstr, j, "expected '.' or '}' or ']' at end of statement")
 
@@ -1212,7 +1215,7 @@ class SinkParser:
                         assert (
                             ":" in uref
                         ), "With no base URI, cannot deal with relative URIs"
-                    if argstr[i - 1: i] == "#" and not uref[-1:] == "#":
+                    if argstr[i - 1 : i] == "#" and not uref[-1:] == "#":
                         uref = uref + "#"  # She meant it! Weirdness in urlparse?
                     symb = self._store.newSymbol(uref)
                     if symb in self._variables:
@@ -1261,7 +1264,7 @@ class SinkParser:
         if j < 0:
             return -1
 
-        if argstr[j: j + 1] != "?":
+        if argstr[j : j + 1] != "?":
             return -1
         j = j + 1
         i = j
@@ -1419,7 +1422,7 @@ class SinkParser:
                 i = j
 
             if argstr[i] in self.string_delimiters:
-                if argstr[i: i + 3] == argstr[i] * 3:
+                if argstr[i : i + 3] == argstr[i] * 3:
                     delim = argstr[i] * 3
                 else:
                     delim = argstr[i]
@@ -1467,7 +1470,7 @@ class SinkParser:
                 # return -1  ## or fall through?
 
             if argstr[i] in self.string_delimiters:
-                if argstr[i: i + 3] == argstr[i] * 3:
+                if argstr[i : i + 3] == argstr[i] * 3:
                     delim = argstr[i] * 3
                 else:
                     delim = argstr[i]
@@ -1476,7 +1479,7 @@ class SinkParser:
                 dt = None
                 j, s = self.strconst(argstr, i, delim)
                 lang = None
-                if argstr[j: j + 1] == "@":  # Language?
+                if argstr[j : j + 1] == "@":  # Language?
                     m = langcode.match(argstr, j + 1)
                     if m is None:
                         raise BadSyntax(
@@ -1487,9 +1490,9 @@ class SinkParser:
                             "Bad language code syntax on string " + "literal, after @",
                         )
                     i = m.end()
-                    lang = argstr[j + 1: i]
+                    lang = argstr[j + 1 : i]
                     j = i
-                if argstr[j: j + 2] == "^^":
+                if argstr[j : j + 2] == "^^":
                     res2 = []
                     j = self.uri_ref2(argstr, j + 2, res2)  # Read datatype URI
                     dt = res2[0]
@@ -1522,15 +1525,15 @@ class SinkParser:
                 if (
                     delim == delim3
                 ):  # done when delim is """ or ''' and, respectively ...
-                    if argstr[j: j + 5] == delim5:  # ... we have "" or '' before
+                    if argstr[j : j + 5] == delim5:  # ... we have "" or '' before
                         i = j + 5
                         ustr = ustr + delim2
                         return i, ustr
-                    if argstr[j: j + 4] == delim4:  # ... we have " or ' before
+                    if argstr[j : j + 4] == delim4:  # ... we have " or ' before
                         i = j + 4
                         ustr = ustr + delim1
                         return i, ustr
-                    if argstr[j: j + 3] == delim3:  # current " or ' is part of delim
+                    if argstr[j : j + 3] == delim3:  # current " or ' is part of delim
                         i = j + 3
                         return i, ustr
 
@@ -1542,8 +1545,8 @@ class SinkParser:
             m = interesting.search(argstr, j)  # was argstr[j:].
             # Note for pos param to work, MUST be compiled  ... re bug?
             assert m, "Quote expected in string at ^ in %s^%s" % (
-                argstr[j - 20: j],
-                argstr[j: j + 20],
+                argstr[j - 20 : j],
+                argstr[j : j + 20],
             )  # at least need a quote
 
             i = m.start()
@@ -1589,7 +1592,7 @@ class SinkParser:
 
             elif ch == "\\":
                 j = i + 1
-                ch = argstr[j: j + 1]  # Will be empty if string ends
+                ch = argstr[j : j + 1]  # Will be empty if string ends
                 if not ch:
                     raise BadSyntax(
                         self._thisDoc,
@@ -1620,14 +1623,14 @@ class SinkParser:
                 self._thisDoc, startline, argstr, i, "unterminated string literal(3)"
             )
         try:
-            return i + n, reg.sub(unicodeExpand, "\\" + prefix + argstr[i: i + n])
+            return i + n, reg.sub(unicodeExpand, "\\" + prefix + argstr[i : i + n])
         except:
             raise BadSyntax(
                 self._thisDoc,
                 startline,
                 argstr,
                 i,
-                "bad string literal hex escape: " + argstr[i: i + n],
+                "bad string literal hex escape: " + argstr[i : i + n],
             )
 
     def uEscape(self, argstr, i, startline):
@@ -1672,7 +1675,7 @@ class BadSyntax(SyntaxError):
             self._why,
             pre,
             argstr[st:i],
-            argstr[i: i + 60],
+            argstr[i : i + 60],
             post,
         )
 
@@ -1896,8 +1899,11 @@ class TurtleParser(Parser):
 
         baseURI = graph.absolutize(source.getPublicId() or source.getSystemId() or "")
         p = SinkParser(sink, baseURI=baseURI, turtle=turtle)
-
-        p.loadStream(source.getByteStream())
+        # N3 parser prefers str stream
+        stream = source.getCharacterStream()
+        if not stream:
+            stream = source.getByteStream()
+        p.loadStream(stream)
 
         for prefix, namespace in p._bindings.items():
             graph.bind(prefix, namespace)
diff --git a/rdflib/plugins/parsers/nquads.py b/rdflib/plugins/parsers/nquads.py
index a3bfbc6e..2a3a9136 100644
--- a/rdflib/plugins/parsers/nquads.py
+++ b/rdflib/plugins/parsers/nquads.py
@@ -31,7 +31,7 @@ from codecs import getreader
 from rdflib import ConjunctiveGraph
 
 # Build up from the NTriples parser:
-from rdflib.plugins.parsers.ntriples import NTriplesParser
+from rdflib.plugins.parsers.ntriples import W3CNTriplesParser
 from rdflib.plugins.parsers.ntriples import ParseError
 from rdflib.plugins.parsers.ntriples import r_tail
 from rdflib.plugins.parsers.ntriples import r_wspace
@@ -39,7 +39,7 @@ from rdflib.plugins.parsers.ntriples import r_wspace
 __all__ = ["NQuadsParser"]
 
 
-class NQuadsParser(NTriplesParser):
+class NQuadsParser(W3CNTriplesParser):
     def parse(self, inputsource, sink, bnode_context=None, **kwargs):
         """
         Parse inputsource as an N-Quads file.
@@ -57,13 +57,14 @@ class NQuadsParser(NTriplesParser):
         )
         self.sink = ConjunctiveGraph(store=sink.store, identifier=sink.identifier)
 
-        source = inputsource.getByteStream()
+        source = inputsource.getCharacterStream()
+        if not source:
+            source = inputsource.getByteStream()
+            source = getreader("utf-8")(source)
 
         if not hasattr(source, "read"):
             raise ParseError("Item to parse must be a file-like object.")
 
-        source = getreader("utf-8")(source)
-
         self.file = source
         self.buffer = ""
         while True:
diff --git a/rdflib/plugins/parsers/nt.py b/rdflib/plugins/parsers/nt.py
deleted file mode 100644
index c37a1aa0..00000000
--- a/rdflib/plugins/parsers/nt.py
+++ /dev/null
@@ -1,33 +0,0 @@
-from rdflib.parser import Parser
-from rdflib.plugins.parsers.ntriples import NTriplesParser
-
-__all__ = ["NTSink", "NTParser"]
-
-
-class NTSink(object):
-    def __init__(self, graph):
-        self.graph = graph
-
-    def triple(self, s, p, o):
-        self.graph.add((s, p, o))
-
-
-class NTParser(Parser):
-    """parser for the ntriples format, often stored with the .nt extension
-
-    See http://www.w3.org/TR/rdf-testcases/#ntriples"""
-
-    def parse(self, source, sink, **kwargs):
-        '''
-        Parse the NT format
-
-        :type source: `rdflib.parser.InputSource`
-        :param source: the source of NT-formatted data
-        :type sink: `rdflib.graph.Graph`
-        :param sink: where to send parsed triples
-        :param kwargs: Additional arguments to pass to `.NTriplesParser.parse`
-        '''
-        f = source.getByteStream()  # TODO getCharacterStream?
-        parser = NTriplesParser(NTSink(sink))
-        parser.parse(f, **kwargs)
-        f.close()
diff --git a/rdflib/plugins/parsers/ntriples.py b/rdflib/plugins/parsers/ntriples.py
index 33a4a4e6..d43a240c 100644
--- a/rdflib/plugins/parsers/ntriples.py
+++ b/rdflib/plugins/parsers/ntriples.py
@@ -1,9 +1,6 @@
-#!/usr/bin/env python
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
+#!/usr/bin/env python3
 
-__doc__ = """
+__doc__ = """\
 N-Triples Parser
 License: GPL 2, W3C, BSD, or MIT
 Author: Sean B. Palmer, inamidst.com
@@ -15,14 +12,13 @@ import codecs
 from rdflib.term import URIRef as URI
 from rdflib.term import BNode as bNode
 from rdflib.term import Literal
-
-
-from rdflib.compat import cast_bytes
 from rdflib.compat import decodeUnicodeEscape
+from rdflib.exceptions import ParserError as ParseError
+from rdflib.parser import Parser
 
-from io import BytesIO
+from io import StringIO, TextIOBase, BytesIO
 
-__all__ = ["unquote", "uriquote", "Sink", "NTriplesParser"]
+__all__ = ["unquote", "uriquote", "W3CNTriplesParser", "NTGraphSink", "NTParser"]
 
 uriref = r'<([^:]+:[^\s"<>]*)>'
 literal = r'"([^"\\]*(?:\\.[^"\\]*)*)"'
@@ -40,15 +36,7 @@ bufsiz = 2048
 validate = False
 
 
-class Node(str):
-    pass
-
-
-class ParseError(Exception):
-    pass
-
-
-class Sink(object):
+class DummySink(object):
     def __init__(self):
         self.length = 0
 
@@ -78,7 +66,7 @@ def unquote(s):
         while s:
             m = r_safe.match(s)
             if m:
-                s = s[m.end():]
+                s = s[m.end() :]
                 result.append(m.group(1))
                 continue
 
@@ -90,7 +78,7 @@ def unquote(s):
 
             m = r_uniquot.match(s)
             if m:
-                s = s[m.end():]
+                s = s[m.end() :]
                 u, U = m.groups()
                 codepoint = int(u or U, 16)
                 if codepoint > 0x10FFFF:
@@ -113,11 +101,10 @@ def uriquote(uri):
         return r_hibyte.sub(lambda m: "%%%02X" % ord(m.group(1)), uri)
 
 
-class NTriplesParser(object):
+class W3CNTriplesParser(object):
     """An N-Triples Parser.
-
+    This is a legacy-style Triples parser for NTriples provided by W3C
     Usage::
-
           p = NTriplesParser(sink=MySink())
           sink = p.parse(f) # file; use parsestring for a string
 
@@ -127,6 +114,8 @@ class NTriplesParser(object):
     `NTriplesParser`.
     """
 
+    __slots__ = ("_bnode_ids", "sink", "buffer", "file", "line")
+
     def __init__(self, sink=None, bnode_context=None):
         if bnode_context is not None:
             self._bnode_ids = bnode_context
@@ -136,7 +125,11 @@ class NTriplesParser(object):
         if sink is not None:
             self.sink = sink
         else:
-            self.sink = Sink()
+            self.sink = DummySink()
+
+        self.buffer = None
+        self.file = None
+        self.line = ""
 
     def parse(self, f, bnode_context=None):
         """
@@ -150,10 +143,13 @@ class NTriplesParser(object):
                               passed in to define a distinct context for a given call to
                               `parse`.
         """
+
         if not hasattr(f, "read"):
             raise ParseError("Item to parse must be a file-like object.")
-        # since N-Triples 1.1 files can and should be utf-8 encoded
-        f = codecs.getreader("utf-8")(f)
+
+        if not hasattr(f, "encoding") and not hasattr(f, "charbuffer"):
+            # someone still using a bytestream here?
+            f = codecs.getreader("utf-8")(f)
 
         self.file = f
         self.buffer = ""
@@ -164,16 +160,17 @@ class NTriplesParser(object):
             try:
                 self.parseline(bnode_context=bnode_context)
             except ParseError:
-                raise ParseError("Invalid line: %r" % self.line)
+                raise ParseError("Invalid line: {}".format(self.line))
         return self.sink
 
     def parsestring(self, s, **kwargs):
         """Parse s as an N-Triples string."""
-        if not isinstance(s, str):
+        if not isinstance(s, (str, bytes, bytearray)):
             raise ParseError("Item to parse must be a string instance.")
-        f = BytesIO()
-        f.write(cast_bytes(s))
-        f.seek(0)
+        if isinstance(s, (bytes, bytearray)):
+            f = codecs.getreader("utf-8")(BytesIO(s))
+        else:
+            f = StringIO(s)
         self.parse(f, **kwargs)
 
     def readline(self):
@@ -189,7 +186,7 @@ class NTriplesParser(object):
         while True:
             m = r_line.match(self.buffer)
             if m:  # the more likely prospect
-                self.buffer = self.buffer[m.end():]
+                self.buffer = self.buffer[m.end() :]
                 return m.group(1)
             else:
                 buffer = self.file.read(bufsiz)
@@ -211,12 +208,12 @@ class NTriplesParser(object):
         predicate = self.predicate()
         self.eat(r_wspaces)
 
-        object = self.object(bnode_context)
+        object_ = self.object(bnode_context)
         self.eat(r_tail)
 
         if self.line:
-            raise ParseError("Trailing garbage")
-        self.sink.triple(subject, predicate, object)
+            raise ParseError("Trailing garbage: {}".format(self.line))
+        self.sink.triple(subject, predicate, object_)
 
     def peek(self, token):
         return self.line.startswith(token)
@@ -227,7 +224,7 @@ class NTriplesParser(object):
             # print(dir(pattern))
             # print repr(self.line), type(self.line)
             raise ParseError("Failed to eat %s at %s" % (pattern.pattern, self.line))
-        self.line = self.line[m.end():]
+        self.line = self.line[m.end() :]
         return m
 
     def subject(self, bnode_context=None):
@@ -295,13 +292,44 @@ class NTriplesParser(object):
         return False
 
 
-# # Obsolete, unused
-# def parseURI(uri):
-#     import urllib
-#     parser = NTriplesParser()
-#     u = urllib.urlopen(uri)
-#     sink = parser.parse(u)
-#     u.close()
-#     # for triple in sink:
-#     #     print triple
-#     print 'Length of input:', sink.length
+class NTGraphSink(object):
+    __slots__ = ("g",)
+
+    def __init__(self, graph):
+        self.g = graph
+
+    def triple(self, s, p, o):
+        self.g.add((s, p, o))
+
+
+class NTParser(Parser):
+    """parser for the ntriples format, often stored with the .nt extension
+
+    See http://www.w3.org/TR/rdf-testcases/#ntriples"""
+
+    __slots__ = set()
+
+    @classmethod
+    def parse(cls, source, sink, **kwargs):
+        """
+        Parse the NT format
+
+        :type source: `rdflib.parser.InputSource`
+        :param source: the source of NT-formatted data
+        :type sink: `rdflib.graph.Graph`
+        :param sink: where to send parsed triples
+        :param kwargs: Additional arguments to pass to `.NTriplesParser.parse`
+        """
+        f = source.getCharacterStream()
+        if not f:
+            b = source.getByteStream()
+            # TextIOBase includes: StringIO and TextIOWrapper
+            if isinstance(b, TextIOBase):
+                # f is not really a ByteStream, but a CharacterStream
+                f = b
+            else:
+                # since N-Triples 1.1 files can and should be utf-8 encoded
+                f = codecs.getreader("utf-8")(b)
+        parser = W3CNTriplesParser(NTGraphSink(sink))
+        parser.parse(f, **kwargs)
+        f.close()
diff --git a/rdflib/plugins/parsers/trig.py b/rdflib/plugins/parsers/trig.py
index 8f270de0..938fb259 100644
--- a/rdflib/plugins/parsers/trig.py
+++ b/rdflib/plugins/parsers/trig.py
@@ -82,7 +82,7 @@ class TrigSinkParser(SinkParser):
         if j < 0:
             self.BadSyntax(argstr, i, "EOF found when expected graph")
 
-        if argstr[j: j + 1] == "=":  # optional = for legacy support
+        if argstr[j : j + 1] == "=":  # optional = for legacy support
 
             i = self.skipSpace(argstr, j + 1)
             if i < 0:
@@ -90,7 +90,7 @@ class TrigSinkParser(SinkParser):
         else:
             i = j
 
-        if argstr[i: i + 1] != "{":
+        if argstr[i : i + 1] != "{":
             return -1  # the node wasn't part of a graph
 
         j = i + 1
@@ -106,7 +106,7 @@ class TrigSinkParser(SinkParser):
             if i < 0:
                 self.BadSyntax(argstr, i, "needed '}', found end.")
 
-            if argstr[i: i + 1] == "}":
+            if argstr[i : i + 1] == "}":
                 j = i + 1
                 break
 
@@ -153,7 +153,11 @@ class TrigParser(Parser):
         )
         p = TrigSinkParser(sink, baseURI=baseURI, turtle=True)
 
-        p.loadStream(source.getByteStream())
+        stream = source.getCharacterStream()  # try to get str stream first
+        if not stream:
+            # fallback to get the bytes stream
+            stream = source.getByteStream()
+        p.loadStream(stream)
 
         for prefix, namespace in p._bindings.items():
             conj_graph.bind(prefix, namespace)
author	Nicholas Car <nicholas.car@surroundaustralia.com>	2020-08-27 13:13:45 +1000
committer	GitHub <noreply@github.com>	2020-08-27 13:13:45 +1000
commit	3afffcd19d3a5d240e83b3a59b53e3ee1120c165 (patch)
tree	42ba0191f0a8f645cbc5b60aefd8a3cbfc383a8b /rdflib/plugins
parent	3e42f5eea742563cdeab7d655fe55f7d0e25ea16 (diff)
parent	94295389204175783c2f369c2826f0ba55a2d42c (diff)
download	rdflib-improve_graph_parse.tar.gz