fix: validation issues with examples (#2269)

I want to add examples for securing RDFLib network access using `sys.addaudithook` and `urllib.request.install_opener`, but I want to also validate the examples in our CI pipeline, so we can demonstrate they work to our users. This change adds validation for all examples, and the addition of the security examples in a seperate PR will then also get validated.
author: Iwan Aucamp <aucampia@gmail.com> 2023-03-13 01:18:21 +0100
committer: GitHub <noreply@github.com> 2023-03-13 01:18:21 +0100
commit: 35a35375d213afe83a825d7dfcc59008f8e2352c (patch)
tree: 1fa971ef3a9e897e19b5df5fc304fe9c02697e41
parent: 7a7cc1f929ff6fbc8c3238368e518a00af1025b6 (diff)
download: rdflib-35a35375d213afe83a825d7dfcc59008f8e2352c.tar.gz
17 files changed, 147 insertions, 61 deletions
diff --git a/examples/berkeleydb_example.py b/examples/berkeleydb_example.py
index 120370be..6430315a 100644
--- a/examples/berkeleydb_example.py
+++ b/examples/berkeleydb_example.py
@@ -16,10 +16,12 @@ Example 2: larger data
 * does not delete the DB at the end so you can see it on disk
 """
 import os
-from rdflib import ConjunctiveGraph, Namespace, Literal
-from rdflib.store import NO_STORE, VALID_STORE
 import tempfile
 
+from rdflib import ConjunctiveGraph, Literal, Namespace
+from rdflib.plugins.stores.berkeleydb import has_bsddb
+from rdflib.store import NO_STORE, VALID_STORE
+
 
 def example_1():
     """Creates a ConjunctiveGraph and performs some BerkeleyDB tasks with it"""
@@ -98,10 +100,10 @@ def example_2():
         9719
         ...
     """
-    from urllib.request import urlopen, Request
-    from urllib.error import HTTPError
-    import json
     import base64
+    import json
+    from urllib.error import HTTPError
+    from urllib.request import Request, urlopen
 
     g = ConjunctiveGraph("BerkeleyDB")
     g.open("gsg_vocabs", create=True)
@@ -129,5 +131,7 @@ def example_2():
 
 
 if __name__ == "__main__":
-    example_1()
-    example_2()
+    if has_bsddb:
+        # Only run the examples if BerkeleyDB is available
+        example_1()
+        example_2()
diff --git a/examples/conjunctive_graphs.py b/examples/conjunctive_graphs.py
index b2e6aacf..433a843f 100644
--- a/examples/conjunctive_graphs.py
+++ b/examples/conjunctive_graphs.py
@@ -8,12 +8,11 @@ This example shows how to create Named Graphs and work with the
 conjunction (union) of all the graphs.
 """
 
-from rdflib import Namespace, Literal, URIRef
-from rdflib.graph import Graph, ConjunctiveGraph
+from rdflib import Literal, Namespace, URIRef
+from rdflib.graph import ConjunctiveGraph, Graph
 from rdflib.plugins.stores.memory import Memory
 
 if __name__ == "__main__":
-
     LOVE = Namespace("http://love.com#")
     LOVERS = Namespace("http://love.com/lovers/")
 
@@ -59,7 +58,7 @@ if __name__ == "__main__":
 
     print("Query the conjunction of all graphs:")
     xx = None
-    for x in g[mary : LOVE.loves / LOVE.hasCuteName]:
+    for x in g[mary : LOVE.loves / LOVE.hasCuteName]:  # type: ignore[misc]
         xx = x
     print("Q: Who does Mary love?")
     print("A: Mary loves {}".format(xx))
diff --git a/examples/custom_datatype.py b/examples/custom_datatype.py
index c3f43820..4d402793 100644
--- a/examples/custom_datatype.py
+++ b/examples/custom_datatype.py
@@ -9,11 +9,9 @@ mappings between literal datatypes and Python objects
 """
 
 
-from rdflib import Graph, Literal, Namespace, XSD
-from rdflib import term
+from rdflib import XSD, Graph, Literal, Namespace, term
 
 if __name__ == "__main__":
-
     # Complex numbers are not registered by default
     # No custom constructor/serializer needed since
     # complex('(2+3j)') works fine
@@ -46,4 +44,5 @@ if __name__ == "__main__":
 
     # Compare with the original python complex object (should be True)
     # l2[2] is the object of the triple
+    assert isinstance(l2[2], Literal)
     print(l2[2].value == c)
diff --git a/examples/custom_eval.py b/examples/custom_eval.py
index 36998087..f8dfd390 100644
--- a/examples/custom_eval.py
+++ b/examples/custom_eval.py
@@ -16,12 +16,19 @@ i.e. in your setup.py::
     }
 """
 
-import rdflib
 
-from rdflib.plugins.sparql.evaluate import evalBGP
+from pathlib import Path
+
+import rdflib
 from rdflib.namespace import FOAF, RDF, RDFS
+from rdflib.plugins.sparql.evaluate import evalBGP
 
-inferredSubClass = RDFS.subClassOf * "*"  # any number of rdfs.subClassOf
+EXAMPLES_DIR = Path(__file__).parent
+
+
+inferred_sub_class = (
+    RDFS.subClassOf * "*"  # type: ignore[operator]
+)  # any number of rdfs.subClassOf
 
 
 def customEval(ctx, part):
@@ -36,7 +43,7 @@ def customEval(ctx, part):
             if t[1] == RDF.type:
                 bnode = rdflib.BNode()
                 triples.append((t[0], t[1], bnode))
-                triples.append((bnode, inferredSubClass, t[2]))
+                triples.append((bnode, inferred_sub_class, t[2]))
             else:
                 triples.append(t)
 
@@ -47,12 +54,11 @@ def customEval(ctx, part):
 
 
 if __name__ == "__main__":
-
     # add function directly, normally we would use setuptools and entry_points
     rdflib.plugins.sparql.CUSTOM_EVALS["exampleEval"] = customEval
 
     g = rdflib.Graph()
-    g.parse("foaf.n3")
+    g.parse(f"{EXAMPLES_DIR / 'foaf.n3'}")
 
     # Add the subClassStmt so that we can query for it!
     g.add((FOAF.Person, RDFS.subClassOf, FOAF.Agent))
@@ -60,11 +66,11 @@ if __name__ == "__main__":
     # Find all FOAF Agents
     for x in g.query(
         f"""
-        PREFIX foaf: <{FOAF}> 
-        
-        SELECT * 
+        PREFIX foaf: <{FOAF}>
+
+        SELECT *
         WHERE {{
-            ?s a foaf:Agent . 
+            ?s a foaf:Agent .
         }}
         """
     ):
diff --git a/examples/datasets.py b/examples/datasets.py
index e5d14aa1..8bf3c9d3 100644
--- a/examples/datasets.py
+++ b/examples/datasets.py
@@ -7,7 +7,18 @@ This example file shows how to decalre a Dataset, add content to it, serialise i
 and remove things from it.
 """
 
-from rdflib import Dataset, URIRef, Literal, Namespace
+from rdflib import Dataset, Literal, Namespace, URIRef
+
+# Note regarding `mypy: ignore_errors=true`:
+#
+# This example is using URIRef values as context identifiers. This is contrary
+# to the type hints, but it does work. Most likely the type hints are wrong.
+# Ideally we should just use `# type: ignore` comments for the lines that are
+# causing problems, but for some reason the error occurs on different lines with
+# different python versions, so the only option is to ignore errors for the
+# whole file.
+
+# mypy: ignore_errors=true
 
 #
 #   Create & Add
@@ -99,7 +110,7 @@ print()
 """
 print("Printing all triple from one Graph in the Dataset:")
 print("---")
-for triple in d.triples((None, None, None, graph_1)):
+for triple in d.triples((None, None, None, graph_1)):  # type: ignore[arg-type]
     print(triple)
 print("---")
 print()
diff --git a/examples/foafpaths.py b/examples/foafpaths.py
index 6bb21efd..db34fb31 100644
--- a/examples/foafpaths.py
+++ b/examples/foafpaths.py
@@ -26,13 +26,16 @@ See the docs for :mod:`rdflib.paths` for the details.
 This example shows how to get the name of friends (i.e values two steps away x knows y, y name z) with a single query.
 """
 
-from rdflib import URIRef, Graph
+from pathlib import Path
+
+from rdflib import Graph, URIRef
 from rdflib.namespace import FOAF
 
-if __name__ == "__main__":
+EXAMPLES_DIR = Path(__file__).parent
 
+if __name__ == "__main__":
     g = Graph()
-    g.parse("foaf.n3")
+    g.parse(f"{EXAMPLES_DIR / 'foaf.n3'}")
 
     tim = URIRef("http://www.w3.org/People/Berners-Lee/card#i")
 
diff --git a/examples/prepared_query.py b/examples/prepared_query.py
index 828ed052..035c6137 100644
--- a/examples/prepared_query.py
+++ b/examples/prepared_query.py
@@ -8,22 +8,27 @@ When executing, variables can be bound with the
 ``initBindings`` keyword parameter.
 """
 
+from pathlib import Path
+
 import rdflib
-from rdflib.plugins.sparql import prepareQuery
 from rdflib.namespace import FOAF
+from rdflib.plugins.sparql import prepareQuery
 
+EXAMPLES_DIR = Path(__file__).parent
 
 if __name__ == "__main__":
-
     q = prepareQuery(
         "SELECT ?name WHERE { ?person foaf:knows/foaf:name ?name . }",
         initNs={"foaf": FOAF},
     )
 
     g = rdflib.Graph()
-    g.parse("foaf.n3")
+    g.parse(f"{EXAMPLES_DIR / 'foaf.n3'}")
 
     tim = rdflib.URIRef("http://www.w3.org/People/Berners-Lee/card#i")
 
     for row in g.query(q, initBindings={"person": tim}):
+        # For select queries, the Result object is an iterable of ResultRow
+        # objects.
+        assert isinstance(row, rdflib.query.ResultRow)
         print(row.name)
diff --git a/examples/resource_example.py b/examples/resource_example.py
index 9085c32c..da93042f 100644
--- a/examples/resource_example.py
+++ b/examples/resource_example.py
@@ -7,7 +7,7 @@ where this resource is the subject.
 This example shows g.resource() in action.
 """
 
-from rdflib import Graph, RDF, RDFS, Literal
+from rdflib import RDF, RDFS, Graph, Literal
 from rdflib.namespace import FOAF
 
 if __name__ == "__main__":
diff --git a/examples/simple_example.py b/examples/simple_example.py
index 49f08408..c27f3181 100644
--- a/examples/simple_example.py
+++ b/examples/simple_example.py
@@ -1,8 +1,10 @@
-from rdflib import Graph, Literal, BNode, RDF
-from rdflib.namespace import FOAF, DC
+import os.path
+from tempfile import TemporaryDirectory
 
-if __name__ == "__main__":
+from rdflib import RDF, BNode, Graph, Literal
+from rdflib.namespace import DC, FOAF
 
+if __name__ == "__main__":
     store = Graph()
 
     # Bind a few prefix, namespace pairs for pretty output
@@ -29,9 +31,11 @@ if __name__ == "__main__":
         for mbox in store.objects(person, FOAF["mbox"]):
             print(mbox)
 
-    print("--- saving RDF to a file (donna_foaf.rdf) ---")
+    tmp_dir = TemporaryDirectory()
+    output_file = os.path.join(tmp_dir.name, "donna_foaf.rdf")
+    print(f"--- saving RDF to a file ({output_file}) ---")
     # Serialize the store as RDF/XML to the file donna_foaf.rdf.
-    store.serialize("donna_foaf.rdf", format="pretty-xml", max_depth=3)
+    store.serialize(f"{output_file}", format="pretty-xml", max_depth=3)
 
     # Let's show off the serializers
     print()
diff --git a/examples/slice.py b/examples/slice.py
index 47c77fad..6994613e 100644
--- a/examples/slice.py
+++ b/examples/slice.py
@@ -9,16 +9,20 @@ can be realised.
 See :meth:`rdflib.graph.Graph.__getitem__` for details
 """
 
-from rdflib import Graph, RDF
+from pathlib import Path
+
+from rdflib import RDF, Graph
 from rdflib.namespace import FOAF
 
-if __name__ == "__main__":
+EXAMPLES_DIR = Path(__file__).parent
 
+
+if __name__ == "__main__":
     graph = Graph()
-    graph.parse("foaf.n3", format="n3")
+    graph.parse(f"{EXAMPLES_DIR / 'foaf.n3'}", format="n3")
 
-    for person in graph[: RDF.type : FOAF.Person]:
-        friends = list(graph[person : FOAF.knows * "+" / FOAF.name])
+    for person in graph[: RDF.type : FOAF.Person]:  # type: ignore[misc]
+        friends = list(graph[person : FOAF.knows * "+" / FOAF.name])  # type: ignore[operator]
         if friends:
             print(f"{graph.value(person, FOAF.name)}'s circle of friends:")
             for name in friends:
diff --git a/examples/smushing.py b/examples/smushing.py
index 8cdb13b4..7c367a25 100644
--- a/examples/smushing.py
+++ b/examples/smushing.py
@@ -21,17 +21,25 @@ year, I would still give it the same stable subject URI that merges
 with my existing data.
 """
 
+from pathlib import Path
+
 from rdflib import Graph, Namespace
 from rdflib.namespace import FOAF
 
 STABLE = Namespace("http://example.com/person/mbox_sha1sum/")
 
+EXAMPLES_DIR = Path(__file__).parent
+
 if __name__ == "__main__":
     g = Graph()
-    g.parse("smushingdemo.n3", format="n3")
+    g.parse(f"{EXAMPLES_DIR / 'smushingdemo.n3'}", format="n3")
 
     newURI = {}  # old subject : stable uri
     for s, p, o in g.triples((None, FOAF["mbox_sha1sum"], None)):
+        # For this graph, all objects are Identifiers, which is a subclass of
+        # string. `n3` does allow for objects which are not Identifiers, like
+        # subgraphs.
+        assert isinstance(o, str)
         newURI[s] = STABLE[o]
 
     out = Graph()
diff --git a/examples/sparql_query_example.py b/examples/sparql_query_example.py
index 8379a634..0e9fc225 100644
--- a/examples/sparql_query_example.py
+++ b/examples/sparql_query_example.py
@@ -11,16 +11,26 @@ For variable names that are not valid python identifiers, dict access
 :attr:`~rdflib.query.Result.vars` contains the variables
 """
 
+import logging
+import sys
+from pathlib import Path
+
 import rdflib
 
+EXAMPLES_DIR = Path(__file__).parent
+
 if __name__ == "__main__":
+    logging.basicConfig(level=logging.DEBUG, stream=sys.stderr)
 
     g = rdflib.Graph()
-    g.parse("foaf.n3", format="n3")
+    g.parse(f"{EXAMPLES_DIR / 'foaf.n3'}", format="n3")
 
     # The QueryProcessor knows the FOAF prefix from the graph
     # which in turn knows it from reading the N3 RDF file
     for row in g.query("SELECT ?s WHERE { [] foaf:knows ?s .}"):
+        # For select queries, the Result object is an iterable of ResultRow
+        # objects.
+        assert isinstance(row, rdflib.query.ResultRow)
         print(row.s)
         # or row["s"]
         # or row[rdflib.Variable("s")]
diff --git a/examples/sparql_update_example.py b/examples/sparql_update_example.py
index 4236ce37..a9974996 100644
--- a/examples/sparql_update_example.py
+++ b/examples/sparql_update_example.py
@@ -2,12 +2,16 @@
 SPARQL Update statements can be applied with :meth:`rdflib.graph.Graph.update`
 """
 
+from pathlib import Path
+
 import rdflib
 
-if __name__ == "__main__":
+EXAMPLES_DIR = Path(__file__).parent
 
+
+if __name__ == "__main__":
     g = rdflib.Graph()
-    g.parse("foaf.n3", format="n3")
+    g.parse(f"{EXAMPLES_DIR / 'foaf.n3'}", format="n3")
 
     print(f"Initially there are {len(g)} triples in the graph")
 
@@ -15,8 +19,8 @@ if __name__ == "__main__":
         """
         PREFIX foaf: <http://xmlns.com/foaf/0.1/>
         PREFIX dbpedia: <http://dbpedia.org/resource/>
-        INSERT { 
-            ?s a dbpedia:Human . 
+        INSERT {
+            ?s a dbpedia:Human .
         }
         WHERE {
             ?s a foaf:Person .
diff --git a/examples/sparqlstore_example.py b/examples/sparqlstore_example.py
index 0fe23314..12fe4330 100644
--- a/examples/sparqlstore_example.py
+++ b/examples/sparqlstore_example.py
@@ -2,11 +2,11 @@
 Simple examples showing how to use the SPARQLStore
 """
 
-from rdflib import Graph, URIRef, Namespace
+from rdflib import Graph, Namespace, URIRef
 from rdflib.plugins.stores.sparqlstore import SPARQLStore
+from rdflib.term import Identifier
 
 if __name__ == "__main__":
-
     dbo = Namespace("http://dbpedia.org/ontology/")
 
     # EXAMPLE 1: using a Graph with the Store type string set to "SPARQLStore"
@@ -14,6 +14,7 @@ if __name__ == "__main__":
     graph.open("http://dbpedia.org/sparql")
 
     pop = graph.value(URIRef("http://dbpedia.org/resource/Berlin"), dbo.populationTotal)
+    assert isinstance(pop, Identifier)
 
     print(
         "According to DBPedia, Berlin has a population of {0:,}".format(
@@ -28,9 +29,9 @@ if __name__ == "__main__":
     for p in st.objects(
         URIRef("http://dbpedia.org/resource/Brisbane"), dbo.populationTotal
     ):
+        assert isinstance(p, Identifier)
         print(
-            "According to DBPedia, Brisbane has a population of "
-            "{0:,}".format(int(p), ",d")
+            "According to DBPedia, Brisbane has a population of " "{0}".format(int(p))
         )
     print()
 
@@ -50,7 +51,7 @@ if __name__ == "__main__":
 
     # EXAMPLE 4: using a SPARQL endpoint that requires Basic HTTP authentication
     # NOTE: this example won't run since the endpoint isn't live (or real)
-    s = SPARQLStore(
+    sparql_store = SPARQLStore(
         query_endpoint="http://fake-sparql-endpoint.com/repository/x",
         auth=("my_username", "my_password"),
     )
diff --git a/examples/swap_primer.py b/examples/swap_primer.py
index dad76211..fbcc52c3 100644
--- a/examples/swap_primer.py
+++ b/examples/swap_primer.py
@@ -5,11 +5,10 @@ example stuff in the Primer on N3:
 http://www.w3.org/2000/10/swap/Primer
 """
 
-from rdflib import ConjunctiveGraph, Namespace, Literal
-from rdflib.namespace import OWL, DC
+from rdflib import ConjunctiveGraph, Literal, Namespace
+from rdflib.namespace import DC, OWL
 
 if __name__ == "__main__":
-
     # Firstly, it doesn't have to be so complex.
     # Here we create a "Graph" of our work.
     # Think of it as a blank piece of graph paper!
diff --git a/pyproject.toml b/pyproject.toml
index 8e9348f3..24be1293 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -130,7 +130,6 @@ exclude = '''
     | _build
     | htmlcov
     | benchmarks
-    | examples       # No need to Black examples
     | test_reports
     | rdflib.egg-info
     | buck-out
@@ -168,7 +167,7 @@ log_cli_date_format = "%Y-%m-%dT%H:%M:%S"
 profile = "black"
 py_version = 37
 line_length = 88
-src_paths= ["rdflib", "test"]
+src_paths= ["rdflib", "test", "devtools", "examples"]
 supported_extensions = ["pyw", "pyi", "py"]
 skip = [
     '.eggs',         # exclude a few common directories in the
@@ -183,7 +182,6 @@ skip = [
     '_build',
     'htmlcov',
     'benchmarks',
-    'examples',       # No need to isort examples
     'test_reports',
     'rdflib.egg-info',
     'buck-out',
@@ -193,7 +191,7 @@ skip = [
 ]
 
 [tool.mypy]
-files = ['rdflib', 'test', 'devtools']
+files = ['rdflib', 'test', 'devtools', 'examples']
 python_version = "3.7"
 warn_unused_configs = true
 ignore_missing_imports = true
diff --git a/test/test_examples.py b/test/test_examples.py
new file mode 100644
index 00000000..d21d7cc0
--- /dev/null
+++ b/test/test_examples.py
@@ -0,0 +1,31 @@
+import subprocess
+import sys
+from pathlib import Path
+from typing import Iterable
+
+import pytest
+from _pytest.mark.structures import ParameterSet
+
+FILE_PATH = Path(__file__)
+
+EXAMPLES_DIR = FILE_PATH.parent.parent / "examples"
+
+
+def generate_example_cases() -> Iterable[ParameterSet]:
+    for example_file in EXAMPLES_DIR.glob("*.py"):
+        if example_file.name == "__init__.py":
+            # this is not an example ...
+            continue
+        yield pytest.param(example_file, id=f"{example_file.relative_to(EXAMPLES_DIR)}")
+
+
+@pytest.mark.parametrize(["example_file"], generate_example_cases())
+def test_example(example_file: Path) -> None:
+    """
+    The example runs without errors.
+    """
+    if example_file.name == "berkeleydb_example.py":
+        # this example requires a berkeleydb installation
+        pytest.skip("The BerkeleyDB example is not working correctly.")
+
+    subprocess.run([sys.executable, f"{example_file}"], check=True)
author	Iwan Aucamp <aucampia@gmail.com>	2023-03-13 01:18:21 +0100
committer	GitHub <noreply@github.com>	2023-03-13 01:18:21 +0100
commit	35a35375d213afe83a825d7dfcc59008f8e2352c (patch)
tree	1fa971ef3a9e897e19b5df5fc304fe9c02697e41
parent	7a7cc1f929ff6fbc8c3238368e518a00af1025b6 (diff)
download	rdflib-35a35375d213afe83a825d7dfcc59008f8e2352c.tar.gz