summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorGunnar Aastrand Grimnes <gromgull@gmail.com>2013-05-03 21:12:44 +0200
committerGunnar Aastrand Grimnes <gromgull@gmail.com>2013-05-03 21:12:44 +0200
commit937edd34747dec528ec818e7893a1f2e3c0a84b3 (patch)
tree8b872efe1bec7ea9600617800a905ab9e4c01368
parent723137895125209c071ea0aac927b0153892d557 (diff)
downloadrdflib-937edd34747dec528ec818e7893a1f2e3c0a84b3.tar.gz
cleanup - move stuff from rdfextras to sensible packages - entry_points for console scripts
-rw-r--r--CHANGELOG22
-rw-r--r--examples/sparql_query_example.py7
-rw-r--r--rdflib/extras/cmdlineutils.py (renamed from rdflib/extras/utils/cmdlineutils.py)2
-rw-r--r--rdflib/extras/describer.py5
-rw-r--r--rdflib/extras/utils/__init__.py10
-rw-r--r--rdflib/extras/utils/graphutils.py220
-rw-r--r--rdflib/extras/utils/pathutils.py107
-rw-r--r--rdflib/extras/utils/termutils.py234
-rw-r--r--rdflib/graph.py20
-rw-r--r--rdflib/namespace.py4
-rw-r--r--rdflib/tools/csv2rdf.py (renamed from rdflib/extras/csv2rdf.py)2
-rw-r--r--rdflib/tools/graphisomorphism.py (renamed from rdflib/extras/graphisomorphism.py)0
-rw-r--r--rdflib/tools/rdf2dot.py (renamed from rdflib/extras/rdf2dot.py)5
-rw-r--r--rdflib/tools/rdfpipe.py (renamed from rdflib/extras/rdfpipe.py)2
-rw-r--r--rdflib/tools/rdfs2dot.py (renamed from rdflib/extras/rdfs2dot.py)5
-rw-r--r--rdflib/util.py207
-rw-r--r--rdflib/void.py129
-rw-r--r--setup.py10
-rw-r--r--test/test_dawg.py44
19 files changed, 398 insertions, 637 deletions
diff --git a/CHANGELOG b/CHANGELOG
index c4a2ae92..115e0551 100644
--- a/CHANGELOG
+++ b/CHANGELOG
@@ -42,23 +42,23 @@
for row in graph.query('select ... ') :
print row.age, row["name"]
- * String operations on URIRefs return new URIRefs
- https://github.com/RDFLib/rdflib/pull/258
-
- >>> URIRef('http://example.org/')+'test
- rdflib.term.URIRef('http://example.org/test')
-
- >>> URIRef('http://example.org/persons/%d/id')%32
- rdflib.term.URIRef('http://example.org/persons/32/id')
-
* "Slicing" of Graphs and Resources as syntactic sugar:
https://github.com/RDFLib/rdflib/pull/271
- graph[bob:FOAF.knows/FOAF.name] -> generator over the names of Bobs friends
+ graph[bob:FOAF.knows/FOAF.name]
+ -> generator over the names of Bobs friends
+
+ * The SPARQLStore and SPARQLUpdateStore are now included in the RDFLib core
+
Minor Changes:
-
+
+ * String operations on URIRefs return new URIRefs:
+ >>> URIRef('http://example.org/')+'test
+ rdflib.term.URIRef('http://example.org/test')
+
+ https://github.com/RDFLib/rdflib/pull/258
* Namespace is no longer a subclass of URIRef
* URIRefs and Literal language tags are validated on construction,
avoiding some "RDF-injection" issues
diff --git a/examples/sparql_query_example.py b/examples/sparql_query_example.py
index 285eabf1..293c4354 100644
--- a/examples/sparql_query_example.py
+++ b/examples/sparql_query_example.py
@@ -19,9 +19,14 @@ import rdflib
g = rdflib.Graph()
g.load("foaf.rdf")
+# the QueryProcessor knows the FOAF prefix from the graph
+# which in turn knows it from reading the RDF/XML file
for row in g.query(
- 'select ?s where { [] <http://xmlns.com/foaf/0.1/knows> ?s .}'):
+ 'select ?s where { [] foaf:knows ?s .}'):
print row.s
# or row["s"]
# or row[rdflib.Variable("s")]
+
+
+
diff --git a/rdflib/extras/utils/cmdlineutils.py b/rdflib/extras/cmdlineutils.py
index 866798d9..a771d4d7 100644
--- a/rdflib/extras/utils/cmdlineutils.py
+++ b/rdflib/extras/cmdlineutils.py
@@ -4,7 +4,7 @@ import getopt
import rdflib
import codecs
-from rdflib.extras.utils.pathutils import guess_format
+from rdflib.util import guess_format
def _help():
diff --git a/rdflib/extras/describer.py b/rdflib/extras/describer.py
index 82162a02..fa8256ec 100644
--- a/rdflib/extras/describer.py
+++ b/rdflib/extras/describer.py
@@ -1,6 +1,6 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
-from __future__ import with_statement
+
from rdflib import py3compat
__doc__ = py3compat.format_doctest_out("""
A Describer is a stateful utility for creating RDF statements in a
@@ -15,11 +15,10 @@ Full example in the ``to_rdf`` method below::
>>> import datetime
>>> from rdflib.graph import Graph
- >>> from rdflib.namespace import Namespace, RDFS
+ >>> from rdflib.namespace import Namespace, RDFS, FOAF
>>>
>>> ORG_URI = "http://example.org/"
>>>
- >>> FOAF = Namespace("http://xmlns.com/foaf/0.1/")
>>> CV = Namespace("http://purl.org/captsolo/resume-rdf/0.2/cv#")
>>>
>>> class Person(object):
diff --git a/rdflib/extras/utils/__init__.py b/rdflib/extras/utils/__init__.py
deleted file mode 100644
index ff96ebc7..00000000
--- a/rdflib/extras/utils/__init__.py
+++ /dev/null
@@ -1,10 +0,0 @@
-import pathutils
-import cmdlineutils
-import termutils
-import graphutils
-
-# tedious sop to flake
-assert pathutils
-assert cmdlineutils
-assert termutils
-assert graphutils
diff --git a/rdflib/extras/utils/graphutils.py b/rdflib/extras/utils/graphutils.py
deleted file mode 100644
index 986836c4..00000000
--- a/rdflib/extras/utils/graphutils.py
+++ /dev/null
@@ -1,220 +0,0 @@
-import collections
-import rdflib
-from rdflib import RDF
-
-"""
-RDF- and RDFlib-centric Graph utilities.
-"""
-
-
-def graph_to_dot(graph, dot):
- """
- Turns graph into dot (graphviz graph drawing format) using pydot.
-
- """
- import pydot
- nodes = {}
- for s, o in graph.subject_objects():
- for i in s, o:
- if i not in nodes.keys():
- nodes[i] = i
- for s, p, o in graph.triples((None, None, None)):
- dot.add_edge(pydot.Edge(nodes[s], nodes[o], label=p))
-
-
-def find_roots(graph, prop, roots=None):
- """
- Find the roots in some sort of transitive hierarchy.
-
- find_roots(graph, rdflib.RDFS.subClassOf)
- will return a set of all roots of the sub-class hierarchy
-
- Assumes triple of the form (child, prop, parent), i.e. the direction of
- RDFS.subClassOf or SKOS.broader
-
- """
-
- non_roots = set()
- if roots is None:
- roots = set()
- for x, y in graph.subject_objects(prop):
- non_roots.add(x)
- if x in roots:
- roots.remove(x)
- if y not in non_roots:
- roots.add(y)
- return roots
-
-
-def get_tree(graph,
- root,
- prop,
- mapper=lambda x: x,
- sortkey=None,
- done=None,
- dir='down'):
- """
- Return a nested list/tuple structure representing the tree
- built by the transitive property given, starting from the root given
-
- i.e.
-
- get_tree(graph,
- rdflib.URIRef("http://xmlns.com/foaf/0.1/Person"),
- rdflib.RDFS.subClassOf)
-
- will return the structure for the subClassTree below person.
-
- dir='down' assumes triple of the form (child, prop, parent),
- i.e. the direction of RDFS.subClassOf or SKOS.broader
- Any other dir traverses in the other direction
-
- """
-
- if done is None:
- done = set()
- if root in done:
- return
- done.add(root)
- tree = []
-
- if dir == 'down':
- branches = graph.subjects(prop, root)
- else:
- branches = graph.objects(root, prop)
-
- for branch in branches:
- t = get_tree(graph, branch, prop, mapper, sortkey, done, dir)
- if t:
- tree.append(t)
-
- return (mapper(root), sorted(tree, key=sortkey))
-
-VOID = rdflib.Namespace("http://rdfs.org/ns/void#")
-DCTERMS = rdflib.Namespace("http://purl.org/dc/terms/")
-FOAF = rdflib.Namespace("http://xmlns.com/foaf/0.1/")
-
-
-def generateVoID(g, dataset=None, res=None, distinctForPartitions=True):
- """
- Returns a new graph with a VoID description of the passed dataset
-
- For more info on Vocabulary of Interlinked Datasets (VoID), see:
- http://vocab.deri.ie/void
-
- This only makes two passes through the triples (once to detect the types
- of things)
-
- The tradeoff is that lots of temporary structures are built up in memory
- meaning lots of memory may be consumed :)
- I imagine at least a few copies of your original graph.
-
- the distinctForPartitions parameter controls whether
- distinctSubjects/objects are tracked for each class/propertyPartition
- this requires more memory again
-
- """
-
- typeMap = collections.defaultdict(set)
- classes = collections.defaultdict(set)
- for e, c in g.subject_objects(RDF.type):
- classes[c].add(e)
- typeMap[e].add(c)
-
- triples = 0
- subjects = set()
- objects = set()
- properties = set()
- classCount = collections.defaultdict(int)
- propCount = collections.defaultdict(int)
-
- classProps = collections.defaultdict(set)
- classObjects = collections.defaultdict(set)
- propSubjects = collections.defaultdict(set)
- propObjects = collections.defaultdict(set)
-
- for s, p, o in g:
-
- triples += 1
- subjects.add(s)
- properties.add(p)
- objects.add(o)
-
- # class partitions
- if s in typeMap:
- for c in typeMap[s]:
- classCount[c] += 1
- if distinctForPartitions:
- classObjects[c].add(o)
- classProps[c].add(p)
-
- # property partitions
- propCount[p] += 1
- if distinctForPartitions:
- propObjects[p].add(o)
- propSubjects[p].add(s)
-
- if not dataset:
- dataset = rdflib.URIRef("http://example.org/Dataset")
-
- if not res:
- res = rdflib.Graph()
-
- res.add((dataset, RDF.type, VOID.Dataset))
-
- # basic stats
- res.add((dataset, VOID.triples, rdflib.Literal(triples)))
- res.add((dataset, VOID.classes, rdflib.Literal(len(classes))))
-
- res.add((dataset, VOID.distinctObjects, rdflib.Literal(len(objects))))
- res.add((dataset, VOID.distinctSubjects, rdflib.Literal(len(subjects))))
- res.add((dataset, VOID.properties, rdflib.Literal(len(properties))))
-
- for i, c in enumerate(classes):
- part = rdflib.URIRef(dataset + "_class%d" % i)
- res.add((dataset, VOID.classPartition, part))
- res.add((part, RDF.type, VOID.Dataset))
-
- res.add((part, VOID.triples, rdflib.Literal(classCount[c])))
- res.add((part, VOID.classes, rdflib.Literal(1)))
-
- res.add((part, VOID["class"], c))
-
- res.add((part, VOID.entities, rdflib.Literal(len(classes[c]))))
- res.add((part, VOID.distinctSubjects, rdflib.Literal(len(classes[c]))))
-
- if distinctForPartitions:
- res.add(
- (part, VOID.properties, rdflib.Literal(len(classProps[c]))))
- res.add((part, VOID.distinctObjects,
- rdflib.Literal(len(classObjects[c]))))
-
- for i, p in enumerate(properties):
- part = rdflib.URIRef(dataset + "_property%d" % i)
- res.add((dataset, VOID.propertyPartition, part))
- res.add((part, RDF.type, VOID.Dataset))
-
- res.add((part, VOID.triples, rdflib.Literal(propCount[p])))
- res.add((part, VOID.properties, rdflib.Literal(1)))
-
- res.add((part, VOID.property, p))
-
- if distinctForPartitions:
-
- entities = 0
- propClasses = set()
- for s in propSubjects[p]:
- if s in typeMap:
- entities += 1
- for c in typeMap[s]:
- propClasses.add(c)
-
- res.add((part, VOID.entities, rdflib.Literal(entities)))
- res.add((part, VOID.classes, rdflib.Literal(len(propClasses))))
-
- res.add((part, VOID.distinctSubjects,
- rdflib.Literal(len(propSubjects[p]))))
- res.add((part, VOID.distinctObjects,
- rdflib.Literal(len(propObjects[p]))))
-
- return res, dataset
diff --git a/rdflib/extras/utils/pathutils.py b/rdflib/extras/utils/pathutils.py
deleted file mode 100644
index a547411a..00000000
--- a/rdflib/extras/utils/pathutils.py
+++ /dev/null
@@ -1,107 +0,0 @@
-"""
-RDF- and RDFlib-centric file and URL path utilities.
-"""
-
-from os.path import splitext
-
-
-def uri_leaf(uri):
- """
- Get the "leaf" - fragment id or last segment - of a URI. Useful e.g. for
- getting a term from a "namespace like" URI. Examples:
-
- >>> uri_leaf('http://example.org/ns/things#item')
- 'item'
- >>> uri_leaf('http://example.org/ns/stuff/item')
- 'item'
- >>> uri_leaf('http://example.org/ns/stuff/')
- >>>
- >>> uri_leaf('urn:example.org:stuff')
- 'stuff'
- >>> uri_leaf('example.org')
- >>>
- """
- for char in ('#', '/', ':'):
- if uri.endswith(char):
- break
- # base, sep, leaf = uri.rpartition(char)
- if char in uri:
- sep = char
- leaf = uri.rsplit(char)[-1]
- else:
- sep = ''
- leaf = uri
- if sep and leaf:
- return leaf
-
-
-SUFFIX_FORMAT_MAP = {
- 'rdf': 'xml',
- 'rdfs': 'xml',
- 'owl': 'xml',
- 'n3': 'n3',
- 'ttl': 'n3',
- 'nt': 'nt',
- 'trix': 'trix',
- 'xhtml': 'rdfa',
- 'html': 'rdfa',
- 'svg': 'rdfa',
- 'nq': 'nquads',
- 'trig': 'trig'
-}
-
-
-def guess_format(fpath, fmap=None):
- """
- Guess RDF serialization based on file suffix. Uses
- ``SUFFIX_FORMAT_MAP`` unless ``fmap`` is provided. Examples:
-
- >>> guess_format('path/to/file.rdf')
- 'xml'
- >>> guess_format('path/to/file.owl')
- 'xml'
- >>> guess_format('path/to/file.ttl')
- 'n3'
- >>> guess_format('path/to/file.xhtml')
- 'rdfa'
- >>> guess_format('path/to/file.svg')
- 'rdfa'
- >>> guess_format('path/to/file.xhtml', {'xhtml': 'grddl'})
- 'grddl'
-
- This also works with just the suffixes, with or without leading dot, and
- regardless of letter case::
-
- >>> guess_format('.rdf')
- 'xml'
- >>> guess_format('rdf')
- 'xml'
- >>> guess_format('RDF')
- 'xml'
- """
- fmap = fmap or SUFFIX_FORMAT_MAP
- return fmap.get(_get_ext(fpath)) or fmap.get(fpath.lower())
-
-
-def _get_ext(fpath, lower=True):
- """
- Gets the file extension from a file(path); stripped of leading '.' and in
- lower case. Examples:
-
- >>> _get_ext("path/to/file.txt")
- 'txt'
- >>> _get_ext("OTHER.PDF")
- 'pdf'
- >>> _get_ext("noext")
- ''
- >>> _get_ext(".rdf")
- 'rdf'
- """
- ext = splitext(fpath)[-1]
- if ext == '' and fpath.startswith("."):
- ext = fpath
- if lower:
- ext = ext.lower()
- if ext.startswith('.'):
- ext = ext[1:]
- return ext
diff --git a/rdflib/extras/utils/termutils.py b/rdflib/extras/utils/termutils.py
deleted file mode 100644
index d8a21bc1..00000000
--- a/rdflib/extras/utils/termutils.py
+++ /dev/null
@@ -1,234 +0,0 @@
-"""Convenience functions for working with Terms and Graphs."""
-from rdflib import BNode
-from rdflib import Graph
-from rdflib import Literal
-from rdflib import URIRef
-from rdflib import Variable
-from rdflib.term import Statement
-from rdflib.graph import QuotedGraph
-from rdflib.py3compat import format_doctest_out
-
-__all__ = ['SUBJECT', 'PREDICATE', 'OBJECT', 'CONTEXT', 'TERM_COMBINATIONS',
- 'REVERSE_TERM_COMBINATIONS', 'TERM_INSTANTIATION_DICT',
- 'GRAPH_TERM_DICT', 'normalizeGraph', 'term2Letter',
- 'constructGraph', 'triplePattern2termCombinations',
- 'type2TermCombination', 'statement2TermCombination',
- 'escape_quotes']
-
-SUBJECT = 0
-PREDICATE = 1
-OBJECT = 2
-CONTEXT = 3
-TERM_COMBINATIONS = dict(
- [(term, index) for index, term, in enumerate(
- [
- 'UUUU', 'UUUB', 'UUUF', 'UUVU', 'UUVB', 'UUVF', 'UUBU', 'UUBB', 'UUBF',
- 'UULU', 'UULB', 'UULF', 'UUFU', 'UUFB', 'UUFF',
- #
- 'UVUU', 'UVUB', 'UVUF', 'UVVU', 'UVVB', 'UVVF', 'UVBU', 'UVBB', 'UVBF',
- 'UVLU', 'UVLB', 'UVLF', 'UVFU', 'UVFB', 'UVFF',
- #
- 'VUUU', 'VUUB', 'VUUF', 'VUVU', 'VUVB', 'VUVF', 'VUBU', 'VUBB', 'VUBF',
- 'VULU', 'VULB', 'VULF', 'VUFU', 'VUFB', 'VUFF',
- #
- 'VVUU', 'VVUB', 'VVUF', 'VVVU', 'VVVB', 'VVVF', 'VVBU', 'VVBB', 'VVBF',
- 'VVLU', 'VVLB', 'VVLF', 'VVFU', 'VVFB', 'VVFF',
- #
- 'BUUU', 'BUUB', 'BUUF', 'BUVU', 'BUVB', 'BUVF', 'BUBU', 'BUBB', 'BUBF',
- 'BULU', 'BULB', 'BULF', 'BUFU', 'BUFB', 'BUFF',
- #
- 'BVUU', 'BVUB', 'BVUF', 'BVVU', 'BVVB', 'BVVF', 'BVBU', 'BVBB', 'BVBF',
- 'BVLU', 'BVLB', 'BVLF', 'BVFU', 'BVFB', 'BVFF',
- #
- 'FUUU', 'FUUB', 'FUUF', 'FUVU', 'FUVB', 'FUVF', 'FUBU', 'FUBB', 'FUBF',
- 'FULU', 'FULB', 'FULF', 'FUFU', 'FUFB', 'FUFF',
- #
- 'FVUU', 'FVUB', 'FVUF', 'FVVU', 'FVVB', 'FVVF', 'FVBU', 'FVBB', 'FVBF',
- 'FVLU', 'FVLB', 'FVLF', 'FVFU', 'FVFB', 'FVFF',
- #
- # 'sUUU', 'sUUB', 'sUUF', 'sUVU', 'sUVB', 'sUVF', 'sUBU', 'sUBB', 'sUBF',
- # 'sULU', 'sULB', 'sULF', 'sUFU', 'sUFB', 'sUFF',
- #
- # 'sVUU', 'sVUB', 'sVUF', 'sVVU', 'sVVB', 'sVVF', 'sVBU', 'sVBB', 'sVBF',
- # 'sVLU', 'sVLB', 'sVLF', 'sVFU', 'sVFB', 'sVFF'
- ])])
-
-REVERSE_TERM_COMBINATIONS = dict(
- [(value, key) for key, value in TERM_COMBINATIONS.items()])
-
-TERM_INSTANTIATION_DICT = {
- 'U': URIRef,
- 'B': BNode,
- 'V': Variable,
- 'L': Literal
-}
-
-GRAPH_TERM_DICT = {
- 'F': (QuotedGraph, URIRef),
- 'U': (Graph, URIRef),
- 'B': (Graph, BNode)
-}
-
-
-@format_doctest_out
-def normalizeGraph(graph):
- """Takes an instance of a ``Graph`` and returns the instance's identifier
- and ``type``.
-
- Types are ``U`` for a :class:`~rdflib.graph.Graph`, ``F`` for
- a :class:`~rdflib.graph.QuotedGraph` and ``B`` for a
- :class:`~rdflib.graph.ConjunctiveGraph`
-
- >>> from rdflib import plugin
- >>> from rdflib.graph import Graph, ConjunctiveGraph, QuotedGraph
- >>> from rdflib.store import Store
- >>> from rdflib import URIRef, Namespace
- >>> from rdflib.extras.utils.termutils import normalizeGraph
- >>> memstore = plugin.get('IOMemory', Store)()
- >>> g = Graph(memstore, URIRef("http://purl.org/net/bel-epa/gjh"))
- >>> normalizeGraph(g)
- (rdflib.term.URIRef(%(u)s'http://purl.org/net/bel-epa/gjh'), 'U')
- >>> g = ConjunctiveGraph(memstore, Namespace("http://rdflib.net/ns"))
- >>> normalizeGraph(g) #doctest: +ELLIPSIS
- (rdflib.term.URIRef(%(u)s'http://rdflib.net/ns'), 'U')
- >>> g = QuotedGraph(memstore, Namespace("http://rdflib.net/ns"))
- >>> normalizeGraph(g)
- (rdflib.term.URIRef(%(u)s'http://rdflib.net/ns'), 'F')
-
- """
- if isinstance(graph, QuotedGraph):
- return graph.identifier, 'F'
- else:
- return graph.identifier, term2Letter(graph.identifier)
-
-
-@format_doctest_out
-def term2Letter(term):
- """Relate a given term to one of several key types:
-
- * :class:`~rdflib.term.BNode`,
- * :class:`~rdflib.term.Literal`,
- * :class:`~rdflib.term.Statement` (Deprecated)
- * :class:`~rdflib.term.URIRef`,
- * :class:`~rdflib.term.Variable`
- * :class:`~rdflib.graph.Graph`
- * :class:`~rdflib.graph.QuotedGraph`
-
- >>> import rdflib
- >>> from rdflib import plugin
- >>> from rdflib import URIRef, Namespace
- >>> from rdflib.term import BNode, Literal, Variable
- >>> # from rdflib.term import Statement
- >>> from rdflib.graph import Graph, ConjunctiveGraph, QuotedGraph
- >>> from rdflib.store import Store
- >>> from rdflib.extras.utils.termutils import term2Letter
- >>> term2Letter(URIRef('http://purl.org/net/bel-epa.com/'))
- 'U'
- >>> term2Letter(BNode())
- 'B'
- >>> term2Letter(Literal(%(u)s''))
- 'L'
- >>> term2Letter(Variable(%(u)s'x'))
- 'V'
- >>> term2Letter(Graph())
- 'B'
- >>> term2Letter(QuotedGraph("IOMemory", None))
- 'F'
- >>> term2Letter(None)
- 'L'
- >>> # term2Letter(Statement((None, None, None), None)) # Deprecated
-
- """
- if isinstance(term, URIRef):
- return 'U'
- elif isinstance(term, BNode):
- return 'B'
- elif isinstance(term, Literal):
- return 'L'
- elif isinstance(term, QuotedGraph):
- return 'F'
- elif isinstance(term, Variable):
- return 'V'
- elif isinstance(term, Statement):
- return 's'
- elif isinstance(term, Graph):
- return term2Letter(term.identifier)
- elif term is None:
- return 'L'
- else:
- raise Exception(
- ("The given term (%s) is not an instance of any " +
- "of the known types (URIRef, BNode, Literal, QuotedGraph, " +
- "or Variable). It is a %s")
- % (term, type(term)))
-
-
-def constructGraph(key):
- """Given a key (one of 'F', 'U' or 'B'), returns
- a tuple containing a ``Graph`` and an appropriate referent.
-
- >>> from rdflib.extras.utils.termutils import constructGraph
- >>> constructGraph('F')
- (<class 'rdflib.graph.QuotedGraph'>, <class 'rdflib.term.URIRef'>)
- >>> constructGraph('U')
- (<class 'rdflib.graph.Graph'>, <class 'rdflib.term.URIRef'>)
- >>> constructGraph('B')
- (<class 'rdflib.graph.Graph'>, <class 'rdflib.term.BNode'>)
-
- """
- return GRAPH_TERM_DICT[key]
-
-
-def triplePattern2termCombinations((s, p, o)):
- """
- Maps a triple pattern to term combinations (non-functioning)
-
- """
- combinations = []
- # combinations.update(TERM_COMBINATIONS)
- if isinstance(o, Literal):
- for key, val in TERM_COMBINATIONS.items():
- if key[OBJECT] == 'O':
- combinations.append(val)
- return combinations
-
-
-def type2TermCombination(member, klass, context):
- """
- Maps a type to a TermCombo
-
- """
- try:
- rt = TERM_COMBINATIONS['%sU%s%s' %
- (term2Letter(member),
- term2Letter(klass),
- normalizeGraph(context)[-1])]
- return rt
- except:
- raise Exception("Unable to persist" +
- "classification triple: %s %s %s %s" %
- (member, 'rdf:type', klass, context))
-
-
-def statement2TermCombination(subject, predicate, obj, context):
- """
- Maps a statement to a Term Combo
-
- """
- return TERM_COMBINATIONS['%s%s%s%s' %
- (term2Letter(subject), term2Letter(predicate),
- term2Letter(obj), normalizeGraph(context)[-1])]
-
-
-def escape_quotes(qstr):
- """
- #FIXME: This *may* prove to be a performance bottleneck and should
- perhaps be implemented in C (as it was in 4Suite RDF)
-
- Ported from Ft.Lib.DbUtil
- """
- if qstr is None:
- return ''
- tmp = qstr.replace("\\", "\\\\")
- tmp = tmp.replace("'", "\\'")
- return tmp
diff --git a/rdflib/graph.py b/rdflib/graph.py
index 0acb3879..37d17896 100644
--- a/rdflib/graph.py
+++ b/rdflib/graph.py
@@ -874,6 +874,9 @@ class Graph(Node):
If override is True will bind namespace to given prefix if namespace
was already bound to a different prefix.
+
+ for example: graph.bind('foaf', 'http://xmlns.com/foaf/0.1/')
+
"""
return self.namespace_manager.bind(
prefix, namespace, override=override)
@@ -1009,10 +1012,25 @@ class Graph(Node):
self.parse(source, publicID, format)
def query(self, query_object, processor='sparql',
- result='sparql', initNs={}, initBindings={},
+ result='sparql', initNs=None, initBindings=None,
use_store_provided=True, **kwargs):
"""
+ Query this graph.
+
+ A type of 'prepared queries' can be realised by providing
+ initial variable bindings with initBindings
+
+ Initial namespaces are used to resolve prefixes used in the query,
+ if none are given, the namespaces from the graph's namespace manager
+ are used.
+
+ A rdflib.query.QueryResult object is returned
+
"""
+
+ initBindings = initBindings or {}
+ initNs = initNs or dict(self.namespaces())
+
if hasattr(self.store, "query") and use_store_provided:
try:
return self.store.query(
diff --git a/rdflib/namespace.py b/rdflib/namespace.py
index 55455dfc..ac78042b 100644
--- a/rdflib/namespace.py
+++ b/rdflib/namespace.py
@@ -214,6 +214,10 @@ SKOS = Namespace('http://www.w3.org/2004/02/skos/core#')
DOAP = Namespace('http://usefulinc.com/ns/doap#')
FOAF = Namespace('http://xmlns.com/foaf/0.1/')
DC = Namespace('http://purl.org/dc/elements/1.1/')
+DCTERMS = Namespace('http://purl.org/dc/terms/')
+VOID = Namespace('http://rdfs.org/ns/void#')
+
+
class NamespaceManager(object):
"""
diff --git a/rdflib/extras/csv2rdf.py b/rdflib/tools/csv2rdf.py
index 04d4db0e..97b589ee 100644
--- a/rdflib/extras/csv2rdf.py
+++ b/rdflib/tools/csv2rdf.py
@@ -16,7 +16,7 @@ from rdflib import RDF, RDFS
from rdflib.namespace import split_uri
HELP = """
-toRDF.py \
+csv2rdf.py \
-b <instance-base> \
-p <property-base> \
[-c <classname>] \
diff --git a/rdflib/extras/graphisomorphism.py b/rdflib/tools/graphisomorphism.py
index 74c85bc9..74c85bc9 100644
--- a/rdflib/extras/graphisomorphism.py
+++ b/rdflib/tools/graphisomorphism.py
diff --git a/rdflib/extras/rdf2dot.py b/rdflib/tools/rdf2dot.py
index f865f479..104fd990 100644
--- a/rdflib/extras/rdf2dot.py
+++ b/rdflib/tools/rdf2dot.py
@@ -1,8 +1,7 @@
#!/usr/bin/env python
import rdflib
-import rdflib.extras
-import rdflib.extras.utils.cmdlineutils
+import rdflib.extras.cmdlineutils
import sys
import cgi
@@ -118,7 +117,7 @@ language to stdout
def main():
- rdflib.extras.utils.cmdlineutils.main(rdf2dot, _help)
+ rdflib.extras.cmdlineutils.main(rdf2dot, _help)
if __name__ == '__main__':
main()
diff --git a/rdflib/extras/rdfpipe.py b/rdflib/tools/rdfpipe.py
index 1b6adaa5..fc57b4b6 100644
--- a/rdflib/extras/rdfpipe.py
+++ b/rdflib/tools/rdfpipe.py
@@ -17,7 +17,7 @@ from rdflib.namespace import RDF, RDFS, OWL, XSD
from rdflib.parser import Parser
from rdflib.serializer import Serializer
-from rdflib.extras.utils.pathutils import guess_format
+from rdflib.util import guess_format
STORE_CONNECTION = ''
diff --git a/rdflib/extras/rdfs2dot.py b/rdflib/tools/rdfs2dot.py
index c2c1e17c..ca3bd66c 100644
--- a/rdflib/extras/rdfs2dot.py
+++ b/rdflib/tools/rdfs2dot.py
@@ -1,7 +1,6 @@
#!/usr/bin/env python
-import rdflib.extras
-import rdflib.extras.utils
+import rdflib.extras.cmdlineutils
import sys
import itertools
@@ -98,7 +97,7 @@ DOT language to stdout
def main():
- rdflib.extras.utils.cmdlineutils.main(rdfs2dot, _help)
+ rdflib.extras.cmdlineutils.main(rdfs2dot, _help)
if __name__ == '__main__':
main()
diff --git a/rdflib/util.py b/rdflib/util.py
index 99f7741d..305c4dc1 100644
--- a/rdflib/util.py
+++ b/rdflib/util.py
@@ -37,6 +37,8 @@ from time import localtime
from time import time
from time import timezone
+from os.path import splitext
+from StringIO import StringIO
from rdflib.exceptions import ContextTypeError
from rdflib.exceptions import ObjectTypeError
@@ -52,7 +54,8 @@ from rdflib.py3compat import sign
__all__ = [
'list2set', 'first', 'uniq', 'more_than', 'to_term', 'from_n3',
'date_time', 'parse_date_time', 'check_context', 'check_subject',
- 'check_predicate', 'check_object', 'check_statement', 'check_pattern']
+ 'check_predicate', 'check_object', 'check_statement', 'check_pattern',
+ 'guess_format', 'pprint_query_results', 'find_roots', 'get_tree']
def list2set(seq):
@@ -317,6 +320,208 @@ def parse_date_time(val):
return t
+
+
+
+SUFFIX_FORMAT_MAP = {
+ 'rdf': 'xml',
+ 'rdfs': 'xml',
+ 'owl': 'xml',
+ 'n3': 'n3',
+ 'ttl': 'n3',
+ 'nt': 'nt',
+ 'trix': 'trix',
+ 'xhtml': 'rdfa',
+ 'html': 'rdfa',
+ 'svg': 'rdfa',
+ 'nq': 'nquads',
+ 'trig': 'trig'
+}
+
+
+def guess_format(fpath, fmap=None):
+ """
+ Guess RDF serialization based on file suffix. Uses
+ ``SUFFIX_FORMAT_MAP`` unless ``fmap`` is provided. Examples:
+
+ >>> guess_format('path/to/file.rdf')
+ 'xml'
+ >>> guess_format('path/to/file.owl')
+ 'xml'
+ >>> guess_format('path/to/file.ttl')
+ 'n3'
+ >>> guess_format('path/to/file.xhtml')
+ 'rdfa'
+ >>> guess_format('path/to/file.svg')
+ 'rdfa'
+ >>> guess_format('path/to/file.xhtml', {'xhtml': 'grddl'})
+ 'grddl'
+
+ This also works with just the suffixes, with or without leading dot, and
+ regardless of letter case::
+
+ >>> guess_format('.rdf')
+ 'xml'
+ >>> guess_format('rdf')
+ 'xml'
+ >>> guess_format('RDF')
+ 'xml'
+ """
+ fmap = fmap or SUFFIX_FORMAT_MAP
+ return fmap.get(_get_ext(fpath)) or fmap.get(fpath.lower())
+
+
+def _get_ext(fpath, lower=True):
+ """
+ Gets the file extension from a file(path); stripped of leading '.' and in
+ lower case. Examples:
+
+ >>> _get_ext("path/to/file.txt")
+ 'txt'
+ >>> _get_ext("OTHER.PDF")
+ 'pdf'
+ >>> _get_ext("noext")
+ ''
+ >>> _get_ext(".rdf")
+ 'rdf'
+ """
+ ext = splitext(fpath)[-1]
+ if ext == '' and fpath.startswith("."):
+ ext = fpath
+ if lower:
+ ext = ext.lower()
+ if ext.startswith('.'):
+ ext = ext[1:]
+ return ext
+
+
+def find_roots(graph, prop, roots=None):
+ """
+ Find the roots in some sort of transitive hierarchy.
+
+ find_roots(graph, rdflib.RDFS.subClassOf)
+ will return a set of all roots of the sub-class hierarchy
+
+ Assumes triple of the form (child, prop, parent), i.e. the direction of
+ RDFS.subClassOf or SKOS.broader
+
+ """
+
+ non_roots = set()
+ if roots is None:
+ roots = set()
+ for x, y in graph.subject_objects(prop):
+ non_roots.add(x)
+ if x in roots:
+ roots.remove(x)
+ if y not in non_roots:
+ roots.add(y)
+ return roots
+
+
+def get_tree(graph,
+ root,
+ prop,
+ mapper=lambda x: x,
+ sortkey=None,
+ done=None,
+ dir='down'):
+ """
+ Return a nested list/tuple structure representing the tree
+ built by the transitive property given, starting from the root given
+
+ i.e.
+
+ get_tree(graph,
+ rdflib.URIRef("http://xmlns.com/foaf/0.1/Person"),
+ rdflib.RDFS.subClassOf)
+
+ will return the structure for the subClassTree below person.
+
+ dir='down' assumes triple of the form (child, prop, parent),
+ i.e. the direction of RDFS.subClassOf or SKOS.broader
+ Any other dir traverses in the other direction
+
+ """
+
+ if done is None:
+ done = set()
+ if root in done:
+ return
+ done.add(root)
+ tree = []
+
+ if dir == 'down':
+ branches = graph.subjects(prop, root)
+ else:
+ branches = graph.objects(root, prop)
+
+ for branch in branches:
+ t = get_tree(graph, branch, prop, mapper, sortkey, done, dir)
+ if t:
+ tree.append(t)
+
+ return (mapper(root), sorted(tree, key=sortkey))
+
+
+def pprint_query_results(res, namespace_manager = None, stream = None):
+
+ """
+ return a text table of query results
+ """
+
+ def termString(t):
+ if t == None:
+ return "-"
+ if namespace_manager:
+ if isinstance(t, URIRef):
+ return namespace_manager.normalizeUri(t)
+ elif isinstance(t, BNode):
+ return t.n3()
+ elif isinstance(t, Literal):
+ return t._literal_n3(qname_callback=namespace_manager.normalizeUri)
+ else:
+ return t.n3()
+
+ def c(s, w):
+ """
+ center the string s in w wide string
+ """
+ h = (w - len(s)) // 2
+ return " " * h + s + " " * h
+
+ if res.type!='SELECT':
+ raise Exception("Can only pretty print SELECT results!")
+
+ if not res:
+ return "(no results)\n"
+ else:
+ if stream:
+ out = stream
+ else:
+ out = StringIO()
+ # keys = r.vars
+ # for r in b:
+ # keys.update(r.keys())
+
+ keys = sorted(res.vars)
+ maxlen = [0] * len(keys)
+ b = [[termString(r[k]) for k in keys] for r in res]
+ for r in b:
+ for i in range(len(keys)):
+ maxlen[i] = max(maxlen[i], 1 + len(r[i]))
+
+ out.write(
+ "|".join([c(k, maxlen[i]) for i, k in enumerate(keys)]) + "\n")
+ out.write("-" * sum(maxlen) + "\n")
+ for r in sorted(b):
+ out.write("|".join(
+ [t + " " * (i - len(t) - 1) for i, t in zip(maxlen, r)]) + "\n")
+
+ if not stream:
+ return out.getvalue()
+
+
def test():
import doctest
doctest.testmod()
diff --git a/rdflib/void.py b/rdflib/void.py
new file mode 100644
index 00000000..8a23f569
--- /dev/null
+++ b/rdflib/void.py
@@ -0,0 +1,129 @@
+import collections
+
+from rdflib import URIRef, Graph, Literal
+from rdflib.namespace import VOID, RDF
+
+
+def generateVoID(g, dataset=None, res=None, distinctForPartitions=True):
+ """
+ Returns a new graph with a VoID description of the passed dataset
+
+ For more info on Vocabulary of Interlinked Datasets (VoID), see:
+ http://vocab.deri.ie/void
+
+ This only makes two passes through the triples (once to detect the types
+ of things)
+
+ The tradeoff is that lots of temporary structures are built up in memory
+ meaning lots of memory may be consumed :)
+ I imagine at least a few copies of your original graph.
+
+ the distinctForPartitions parameter controls whether
+ distinctSubjects/objects are tracked for each class/propertyPartition
+ this requires more memory again
+
+ """
+
+ typeMap = collections.defaultdict(set)
+ classes = collections.defaultdict(set)
+ for e, c in g.subject_objects(RDF.type):
+ classes[c].add(e)
+ typeMap[e].add(c)
+
+ triples = 0
+ subjects = set()
+ objects = set()
+ properties = set()
+ classCount = collections.defaultdict(int)
+ propCount = collections.defaultdict(int)
+
+ classProps = collections.defaultdict(set)
+ classObjects = collections.defaultdict(set)
+ propSubjects = collections.defaultdict(set)
+ propObjects = collections.defaultdict(set)
+
+ for s, p, o in g:
+
+ triples += 1
+ subjects.add(s)
+ properties.add(p)
+ objects.add(o)
+
+ # class partitions
+ if s in typeMap:
+ for c in typeMap[s]:
+ classCount[c] += 1
+ if distinctForPartitions:
+ classObjects[c].add(o)
+ classProps[c].add(p)
+
+ # property partitions
+ propCount[p] += 1
+ if distinctForPartitions:
+ propObjects[p].add(o)
+ propSubjects[p].add(s)
+
+ if not dataset:
+ dataset = URIRef("http://example.org/Dataset")
+
+ if not res:
+ res = Graph()
+
+ res.add((dataset, RDF.type, VOID.Dataset))
+
+ # basic stats
+ res.add((dataset, VOID.triples, Literal(triples)))
+ res.add((dataset, VOID.classes, Literal(len(classes))))
+
+ res.add((dataset, VOID.distinctObjects, Literal(len(objects))))
+ res.add((dataset, VOID.distinctSubjects, Literal(len(subjects))))
+ res.add((dataset, VOID.properties, Literal(len(properties))))
+
+ for i, c in enumerate(classes):
+ part = URIRef(dataset + "_class%d" % i)
+ res.add((dataset, VOID.classPartition, part))
+ res.add((part, RDF.type, VOID.Dataset))
+
+ res.add((part, VOID.triples, Literal(classCount[c])))
+ res.add((part, VOID.classes, Literal(1)))
+
+ res.add((part, VOID["class"], c))
+
+ res.add((part, VOID.entities, Literal(len(classes[c]))))
+ res.add((part, VOID.distinctSubjects, Literal(len(classes[c]))))
+
+ if distinctForPartitions:
+ res.add(
+ (part, VOID.properties, Literal(len(classProps[c]))))
+ res.add((part, VOID.distinctObjects,
+ Literal(len(classObjects[c]))))
+
+ for i, p in enumerate(properties):
+ part = URIRef(dataset + "_property%d" % i)
+ res.add((dataset, VOID.propertyPartition, part))
+ res.add((part, RDF.type, VOID.Dataset))
+
+ res.add((part, VOID.triples, Literal(propCount[p])))
+ res.add((part, VOID.properties, Literal(1)))
+
+ res.add((part, VOID.property, p))
+
+ if distinctForPartitions:
+
+ entities = 0
+ propClasses = set()
+ for s in propSubjects[p]:
+ if s in typeMap:
+ entities += 1
+ for c in typeMap[s]:
+ propClasses.add(c)
+
+ res.add((part, VOID.entities, Literal(entities)))
+ res.add((part, VOID.classes, Literal(len(propClasses))))
+
+ res.add((part, VOID.distinctSubjects,
+ Literal(len(propSubjects[p]))))
+ res.add((part, VOID.distinctObjects,
+ Literal(len(propObjects[p]))))
+
+ return res, dataset
diff --git a/setup.py b/setup.py
index a8792d37..d7cfd154 100644
--- a/setup.py
+++ b/setup.py
@@ -136,5 +136,15 @@ setup(
'rdflib/plugins/sparql/results',
'rdflib/plugins/stores',
],
+ entry_points = {
+ 'console_scripts': [
+ 'rdfpipe = rdflib.tools.rdfpipe:main',
+ 'csv2rdf = rdflib.tools.csv2rdf:main',
+ 'rdf2dot = rdflib.tools.rdf2dot:main',
+ 'rdfs2dot = rdflib.tools.rdfs2dot:main',
+ 'graphisomorpishm = rdflib.tools.graphisomorphism:main',
+ ],
+ },
+
**kwargs
)
diff --git a/test/test_dawg.py b/test/test_dawg.py
index 748ef191..d7ea1a71 100644
--- a/test/test_dawg.py
+++ b/test/test_dawg.py
@@ -43,6 +43,8 @@ from rdflib.plugins.sparql.update import evalUpdate
from rdflib.py3compat import decodeStringEscape
+from rdflib.util import pprint_query_results
+
from nose.tools import nottest, eq_ as eq
from nose import SkipTest
@@ -141,44 +143,6 @@ def _fmt(f):
return "turtle"
-def _bindingsTable(res):
-
- def termString(t):
- if t == None:
- return "-"
- return repr(t).replace('rdflib.term.', '').replace(
- "datatype=URIRef(u'http://www.w3.org/2001/XMLSchema#",
- 'datatype=xsd:').replace("datatype=URIRef(u'", "datatype=")
-
- def c(s, w):
- """
- center the string s in w wide string
- """
- h = (w - len(s)) // 2
- return " " * h + s + " " * h
-
- if not res:
- return "(no results)\n"
- else:
- out = StringIO()
- # keys = r.vars
- # for r in b:
- # keys.update(r.keys())
-
- keys = sorted(res.vars)
- maxlen = [0] * len(keys)
- b = [[termString(r[k]) for k in keys] for r in res]
- for r in b:
- for i in range(len(keys)):
- maxlen[i] = max(maxlen[i], 1 + len(r[i]))
-
- out.write(
- "|".join([c(k, maxlen[i]) for i, k in enumerate(keys)]) + "\n")
- out.write("-" * sum(maxlen) + "\n")
- for r in sorted(b):
- out.write("|".join(
- [t + " " * (i - len(t) - 1) for i, t in zip(maxlen, r)]) + "\n")
- return out.getvalue()
def bindingsCompatible(a, b):
@@ -476,8 +440,8 @@ def query_test(t):
set(res),
set(res2)
), 'Bindings do not match: \n%s\n!=\n%s' % (
- _bindingsTable(res),
- _bindingsTable(res2))
+ pprint_query_results(res, namespace_manager=g.namespace_manager),
+ pprint_query_results(res2, namespace_manager=g.namespace_manager))
elif res.type == 'ASK':
eq(res.askAnswer,
res2.askAnswer, "Ask answer does not match: %r != %r" % (