cleanup - move stuff from rdfextras to sensible packages - entry_points for console scripts

author: Gunnar Aastrand Grimnes <gromgull@gmail.com> 2013-05-03 21:12:44 +0200
committer: Gunnar Aastrand Grimnes <gromgull@gmail.com> 2013-05-03 21:12:44 +0200
commit: 937edd34747dec528ec818e7893a1f2e3c0a84b3 (patch)
tree: 8b872efe1bec7ea9600617800a905ab9e4c01368
parent: 723137895125209c071ea0aac927b0153892d557 (diff)
download: rdflib-937edd34747dec528ec818e7893a1f2e3c0a84b3.tar.gz
19 files changed, 398 insertions, 637 deletions
diff --git a/CHANGELOG b/CHANGELOG
index c4a2ae92..115e0551 100644
--- a/CHANGELOG
+++ b/CHANGELOG
@@ -42,23 +42,23 @@
             for row in graph.query('select ... ') :
                 print row.age, row["name"]
 
-    * String operations on URIRefs return new URIRefs
-	  https://github.com/RDFLib/rdflib/pull/258
- 
-	  >>> URIRef('http://example.org/')+'test
-      rdflib.term.URIRef('http://example.org/test')
-
-	  >>> URIRef('http://example.org/persons/%d/id')%32
-	  rdflib.term.URIRef('http://example.org/persons/32/id')	
-
 	* "Slicing" of Graphs and Resources as syntactic sugar: 
 	  https://github.com/RDFLib/rdflib/pull/271
 
-		graph[bob:FOAF.knows/FOAF.name] -> generator over the names of Bobs friends
+		graph[bob:FOAF.knows/FOAF.name] 
+			-> generator over the names of Bobs friends
+		
+	* The SPARQLStore and SPARQLUpdateStore are now included in the RDFLib core
+	
     
 
     Minor Changes:
-	
+
+    * String operations on URIRefs return new URIRefs:
+	  >>> URIRef('http://example.org/')+'test
+      rdflib.term.URIRef('http://example.org/test')
+
+	  https://github.com/RDFLib/rdflib/pull/258 
 	* Namespace is no longer a subclass of URIRef
     * URIRefs and Literal language tags are validated on construction,
       avoiding some "RDF-injection" issues
diff --git a/examples/sparql_query_example.py b/examples/sparql_query_example.py
index 285eabf1..293c4354 100644
--- a/examples/sparql_query_example.py
+++ b/examples/sparql_query_example.py
@@ -19,9 +19,14 @@ import rdflib
 g = rdflib.Graph()
 g.load("foaf.rdf")
 
+# the QueryProcessor knows the FOAF prefix from the graph
+# which in turn knows it from reading the RDF/XML file
 for row in g.query(
-        'select ?s where { [] <http://xmlns.com/foaf/0.1/knows> ?s .}'):
+        'select ?s where { [] foaf:knows ?s .}'):
     print row.s 
     # or row["s"]
     # or row[rdflib.Variable("s")]
     
+
+
+
diff --git a/rdflib/extras/utils/cmdlineutils.py b/rdflib/extras/cmdlineutils.py
index 866798d9..a771d4d7 100644
--- a/rdflib/extras/utils/cmdlineutils.py
+++ b/rdflib/extras/cmdlineutils.py
@@ -4,7 +4,7 @@ import getopt
 import rdflib
 import codecs
 
-from rdflib.extras.utils.pathutils import guess_format
+from rdflib.util import guess_format
 
 
 def _help():
diff --git a/rdflib/extras/describer.py b/rdflib/extras/describer.py
index 82162a02..fa8256ec 100644
--- a/rdflib/extras/describer.py
+++ b/rdflib/extras/describer.py
@@ -1,6 +1,6 @@
 #!/usr/bin/env python
 # -*- coding: utf-8 -*-
-from __future__ import with_statement
+
 from rdflib import py3compat
 __doc__ = py3compat.format_doctest_out("""
 A Describer is a stateful utility for creating RDF statements in a
@@ -15,11 +15,10 @@ Full example in the ``to_rdf`` method below::
 
     >>> import datetime
     >>> from rdflib.graph import Graph
-    >>> from rdflib.namespace import Namespace, RDFS
+    >>> from rdflib.namespace import Namespace, RDFS, FOAF
     >>>
     >>> ORG_URI = "http://example.org/"
     >>>
-    >>> FOAF = Namespace("http://xmlns.com/foaf/0.1/")
     >>> CV = Namespace("http://purl.org/captsolo/resume-rdf/0.2/cv#")
     >>>
     >>> class Person(object):
diff --git a/rdflib/extras/utils/__init__.py b/rdflib/extras/utils/__init__.py
deleted file mode 100644
index ff96ebc7..00000000
--- a/rdflib/extras/utils/__init__.py
+++ /dev/null
@@ -1,10 +0,0 @@
-import pathutils
-import cmdlineutils
-import termutils
-import graphutils
-
-# tedious sop to flake
-assert pathutils
-assert cmdlineutils
-assert termutils
-assert graphutils
diff --git a/rdflib/extras/utils/graphutils.py b/rdflib/extras/utils/graphutils.py
deleted file mode 100644
index 986836c4..00000000
--- a/rdflib/extras/utils/graphutils.py
+++ /dev/null
@@ -1,220 +0,0 @@
-import collections
-import rdflib
-from rdflib import RDF
-
-"""
-RDF- and RDFlib-centric Graph utilities.
-"""
-
-
-def graph_to_dot(graph, dot):
-    """
-    Turns graph into dot (graphviz graph drawing format) using pydot.
-
-    """
-    import pydot
-    nodes = {}
-    for s, o in graph.subject_objects():
-        for i in s, o:
-            if i not in nodes.keys():
-                nodes[i] = i
-    for s, p, o in graph.triples((None, None, None)):
-        dot.add_edge(pydot.Edge(nodes[s], nodes[o], label=p))
-
-
-def find_roots(graph, prop, roots=None):
-    """
-    Find the roots in some sort of transitive hierarchy.
-
-    find_roots(graph, rdflib.RDFS.subClassOf)
-    will return a set of all roots of the sub-class hierarchy
-
-    Assumes triple of the form (child, prop, parent), i.e. the direction of
-    RDFS.subClassOf or SKOS.broader
-
-    """
-
-    non_roots = set()
-    if roots is None:
-        roots = set()
-    for x, y in graph.subject_objects(prop):
-        non_roots.add(x)
-        if x in roots:
-            roots.remove(x)
-        if y not in non_roots:
-            roots.add(y)
-    return roots
-
-
-def get_tree(graph,
-             root,
-             prop,
-             mapper=lambda x: x,
-             sortkey=None,
-             done=None,
-             dir='down'):
-    """
-    Return a nested list/tuple structure representing the tree
-    built by the transitive property given, starting from the root given
-
-    i.e.
-
-    get_tree(graph,
-       rdflib.URIRef("http://xmlns.com/foaf/0.1/Person"),
-       rdflib.RDFS.subClassOf)
-
-    will return the structure for the subClassTree below person.
-
-    dir='down' assumes triple of the form (child, prop, parent),
-    i.e. the direction of RDFS.subClassOf or SKOS.broader
-    Any other dir traverses in the other direction
-
-    """
-
-    if done is None:
-        done = set()
-    if root in done:
-        return
-    done.add(root)
-    tree = []
-
-    if dir == 'down':
-        branches = graph.subjects(prop, root)
-    else:
-        branches = graph.objects(root, prop)
-
-    for branch in branches:
-        t = get_tree(graph, branch, prop, mapper, sortkey, done, dir)
-        if t:
-            tree.append(t)
-
-    return (mapper(root), sorted(tree, key=sortkey))
-
-VOID = rdflib.Namespace("http://rdfs.org/ns/void#")
-DCTERMS = rdflib.Namespace("http://purl.org/dc/terms/")
-FOAF = rdflib.Namespace("http://xmlns.com/foaf/0.1/")
-
-
-def generateVoID(g, dataset=None, res=None, distinctForPartitions=True):
-    """
-    Returns a new graph with a VoID description of the passed dataset
-
-    For more info on Vocabulary of Interlinked Datasets (VoID), see:
-    http://vocab.deri.ie/void
-
-    This only makes two passes through the triples (once to detect the types
-    of things)
-
-    The tradeoff is that lots of temporary structures are built up in memory
-    meaning lots of memory may be consumed :)
-    I imagine at least a few copies of your original graph.
-
-    the distinctForPartitions parameter controls whether
-    distinctSubjects/objects are tracked for each class/propertyPartition
-    this requires more memory again
-
-    """
-
-    typeMap = collections.defaultdict(set)
-    classes = collections.defaultdict(set)
-    for e, c in g.subject_objects(RDF.type):
-        classes[c].add(e)
-        typeMap[e].add(c)
-
-    triples = 0
-    subjects = set()
-    objects = set()
-    properties = set()
-    classCount = collections.defaultdict(int)
-    propCount = collections.defaultdict(int)
-
-    classProps = collections.defaultdict(set)
-    classObjects = collections.defaultdict(set)
-    propSubjects = collections.defaultdict(set)
-    propObjects = collections.defaultdict(set)
-
-    for s, p, o in g:
-
-        triples += 1
-        subjects.add(s)
-        properties.add(p)
-        objects.add(o)
-
-        # class partitions
-        if s in typeMap:
-            for c in typeMap[s]:
-                classCount[c] += 1
-                if distinctForPartitions:
-                    classObjects[c].add(o)
-                    classProps[c].add(p)
-
-        # property partitions
-        propCount[p] += 1
-        if distinctForPartitions:
-            propObjects[p].add(o)
-            propSubjects[p].add(s)
-
-    if not dataset:
-        dataset = rdflib.URIRef("http://example.org/Dataset")
-
-    if not res:
-        res = rdflib.Graph()
-
-    res.add((dataset, RDF.type, VOID.Dataset))
-
-    # basic stats
-    res.add((dataset, VOID.triples, rdflib.Literal(triples)))
-    res.add((dataset, VOID.classes, rdflib.Literal(len(classes))))
-
-    res.add((dataset, VOID.distinctObjects, rdflib.Literal(len(objects))))
-    res.add((dataset, VOID.distinctSubjects, rdflib.Literal(len(subjects))))
-    res.add((dataset, VOID.properties, rdflib.Literal(len(properties))))
-
-    for i, c in enumerate(classes):
-        part = rdflib.URIRef(dataset + "_class%d" % i)
-        res.add((dataset, VOID.classPartition, part))
-        res.add((part, RDF.type, VOID.Dataset))
-
-        res.add((part, VOID.triples, rdflib.Literal(classCount[c])))
-        res.add((part, VOID.classes, rdflib.Literal(1)))
-
-        res.add((part, VOID["class"], c))
-
-        res.add((part, VOID.entities, rdflib.Literal(len(classes[c]))))
-        res.add((part, VOID.distinctSubjects, rdflib.Literal(len(classes[c]))))
-
-        if distinctForPartitions:
-            res.add(
-                (part, VOID.properties, rdflib.Literal(len(classProps[c]))))
-            res.add((part, VOID.distinctObjects,
-                    rdflib.Literal(len(classObjects[c]))))
-
-    for i, p in enumerate(properties):
-        part = rdflib.URIRef(dataset + "_property%d" % i)
-        res.add((dataset, VOID.propertyPartition, part))
-        res.add((part, RDF.type, VOID.Dataset))
-
-        res.add((part, VOID.triples, rdflib.Literal(propCount[p])))
-        res.add((part, VOID.properties, rdflib.Literal(1)))
-
-        res.add((part, VOID.property, p))
-
-        if distinctForPartitions:
-
-            entities = 0
-            propClasses = set()
-            for s in propSubjects[p]:
-                if s in typeMap:
-                    entities += 1
-                for c in typeMap[s]:
-                    propClasses.add(c)
-
-            res.add((part, VOID.entities, rdflib.Literal(entities)))
-            res.add((part, VOID.classes, rdflib.Literal(len(propClasses))))
-
-            res.add((part, VOID.distinctSubjects,
-                    rdflib.Literal(len(propSubjects[p]))))
-            res.add((part, VOID.distinctObjects,
-                    rdflib.Literal(len(propObjects[p]))))
-
-    return res, dataset
diff --git a/rdflib/extras/utils/pathutils.py b/rdflib/extras/utils/pathutils.py
deleted file mode 100644
index a547411a..00000000
--- a/rdflib/extras/utils/pathutils.py
+++ /dev/null
@@ -1,107 +0,0 @@
-"""
-RDF- and RDFlib-centric file and URL path utilities.
-"""
-
-from os.path import splitext
-
-
-def uri_leaf(uri):
-    """
-    Get the "leaf" - fragment id or last segment - of a URI. Useful e.g. for
-    getting a term from a "namespace like" URI. Examples:
-
-        >>> uri_leaf('http://example.org/ns/things#item')
-        'item'
-        >>> uri_leaf('http://example.org/ns/stuff/item')
-        'item'
-        >>> uri_leaf('http://example.org/ns/stuff/')
-        >>>
-        >>> uri_leaf('urn:example.org:stuff')
-        'stuff'
-        >>> uri_leaf('example.org')
-        >>>
-    """
-    for char in ('#', '/', ':'):
-        if uri.endswith(char):
-            break
-        # base, sep, leaf = uri.rpartition(char)
-        if char in uri:
-            sep = char
-            leaf = uri.rsplit(char)[-1]
-        else:
-            sep = ''
-            leaf = uri
-        if sep and leaf:
-            return leaf
-
-
-SUFFIX_FORMAT_MAP = {
-    'rdf': 'xml',
-    'rdfs': 'xml',
-    'owl': 'xml',
-    'n3': 'n3',
-    'ttl': 'n3',
-    'nt': 'nt',
-    'trix': 'trix',
-    'xhtml': 'rdfa',
-    'html': 'rdfa',
-    'svg': 'rdfa',
-    'nq': 'nquads',
-    'trig': 'trig'
-}
-
-
-def guess_format(fpath, fmap=None):
-    """
-    Guess RDF serialization based on file suffix. Uses
-    ``SUFFIX_FORMAT_MAP`` unless ``fmap`` is provided. Examples:
-
-        >>> guess_format('path/to/file.rdf')
-        'xml'
-        >>> guess_format('path/to/file.owl')
-        'xml'
-        >>> guess_format('path/to/file.ttl')
-        'n3'
-        >>> guess_format('path/to/file.xhtml')
-        'rdfa'
-        >>> guess_format('path/to/file.svg')
-        'rdfa'
-        >>> guess_format('path/to/file.xhtml', {'xhtml': 'grddl'})
-        'grddl'
-
-    This also works with just the suffixes, with or without leading dot, and
-    regardless of letter case::
-
-        >>> guess_format('.rdf')
-        'xml'
-        >>> guess_format('rdf')
-        'xml'
-        >>> guess_format('RDF')
-        'xml'
-    """
-    fmap = fmap or SUFFIX_FORMAT_MAP
-    return fmap.get(_get_ext(fpath)) or fmap.get(fpath.lower())
-
-
-def _get_ext(fpath, lower=True):
-    """
-    Gets the file extension from a file(path); stripped of leading '.' and in
-    lower case. Examples:
-
-        >>> _get_ext("path/to/file.txt")
-        'txt'
-        >>> _get_ext("OTHER.PDF")
-        'pdf'
-        >>> _get_ext("noext")
-        ''
-        >>> _get_ext(".rdf")
-        'rdf'
-    """
-    ext = splitext(fpath)[-1]
-    if ext == '' and fpath.startswith("."):
-        ext = fpath
-    if lower:
-        ext = ext.lower()
-    if ext.startswith('.'):
-        ext = ext[1:]
-    return ext
diff --git a/rdflib/extras/utils/termutils.py b/rdflib/extras/utils/termutils.py
deleted file mode 100644
index d8a21bc1..00000000
--- a/rdflib/extras/utils/termutils.py
+++ /dev/null
@@ -1,234 +0,0 @@
-"""Convenience functions for working with Terms and Graphs."""
-from rdflib import BNode
-from rdflib import Graph
-from rdflib import Literal
-from rdflib import URIRef
-from rdflib import Variable
-from rdflib.term import Statement
-from rdflib.graph import QuotedGraph
-from rdflib.py3compat import format_doctest_out
-
-__all__ = ['SUBJECT', 'PREDICATE', 'OBJECT', 'CONTEXT', 'TERM_COMBINATIONS',
-           'REVERSE_TERM_COMBINATIONS', 'TERM_INSTANTIATION_DICT',
-           'GRAPH_TERM_DICT', 'normalizeGraph', 'term2Letter',
-           'constructGraph', 'triplePattern2termCombinations',
-           'type2TermCombination', 'statement2TermCombination',
-           'escape_quotes']
-
-SUBJECT = 0
-PREDICATE = 1
-OBJECT = 2
-CONTEXT = 3
-TERM_COMBINATIONS = dict(
-    [(term, index) for index, term, in enumerate(
-     [
-     'UUUU', 'UUUB', 'UUUF', 'UUVU', 'UUVB', 'UUVF', 'UUBU', 'UUBB', 'UUBF',
-     'UULU', 'UULB', 'UULF', 'UUFU', 'UUFB', 'UUFF',
-     #
-     'UVUU', 'UVUB', 'UVUF', 'UVVU', 'UVVB', 'UVVF', 'UVBU', 'UVBB', 'UVBF',
-     'UVLU', 'UVLB', 'UVLF', 'UVFU', 'UVFB', 'UVFF',
-     #
-     'VUUU', 'VUUB', 'VUUF', 'VUVU', 'VUVB', 'VUVF', 'VUBU', 'VUBB', 'VUBF',
-     'VULU', 'VULB', 'VULF', 'VUFU', 'VUFB', 'VUFF',
-     #
-     'VVUU', 'VVUB', 'VVUF', 'VVVU', 'VVVB', 'VVVF', 'VVBU', 'VVBB', 'VVBF',
-     'VVLU', 'VVLB', 'VVLF', 'VVFU', 'VVFB', 'VVFF',
-     #
-     'BUUU', 'BUUB', 'BUUF', 'BUVU', 'BUVB', 'BUVF', 'BUBU', 'BUBB', 'BUBF',
-     'BULU', 'BULB', 'BULF', 'BUFU', 'BUFB', 'BUFF',
-     #
-     'BVUU', 'BVUB', 'BVUF', 'BVVU', 'BVVB', 'BVVF', 'BVBU', 'BVBB', 'BVBF',
-     'BVLU', 'BVLB', 'BVLF', 'BVFU', 'BVFB', 'BVFF',
-     #
-     'FUUU', 'FUUB', 'FUUF', 'FUVU', 'FUVB', 'FUVF', 'FUBU', 'FUBB', 'FUBF',
-     'FULU', 'FULB', 'FULF', 'FUFU', 'FUFB', 'FUFF',
-     #
-     'FVUU', 'FVUB', 'FVUF', 'FVVU', 'FVVB', 'FVVF', 'FVBU', 'FVBB', 'FVBF',
-     'FVLU', 'FVLB', 'FVLF', 'FVFU', 'FVFB', 'FVFF',
-     #
-     # 'sUUU', 'sUUB', 'sUUF', 'sUVU', 'sUVB', 'sUVF', 'sUBU', 'sUBB', 'sUBF',
-     # 'sULU', 'sULB', 'sULF', 'sUFU', 'sUFB', 'sUFF',
-     #
-     # 'sVUU', 'sVUB', 'sVUF', 'sVVU', 'sVVB', 'sVVF', 'sVBU', 'sVBB', 'sVBF',
-     # 'sVLU', 'sVLB', 'sVLF', 'sVFU', 'sVFB', 'sVFF'
-     ])])
-
-REVERSE_TERM_COMBINATIONS = dict(
-    [(value, key) for key, value in TERM_COMBINATIONS.items()])
-
-TERM_INSTANTIATION_DICT = {
-    'U': URIRef,
-    'B': BNode,
-    'V': Variable,
-    'L': Literal
-}
-
-GRAPH_TERM_DICT = {
-    'F': (QuotedGraph, URIRef),
-    'U': (Graph, URIRef),
-    'B': (Graph, BNode)
-}
-
-
-@format_doctest_out
-def normalizeGraph(graph):
-    """Takes an instance of a ``Graph`` and returns the instance's identifier
-    and  ``type``.
-
-    Types are ``U`` for a :class:`~rdflib.graph.Graph`, ``F`` for
-    a :class:`~rdflib.graph.QuotedGraph` and ``B`` for a
-    :class:`~rdflib.graph.ConjunctiveGraph`
-
-    >>> from rdflib import plugin
-    >>> from rdflib.graph import Graph, ConjunctiveGraph, QuotedGraph
-    >>> from rdflib.store import Store
-    >>> from rdflib import URIRef, Namespace
-    >>> from rdflib.extras.utils.termutils import normalizeGraph
-    >>> memstore = plugin.get('IOMemory', Store)()
-    >>> g = Graph(memstore, URIRef("http://purl.org/net/bel-epa/gjh"))
-    >>> normalizeGraph(g)
-    (rdflib.term.URIRef(%(u)s'http://purl.org/net/bel-epa/gjh'), 'U')
-    >>> g = ConjunctiveGraph(memstore, Namespace("http://rdflib.net/ns"))
-    >>> normalizeGraph(g)  #doctest: +ELLIPSIS
-    (rdflib.term.URIRef(%(u)s'http://rdflib.net/ns'), 'U')
-    >>> g = QuotedGraph(memstore, Namespace("http://rdflib.net/ns"))
-    >>> normalizeGraph(g)
-    (rdflib.term.URIRef(%(u)s'http://rdflib.net/ns'), 'F')
-
-    """
-    if isinstance(graph, QuotedGraph):
-        return graph.identifier, 'F'
-    else:
-        return graph.identifier, term2Letter(graph.identifier)
-
-
-@format_doctest_out
-def term2Letter(term):
-    """Relate a given term to one of several key types:
-
-    * :class:`~rdflib.term.BNode`,
-    * :class:`~rdflib.term.Literal`,
-    * :class:`~rdflib.term.Statement` (Deprecated)
-    * :class:`~rdflib.term.URIRef`,
-    * :class:`~rdflib.term.Variable`
-    * :class:`~rdflib.graph.Graph`
-    * :class:`~rdflib.graph.QuotedGraph`
-
-    >>> import rdflib
-    >>> from rdflib import plugin
-    >>> from rdflib import URIRef, Namespace
-    >>> from rdflib.term import BNode, Literal, Variable
-    >>> # from rdflib.term import Statement
-    >>> from rdflib.graph import Graph, ConjunctiveGraph, QuotedGraph
-    >>> from rdflib.store import Store
-    >>> from rdflib.extras.utils.termutils import term2Letter
-    >>> term2Letter(URIRef('http://purl.org/net/bel-epa.com/'))
-    'U'
-    >>> term2Letter(BNode())
-    'B'
-    >>> term2Letter(Literal(%(u)s''))
-    'L'
-    >>> term2Letter(Variable(%(u)s'x'))
-    'V'
-    >>> term2Letter(Graph())
-    'B'
-    >>> term2Letter(QuotedGraph("IOMemory", None))
-    'F'
-    >>> term2Letter(None)
-    'L'
-    >>> # term2Letter(Statement((None, None, None), None)) # Deprecated
-
-    """
-    if isinstance(term, URIRef):
-        return 'U'
-    elif isinstance(term, BNode):
-        return 'B'
-    elif isinstance(term, Literal):
-        return 'L'
-    elif isinstance(term, QuotedGraph):
-        return 'F'
-    elif isinstance(term, Variable):
-        return 'V'
-    elif isinstance(term, Statement):
-        return 's'
-    elif isinstance(term, Graph):
-        return term2Letter(term.identifier)
-    elif term is None:
-        return 'L'
-    else:
-        raise Exception(
-            ("The given term (%s) is not an instance of any " +
-             "of the known types (URIRef, BNode, Literal, QuotedGraph, " +
-             "or Variable).  It is a %s")
-            % (term, type(term)))
-
-
-def constructGraph(key):
-    """Given a key (one of 'F', 'U' or 'B'), returns
-    a tuple containing a ``Graph`` and an appropriate referent.
-
-    >>> from rdflib.extras.utils.termutils import constructGraph
-    >>> constructGraph('F')
-    (<class 'rdflib.graph.QuotedGraph'>, <class 'rdflib.term.URIRef'>)
-    >>> constructGraph('U')
-    (<class 'rdflib.graph.Graph'>, <class 'rdflib.term.URIRef'>)
-    >>> constructGraph('B')
-    (<class 'rdflib.graph.Graph'>, <class 'rdflib.term.BNode'>)
-
-    """
-    return GRAPH_TERM_DICT[key]
-
-
-def triplePattern2termCombinations((s, p, o)):
-    """
-    Maps a triple pattern to term combinations (non-functioning)
-
-    """
-    combinations = []
-    # combinations.update(TERM_COMBINATIONS)
-    if isinstance(o, Literal):
-        for key, val in TERM_COMBINATIONS.items():
-            if key[OBJECT] == 'O':
-                combinations.append(val)
-    return combinations
-
-
-def type2TermCombination(member, klass, context):
-    """
-    Maps a type to a TermCombo
-
-    """
-    try:
-        rt = TERM_COMBINATIONS['%sU%s%s' %
-                               (term2Letter(member),
-                                term2Letter(klass),
-                                normalizeGraph(context)[-1])]
-        return rt
-    except:
-        raise Exception("Unable to persist" +
-                        "classification triple: %s %s %s %s" %
-                        (member, 'rdf:type', klass, context))
-
-
-def statement2TermCombination(subject, predicate, obj, context):
-    """
-    Maps a statement to a Term Combo
-
-    """
-    return TERM_COMBINATIONS['%s%s%s%s' %
-                             (term2Letter(subject), term2Letter(predicate),
-                              term2Letter(obj), normalizeGraph(context)[-1])]
-
-
-def escape_quotes(qstr):
-    """
-    #FIXME:  This *may* prove to be a performance bottleneck and should
-             perhaps be implemented in C (as it was in 4Suite RDF)
-
-    Ported from Ft.Lib.DbUtil
-    """
-    if qstr is None:
-        return ''
-    tmp = qstr.replace("\\", "\\\\")
-    tmp = tmp.replace("'", "\\'")
-    return tmp
diff --git a/rdflib/graph.py b/rdflib/graph.py
index 0acb3879..37d17896 100644
--- a/rdflib/graph.py
+++ b/rdflib/graph.py
@@ -874,6 +874,9 @@ class Graph(Node):
 
         If override is True will bind namespace to given prefix if namespace
         was already bound to a different prefix.
+
+        for example:  graph.bind('foaf', 'http://xmlns.com/foaf/0.1/')
+        
         """
         return self.namespace_manager.bind(
             prefix, namespace, override=override)
@@ -1009,10 +1012,25 @@ class Graph(Node):
         self.parse(source, publicID, format)
 
     def query(self, query_object, processor='sparql',
-              result='sparql', initNs={}, initBindings={},
+              result='sparql', initNs=None, initBindings=None,
               use_store_provided=True, **kwargs):
         """
+        Query this graph. 
+        
+        A type of 'prepared queries' can be realised by providing
+        initial variable bindings with initBindings
+
+        Initial namespaces are used to resolve prefixes used in the query, 
+        if none are given, the namespaces from the graph's namespace manager
+        are used. 
+
+        A rdflib.query.QueryResult object is returned
+        
         """
+
+        initBindings = initBindings or {}
+        initNs = initNs or dict(self.namespaces())
+
         if hasattr(self.store, "query") and use_store_provided:
             try:
                 return self.store.query(
diff --git a/rdflib/namespace.py b/rdflib/namespace.py
index 55455dfc..ac78042b 100644
--- a/rdflib/namespace.py
+++ b/rdflib/namespace.py
@@ -214,6 +214,10 @@ SKOS = Namespace('http://www.w3.org/2004/02/skos/core#')
 DOAP = Namespace('http://usefulinc.com/ns/doap#')
 FOAF = Namespace('http://xmlns.com/foaf/0.1/')
 DC = Namespace('http://purl.org/dc/elements/1.1/')
+DCTERMS = Namespace('http://purl.org/dc/terms/')
+VOID = Namespace('http://rdfs.org/ns/void#')
+
+
 
 class NamespaceManager(object):
     """
diff --git a/rdflib/extras/csv2rdf.py b/rdflib/tools/csv2rdf.py
index 04d4db0e..97b589ee 100644
--- a/rdflib/extras/csv2rdf.py
+++ b/rdflib/tools/csv2rdf.py
@@ -16,7 +16,7 @@ from rdflib import RDF, RDFS
 from rdflib.namespace import split_uri
 
 HELP = """
-toRDF.py \
+csv2rdf.py \
     -b <instance-base> \
     -p <property-base> \
     [-c <classname>] \
diff --git a/rdflib/extras/graphisomorphism.py b/rdflib/tools/graphisomorphism.py
index 74c85bc9..74c85bc9 100644
--- a/rdflib/extras/graphisomorphism.py
+++ b/rdflib/tools/graphisomorphism.py
diff --git a/rdflib/extras/rdf2dot.py b/rdflib/tools/rdf2dot.py
index f865f479..104fd990 100644
--- a/rdflib/extras/rdf2dot.py
+++ b/rdflib/tools/rdf2dot.py
@@ -1,8 +1,7 @@
 #!/usr/bin/env python
 
 import rdflib
-import rdflib.extras
-import rdflib.extras.utils.cmdlineutils
+import rdflib.extras.cmdlineutils
 
 import sys
 import cgi
@@ -118,7 +117,7 @@ language to stdout
 
 
 def main():
-    rdflib.extras.utils.cmdlineutils.main(rdf2dot, _help)
+    rdflib.extras.cmdlineutils.main(rdf2dot, _help)
 
 if __name__ == '__main__':
     main()
diff --git a/rdflib/extras/rdfpipe.py b/rdflib/tools/rdfpipe.py
index 1b6adaa5..fc57b4b6 100644
--- a/rdflib/extras/rdfpipe.py
+++ b/rdflib/tools/rdfpipe.py
@@ -17,7 +17,7 @@ from rdflib.namespace import RDF, RDFS, OWL, XSD
 from rdflib.parser import Parser
 from rdflib.serializer import Serializer
 
-from rdflib.extras.utils.pathutils import guess_format
+from rdflib.util import guess_format
 
 
 STORE_CONNECTION = ''
diff --git a/rdflib/extras/rdfs2dot.py b/rdflib/tools/rdfs2dot.py
index c2c1e17c..ca3bd66c 100644
--- a/rdflib/extras/rdfs2dot.py
+++ b/rdflib/tools/rdfs2dot.py
@@ -1,7 +1,6 @@
 #!/usr/bin/env python
 
-import rdflib.extras
-import rdflib.extras.utils
+import rdflib.extras.cmdlineutils
 
 import sys
 import itertools
@@ -98,7 +97,7 @@ DOT language to stdout
 
 
 def main():
-    rdflib.extras.utils.cmdlineutils.main(rdfs2dot, _help)
+    rdflib.extras.cmdlineutils.main(rdfs2dot, _help)
 
 if __name__ == '__main__':
     main()
diff --git a/rdflib/util.py b/rdflib/util.py
index 99f7741d..305c4dc1 100644
--- a/rdflib/util.py
+++ b/rdflib/util.py
@@ -37,6 +37,8 @@ from time import localtime
 from time import time
 from time import timezone
 
+from os.path import splitext
+from StringIO import StringIO
 
 from rdflib.exceptions import ContextTypeError
 from rdflib.exceptions import ObjectTypeError
@@ -52,7 +54,8 @@ from rdflib.py3compat import sign
 __all__ = [
     'list2set', 'first', 'uniq', 'more_than', 'to_term', 'from_n3',
     'date_time', 'parse_date_time', 'check_context', 'check_subject',
-    'check_predicate', 'check_object', 'check_statement', 'check_pattern']
+    'check_predicate', 'check_object', 'check_statement', 'check_pattern',
+    'guess_format', 'pprint_query_results', 'find_roots', 'get_tree']
 
 
 def list2set(seq):
@@ -317,6 +320,208 @@ def parse_date_time(val):
     return t
 
 
+
+
+
+SUFFIX_FORMAT_MAP = {
+    'rdf': 'xml',
+    'rdfs': 'xml',
+    'owl': 'xml',
+    'n3': 'n3',
+    'ttl': 'n3',
+    'nt': 'nt',
+    'trix': 'trix',
+    'xhtml': 'rdfa',
+    'html': 'rdfa',
+    'svg': 'rdfa',
+    'nq': 'nquads',
+    'trig': 'trig'
+}
+
+
+def guess_format(fpath, fmap=None):
+    """
+    Guess RDF serialization based on file suffix. Uses
+    ``SUFFIX_FORMAT_MAP`` unless ``fmap`` is provided. Examples:
+
+        >>> guess_format('path/to/file.rdf')
+        'xml'
+        >>> guess_format('path/to/file.owl')
+        'xml'
+        >>> guess_format('path/to/file.ttl')
+        'n3'
+        >>> guess_format('path/to/file.xhtml')
+        'rdfa'
+        >>> guess_format('path/to/file.svg')
+        'rdfa'
+        >>> guess_format('path/to/file.xhtml', {'xhtml': 'grddl'})
+        'grddl'
+
+    This also works with just the suffixes, with or without leading dot, and
+    regardless of letter case::
+
+        >>> guess_format('.rdf')
+        'xml'
+        >>> guess_format('rdf')
+        'xml'
+        >>> guess_format('RDF')
+        'xml'
+    """
+    fmap = fmap or SUFFIX_FORMAT_MAP
+    return fmap.get(_get_ext(fpath)) or fmap.get(fpath.lower())
+
+
+def _get_ext(fpath, lower=True):
+    """
+    Gets the file extension from a file(path); stripped of leading '.' and in
+    lower case. Examples:
+
+        >>> _get_ext("path/to/file.txt")
+        'txt'
+        >>> _get_ext("OTHER.PDF")
+        'pdf'
+        >>> _get_ext("noext")
+        ''
+        >>> _get_ext(".rdf")
+        'rdf'
+    """
+    ext = splitext(fpath)[-1]
+    if ext == '' and fpath.startswith("."):
+        ext = fpath
+    if lower:
+        ext = ext.lower()
+    if ext.startswith('.'):
+        ext = ext[1:]
+    return ext
+
+
+def find_roots(graph, prop, roots=None):
+    """
+    Find the roots in some sort of transitive hierarchy.
+
+    find_roots(graph, rdflib.RDFS.subClassOf)
+    will return a set of all roots of the sub-class hierarchy
+
+    Assumes triple of the form (child, prop, parent), i.e. the direction of
+    RDFS.subClassOf or SKOS.broader
+
+    """
+
+    non_roots = set()
+    if roots is None:
+        roots = set()
+    for x, y in graph.subject_objects(prop):
+        non_roots.add(x)
+        if x in roots:
+            roots.remove(x)
+        if y not in non_roots:
+            roots.add(y)
+    return roots
+
+
+def get_tree(graph,
+             root,
+             prop,
+             mapper=lambda x: x,
+             sortkey=None,
+             done=None,
+             dir='down'):
+    """
+    Return a nested list/tuple structure representing the tree
+    built by the transitive property given, starting from the root given
+
+    i.e.
+
+    get_tree(graph,
+       rdflib.URIRef("http://xmlns.com/foaf/0.1/Person"),
+       rdflib.RDFS.subClassOf)
+
+    will return the structure for the subClassTree below person.
+
+    dir='down' assumes triple of the form (child, prop, parent),
+    i.e. the direction of RDFS.subClassOf or SKOS.broader
+    Any other dir traverses in the other direction
+
+    """
+
+    if done is None:
+        done = set()
+    if root in done:
+        return
+    done.add(root)
+    tree = []
+
+    if dir == 'down':
+        branches = graph.subjects(prop, root)
+    else:
+        branches = graph.objects(root, prop)
+
+    for branch in branches:
+        t = get_tree(graph, branch, prop, mapper, sortkey, done, dir)
+        if t:
+            tree.append(t)
+
+    return (mapper(root), sorted(tree, key=sortkey))
+
+
+def pprint_query_results(res, namespace_manager = None, stream = None):
+
+    """
+    return a text table of query results
+    """
+
+    def termString(t):
+        if t == None:
+            return "-"
+        if namespace_manager:
+            if isinstance(t, URIRef): 
+                return namespace_manager.normalizeUri(t)
+            elif isinstance(t, BNode): 
+                return t.n3()
+            elif isinstance(t, Literal): 
+                return t._literal_n3(qname_callback=namespace_manager.normalizeUri)
+        else: 
+            return t.n3()
+
+    def c(s, w):
+        """
+        center the string s in w wide string
+        """
+        h = (w - len(s)) // 2
+        return " " * h + s + " " * h
+
+    if res.type!='SELECT': 
+        raise Exception("Can only pretty print SELECT results!")
+
+    if not res:
+        return "(no results)\n"
+    else:
+        if stream: 
+            out = stream
+        else: 
+            out = StringIO()
+        # keys = r.vars
+        # for r in b:
+        #     keys.update(r.keys())
+
+        keys = sorted(res.vars)
+        maxlen = [0] * len(keys)
+        b = [[termString(r[k]) for k in keys] for r in res]
+        for r in b:
+            for i in range(len(keys)):
+                maxlen[i] = max(maxlen[i], 1 + len(r[i]))
+
+        out.write(
+            "|".join([c(k, maxlen[i]) for i, k in enumerate(keys)]) + "\n")
+        out.write("-" * sum(maxlen) + "\n")
+        for r in sorted(b):
+            out.write("|".join(
+                [t + " " * (i - len(t) - 1) for i, t in zip(maxlen, r)]) + "\n")
+
+        if not stream: 
+            return out.getvalue()
+
+
 def test():
     import doctest
     doctest.testmod()
diff --git a/rdflib/void.py b/rdflib/void.py
new file mode 100644
index 00000000..8a23f569
--- /dev/null
+++ b/rdflib/void.py
@@ -0,0 +1,129 @@
+import collections
+
+from rdflib import URIRef, Graph, Literal
+from rdflib.namespace import VOID, RDF
+
+
+def generateVoID(g, dataset=None, res=None, distinctForPartitions=True):
+    """
+    Returns a new graph with a VoID description of the passed dataset
+
+    For more info on Vocabulary of Interlinked Datasets (VoID), see:
+    http://vocab.deri.ie/void
+
+    This only makes two passes through the triples (once to detect the types
+    of things)
+
+    The tradeoff is that lots of temporary structures are built up in memory
+    meaning lots of memory may be consumed :)
+    I imagine at least a few copies of your original graph.
+
+    the distinctForPartitions parameter controls whether
+    distinctSubjects/objects are tracked for each class/propertyPartition
+    this requires more memory again
+
+    """
+
+    typeMap = collections.defaultdict(set)
+    classes = collections.defaultdict(set)
+    for e, c in g.subject_objects(RDF.type):
+        classes[c].add(e)
+        typeMap[e].add(c)
+
+    triples = 0
+    subjects = set()
+    objects = set()
+    properties = set()
+    classCount = collections.defaultdict(int)
+    propCount = collections.defaultdict(int)
+
+    classProps = collections.defaultdict(set)
+    classObjects = collections.defaultdict(set)
+    propSubjects = collections.defaultdict(set)
+    propObjects = collections.defaultdict(set)
+
+    for s, p, o in g:
+
+        triples += 1
+        subjects.add(s)
+        properties.add(p)
+        objects.add(o)
+
+        # class partitions
+        if s in typeMap:
+            for c in typeMap[s]:
+                classCount[c] += 1
+                if distinctForPartitions:
+                    classObjects[c].add(o)
+                    classProps[c].add(p)
+
+        # property partitions
+        propCount[p] += 1
+        if distinctForPartitions:
+            propObjects[p].add(o)
+            propSubjects[p].add(s)
+
+    if not dataset:
+        dataset = URIRef("http://example.org/Dataset")
+
+    if not res:
+        res = Graph()
+
+    res.add((dataset, RDF.type, VOID.Dataset))
+
+    # basic stats
+    res.add((dataset, VOID.triples, Literal(triples)))
+    res.add((dataset, VOID.classes, Literal(len(classes))))
+
+    res.add((dataset, VOID.distinctObjects, Literal(len(objects))))
+    res.add((dataset, VOID.distinctSubjects, Literal(len(subjects))))
+    res.add((dataset, VOID.properties, Literal(len(properties))))
+
+    for i, c in enumerate(classes):
+        part = URIRef(dataset + "_class%d" % i)
+        res.add((dataset, VOID.classPartition, part))
+        res.add((part, RDF.type, VOID.Dataset))
+
+        res.add((part, VOID.triples, Literal(classCount[c])))
+        res.add((part, VOID.classes, Literal(1)))
+
+        res.add((part, VOID["class"], c))
+
+        res.add((part, VOID.entities, Literal(len(classes[c]))))
+        res.add((part, VOID.distinctSubjects, Literal(len(classes[c]))))
+
+        if distinctForPartitions:
+            res.add(
+                (part, VOID.properties, Literal(len(classProps[c]))))
+            res.add((part, VOID.distinctObjects,
+                    Literal(len(classObjects[c]))))
+
+    for i, p in enumerate(properties):
+        part = URIRef(dataset + "_property%d" % i)
+        res.add((dataset, VOID.propertyPartition, part))
+        res.add((part, RDF.type, VOID.Dataset))
+
+        res.add((part, VOID.triples, Literal(propCount[p])))
+        res.add((part, VOID.properties, Literal(1)))
+
+        res.add((part, VOID.property, p))
+
+        if distinctForPartitions:
+
+            entities = 0
+            propClasses = set()
+            for s in propSubjects[p]:
+                if s in typeMap:
+                    entities += 1
+                for c in typeMap[s]:
+                    propClasses.add(c)
+
+            res.add((part, VOID.entities, Literal(entities)))
+            res.add((part, VOID.classes, Literal(len(propClasses))))
+
+            res.add((part, VOID.distinctSubjects,
+                    Literal(len(propSubjects[p]))))
+            res.add((part, VOID.distinctObjects,
+                    Literal(len(propObjects[p]))))
+
+    return res, dataset
diff --git a/setup.py b/setup.py
index a8792d37..d7cfd154 100644
--- a/setup.py
+++ b/setup.py
@@ -136,5 +136,15 @@ setup(
               'rdflib/plugins/sparql/results',
               'rdflib/plugins/stores',
               ],
+    entry_points = {
+        'console_scripts': [
+            'rdfpipe = rdflib.tools.rdfpipe:main',
+            'csv2rdf = rdflib.tools.csv2rdf:main',
+            'rdf2dot = rdflib.tools.rdf2dot:main',
+            'rdfs2dot = rdflib.tools.rdfs2dot:main',
+            'graphisomorpishm = rdflib.tools.graphisomorphism:main',
+            ],
+        },
+    
     **kwargs
     )
diff --git a/test/test_dawg.py b/test/test_dawg.py
index 748ef191..d7ea1a71 100644
--- a/test/test_dawg.py
+++ b/test/test_dawg.py
@@ -43,6 +43,8 @@ from rdflib.plugins.sparql.update import evalUpdate
 
 from rdflib.py3compat import decodeStringEscape
 
+from rdflib.util import pprint_query_results
+
 from nose.tools import nottest, eq_ as eq
 from nose import SkipTest
 
@@ -141,44 +143,6 @@ def _fmt(f):
     return "turtle"
 
 
-def _bindingsTable(res):
-
-    def termString(t):
-        if t == None:
-            return "-"
-        return repr(t).replace('rdflib.term.', '').replace(
-            "datatype=URIRef(u'http://www.w3.org/2001/XMLSchema#",
-            'datatype=xsd:').replace("datatype=URIRef(u'", "datatype=")
-
-    def c(s, w):
-        """
-        center the string s in w wide string
-        """
-        h = (w - len(s)) // 2
-        return " " * h + s + " " * h
-
-    if not res:
-        return "(no results)\n"
-    else:
-        out = StringIO()
-        # keys = r.vars
-        # for r in b:
-        #     keys.update(r.keys())
-
-        keys = sorted(res.vars)
-        maxlen = [0] * len(keys)
-        b = [[termString(r[k]) for k in keys] for r in res]
-        for r in b:
-            for i in range(len(keys)):
-                maxlen[i] = max(maxlen[i], 1 + len(r[i]))
-
-        out.write(
-            "|".join([c(k, maxlen[i]) for i, k in enumerate(keys)]) + "\n")
-        out.write("-" * sum(maxlen) + "\n")
-        for r in sorted(b):
-            out.write("|".join(
-                [t + " " * (i - len(t) - 1) for i, t in zip(maxlen, r)]) + "\n")
-        return out.getvalue()
 
 
 def bindingsCompatible(a, b):
@@ -476,8 +440,8 @@ def query_test(t):
                     set(res),
                     set(res2)
                 ), 'Bindings do not match: \n%s\n!=\n%s' % (
-                    _bindingsTable(res),
-                    _bindingsTable(res2))
+                    pprint_query_results(res, namespace_manager=g.namespace_manager),
+                    pprint_query_results(res2, namespace_manager=g.namespace_manager))
             elif res.type == 'ASK':
                 eq(res.askAnswer,
                    res2.askAnswer, "Ask answer does not match: %r != %r" % (
author	Gunnar Aastrand Grimnes <gromgull@gmail.com>	2013-05-03 21:12:44 +0200
committer	Gunnar Aastrand Grimnes <gromgull@gmail.com>	2013-05-03 21:12:44 +0200
commit	937edd34747dec528ec818e7893a1f2e3c0a84b3 (patch)
tree	8b872efe1bec7ea9600617800a905ab9e4c01368
parent	723137895125209c071ea0aac927b0153892d557 (diff)
download	rdflib-937edd34747dec528ec818e7893a1f2e3c0a84b3.tar.gz