diff options
author | Gunnar Aastrand Grimnes <gromgull@gmail.com> | 2013-05-03 21:12:44 +0200 |
---|---|---|
committer | Gunnar Aastrand Grimnes <gromgull@gmail.com> | 2013-05-03 21:12:44 +0200 |
commit | 937edd34747dec528ec818e7893a1f2e3c0a84b3 (patch) | |
tree | 8b872efe1bec7ea9600617800a905ab9e4c01368 | |
parent | 723137895125209c071ea0aac927b0153892d557 (diff) | |
download | rdflib-937edd34747dec528ec818e7893a1f2e3c0a84b3.tar.gz |
cleanup - move stuff from rdfextras to sensible packages - entry_points for console scripts
-rw-r--r-- | CHANGELOG | 22 | ||||
-rw-r--r-- | examples/sparql_query_example.py | 7 | ||||
-rw-r--r-- | rdflib/extras/cmdlineutils.py (renamed from rdflib/extras/utils/cmdlineutils.py) | 2 | ||||
-rw-r--r-- | rdflib/extras/describer.py | 5 | ||||
-rw-r--r-- | rdflib/extras/utils/__init__.py | 10 | ||||
-rw-r--r-- | rdflib/extras/utils/graphutils.py | 220 | ||||
-rw-r--r-- | rdflib/extras/utils/pathutils.py | 107 | ||||
-rw-r--r-- | rdflib/extras/utils/termutils.py | 234 | ||||
-rw-r--r-- | rdflib/graph.py | 20 | ||||
-rw-r--r-- | rdflib/namespace.py | 4 | ||||
-rw-r--r-- | rdflib/tools/csv2rdf.py (renamed from rdflib/extras/csv2rdf.py) | 2 | ||||
-rw-r--r-- | rdflib/tools/graphisomorphism.py (renamed from rdflib/extras/graphisomorphism.py) | 0 | ||||
-rw-r--r-- | rdflib/tools/rdf2dot.py (renamed from rdflib/extras/rdf2dot.py) | 5 | ||||
-rw-r--r-- | rdflib/tools/rdfpipe.py (renamed from rdflib/extras/rdfpipe.py) | 2 | ||||
-rw-r--r-- | rdflib/tools/rdfs2dot.py (renamed from rdflib/extras/rdfs2dot.py) | 5 | ||||
-rw-r--r-- | rdflib/util.py | 207 | ||||
-rw-r--r-- | rdflib/void.py | 129 | ||||
-rw-r--r-- | setup.py | 10 | ||||
-rw-r--r-- | test/test_dawg.py | 44 |
19 files changed, 398 insertions, 637 deletions
@@ -42,23 +42,23 @@ for row in graph.query('select ... ') : print row.age, row["name"] - * String operations on URIRefs return new URIRefs - https://github.com/RDFLib/rdflib/pull/258 - - >>> URIRef('http://example.org/')+'test - rdflib.term.URIRef('http://example.org/test') - - >>> URIRef('http://example.org/persons/%d/id')%32 - rdflib.term.URIRef('http://example.org/persons/32/id') - * "Slicing" of Graphs and Resources as syntactic sugar: https://github.com/RDFLib/rdflib/pull/271 - graph[bob:FOAF.knows/FOAF.name] -> generator over the names of Bobs friends + graph[bob:FOAF.knows/FOAF.name] + -> generator over the names of Bobs friends + + * The SPARQLStore and SPARQLUpdateStore are now included in the RDFLib core + Minor Changes: - + + * String operations on URIRefs return new URIRefs: + >>> URIRef('http://example.org/')+'test + rdflib.term.URIRef('http://example.org/test') + + https://github.com/RDFLib/rdflib/pull/258 * Namespace is no longer a subclass of URIRef * URIRefs and Literal language tags are validated on construction, avoiding some "RDF-injection" issues diff --git a/examples/sparql_query_example.py b/examples/sparql_query_example.py index 285eabf1..293c4354 100644 --- a/examples/sparql_query_example.py +++ b/examples/sparql_query_example.py @@ -19,9 +19,14 @@ import rdflib g = rdflib.Graph() g.load("foaf.rdf") +# the QueryProcessor knows the FOAF prefix from the graph +# which in turn knows it from reading the RDF/XML file for row in g.query( - 'select ?s where { [] <http://xmlns.com/foaf/0.1/knows> ?s .}'): + 'select ?s where { [] foaf:knows ?s .}'): print row.s # or row["s"] # or row[rdflib.Variable("s")] + + + diff --git a/rdflib/extras/utils/cmdlineutils.py b/rdflib/extras/cmdlineutils.py index 866798d9..a771d4d7 100644 --- a/rdflib/extras/utils/cmdlineutils.py +++ b/rdflib/extras/cmdlineutils.py @@ -4,7 +4,7 @@ import getopt import rdflib import codecs -from rdflib.extras.utils.pathutils import guess_format +from rdflib.util import guess_format def _help(): diff --git a/rdflib/extras/describer.py b/rdflib/extras/describer.py index 82162a02..fa8256ec 100644 --- a/rdflib/extras/describer.py +++ b/rdflib/extras/describer.py @@ -1,6 +1,6 @@ #!/usr/bin/env python # -*- coding: utf-8 -*- -from __future__ import with_statement + from rdflib import py3compat __doc__ = py3compat.format_doctest_out(""" A Describer is a stateful utility for creating RDF statements in a @@ -15,11 +15,10 @@ Full example in the ``to_rdf`` method below:: >>> import datetime >>> from rdflib.graph import Graph - >>> from rdflib.namespace import Namespace, RDFS + >>> from rdflib.namespace import Namespace, RDFS, FOAF >>> >>> ORG_URI = "http://example.org/" >>> - >>> FOAF = Namespace("http://xmlns.com/foaf/0.1/") >>> CV = Namespace("http://purl.org/captsolo/resume-rdf/0.2/cv#") >>> >>> class Person(object): diff --git a/rdflib/extras/utils/__init__.py b/rdflib/extras/utils/__init__.py deleted file mode 100644 index ff96ebc7..00000000 --- a/rdflib/extras/utils/__init__.py +++ /dev/null @@ -1,10 +0,0 @@ -import pathutils -import cmdlineutils -import termutils -import graphutils - -# tedious sop to flake -assert pathutils -assert cmdlineutils -assert termutils -assert graphutils diff --git a/rdflib/extras/utils/graphutils.py b/rdflib/extras/utils/graphutils.py deleted file mode 100644 index 986836c4..00000000 --- a/rdflib/extras/utils/graphutils.py +++ /dev/null @@ -1,220 +0,0 @@ -import collections -import rdflib -from rdflib import RDF - -""" -RDF- and RDFlib-centric Graph utilities. -""" - - -def graph_to_dot(graph, dot): - """ - Turns graph into dot (graphviz graph drawing format) using pydot. - - """ - import pydot - nodes = {} - for s, o in graph.subject_objects(): - for i in s, o: - if i not in nodes.keys(): - nodes[i] = i - for s, p, o in graph.triples((None, None, None)): - dot.add_edge(pydot.Edge(nodes[s], nodes[o], label=p)) - - -def find_roots(graph, prop, roots=None): - """ - Find the roots in some sort of transitive hierarchy. - - find_roots(graph, rdflib.RDFS.subClassOf) - will return a set of all roots of the sub-class hierarchy - - Assumes triple of the form (child, prop, parent), i.e. the direction of - RDFS.subClassOf or SKOS.broader - - """ - - non_roots = set() - if roots is None: - roots = set() - for x, y in graph.subject_objects(prop): - non_roots.add(x) - if x in roots: - roots.remove(x) - if y not in non_roots: - roots.add(y) - return roots - - -def get_tree(graph, - root, - prop, - mapper=lambda x: x, - sortkey=None, - done=None, - dir='down'): - """ - Return a nested list/tuple structure representing the tree - built by the transitive property given, starting from the root given - - i.e. - - get_tree(graph, - rdflib.URIRef("http://xmlns.com/foaf/0.1/Person"), - rdflib.RDFS.subClassOf) - - will return the structure for the subClassTree below person. - - dir='down' assumes triple of the form (child, prop, parent), - i.e. the direction of RDFS.subClassOf or SKOS.broader - Any other dir traverses in the other direction - - """ - - if done is None: - done = set() - if root in done: - return - done.add(root) - tree = [] - - if dir == 'down': - branches = graph.subjects(prop, root) - else: - branches = graph.objects(root, prop) - - for branch in branches: - t = get_tree(graph, branch, prop, mapper, sortkey, done, dir) - if t: - tree.append(t) - - return (mapper(root), sorted(tree, key=sortkey)) - -VOID = rdflib.Namespace("http://rdfs.org/ns/void#") -DCTERMS = rdflib.Namespace("http://purl.org/dc/terms/") -FOAF = rdflib.Namespace("http://xmlns.com/foaf/0.1/") - - -def generateVoID(g, dataset=None, res=None, distinctForPartitions=True): - """ - Returns a new graph with a VoID description of the passed dataset - - For more info on Vocabulary of Interlinked Datasets (VoID), see: - http://vocab.deri.ie/void - - This only makes two passes through the triples (once to detect the types - of things) - - The tradeoff is that lots of temporary structures are built up in memory - meaning lots of memory may be consumed :) - I imagine at least a few copies of your original graph. - - the distinctForPartitions parameter controls whether - distinctSubjects/objects are tracked for each class/propertyPartition - this requires more memory again - - """ - - typeMap = collections.defaultdict(set) - classes = collections.defaultdict(set) - for e, c in g.subject_objects(RDF.type): - classes[c].add(e) - typeMap[e].add(c) - - triples = 0 - subjects = set() - objects = set() - properties = set() - classCount = collections.defaultdict(int) - propCount = collections.defaultdict(int) - - classProps = collections.defaultdict(set) - classObjects = collections.defaultdict(set) - propSubjects = collections.defaultdict(set) - propObjects = collections.defaultdict(set) - - for s, p, o in g: - - triples += 1 - subjects.add(s) - properties.add(p) - objects.add(o) - - # class partitions - if s in typeMap: - for c in typeMap[s]: - classCount[c] += 1 - if distinctForPartitions: - classObjects[c].add(o) - classProps[c].add(p) - - # property partitions - propCount[p] += 1 - if distinctForPartitions: - propObjects[p].add(o) - propSubjects[p].add(s) - - if not dataset: - dataset = rdflib.URIRef("http://example.org/Dataset") - - if not res: - res = rdflib.Graph() - - res.add((dataset, RDF.type, VOID.Dataset)) - - # basic stats - res.add((dataset, VOID.triples, rdflib.Literal(triples))) - res.add((dataset, VOID.classes, rdflib.Literal(len(classes)))) - - res.add((dataset, VOID.distinctObjects, rdflib.Literal(len(objects)))) - res.add((dataset, VOID.distinctSubjects, rdflib.Literal(len(subjects)))) - res.add((dataset, VOID.properties, rdflib.Literal(len(properties)))) - - for i, c in enumerate(classes): - part = rdflib.URIRef(dataset + "_class%d" % i) - res.add((dataset, VOID.classPartition, part)) - res.add((part, RDF.type, VOID.Dataset)) - - res.add((part, VOID.triples, rdflib.Literal(classCount[c]))) - res.add((part, VOID.classes, rdflib.Literal(1))) - - res.add((part, VOID["class"], c)) - - res.add((part, VOID.entities, rdflib.Literal(len(classes[c])))) - res.add((part, VOID.distinctSubjects, rdflib.Literal(len(classes[c])))) - - if distinctForPartitions: - res.add( - (part, VOID.properties, rdflib.Literal(len(classProps[c])))) - res.add((part, VOID.distinctObjects, - rdflib.Literal(len(classObjects[c])))) - - for i, p in enumerate(properties): - part = rdflib.URIRef(dataset + "_property%d" % i) - res.add((dataset, VOID.propertyPartition, part)) - res.add((part, RDF.type, VOID.Dataset)) - - res.add((part, VOID.triples, rdflib.Literal(propCount[p]))) - res.add((part, VOID.properties, rdflib.Literal(1))) - - res.add((part, VOID.property, p)) - - if distinctForPartitions: - - entities = 0 - propClasses = set() - for s in propSubjects[p]: - if s in typeMap: - entities += 1 - for c in typeMap[s]: - propClasses.add(c) - - res.add((part, VOID.entities, rdflib.Literal(entities))) - res.add((part, VOID.classes, rdflib.Literal(len(propClasses)))) - - res.add((part, VOID.distinctSubjects, - rdflib.Literal(len(propSubjects[p])))) - res.add((part, VOID.distinctObjects, - rdflib.Literal(len(propObjects[p])))) - - return res, dataset diff --git a/rdflib/extras/utils/pathutils.py b/rdflib/extras/utils/pathutils.py deleted file mode 100644 index a547411a..00000000 --- a/rdflib/extras/utils/pathutils.py +++ /dev/null @@ -1,107 +0,0 @@ -""" -RDF- and RDFlib-centric file and URL path utilities. -""" - -from os.path import splitext - - -def uri_leaf(uri): - """ - Get the "leaf" - fragment id or last segment - of a URI. Useful e.g. for - getting a term from a "namespace like" URI. Examples: - - >>> uri_leaf('http://example.org/ns/things#item') - 'item' - >>> uri_leaf('http://example.org/ns/stuff/item') - 'item' - >>> uri_leaf('http://example.org/ns/stuff/') - >>> - >>> uri_leaf('urn:example.org:stuff') - 'stuff' - >>> uri_leaf('example.org') - >>> - """ - for char in ('#', '/', ':'): - if uri.endswith(char): - break - # base, sep, leaf = uri.rpartition(char) - if char in uri: - sep = char - leaf = uri.rsplit(char)[-1] - else: - sep = '' - leaf = uri - if sep and leaf: - return leaf - - -SUFFIX_FORMAT_MAP = { - 'rdf': 'xml', - 'rdfs': 'xml', - 'owl': 'xml', - 'n3': 'n3', - 'ttl': 'n3', - 'nt': 'nt', - 'trix': 'trix', - 'xhtml': 'rdfa', - 'html': 'rdfa', - 'svg': 'rdfa', - 'nq': 'nquads', - 'trig': 'trig' -} - - -def guess_format(fpath, fmap=None): - """ - Guess RDF serialization based on file suffix. Uses - ``SUFFIX_FORMAT_MAP`` unless ``fmap`` is provided. Examples: - - >>> guess_format('path/to/file.rdf') - 'xml' - >>> guess_format('path/to/file.owl') - 'xml' - >>> guess_format('path/to/file.ttl') - 'n3' - >>> guess_format('path/to/file.xhtml') - 'rdfa' - >>> guess_format('path/to/file.svg') - 'rdfa' - >>> guess_format('path/to/file.xhtml', {'xhtml': 'grddl'}) - 'grddl' - - This also works with just the suffixes, with or without leading dot, and - regardless of letter case:: - - >>> guess_format('.rdf') - 'xml' - >>> guess_format('rdf') - 'xml' - >>> guess_format('RDF') - 'xml' - """ - fmap = fmap or SUFFIX_FORMAT_MAP - return fmap.get(_get_ext(fpath)) or fmap.get(fpath.lower()) - - -def _get_ext(fpath, lower=True): - """ - Gets the file extension from a file(path); stripped of leading '.' and in - lower case. Examples: - - >>> _get_ext("path/to/file.txt") - 'txt' - >>> _get_ext("OTHER.PDF") - 'pdf' - >>> _get_ext("noext") - '' - >>> _get_ext(".rdf") - 'rdf' - """ - ext = splitext(fpath)[-1] - if ext == '' and fpath.startswith("."): - ext = fpath - if lower: - ext = ext.lower() - if ext.startswith('.'): - ext = ext[1:] - return ext diff --git a/rdflib/extras/utils/termutils.py b/rdflib/extras/utils/termutils.py deleted file mode 100644 index d8a21bc1..00000000 --- a/rdflib/extras/utils/termutils.py +++ /dev/null @@ -1,234 +0,0 @@ -"""Convenience functions for working with Terms and Graphs.""" -from rdflib import BNode -from rdflib import Graph -from rdflib import Literal -from rdflib import URIRef -from rdflib import Variable -from rdflib.term import Statement -from rdflib.graph import QuotedGraph -from rdflib.py3compat import format_doctest_out - -__all__ = ['SUBJECT', 'PREDICATE', 'OBJECT', 'CONTEXT', 'TERM_COMBINATIONS', - 'REVERSE_TERM_COMBINATIONS', 'TERM_INSTANTIATION_DICT', - 'GRAPH_TERM_DICT', 'normalizeGraph', 'term2Letter', - 'constructGraph', 'triplePattern2termCombinations', - 'type2TermCombination', 'statement2TermCombination', - 'escape_quotes'] - -SUBJECT = 0 -PREDICATE = 1 -OBJECT = 2 -CONTEXT = 3 -TERM_COMBINATIONS = dict( - [(term, index) for index, term, in enumerate( - [ - 'UUUU', 'UUUB', 'UUUF', 'UUVU', 'UUVB', 'UUVF', 'UUBU', 'UUBB', 'UUBF', - 'UULU', 'UULB', 'UULF', 'UUFU', 'UUFB', 'UUFF', - # - 'UVUU', 'UVUB', 'UVUF', 'UVVU', 'UVVB', 'UVVF', 'UVBU', 'UVBB', 'UVBF', - 'UVLU', 'UVLB', 'UVLF', 'UVFU', 'UVFB', 'UVFF', - # - 'VUUU', 'VUUB', 'VUUF', 'VUVU', 'VUVB', 'VUVF', 'VUBU', 'VUBB', 'VUBF', - 'VULU', 'VULB', 'VULF', 'VUFU', 'VUFB', 'VUFF', - # - 'VVUU', 'VVUB', 'VVUF', 'VVVU', 'VVVB', 'VVVF', 'VVBU', 'VVBB', 'VVBF', - 'VVLU', 'VVLB', 'VVLF', 'VVFU', 'VVFB', 'VVFF', - # - 'BUUU', 'BUUB', 'BUUF', 'BUVU', 'BUVB', 'BUVF', 'BUBU', 'BUBB', 'BUBF', - 'BULU', 'BULB', 'BULF', 'BUFU', 'BUFB', 'BUFF', - # - 'BVUU', 'BVUB', 'BVUF', 'BVVU', 'BVVB', 'BVVF', 'BVBU', 'BVBB', 'BVBF', - 'BVLU', 'BVLB', 'BVLF', 'BVFU', 'BVFB', 'BVFF', - # - 'FUUU', 'FUUB', 'FUUF', 'FUVU', 'FUVB', 'FUVF', 'FUBU', 'FUBB', 'FUBF', - 'FULU', 'FULB', 'FULF', 'FUFU', 'FUFB', 'FUFF', - # - 'FVUU', 'FVUB', 'FVUF', 'FVVU', 'FVVB', 'FVVF', 'FVBU', 'FVBB', 'FVBF', - 'FVLU', 'FVLB', 'FVLF', 'FVFU', 'FVFB', 'FVFF', - # - # 'sUUU', 'sUUB', 'sUUF', 'sUVU', 'sUVB', 'sUVF', 'sUBU', 'sUBB', 'sUBF', - # 'sULU', 'sULB', 'sULF', 'sUFU', 'sUFB', 'sUFF', - # - # 'sVUU', 'sVUB', 'sVUF', 'sVVU', 'sVVB', 'sVVF', 'sVBU', 'sVBB', 'sVBF', - # 'sVLU', 'sVLB', 'sVLF', 'sVFU', 'sVFB', 'sVFF' - ])]) - -REVERSE_TERM_COMBINATIONS = dict( - [(value, key) for key, value in TERM_COMBINATIONS.items()]) - -TERM_INSTANTIATION_DICT = { - 'U': URIRef, - 'B': BNode, - 'V': Variable, - 'L': Literal -} - -GRAPH_TERM_DICT = { - 'F': (QuotedGraph, URIRef), - 'U': (Graph, URIRef), - 'B': (Graph, BNode) -} - - -@format_doctest_out -def normalizeGraph(graph): - """Takes an instance of a ``Graph`` and returns the instance's identifier - and ``type``. - - Types are ``U`` for a :class:`~rdflib.graph.Graph`, ``F`` for - a :class:`~rdflib.graph.QuotedGraph` and ``B`` for a - :class:`~rdflib.graph.ConjunctiveGraph` - - >>> from rdflib import plugin - >>> from rdflib.graph import Graph, ConjunctiveGraph, QuotedGraph - >>> from rdflib.store import Store - >>> from rdflib import URIRef, Namespace - >>> from rdflib.extras.utils.termutils import normalizeGraph - >>> memstore = plugin.get('IOMemory', Store)() - >>> g = Graph(memstore, URIRef("http://purl.org/net/bel-epa/gjh")) - >>> normalizeGraph(g) - (rdflib.term.URIRef(%(u)s'http://purl.org/net/bel-epa/gjh'), 'U') - >>> g = ConjunctiveGraph(memstore, Namespace("http://rdflib.net/ns")) - >>> normalizeGraph(g) #doctest: +ELLIPSIS - (rdflib.term.URIRef(%(u)s'http://rdflib.net/ns'), 'U') - >>> g = QuotedGraph(memstore, Namespace("http://rdflib.net/ns")) - >>> normalizeGraph(g) - (rdflib.term.URIRef(%(u)s'http://rdflib.net/ns'), 'F') - - """ - if isinstance(graph, QuotedGraph): - return graph.identifier, 'F' - else: - return graph.identifier, term2Letter(graph.identifier) - - -@format_doctest_out -def term2Letter(term): - """Relate a given term to one of several key types: - - * :class:`~rdflib.term.BNode`, - * :class:`~rdflib.term.Literal`, - * :class:`~rdflib.term.Statement` (Deprecated) - * :class:`~rdflib.term.URIRef`, - * :class:`~rdflib.term.Variable` - * :class:`~rdflib.graph.Graph` - * :class:`~rdflib.graph.QuotedGraph` - - >>> import rdflib - >>> from rdflib import plugin - >>> from rdflib import URIRef, Namespace - >>> from rdflib.term import BNode, Literal, Variable - >>> # from rdflib.term import Statement - >>> from rdflib.graph import Graph, ConjunctiveGraph, QuotedGraph - >>> from rdflib.store import Store - >>> from rdflib.extras.utils.termutils import term2Letter - >>> term2Letter(URIRef('http://purl.org/net/bel-epa.com/')) - 'U' - >>> term2Letter(BNode()) - 'B' - >>> term2Letter(Literal(%(u)s'')) - 'L' - >>> term2Letter(Variable(%(u)s'x')) - 'V' - >>> term2Letter(Graph()) - 'B' - >>> term2Letter(QuotedGraph("IOMemory", None)) - 'F' - >>> term2Letter(None) - 'L' - >>> # term2Letter(Statement((None, None, None), None)) # Deprecated - - """ - if isinstance(term, URIRef): - return 'U' - elif isinstance(term, BNode): - return 'B' - elif isinstance(term, Literal): - return 'L' - elif isinstance(term, QuotedGraph): - return 'F' - elif isinstance(term, Variable): - return 'V' - elif isinstance(term, Statement): - return 's' - elif isinstance(term, Graph): - return term2Letter(term.identifier) - elif term is None: - return 'L' - else: - raise Exception( - ("The given term (%s) is not an instance of any " + - "of the known types (URIRef, BNode, Literal, QuotedGraph, " + - "or Variable). It is a %s") - % (term, type(term))) - - -def constructGraph(key): - """Given a key (one of 'F', 'U' or 'B'), returns - a tuple containing a ``Graph`` and an appropriate referent. - - >>> from rdflib.extras.utils.termutils import constructGraph - >>> constructGraph('F') - (<class 'rdflib.graph.QuotedGraph'>, <class 'rdflib.term.URIRef'>) - >>> constructGraph('U') - (<class 'rdflib.graph.Graph'>, <class 'rdflib.term.URIRef'>) - >>> constructGraph('B') - (<class 'rdflib.graph.Graph'>, <class 'rdflib.term.BNode'>) - - """ - return GRAPH_TERM_DICT[key] - - -def triplePattern2termCombinations((s, p, o)): - """ - Maps a triple pattern to term combinations (non-functioning) - - """ - combinations = [] - # combinations.update(TERM_COMBINATIONS) - if isinstance(o, Literal): - for key, val in TERM_COMBINATIONS.items(): - if key[OBJECT] == 'O': - combinations.append(val) - return combinations - - -def type2TermCombination(member, klass, context): - """ - Maps a type to a TermCombo - - """ - try: - rt = TERM_COMBINATIONS['%sU%s%s' % - (term2Letter(member), - term2Letter(klass), - normalizeGraph(context)[-1])] - return rt - except: - raise Exception("Unable to persist" + - "classification triple: %s %s %s %s" % - (member, 'rdf:type', klass, context)) - - -def statement2TermCombination(subject, predicate, obj, context): - """ - Maps a statement to a Term Combo - - """ - return TERM_COMBINATIONS['%s%s%s%s' % - (term2Letter(subject), term2Letter(predicate), - term2Letter(obj), normalizeGraph(context)[-1])] - - -def escape_quotes(qstr): - """ - #FIXME: This *may* prove to be a performance bottleneck and should - perhaps be implemented in C (as it was in 4Suite RDF) - - Ported from Ft.Lib.DbUtil - """ - if qstr is None: - return '' - tmp = qstr.replace("\\", "\\\\") - tmp = tmp.replace("'", "\\'") - return tmp diff --git a/rdflib/graph.py b/rdflib/graph.py index 0acb3879..37d17896 100644 --- a/rdflib/graph.py +++ b/rdflib/graph.py @@ -874,6 +874,9 @@ class Graph(Node): If override is True will bind namespace to given prefix if namespace was already bound to a different prefix. + + for example: graph.bind('foaf', 'http://xmlns.com/foaf/0.1/') + """ return self.namespace_manager.bind( prefix, namespace, override=override) @@ -1009,10 +1012,25 @@ class Graph(Node): self.parse(source, publicID, format) def query(self, query_object, processor='sparql', - result='sparql', initNs={}, initBindings={}, + result='sparql', initNs=None, initBindings=None, use_store_provided=True, **kwargs): """ + Query this graph. + + A type of 'prepared queries' can be realised by providing + initial variable bindings with initBindings + + Initial namespaces are used to resolve prefixes used in the query, + if none are given, the namespaces from the graph's namespace manager + are used. + + A rdflib.query.QueryResult object is returned + """ + + initBindings = initBindings or {} + initNs = initNs or dict(self.namespaces()) + if hasattr(self.store, "query") and use_store_provided: try: return self.store.query( diff --git a/rdflib/namespace.py b/rdflib/namespace.py index 55455dfc..ac78042b 100644 --- a/rdflib/namespace.py +++ b/rdflib/namespace.py @@ -214,6 +214,10 @@ SKOS = Namespace('http://www.w3.org/2004/02/skos/core#') DOAP = Namespace('http://usefulinc.com/ns/doap#') FOAF = Namespace('http://xmlns.com/foaf/0.1/') DC = Namespace('http://purl.org/dc/elements/1.1/') +DCTERMS = Namespace('http://purl.org/dc/terms/') +VOID = Namespace('http://rdfs.org/ns/void#') + + class NamespaceManager(object): """ diff --git a/rdflib/extras/csv2rdf.py b/rdflib/tools/csv2rdf.py index 04d4db0e..97b589ee 100644 --- a/rdflib/extras/csv2rdf.py +++ b/rdflib/tools/csv2rdf.py @@ -16,7 +16,7 @@ from rdflib import RDF, RDFS from rdflib.namespace import split_uri HELP = """ -toRDF.py \ +csv2rdf.py \ -b <instance-base> \ -p <property-base> \ [-c <classname>] \ diff --git a/rdflib/extras/graphisomorphism.py b/rdflib/tools/graphisomorphism.py index 74c85bc9..74c85bc9 100644 --- a/rdflib/extras/graphisomorphism.py +++ b/rdflib/tools/graphisomorphism.py diff --git a/rdflib/extras/rdf2dot.py b/rdflib/tools/rdf2dot.py index f865f479..104fd990 100644 --- a/rdflib/extras/rdf2dot.py +++ b/rdflib/tools/rdf2dot.py @@ -1,8 +1,7 @@ #!/usr/bin/env python import rdflib -import rdflib.extras -import rdflib.extras.utils.cmdlineutils +import rdflib.extras.cmdlineutils import sys import cgi @@ -118,7 +117,7 @@ language to stdout def main(): - rdflib.extras.utils.cmdlineutils.main(rdf2dot, _help) + rdflib.extras.cmdlineutils.main(rdf2dot, _help) if __name__ == '__main__': main() diff --git a/rdflib/extras/rdfpipe.py b/rdflib/tools/rdfpipe.py index 1b6adaa5..fc57b4b6 100644 --- a/rdflib/extras/rdfpipe.py +++ b/rdflib/tools/rdfpipe.py @@ -17,7 +17,7 @@ from rdflib.namespace import RDF, RDFS, OWL, XSD from rdflib.parser import Parser from rdflib.serializer import Serializer -from rdflib.extras.utils.pathutils import guess_format +from rdflib.util import guess_format STORE_CONNECTION = '' diff --git a/rdflib/extras/rdfs2dot.py b/rdflib/tools/rdfs2dot.py index c2c1e17c..ca3bd66c 100644 --- a/rdflib/extras/rdfs2dot.py +++ b/rdflib/tools/rdfs2dot.py @@ -1,7 +1,6 @@ #!/usr/bin/env python -import rdflib.extras -import rdflib.extras.utils +import rdflib.extras.cmdlineutils import sys import itertools @@ -98,7 +97,7 @@ DOT language to stdout def main(): - rdflib.extras.utils.cmdlineutils.main(rdfs2dot, _help) + rdflib.extras.cmdlineutils.main(rdfs2dot, _help) if __name__ == '__main__': main() diff --git a/rdflib/util.py b/rdflib/util.py index 99f7741d..305c4dc1 100644 --- a/rdflib/util.py +++ b/rdflib/util.py @@ -37,6 +37,8 @@ from time import localtime from time import time from time import timezone +from os.path import splitext +from StringIO import StringIO from rdflib.exceptions import ContextTypeError from rdflib.exceptions import ObjectTypeError @@ -52,7 +54,8 @@ from rdflib.py3compat import sign __all__ = [ 'list2set', 'first', 'uniq', 'more_than', 'to_term', 'from_n3', 'date_time', 'parse_date_time', 'check_context', 'check_subject', - 'check_predicate', 'check_object', 'check_statement', 'check_pattern'] + 'check_predicate', 'check_object', 'check_statement', 'check_pattern', + 'guess_format', 'pprint_query_results', 'find_roots', 'get_tree'] def list2set(seq): @@ -317,6 +320,208 @@ def parse_date_time(val): return t + + + +SUFFIX_FORMAT_MAP = { + 'rdf': 'xml', + 'rdfs': 'xml', + 'owl': 'xml', + 'n3': 'n3', + 'ttl': 'n3', + 'nt': 'nt', + 'trix': 'trix', + 'xhtml': 'rdfa', + 'html': 'rdfa', + 'svg': 'rdfa', + 'nq': 'nquads', + 'trig': 'trig' +} + + +def guess_format(fpath, fmap=None): + """ + Guess RDF serialization based on file suffix. Uses + ``SUFFIX_FORMAT_MAP`` unless ``fmap`` is provided. Examples: + + >>> guess_format('path/to/file.rdf') + 'xml' + >>> guess_format('path/to/file.owl') + 'xml' + >>> guess_format('path/to/file.ttl') + 'n3' + >>> guess_format('path/to/file.xhtml') + 'rdfa' + >>> guess_format('path/to/file.svg') + 'rdfa' + >>> guess_format('path/to/file.xhtml', {'xhtml': 'grddl'}) + 'grddl' + + This also works with just the suffixes, with or without leading dot, and + regardless of letter case:: + + >>> guess_format('.rdf') + 'xml' + >>> guess_format('rdf') + 'xml' + >>> guess_format('RDF') + 'xml' + """ + fmap = fmap or SUFFIX_FORMAT_MAP + return fmap.get(_get_ext(fpath)) or fmap.get(fpath.lower()) + + +def _get_ext(fpath, lower=True): + """ + Gets the file extension from a file(path); stripped of leading '.' and in + lower case. Examples: + + >>> _get_ext("path/to/file.txt") + 'txt' + >>> _get_ext("OTHER.PDF") + 'pdf' + >>> _get_ext("noext") + '' + >>> _get_ext(".rdf") + 'rdf' + """ + ext = splitext(fpath)[-1] + if ext == '' and fpath.startswith("."): + ext = fpath + if lower: + ext = ext.lower() + if ext.startswith('.'): + ext = ext[1:] + return ext + + +def find_roots(graph, prop, roots=None): + """ + Find the roots in some sort of transitive hierarchy. + + find_roots(graph, rdflib.RDFS.subClassOf) + will return a set of all roots of the sub-class hierarchy + + Assumes triple of the form (child, prop, parent), i.e. the direction of + RDFS.subClassOf or SKOS.broader + + """ + + non_roots = set() + if roots is None: + roots = set() + for x, y in graph.subject_objects(prop): + non_roots.add(x) + if x in roots: + roots.remove(x) + if y not in non_roots: + roots.add(y) + return roots + + +def get_tree(graph, + root, + prop, + mapper=lambda x: x, + sortkey=None, + done=None, + dir='down'): + """ + Return a nested list/tuple structure representing the tree + built by the transitive property given, starting from the root given + + i.e. + + get_tree(graph, + rdflib.URIRef("http://xmlns.com/foaf/0.1/Person"), + rdflib.RDFS.subClassOf) + + will return the structure for the subClassTree below person. + + dir='down' assumes triple of the form (child, prop, parent), + i.e. the direction of RDFS.subClassOf or SKOS.broader + Any other dir traverses in the other direction + + """ + + if done is None: + done = set() + if root in done: + return + done.add(root) + tree = [] + + if dir == 'down': + branches = graph.subjects(prop, root) + else: + branches = graph.objects(root, prop) + + for branch in branches: + t = get_tree(graph, branch, prop, mapper, sortkey, done, dir) + if t: + tree.append(t) + + return (mapper(root), sorted(tree, key=sortkey)) + + +def pprint_query_results(res, namespace_manager = None, stream = None): + + """ + return a text table of query results + """ + + def termString(t): + if t == None: + return "-" + if namespace_manager: + if isinstance(t, URIRef): + return namespace_manager.normalizeUri(t) + elif isinstance(t, BNode): + return t.n3() + elif isinstance(t, Literal): + return t._literal_n3(qname_callback=namespace_manager.normalizeUri) + else: + return t.n3() + + def c(s, w): + """ + center the string s in w wide string + """ + h = (w - len(s)) // 2 + return " " * h + s + " " * h + + if res.type!='SELECT': + raise Exception("Can only pretty print SELECT results!") + + if not res: + return "(no results)\n" + else: + if stream: + out = stream + else: + out = StringIO() + # keys = r.vars + # for r in b: + # keys.update(r.keys()) + + keys = sorted(res.vars) + maxlen = [0] * len(keys) + b = [[termString(r[k]) for k in keys] for r in res] + for r in b: + for i in range(len(keys)): + maxlen[i] = max(maxlen[i], 1 + len(r[i])) + + out.write( + "|".join([c(k, maxlen[i]) for i, k in enumerate(keys)]) + "\n") + out.write("-" * sum(maxlen) + "\n") + for r in sorted(b): + out.write("|".join( + [t + " " * (i - len(t) - 1) for i, t in zip(maxlen, r)]) + "\n") + + if not stream: + return out.getvalue() + + def test(): import doctest doctest.testmod() diff --git a/rdflib/void.py b/rdflib/void.py new file mode 100644 index 00000000..8a23f569 --- /dev/null +++ b/rdflib/void.py @@ -0,0 +1,129 @@ +import collections + +from rdflib import URIRef, Graph, Literal +from rdflib.namespace import VOID, RDF + + +def generateVoID(g, dataset=None, res=None, distinctForPartitions=True): + """ + Returns a new graph with a VoID description of the passed dataset + + For more info on Vocabulary of Interlinked Datasets (VoID), see: + http://vocab.deri.ie/void + + This only makes two passes through the triples (once to detect the types + of things) + + The tradeoff is that lots of temporary structures are built up in memory + meaning lots of memory may be consumed :) + I imagine at least a few copies of your original graph. + + the distinctForPartitions parameter controls whether + distinctSubjects/objects are tracked for each class/propertyPartition + this requires more memory again + + """ + + typeMap = collections.defaultdict(set) + classes = collections.defaultdict(set) + for e, c in g.subject_objects(RDF.type): + classes[c].add(e) + typeMap[e].add(c) + + triples = 0 + subjects = set() + objects = set() + properties = set() + classCount = collections.defaultdict(int) + propCount = collections.defaultdict(int) + + classProps = collections.defaultdict(set) + classObjects = collections.defaultdict(set) + propSubjects = collections.defaultdict(set) + propObjects = collections.defaultdict(set) + + for s, p, o in g: + + triples += 1 + subjects.add(s) + properties.add(p) + objects.add(o) + + # class partitions + if s in typeMap: + for c in typeMap[s]: + classCount[c] += 1 + if distinctForPartitions: + classObjects[c].add(o) + classProps[c].add(p) + + # property partitions + propCount[p] += 1 + if distinctForPartitions: + propObjects[p].add(o) + propSubjects[p].add(s) + + if not dataset: + dataset = URIRef("http://example.org/Dataset") + + if not res: + res = Graph() + + res.add((dataset, RDF.type, VOID.Dataset)) + + # basic stats + res.add((dataset, VOID.triples, Literal(triples))) + res.add((dataset, VOID.classes, Literal(len(classes)))) + + res.add((dataset, VOID.distinctObjects, Literal(len(objects)))) + res.add((dataset, VOID.distinctSubjects, Literal(len(subjects)))) + res.add((dataset, VOID.properties, Literal(len(properties)))) + + for i, c in enumerate(classes): + part = URIRef(dataset + "_class%d" % i) + res.add((dataset, VOID.classPartition, part)) + res.add((part, RDF.type, VOID.Dataset)) + + res.add((part, VOID.triples, Literal(classCount[c]))) + res.add((part, VOID.classes, Literal(1))) + + res.add((part, VOID["class"], c)) + + res.add((part, VOID.entities, Literal(len(classes[c])))) + res.add((part, VOID.distinctSubjects, Literal(len(classes[c])))) + + if distinctForPartitions: + res.add( + (part, VOID.properties, Literal(len(classProps[c])))) + res.add((part, VOID.distinctObjects, + Literal(len(classObjects[c])))) + + for i, p in enumerate(properties): + part = URIRef(dataset + "_property%d" % i) + res.add((dataset, VOID.propertyPartition, part)) + res.add((part, RDF.type, VOID.Dataset)) + + res.add((part, VOID.triples, Literal(propCount[p]))) + res.add((part, VOID.properties, Literal(1))) + + res.add((part, VOID.property, p)) + + if distinctForPartitions: + + entities = 0 + propClasses = set() + for s in propSubjects[p]: + if s in typeMap: + entities += 1 + for c in typeMap[s]: + propClasses.add(c) + + res.add((part, VOID.entities, Literal(entities))) + res.add((part, VOID.classes, Literal(len(propClasses)))) + + res.add((part, VOID.distinctSubjects, + Literal(len(propSubjects[p])))) + res.add((part, VOID.distinctObjects, + Literal(len(propObjects[p])))) + + return res, dataset @@ -136,5 +136,15 @@ setup( 'rdflib/plugins/sparql/results', 'rdflib/plugins/stores', ], + entry_points = { + 'console_scripts': [ + 'rdfpipe = rdflib.tools.rdfpipe:main', + 'csv2rdf = rdflib.tools.csv2rdf:main', + 'rdf2dot = rdflib.tools.rdf2dot:main', + 'rdfs2dot = rdflib.tools.rdfs2dot:main', + 'graphisomorpishm = rdflib.tools.graphisomorphism:main', + ], + }, + **kwargs ) diff --git a/test/test_dawg.py b/test/test_dawg.py index 748ef191..d7ea1a71 100644 --- a/test/test_dawg.py +++ b/test/test_dawg.py @@ -43,6 +43,8 @@ from rdflib.plugins.sparql.update import evalUpdate from rdflib.py3compat import decodeStringEscape +from rdflib.util import pprint_query_results + from nose.tools import nottest, eq_ as eq from nose import SkipTest @@ -141,44 +143,6 @@ def _fmt(f): return "turtle" -def _bindingsTable(res): - - def termString(t): - if t == None: - return "-" - return repr(t).replace('rdflib.term.', '').replace( - "datatype=URIRef(u'http://www.w3.org/2001/XMLSchema#", - 'datatype=xsd:').replace("datatype=URIRef(u'", "datatype=") - - def c(s, w): - """ - center the string s in w wide string - """ - h = (w - len(s)) // 2 - return " " * h + s + " " * h - - if not res: - return "(no results)\n" - else: - out = StringIO() - # keys = r.vars - # for r in b: - # keys.update(r.keys()) - - keys = sorted(res.vars) - maxlen = [0] * len(keys) - b = [[termString(r[k]) for k in keys] for r in res] - for r in b: - for i in range(len(keys)): - maxlen[i] = max(maxlen[i], 1 + len(r[i])) - - out.write( - "|".join([c(k, maxlen[i]) for i, k in enumerate(keys)]) + "\n") - out.write("-" * sum(maxlen) + "\n") - for r in sorted(b): - out.write("|".join( - [t + " " * (i - len(t) - 1) for i, t in zip(maxlen, r)]) + "\n") - return out.getvalue() def bindingsCompatible(a, b): @@ -476,8 +440,8 @@ def query_test(t): set(res), set(res2) ), 'Bindings do not match: \n%s\n!=\n%s' % ( - _bindingsTable(res), - _bindingsTable(res2)) + pprint_query_results(res, namespace_manager=g.namespace_manager), + pprint_query_results(res2, namespace_manager=g.namespace_manager)) elif res.type == 'ASK': eq(res.askAnswer, res2.askAnswer, "Ask answer does not match: %r != %r" % ( |