summaryrefslogtreecommitdiff
path: root/rdflib/term.py
diff options
context:
space:
mode:
authorgromgull <gromgull@gmail.com>2013-03-04 16:42:32 +0100
committergromgull <gromgull@gmail.com>2013-03-04 16:42:32 +0100
commit0edfc2900eb618478da4b49cc435a89bca6afc0b (patch)
treeec8936ed263adb89669deeaed29b85d9ea4efae3 /rdflib/term.py
parent798d88d98f42fd6cf756ef2c297326efb0d70686 (diff)
parent67559e74917f7d0265fbfbbec5289736973c6a0f (diff)
downloadrdflib-0edfc2900eb618478da4b49cc435a89bca6afc0b.tar.gz
merged from master
Diffstat (limited to 'rdflib/term.py')
-rw-r--r--rdflib/term.py211
1 files changed, 113 insertions, 98 deletions
diff --git a/rdflib/term.py b/rdflib/term.py
index 69312eb7..55cf78d5 100644
--- a/rdflib/term.py
+++ b/rdflib/term.py
@@ -1,6 +1,6 @@
"""
This module defines the different types of terms. Terms are the kinds of
-objects that can appear in a quoted/asserted triple. This includes those
+objects that can appear in a quoted/asserted triple. This includes those
that are core to RDF:
* Blank Nodes
@@ -12,7 +12,8 @@ Those that extend the RDF model into N3:
* Formulae
* Universal Quantifications (Variables)
-And those that are primarily for matching against 'Nodes' in the underlying Graph:
+And those that are primarily for matching against 'Nodes' in the
+underlying Graph:
* REGEX Expressions
* Date Ranges
@@ -22,7 +23,7 @@ And those that are primarily for matching against 'Nodes' in the underlying Grap
__all__ = [
'bind',
-
+
'Node',
'Identifier',
@@ -32,7 +33,7 @@ __all__ = [
'Variable',
'Statement',
- ]
+]
import logging
import warnings
@@ -47,9 +48,9 @@ from isodate import parse_time, parse_date, parse_datetime
from re import sub
-
try:
from hashlib import md5
+ assert md5
except ImportError:
from md5 import md5
@@ -59,6 +60,7 @@ from . import py3compat
b = py3compat.b
+
class Node(object):
"""
A Node in the Graph.
@@ -67,7 +69,7 @@ class Node(object):
__slots__ = ()
-class Identifier(Node, unicode): # we allow Identifiers to be Nodes in our Graph
+class Identifier(Node, unicode): # allow Identifiers to be Nodes in the Graph
"""
See http://www.w3.org/2002/07/rdf-identifer-terminology/
regarding choice of terminology.
@@ -103,7 +105,7 @@ class URIRef(Identifier):
if ends_in_hash:
if not value.endswith("#"):
value += "#"
- #if normalize and value and value != normalize("NFC", value):
+ # if normalize and value and value != normalize("NFC", value):
# raise Error("value must be in NFC normalized form.")
try:
rt = unicode.__new__(cls, value)
@@ -118,14 +120,16 @@ class URIRef(Identifier):
return "<%s>" % self
def concrete(self):
- warnings.warn("URIRef.concrete is deprecated.", category=DeprecationWarning, stacklevel=2)
+ warnings.warn("URIRef.concrete is deprecated.",
+ category=DeprecationWarning, stacklevel=2)
if "#" in self:
return URIRef("/".join(self.rsplit("#", 1)))
else:
return self
def abstract(self):
- warnings.warn("URIRef.abstract is deprecated.", category=DeprecationWarning, stacklevel=2)
+ warnings.warn("URIRef.abstract is deprecated.",
+ category=DeprecationWarning, stacklevel=2)
if "#" not in self:
scheme, netloc, path, params, query, fragment = urlparse(self)
if path:
@@ -138,7 +142,6 @@ class URIRef(Identifier):
else:
return self
-
def defrag(self):
if "#" in self:
url, frag = urldefrag(self)
@@ -152,13 +155,12 @@ class URIRef(Identifier):
def __getnewargs__(self):
return (unicode(self), )
-
def __ne__(self, other):
return not self.__eq__(other)
def __eq__(self, other):
if isinstance(other, URIRef):
- return unicode(self)==unicode(other)
+ return unicode(self) == unicode(other)
else:
return False
@@ -175,8 +177,7 @@ class URIRef(Identifier):
else:
clsName = self.__class__.__name__
- return """%s(%s)""" % (clsName, super(URIRef,self).__repr__())
-
+ return """%s(%s)""" % (clsName, super(URIRef, self).__repr__())
def md5_term_hash(self):
"""a string of hex that will be the same for two URIRefs that
@@ -185,7 +186,6 @@ class URIRef(Identifier):
Supported for backwards compatibility; new code should
probably just use __hash__
"""
- warnings.warn("method md5_term_hash is deprecated, and will be removed in the future. If you use this please let rdflib-dev know!", category=DeprecationWarning, stacklevel=2)
d = md5(self.encode())
d.update(b("U"))
return d.hexdigest()
@@ -195,16 +195,17 @@ def _unique_id():
# Used to read: """Create a (hopefully) unique prefix"""
# now retained merely to leave interal API unchanged.
# From BNode.__new__() below ...
- #
+ #
# acceptable bnode value range for RDF/XML needs to be
# something that can be serialzed as a nodeID for N3
- #
+ #
# BNode identifiers must be valid NCNames" _:[A-Za-z][A-Za-z0-9]*
# http://www.w3.org/TR/2004/REC-rdf-testcases-20040210/#nodeID
- return "N" # ensure that id starts with a letter
+ return "N" # ensure that id starts with a letter
-# Adapted from http://icodesnip.com/snippet/python/simple-universally-unique-id-uuid-or-guid
+# Adapted from http://icodesnip.com/snippet/python/
+# simple-universally-unique-id-uuid-or-guid
def bnode_uuid():
"""
Generates a uuid on behalf of Python 2.4
@@ -266,21 +267,19 @@ def _serial_number_generator():
class BNode(Identifier):
"""
- Blank Node: http://www.w3.org/TR/rdf-concepts/#section-blank-nodes
+ Blank Node: http://www.w3.org/TR/rdf-concepts/#section-blank-nodes
"""
__slots__ = ()
-
- def __new__(cls, value=None,
+ def __new__(cls, value=None,
_sn_gen=_serial_number_generator(), _prefix=_unique_id()):
"""
# only store implementations should pass in a value
"""
- if value==None:
- # so that BNode values do not
- # collide with ones created with a different instance of this module
- # at some other time.
+ if value is None:
+ # so that BNode values do not collide with ones created with
+ # a different instance of this module at some other time.
node_id = _sn_gen()
value = "%s%s" % (_prefix, node_id)
else:
@@ -288,7 +287,7 @@ class BNode(Identifier):
# for RDF/XML needs to be something that can be serialzed
# as a nodeID for N3 ?? Unless we require these
# constraints be enforced elsewhere?
- pass # assert is_ncname(unicode(value)), "BNode identifiers
+ pass # assert is_ncname(unicode(value)), "BNode identifiers
# must be valid NCNames" _:[A-Za-z][A-Za-z0-9]*
# http://www.w3.org/TR/2004/REC-rdf-testcases-20040210/#nodeID
return Identifier.__new__(cls, value)
@@ -322,7 +321,7 @@ class BNode(Identifier):
True
"""
if isinstance(other, BNode):
- return unicode(self)==unicode(other)
+ return unicode(self) == unicode(other)
else:
return False
@@ -347,7 +346,6 @@ class BNode(Identifier):
Supported for backwards compatibility; new code should
probably just use __hash__
"""
- warnings.warn("method md5_term_hash is deprecated, and will be removed in the future. If you use this please let rdflib-dev know!", category=DeprecationWarning, stacklevel=2)
d = md5(self.encode())
d.update(b("B"))
return d.hexdigest()
@@ -424,8 +422,8 @@ class Literal(Identifier):
>>> "2005" < lit2006
True
>>> x = Literal("2", datatype=XSD.integer)
- >>> x
- rdflib.term.Literal(%(u)s'2', datatype=rdflib.term.URIRef(%(u)s'http://www.w3.org/2001/XMLSchema#integer'))
+ >>> x.n3()
+ %(u)s'"2"^^<http://www.w3.org/2001/XMLSchema#integer>'
>>> Literal(x) == x
True
>>> x = Literal("cake", lang="en")
@@ -448,8 +446,9 @@ class Literal(Identifier):
normalize=normalize if normalize is not None else NORMALIZE_LITERALS
if lang is not None and datatype is not None:
- raise TypeError("A Literal can only have one of lang or datatype, "
- "per http://www.w3.org/TR/rdf-concepts/#section-Graph-Literal")
+ raise TypeError(
+ "A Literal can only have one of lang or datatype, "
+ "per http://www.w3.org/TR/rdf-concepts/#section-Graph-Literal")
if datatype: datatype = URIRef(datatype)
@@ -508,11 +507,12 @@ class Literal(Identifier):
return Literal(self.value,datatype=self.datatype, lang=self.language)
@property
- def language(self): return self._language
-
- @property
- def datatype(self): return self._datatype
+ def language(self):
+ return self._language
+ @property
+ def datatype(self):
+ return self._datatype
def __reduce__(self):
return (Literal, (unicode(self), self.language, self.datatype),)
@@ -665,12 +665,13 @@ class Literal(Identifier):
def __le__(self, other):
"""
>>> from rdflib.namespace import XSD
- >>> Literal('2007-01-01T10:00:00', datatype=XSD.dateTime) <= Literal('2007-01-01T10:00:00', datatype=XSD.dateTime)
+ >>> Literal('2007-01-01T10:00:00', datatype=XSD.dateTime
+ ... ) <= Literal('2007-01-01T10:00:00', datatype=XSD.dateTime)
True
"""
if other is None:
return False
- if self==other:
+ if self == other:
return True
else:
return self < other
@@ -713,7 +714,7 @@ class Literal(Identifier):
def __ge__(self, other):
if other is None:
return False
- if self==other:
+ if self == other:
return True
else:
return self > other
@@ -765,23 +766,25 @@ class Literal(Identifier):
>>> Literal('1', datatype=XSD.double) in a
False
-
- "Called for the key object for dictionary operations,
- and by the built-in function hash(). Should return
- a 32-bit integer usable as a hash value for
- dictionary operations. The only required property
- is that objects which compare equal have the same
- hash value; it is advised to somehow mix together
- (e.g., using exclusive or) the hash values for the
- components of the object that also play a part in
+
+ "Called for the key object for dictionary operations,
+ and by the built-in function hash(). Should return
+ a 32-bit integer usable as a hash value for
+ dictionary operations. The only required property
+ is that objects which compare equal have the same
+ hash value; it is advised to somehow mix together
+ (e.g., using exclusive or) the hash values for the
+ components of the object that also play a part in
comparison of objects." -- 3.4.1 Basic customization (Python)
"Two literals are equal if and only if all of the following hold:
- * The strings of the two lexical forms compare equal, character by character.
+ * The strings of the two lexical forms compare equal, character by
+ character.
* Either both or neither have language tags.
* The language tags, if any, compare equal.
* Either both or neither have datatype URIs.
- * The two datatype URIs, if any, compare equal, character by character."
+ * The two datatype URIs, if any, compare equal, character by
+ character."
-- 6.5.1 Literal Equality (RDF: Concepts and Abstract Syntax)
"""
@@ -791,7 +794,6 @@ class Literal(Identifier):
@py3compat.format_doctest_out
def __eq__(self, other):
"""
-
Literals are only equal to other literals.
"Two literals are equal if and only if all of the following hold:
@@ -1046,7 +1048,8 @@ class Literal(Identifier):
>>> Literal(0.123456789)._literal_n3(use_plain=True)
%(u)s'1.234568e-01'
- >>> Literal('0.123456789', datatype=XSD.decimal)._literal_n3(use_plain=True)
+ >>> Literal('0.123456789',
+ ... datatype=XSD.decimal)._literal_n3(use_plain=True)
%(u)s'0.123456789'
Using callback for datatype QNames::
@@ -1062,8 +1065,8 @@ class Literal(Identifier):
# this is a bit of a mess -
# in py >=2.6 the string.format function makes this easier
# we try to produce "pretty" output
- if self.datatype == _XSD_DOUBLE:
- return sub("\\.?0*e","e", u'%e' % float(self))
+ if self.datatype == _XSD_DOUBLE:
+ return sub("\\.?0*e", "e", u'%e' % float(self))
elif self.datatype == _XSD_DECIMAL:
s='%s'%self
if '.' not in s: s+='.0'
@@ -1095,9 +1098,9 @@ class Literal(Identifier):
def _quote_encode(self):
# This simpler encoding doesn't work; a newline gets encoded as "\\n",
# which is ok in sourcecode, but we want "\n".
- #encoded = self.encode('unicode-escape').replace(
+ # encoded = self.encode('unicode-escape').replace(
# '\\', '\\\\').replace('"','\\"')
- #encoded = self.replace.replace('\\', '\\\\').replace('"','\\"')
+ # encoded = self.replace.replace('\\', '\\\\').replace('"','\\"')
# NOTE: Could in theory chose quotes based on quotes appearing in the
# string, i.e. '"' and "'", but N3/turtle doesn't allow "'"(?).
@@ -1107,11 +1110,17 @@ class Literal(Identifier):
encoded = self.replace('\\', '\\\\')
if '"""' in self:
# is this ok?
- encoded = encoded.replace('"""','\\"\\"\\"')
- return '"""%s"""' % encoded.replace('\r','\\r')
+ encoded = encoded.replace('"""', '\\"\\"\\"')
+ if encoded[-1] == '"' and encoded[-2] != '\\':
+ encoded = encoded[:-1] + '\\' + '"'
+
+ return '"""%s"""' % encoded.replace('\r', '\\r')
else:
- return '"%s"' % self.replace('\n','\\n').replace('\\', '\\\\'
- ).replace('"', '\\"').replace('\r','\\r')
+ return '"%s"' % self.replace(
+ '\n', '\\n').replace(
+ '\\', '\\\\').replace(
+ '"', '\\"').replace(
+ '\r', '\\r')
if not py3compat.PY3:
def __str__(self):
@@ -1133,6 +1142,7 @@ class Literal(Identifier):
"""
Returns an appropriate python datatype derived from this RDF Literal
"""
+
if self.value is not None: return self.value
return self
@@ -1143,13 +1153,13 @@ class Literal(Identifier):
Supported for backwards compatibility; new code should
probably just use __hash__
"""
- warnings.warn("method md5_term_hash is deprecated, and will be removed in the future. If you use this please let rdflib-dev know!", category=DeprecationWarning, stacklevel=2)
d = md5(self.encode())
d.update(b("L"))
return d.hexdigest()
+
# Cannot import Namespace/XSD because of circular dependencies
_XSD_PFX = 'http://www.w3.org/2001/XMLSchema#'
@@ -1203,7 +1213,7 @@ def _castPythonToLiteral(obj):
Casts a python datatype to a tuple of the lexical value and a
datatype URI (or None)
"""
- for pType,(castFunc,dType) in _PythonToXSD:
+ for pType, (castFunc, dType) in _PythonToXSD:
if isinstance(obj, pType):
if castFunc:
return castFunc(obj), dType
@@ -1211,7 +1221,7 @@ def _castPythonToLiteral(obj):
return obj, dType
else:
return obj, None
- return obj, None # TODO: is this right for the fall through case?
+ return obj, None # TODO: is this right for the fall through case?
from decimal import Decimal
@@ -1240,57 +1250,61 @@ _PythonToXSD = [
]
XSDToPython = {
- URIRef(_XSD_PFX+'time') : parse_time,
- URIRef(_XSD_PFX+'date') : parse_date,
- URIRef(_XSD_PFX+'dateTime') : parse_datetime,
- URIRef(_XSD_PFX+'string') : None,
- URIRef(_XSD_PFX+'normalizedString') : None,
- URIRef(_XSD_PFX+'token') : None,
- URIRef(_XSD_PFX+'language') : None,
- URIRef(_XSD_PFX+'boolean') : lambda i:i.lower() in ['1','true'],
- URIRef(_XSD_PFX+'decimal') : Decimal,
- URIRef(_XSD_PFX+'integer') : long,
- URIRef(_XSD_PFX+'nonPositiveInteger') : int,
- URIRef(_XSD_PFX+'long') : long,
- URIRef(_XSD_PFX+'nonNegativeInteger') : int,
- URIRef(_XSD_PFX+'negativeInteger') : int,
- URIRef(_XSD_PFX+'int') : long,
- URIRef(_XSD_PFX+'unsignedLong') : long,
- URIRef(_XSD_PFX+'positiveInteger') : int,
- URIRef(_XSD_PFX+'short') : int,
- URIRef(_XSD_PFX+'unsignedInt') : long,
- URIRef(_XSD_PFX+'byte') : int,
- URIRef(_XSD_PFX+'unsignedShort') : int,
- URIRef(_XSD_PFX+'unsignedByte') : int,
- URIRef(_XSD_PFX+'float') : float,
- URIRef(_XSD_PFX+'double') : float,
- URIRef(_XSD_PFX+'base64Binary') : lambda s: base64.b64decode(py3compat.b(s)),
- URIRef(_XSD_PFX+'anyURI') : None,
+ URIRef(_XSD_PFX + 'time'): parse_time,
+ URIRef(_XSD_PFX + 'date'): parse_date,
+ URIRef(_XSD_PFX + 'dateTime'): parse_datetime,
+ URIRef(_XSD_PFX + 'string'): None,
+ URIRef(_XSD_PFX + 'normalizedString'): None,
+ URIRef(_XSD_PFX + 'token'): None,
+ URIRef(_XSD_PFX + 'language'): None,
+ URIRef(_XSD_PFX + 'boolean'): lambda i: i.lower() in ['1', 'true'],
+ URIRef(_XSD_PFX + 'decimal'): Decimal,
+ URIRef(_XSD_PFX + 'integer'): long,
+ URIRef(_XSD_PFX + 'nonPositiveInteger'): int,
+ URIRef(_XSD_PFX + 'long'): long,
+ URIRef(_XSD_PFX + 'nonNegativeInteger'): int,
+ URIRef(_XSD_PFX + 'negativeInteger'): int,
+ URIRef(_XSD_PFX + 'int'): long,
+ URIRef(_XSD_PFX + 'unsignedLong'): long,
+ URIRef(_XSD_PFX + 'positiveInteger'): int,
+ URIRef(_XSD_PFX + 'short'): int,
+ URIRef(_XSD_PFX + 'unsignedInt'): long,
+ URIRef(_XSD_PFX + 'byte'): int,
+ URIRef(_XSD_PFX + 'unsignedShort'): int,
+ URIRef(_XSD_PFX + 'unsignedByte'): int,
+ URIRef(_XSD_PFX + 'float'): float,
+ URIRef(_XSD_PFX + 'double'): float,
+ URIRef(
+ _XSD_PFX + 'base64Binary'): lambda s: base64.b64decode(py3compat.b(s)),
+ URIRef(_XSD_PFX + 'anyURI'): None,
}
_toPythonMapping = {}
_toPythonMapping.update(XSDToPython)
+
def bind(datatype, conversion_function):
"""
bind a datatype to a function for converting it into a Python
instance.
"""
if datatype in _toPythonMapping:
- _LOGGER.warning("datatype '%s' was already bound. Rebinding." %
+ _LOGGER.warning("datatype '%s' was already bound. Rebinding." %
datatype)
_toPythonMapping[datatype] = conversion_function
-
class Variable(Identifier):
"""
"""
__slots__ = ()
+
def __new__(cls, value):
- if len(value)==0: raise Exception("Attempted to create variable with empty string as name!")
- if value[0]=='?':
- value=value[1:]
+ if len(value) == 0:
+ raise Exception(
+ "Attempted to create variable with empty string as name!")
+ if value[0] == '?':
+ value = value[1:]
return unicode.__new__(cls, value)
def __repr__(self):
@@ -1312,7 +1326,6 @@ class Variable(Identifier):
Supported for backwards compatibility; new code should
probably just use __hash__
"""
- warnings.warn("method md5_term_hash is deprecated, and will be removed in the future. If you use this please let rdflib-dev know!", category=DeprecationWarning, stacklevel=2)
d = md5(self.encode())
d.update(b("V"))
return d.hexdigest()
@@ -1321,7 +1334,10 @@ class Variable(Identifier):
class Statement(Node, tuple):
def __new__(cls, (subject, predicate, object), context):
- warnings.warn("Class Statement is deprecated, and will be removed in the future. If you use this please let rdflib-dev know!", category=DeprecationWarning, stacklevel=2)
+ warnings.warn(
+ "Class Statement is deprecated, and will be removed in " +
+ "the future. If you use this please let rdflib-dev know!",
+ category=DeprecationWarning, stacklevel=2)
return tuple.__new__(cls, ((subject, predicate, object), context))
def __reduce__(self):
@@ -1334,4 +1350,3 @@ class Statement(Node, tuple):
if __name__ == '__main__':
import doctest
doctest.testmod()
-