diff options
Diffstat (limited to 'rdflib/term.py')
-rw-r--r-- | rdflib/term.py | 211 |
1 files changed, 113 insertions, 98 deletions
diff --git a/rdflib/term.py b/rdflib/term.py index 69312eb7..55cf78d5 100644 --- a/rdflib/term.py +++ b/rdflib/term.py @@ -1,6 +1,6 @@ """ This module defines the different types of terms. Terms are the kinds of -objects that can appear in a quoted/asserted triple. This includes those +objects that can appear in a quoted/asserted triple. This includes those that are core to RDF: * Blank Nodes @@ -12,7 +12,8 @@ Those that extend the RDF model into N3: * Formulae * Universal Quantifications (Variables) -And those that are primarily for matching against 'Nodes' in the underlying Graph: +And those that are primarily for matching against 'Nodes' in the +underlying Graph: * REGEX Expressions * Date Ranges @@ -22,7 +23,7 @@ And those that are primarily for matching against 'Nodes' in the underlying Grap __all__ = [ 'bind', - + 'Node', 'Identifier', @@ -32,7 +33,7 @@ __all__ = [ 'Variable', 'Statement', - ] +] import logging import warnings @@ -47,9 +48,9 @@ from isodate import parse_time, parse_date, parse_datetime from re import sub - try: from hashlib import md5 + assert md5 except ImportError: from md5 import md5 @@ -59,6 +60,7 @@ from . import py3compat b = py3compat.b + class Node(object): """ A Node in the Graph. @@ -67,7 +69,7 @@ class Node(object): __slots__ = () -class Identifier(Node, unicode): # we allow Identifiers to be Nodes in our Graph +class Identifier(Node, unicode): # allow Identifiers to be Nodes in the Graph """ See http://www.w3.org/2002/07/rdf-identifer-terminology/ regarding choice of terminology. @@ -103,7 +105,7 @@ class URIRef(Identifier): if ends_in_hash: if not value.endswith("#"): value += "#" - #if normalize and value and value != normalize("NFC", value): + # if normalize and value and value != normalize("NFC", value): # raise Error("value must be in NFC normalized form.") try: rt = unicode.__new__(cls, value) @@ -118,14 +120,16 @@ class URIRef(Identifier): return "<%s>" % self def concrete(self): - warnings.warn("URIRef.concrete is deprecated.", category=DeprecationWarning, stacklevel=2) + warnings.warn("URIRef.concrete is deprecated.", + category=DeprecationWarning, stacklevel=2) if "#" in self: return URIRef("/".join(self.rsplit("#", 1))) else: return self def abstract(self): - warnings.warn("URIRef.abstract is deprecated.", category=DeprecationWarning, stacklevel=2) + warnings.warn("URIRef.abstract is deprecated.", + category=DeprecationWarning, stacklevel=2) if "#" not in self: scheme, netloc, path, params, query, fragment = urlparse(self) if path: @@ -138,7 +142,6 @@ class URIRef(Identifier): else: return self - def defrag(self): if "#" in self: url, frag = urldefrag(self) @@ -152,13 +155,12 @@ class URIRef(Identifier): def __getnewargs__(self): return (unicode(self), ) - def __ne__(self, other): return not self.__eq__(other) def __eq__(self, other): if isinstance(other, URIRef): - return unicode(self)==unicode(other) + return unicode(self) == unicode(other) else: return False @@ -175,8 +177,7 @@ class URIRef(Identifier): else: clsName = self.__class__.__name__ - return """%s(%s)""" % (clsName, super(URIRef,self).__repr__()) - + return """%s(%s)""" % (clsName, super(URIRef, self).__repr__()) def md5_term_hash(self): """a string of hex that will be the same for two URIRefs that @@ -185,7 +186,6 @@ class URIRef(Identifier): Supported for backwards compatibility; new code should probably just use __hash__ """ - warnings.warn("method md5_term_hash is deprecated, and will be removed in the future. If you use this please let rdflib-dev know!", category=DeprecationWarning, stacklevel=2) d = md5(self.encode()) d.update(b("U")) return d.hexdigest() @@ -195,16 +195,17 @@ def _unique_id(): # Used to read: """Create a (hopefully) unique prefix""" # now retained merely to leave interal API unchanged. # From BNode.__new__() below ... - # + # # acceptable bnode value range for RDF/XML needs to be # something that can be serialzed as a nodeID for N3 - # + # # BNode identifiers must be valid NCNames" _:[A-Za-z][A-Za-z0-9]* # http://www.w3.org/TR/2004/REC-rdf-testcases-20040210/#nodeID - return "N" # ensure that id starts with a letter + return "N" # ensure that id starts with a letter -# Adapted from http://icodesnip.com/snippet/python/simple-universally-unique-id-uuid-or-guid +# Adapted from http://icodesnip.com/snippet/python/ +# simple-universally-unique-id-uuid-or-guid def bnode_uuid(): """ Generates a uuid on behalf of Python 2.4 @@ -266,21 +267,19 @@ def _serial_number_generator(): class BNode(Identifier): """ - Blank Node: http://www.w3.org/TR/rdf-concepts/#section-blank-nodes + Blank Node: http://www.w3.org/TR/rdf-concepts/#section-blank-nodes """ __slots__ = () - - def __new__(cls, value=None, + def __new__(cls, value=None, _sn_gen=_serial_number_generator(), _prefix=_unique_id()): """ # only store implementations should pass in a value """ - if value==None: - # so that BNode values do not - # collide with ones created with a different instance of this module - # at some other time. + if value is None: + # so that BNode values do not collide with ones created with + # a different instance of this module at some other time. node_id = _sn_gen() value = "%s%s" % (_prefix, node_id) else: @@ -288,7 +287,7 @@ class BNode(Identifier): # for RDF/XML needs to be something that can be serialzed # as a nodeID for N3 ?? Unless we require these # constraints be enforced elsewhere? - pass # assert is_ncname(unicode(value)), "BNode identifiers + pass # assert is_ncname(unicode(value)), "BNode identifiers # must be valid NCNames" _:[A-Za-z][A-Za-z0-9]* # http://www.w3.org/TR/2004/REC-rdf-testcases-20040210/#nodeID return Identifier.__new__(cls, value) @@ -322,7 +321,7 @@ class BNode(Identifier): True """ if isinstance(other, BNode): - return unicode(self)==unicode(other) + return unicode(self) == unicode(other) else: return False @@ -347,7 +346,6 @@ class BNode(Identifier): Supported for backwards compatibility; new code should probably just use __hash__ """ - warnings.warn("method md5_term_hash is deprecated, and will be removed in the future. If you use this please let rdflib-dev know!", category=DeprecationWarning, stacklevel=2) d = md5(self.encode()) d.update(b("B")) return d.hexdigest() @@ -424,8 +422,8 @@ class Literal(Identifier): >>> "2005" < lit2006 True >>> x = Literal("2", datatype=XSD.integer) - >>> x - rdflib.term.Literal(%(u)s'2', datatype=rdflib.term.URIRef(%(u)s'http://www.w3.org/2001/XMLSchema#integer')) + >>> x.n3() + %(u)s'"2"^^<http://www.w3.org/2001/XMLSchema#integer>' >>> Literal(x) == x True >>> x = Literal("cake", lang="en") @@ -448,8 +446,9 @@ class Literal(Identifier): normalize=normalize if normalize is not None else NORMALIZE_LITERALS if lang is not None and datatype is not None: - raise TypeError("A Literal can only have one of lang or datatype, " - "per http://www.w3.org/TR/rdf-concepts/#section-Graph-Literal") + raise TypeError( + "A Literal can only have one of lang or datatype, " + "per http://www.w3.org/TR/rdf-concepts/#section-Graph-Literal") if datatype: datatype = URIRef(datatype) @@ -508,11 +507,12 @@ class Literal(Identifier): return Literal(self.value,datatype=self.datatype, lang=self.language) @property - def language(self): return self._language - - @property - def datatype(self): return self._datatype + def language(self): + return self._language + @property + def datatype(self): + return self._datatype def __reduce__(self): return (Literal, (unicode(self), self.language, self.datatype),) @@ -665,12 +665,13 @@ class Literal(Identifier): def __le__(self, other): """ >>> from rdflib.namespace import XSD - >>> Literal('2007-01-01T10:00:00', datatype=XSD.dateTime) <= Literal('2007-01-01T10:00:00', datatype=XSD.dateTime) + >>> Literal('2007-01-01T10:00:00', datatype=XSD.dateTime + ... ) <= Literal('2007-01-01T10:00:00', datatype=XSD.dateTime) True """ if other is None: return False - if self==other: + if self == other: return True else: return self < other @@ -713,7 +714,7 @@ class Literal(Identifier): def __ge__(self, other): if other is None: return False - if self==other: + if self == other: return True else: return self > other @@ -765,23 +766,25 @@ class Literal(Identifier): >>> Literal('1', datatype=XSD.double) in a False - - "Called for the key object for dictionary operations, - and by the built-in function hash(). Should return - a 32-bit integer usable as a hash value for - dictionary operations. The only required property - is that objects which compare equal have the same - hash value; it is advised to somehow mix together - (e.g., using exclusive or) the hash values for the - components of the object that also play a part in + + "Called for the key object for dictionary operations, + and by the built-in function hash(). Should return + a 32-bit integer usable as a hash value for + dictionary operations. The only required property + is that objects which compare equal have the same + hash value; it is advised to somehow mix together + (e.g., using exclusive or) the hash values for the + components of the object that also play a part in comparison of objects." -- 3.4.1 Basic customization (Python) "Two literals are equal if and only if all of the following hold: - * The strings of the two lexical forms compare equal, character by character. + * The strings of the two lexical forms compare equal, character by + character. * Either both or neither have language tags. * The language tags, if any, compare equal. * Either both or neither have datatype URIs. - * The two datatype URIs, if any, compare equal, character by character." + * The two datatype URIs, if any, compare equal, character by + character." -- 6.5.1 Literal Equality (RDF: Concepts and Abstract Syntax) """ @@ -791,7 +794,6 @@ class Literal(Identifier): @py3compat.format_doctest_out def __eq__(self, other): """ - Literals are only equal to other literals. "Two literals are equal if and only if all of the following hold: @@ -1046,7 +1048,8 @@ class Literal(Identifier): >>> Literal(0.123456789)._literal_n3(use_plain=True) %(u)s'1.234568e-01' - >>> Literal('0.123456789', datatype=XSD.decimal)._literal_n3(use_plain=True) + >>> Literal('0.123456789', + ... datatype=XSD.decimal)._literal_n3(use_plain=True) %(u)s'0.123456789' Using callback for datatype QNames:: @@ -1062,8 +1065,8 @@ class Literal(Identifier): # this is a bit of a mess - # in py >=2.6 the string.format function makes this easier # we try to produce "pretty" output - if self.datatype == _XSD_DOUBLE: - return sub("\\.?0*e","e", u'%e' % float(self)) + if self.datatype == _XSD_DOUBLE: + return sub("\\.?0*e", "e", u'%e' % float(self)) elif self.datatype == _XSD_DECIMAL: s='%s'%self if '.' not in s: s+='.0' @@ -1095,9 +1098,9 @@ class Literal(Identifier): def _quote_encode(self): # This simpler encoding doesn't work; a newline gets encoded as "\\n", # which is ok in sourcecode, but we want "\n". - #encoded = self.encode('unicode-escape').replace( + # encoded = self.encode('unicode-escape').replace( # '\\', '\\\\').replace('"','\\"') - #encoded = self.replace.replace('\\', '\\\\').replace('"','\\"') + # encoded = self.replace.replace('\\', '\\\\').replace('"','\\"') # NOTE: Could in theory chose quotes based on quotes appearing in the # string, i.e. '"' and "'", but N3/turtle doesn't allow "'"(?). @@ -1107,11 +1110,17 @@ class Literal(Identifier): encoded = self.replace('\\', '\\\\') if '"""' in self: # is this ok? - encoded = encoded.replace('"""','\\"\\"\\"') - return '"""%s"""' % encoded.replace('\r','\\r') + encoded = encoded.replace('"""', '\\"\\"\\"') + if encoded[-1] == '"' and encoded[-2] != '\\': + encoded = encoded[:-1] + '\\' + '"' + + return '"""%s"""' % encoded.replace('\r', '\\r') else: - return '"%s"' % self.replace('\n','\\n').replace('\\', '\\\\' - ).replace('"', '\\"').replace('\r','\\r') + return '"%s"' % self.replace( + '\n', '\\n').replace( + '\\', '\\\\').replace( + '"', '\\"').replace( + '\r', '\\r') if not py3compat.PY3: def __str__(self): @@ -1133,6 +1142,7 @@ class Literal(Identifier): """ Returns an appropriate python datatype derived from this RDF Literal """ + if self.value is not None: return self.value return self @@ -1143,13 +1153,13 @@ class Literal(Identifier): Supported for backwards compatibility; new code should probably just use __hash__ """ - warnings.warn("method md5_term_hash is deprecated, and will be removed in the future. If you use this please let rdflib-dev know!", category=DeprecationWarning, stacklevel=2) d = md5(self.encode()) d.update(b("L")) return d.hexdigest() + # Cannot import Namespace/XSD because of circular dependencies _XSD_PFX = 'http://www.w3.org/2001/XMLSchema#' @@ -1203,7 +1213,7 @@ def _castPythonToLiteral(obj): Casts a python datatype to a tuple of the lexical value and a datatype URI (or None) """ - for pType,(castFunc,dType) in _PythonToXSD: + for pType, (castFunc, dType) in _PythonToXSD: if isinstance(obj, pType): if castFunc: return castFunc(obj), dType @@ -1211,7 +1221,7 @@ def _castPythonToLiteral(obj): return obj, dType else: return obj, None - return obj, None # TODO: is this right for the fall through case? + return obj, None # TODO: is this right for the fall through case? from decimal import Decimal @@ -1240,57 +1250,61 @@ _PythonToXSD = [ ] XSDToPython = { - URIRef(_XSD_PFX+'time') : parse_time, - URIRef(_XSD_PFX+'date') : parse_date, - URIRef(_XSD_PFX+'dateTime') : parse_datetime, - URIRef(_XSD_PFX+'string') : None, - URIRef(_XSD_PFX+'normalizedString') : None, - URIRef(_XSD_PFX+'token') : None, - URIRef(_XSD_PFX+'language') : None, - URIRef(_XSD_PFX+'boolean') : lambda i:i.lower() in ['1','true'], - URIRef(_XSD_PFX+'decimal') : Decimal, - URIRef(_XSD_PFX+'integer') : long, - URIRef(_XSD_PFX+'nonPositiveInteger') : int, - URIRef(_XSD_PFX+'long') : long, - URIRef(_XSD_PFX+'nonNegativeInteger') : int, - URIRef(_XSD_PFX+'negativeInteger') : int, - URIRef(_XSD_PFX+'int') : long, - URIRef(_XSD_PFX+'unsignedLong') : long, - URIRef(_XSD_PFX+'positiveInteger') : int, - URIRef(_XSD_PFX+'short') : int, - URIRef(_XSD_PFX+'unsignedInt') : long, - URIRef(_XSD_PFX+'byte') : int, - URIRef(_XSD_PFX+'unsignedShort') : int, - URIRef(_XSD_PFX+'unsignedByte') : int, - URIRef(_XSD_PFX+'float') : float, - URIRef(_XSD_PFX+'double') : float, - URIRef(_XSD_PFX+'base64Binary') : lambda s: base64.b64decode(py3compat.b(s)), - URIRef(_XSD_PFX+'anyURI') : None, + URIRef(_XSD_PFX + 'time'): parse_time, + URIRef(_XSD_PFX + 'date'): parse_date, + URIRef(_XSD_PFX + 'dateTime'): parse_datetime, + URIRef(_XSD_PFX + 'string'): None, + URIRef(_XSD_PFX + 'normalizedString'): None, + URIRef(_XSD_PFX + 'token'): None, + URIRef(_XSD_PFX + 'language'): None, + URIRef(_XSD_PFX + 'boolean'): lambda i: i.lower() in ['1', 'true'], + URIRef(_XSD_PFX + 'decimal'): Decimal, + URIRef(_XSD_PFX + 'integer'): long, + URIRef(_XSD_PFX + 'nonPositiveInteger'): int, + URIRef(_XSD_PFX + 'long'): long, + URIRef(_XSD_PFX + 'nonNegativeInteger'): int, + URIRef(_XSD_PFX + 'negativeInteger'): int, + URIRef(_XSD_PFX + 'int'): long, + URIRef(_XSD_PFX + 'unsignedLong'): long, + URIRef(_XSD_PFX + 'positiveInteger'): int, + URIRef(_XSD_PFX + 'short'): int, + URIRef(_XSD_PFX + 'unsignedInt'): long, + URIRef(_XSD_PFX + 'byte'): int, + URIRef(_XSD_PFX + 'unsignedShort'): int, + URIRef(_XSD_PFX + 'unsignedByte'): int, + URIRef(_XSD_PFX + 'float'): float, + URIRef(_XSD_PFX + 'double'): float, + URIRef( + _XSD_PFX + 'base64Binary'): lambda s: base64.b64decode(py3compat.b(s)), + URIRef(_XSD_PFX + 'anyURI'): None, } _toPythonMapping = {} _toPythonMapping.update(XSDToPython) + def bind(datatype, conversion_function): """ bind a datatype to a function for converting it into a Python instance. """ if datatype in _toPythonMapping: - _LOGGER.warning("datatype '%s' was already bound. Rebinding." % + _LOGGER.warning("datatype '%s' was already bound. Rebinding." % datatype) _toPythonMapping[datatype] = conversion_function - class Variable(Identifier): """ """ __slots__ = () + def __new__(cls, value): - if len(value)==0: raise Exception("Attempted to create variable with empty string as name!") - if value[0]=='?': - value=value[1:] + if len(value) == 0: + raise Exception( + "Attempted to create variable with empty string as name!") + if value[0] == '?': + value = value[1:] return unicode.__new__(cls, value) def __repr__(self): @@ -1312,7 +1326,6 @@ class Variable(Identifier): Supported for backwards compatibility; new code should probably just use __hash__ """ - warnings.warn("method md5_term_hash is deprecated, and will be removed in the future. If you use this please let rdflib-dev know!", category=DeprecationWarning, stacklevel=2) d = md5(self.encode()) d.update(b("V")) return d.hexdigest() @@ -1321,7 +1334,10 @@ class Variable(Identifier): class Statement(Node, tuple): def __new__(cls, (subject, predicate, object), context): - warnings.warn("Class Statement is deprecated, and will be removed in the future. If you use this please let rdflib-dev know!", category=DeprecationWarning, stacklevel=2) + warnings.warn( + "Class Statement is deprecated, and will be removed in " + + "the future. If you use this please let rdflib-dev know!", + category=DeprecationWarning, stacklevel=2) return tuple.__new__(cls, ((subject, predicate, object), context)) def __reduce__(self): @@ -1334,4 +1350,3 @@ class Statement(Node, tuple): if __name__ == '__main__': import doctest doctest.testmod() - |