merged from master

author: gromgull <gromgull@gmail.com> 2013-03-04 16:42:32 +0100
committer: gromgull <gromgull@gmail.com> 2013-03-04 16:42:32 +0100
commit: 0edfc2900eb618478da4b49cc435a89bca6afc0b (patch)
tree: ec8936ed263adb89669deeaed29b85d9ea4efae3 /rdflib/term.py
parent: 798d88d98f42fd6cf756ef2c297326efb0d70686 (diff)
parent: 67559e74917f7d0265fbfbbec5289736973c6a0f (diff)
download: rdflib-0edfc2900eb618478da4b49cc435a89bca6afc0b.tar.gz
1 files changed, 113 insertions, 98 deletions
diff --git a/rdflib/term.py b/rdflib/term.py
index 69312eb7..55cf78d5 100644
--- a/rdflib/term.py
+++ b/rdflib/term.py
@@ -1,6 +1,6 @@
 """
 This module defines the different types of terms. Terms are the kinds of
-objects that can appear in a quoted/asserted triple. This includes those 
+objects that can appear in a quoted/asserted triple. This includes those
 that are core to RDF:
 
 * Blank Nodes
@@ -12,7 +12,8 @@ Those that extend the RDF model into N3:
 * Formulae
 * Universal Quantifications (Variables)
 
-And those that are primarily for matching against 'Nodes' in the underlying Graph:
+And those that are primarily for matching against 'Nodes' in the
+underlying Graph:
 
 * REGEX Expressions
 * Date Ranges
@@ -22,7 +23,7 @@ And those that are primarily for matching against 'Nodes' in the underlying Grap
 
 __all__ = [
     'bind',
-    
+
     'Node',
     'Identifier',
 
@@ -32,7 +33,7 @@ __all__ = [
 
     'Variable',
     'Statement',
-    ]
+]
 
 import logging
 import warnings
@@ -47,9 +48,9 @@ from isodate import parse_time, parse_date, parse_datetime
 from re import sub
 
 
-
 try:
     from hashlib import md5
+    assert md5
 except ImportError:
     from md5 import md5
 
@@ -59,6 +60,7 @@ from . import py3compat
 
 b = py3compat.b
 
+
 class Node(object):
     """
     A Node in the Graph.
@@ -67,7 +69,7 @@ class Node(object):
     __slots__ = ()
 
 
-class Identifier(Node, unicode): # we allow Identifiers to be Nodes in our Graph
+class Identifier(Node, unicode):  # allow Identifiers to be Nodes in the Graph
     """
     See http://www.w3.org/2002/07/rdf-identifer-terminology/
     regarding choice of terminology.
@@ -103,7 +105,7 @@ class URIRef(Identifier):
             if ends_in_hash:
                 if not value.endswith("#"):
                     value += "#"
-        #if normalize and value and value != normalize("NFC", value):
+        # if normalize and value and value != normalize("NFC", value):
         #    raise Error("value must be in NFC normalized form.")
         try:
             rt = unicode.__new__(cls, value)
@@ -118,14 +120,16 @@ class URIRef(Identifier):
         return "<%s>" % self
 
     def concrete(self):
-        warnings.warn("URIRef.concrete is deprecated.", category=DeprecationWarning, stacklevel=2)
+        warnings.warn("URIRef.concrete is deprecated.",
+                      category=DeprecationWarning, stacklevel=2)
         if "#" in self:
             return URIRef("/".join(self.rsplit("#", 1)))
         else:
             return self
 
     def abstract(self):
-        warnings.warn("URIRef.abstract is deprecated.", category=DeprecationWarning, stacklevel=2)
+        warnings.warn("URIRef.abstract is deprecated.",
+                      category=DeprecationWarning, stacklevel=2)
         if "#" not in self:
             scheme, netloc, path, params, query, fragment = urlparse(self)
             if path:
@@ -138,7 +142,6 @@ class URIRef(Identifier):
         else:
             return self
 
-
     def defrag(self):
         if "#" in self:
             url, frag = urldefrag(self)
@@ -152,13 +155,12 @@ class URIRef(Identifier):
     def __getnewargs__(self):
         return (unicode(self), )
 
-
     def __ne__(self, other):
         return not self.__eq__(other)
 
     def __eq__(self, other):
         if isinstance(other, URIRef):
-            return unicode(self)==unicode(other)
+            return unicode(self) == unicode(other)
         else:
             return False
 
@@ -175,8 +177,7 @@ class URIRef(Identifier):
         else:
             clsName = self.__class__.__name__
 
-        return """%s(%s)""" % (clsName, super(URIRef,self).__repr__())
-        
+        return """%s(%s)""" % (clsName, super(URIRef, self).__repr__())
 
     def md5_term_hash(self):
         """a string of hex that will be the same for two URIRefs that
@@ -185,7 +186,6 @@ class URIRef(Identifier):
         Supported for backwards compatibility; new code should
         probably just use __hash__
         """
-        warnings.warn("method md5_term_hash is deprecated, and will be removed in the future. If you use this please let rdflib-dev know!", category=DeprecationWarning, stacklevel=2)
         d = md5(self.encode())
         d.update(b("U"))
         return d.hexdigest()
@@ -195,16 +195,17 @@ def _unique_id():
     # Used to read: """Create a (hopefully) unique prefix"""
     # now retained merely to leave interal API unchanged.
     # From BNode.__new__() below ...
-    # 
+    #
     # acceptable bnode value range for RDF/XML needs to be
     # something that can be serialzed as a nodeID for N3
-    # 
+    #
     # BNode identifiers must be valid NCNames" _:[A-Za-z][A-Za-z0-9]*
     # http://www.w3.org/TR/2004/REC-rdf-testcases-20040210/#nodeID
-    return "N" # ensure that id starts with a letter
+    return "N"  # ensure that id starts with a letter
 
 
-# Adapted from http://icodesnip.com/snippet/python/simple-universally-unique-id-uuid-or-guid
+# Adapted from http://icodesnip.com/snippet/python/
+# simple-universally-unique-id-uuid-or-guid
 def bnode_uuid():
     """
     Generates a uuid on behalf of Python 2.4
@@ -266,21 +267,19 @@ def _serial_number_generator():
 
 class BNode(Identifier):
     """
-    Blank Node: http://www.w3.org/TR/rdf-concepts/#section-blank-nodes    
+    Blank Node: http://www.w3.org/TR/rdf-concepts/#section-blank-nodes
 
     """
     __slots__ = ()
 
-
-    def __new__(cls, value=None, 
+    def __new__(cls, value=None,
                 _sn_gen=_serial_number_generator(), _prefix=_unique_id()):
         """
         # only store implementations should pass in a value
         """
-        if value==None:
-            # so that BNode values do not
-            # collide with ones created with a different instance of this module
-            # at some other time.
+        if value is None:
+            # so that BNode values do not collide with ones created with
+            # a different instance of this module at some other time.
             node_id = _sn_gen()
             value = "%s%s" % (_prefix, node_id)
         else:
@@ -288,7 +287,7 @@ class BNode(Identifier):
             # for RDF/XML needs to be something that can be serialzed
             # as a nodeID for N3 ??  Unless we require these
             # constraints be enforced elsewhere?
-            pass # assert is_ncname(unicode(value)), "BNode identifiers
+            pass  # assert is_ncname(unicode(value)), "BNode identifiers
                  # must be valid NCNames" _:[A-Za-z][A-Za-z0-9]*
                  # http://www.w3.org/TR/2004/REC-rdf-testcases-20040210/#nodeID
         return Identifier.__new__(cls, value)
@@ -322,7 +321,7 @@ class BNode(Identifier):
         True
         """
         if isinstance(other, BNode):
-            return unicode(self)==unicode(other)
+            return unicode(self) == unicode(other)
         else:
             return False
 
@@ -347,7 +346,6 @@ class BNode(Identifier):
         Supported for backwards compatibility; new code should
         probably just use __hash__
         """
-        warnings.warn("method md5_term_hash is deprecated, and will be removed in the future. If you use this please let rdflib-dev know!", category=DeprecationWarning, stacklevel=2)
         d = md5(self.encode())
         d.update(b("B"))
         return d.hexdigest()
@@ -424,8 +422,8 @@ class Literal(Identifier):
     >>> "2005" < lit2006
     True
     >>> x = Literal("2", datatype=XSD.integer)
-    >>> x
-    rdflib.term.Literal(%(u)s'2', datatype=rdflib.term.URIRef(%(u)s'http://www.w3.org/2001/XMLSchema#integer'))
+    >>> x.n3()
+    %(u)s'"2"^^<http://www.w3.org/2001/XMLSchema#integer>'
     >>> Literal(x) == x
     True
     >>> x = Literal("cake", lang="en")
@@ -448,8 +446,9 @@ class Literal(Identifier):
         normalize=normalize if normalize is not None else NORMALIZE_LITERALS
 
         if lang is not None and datatype is not None:
-            raise TypeError("A Literal can only have one of lang or datatype, "
-               "per http://www.w3.org/TR/rdf-concepts/#section-Graph-Literal")
+            raise TypeError(
+                "A Literal can only have one of lang or datatype, "
+                "per http://www.w3.org/TR/rdf-concepts/#section-Graph-Literal")
 
         if datatype: datatype = URIRef(datatype)
 
@@ -508,11 +507,12 @@ class Literal(Identifier):
         return Literal(self.value,datatype=self.datatype, lang=self.language)
 
     @property
-    def language(self): return self._language
-    
-    @property 
-    def datatype(self): return self._datatype
+    def language(self):
+        return self._language
 
+    @property
+    def datatype(self):
+        return self._datatype
 
     def __reduce__(self):
         return (Literal, (unicode(self), self.language, self.datatype),)
@@ -665,12 +665,13 @@ class Literal(Identifier):
     def __le__(self, other):
         """
         >>> from rdflib.namespace import XSD
-        >>> Literal('2007-01-01T10:00:00', datatype=XSD.dateTime) <= Literal('2007-01-01T10:00:00', datatype=XSD.dateTime)
+        >>> Literal('2007-01-01T10:00:00', datatype=XSD.dateTime
+        ...     ) <= Literal('2007-01-01T10:00:00', datatype=XSD.dateTime)
         True
         """
         if other is None:
             return False
-        if self==other:
+        if self == other:
             return True
         else:
             return self < other
@@ -713,7 +714,7 @@ class Literal(Identifier):
     def __ge__(self, other):
         if other is None:
             return False
-        if self==other:
+        if self == other:
             return True
         else:
             return self > other
@@ -765,23 +766,25 @@ class Literal(Identifier):
         >>> Literal('1', datatype=XSD.double) in a
         False
 
-        
-        "Called for the key object for dictionary operations, 
-        and by the built-in function hash(). Should return 
-        a 32-bit integer usable as a hash value for 
-        dictionary operations. The only required property 
-        is that objects which compare equal have the same 
-        hash value; it is advised to somehow mix together 
-        (e.g., using exclusive or) the hash values for the 
-        components of the object that also play a part in 
+
+        "Called for the key object for dictionary operations,
+        and by the built-in function hash(). Should return
+        a 32-bit integer usable as a hash value for
+        dictionary operations. The only required property
+        is that objects which compare equal have the same
+        hash value; it is advised to somehow mix together
+        (e.g., using exclusive or) the hash values for the
+        components of the object that also play a part in
         comparison of objects." -- 3.4.1 Basic customization (Python)
 
         "Two literals are equal if and only if all of the following hold:
-        * The strings of the two lexical forms compare equal, character by character.
+        * The strings of the two lexical forms compare equal, character by
+        character.
         * Either both or neither have language tags.
         * The language tags, if any, compare equal.
         * Either both or neither have datatype URIs.
-        * The two datatype URIs, if any, compare equal, character by character."
+        * The two datatype URIs, if any, compare equal, character by
+        character."
         -- 6.5.1 Literal Equality (RDF: Concepts and Abstract Syntax)
 
         """
@@ -791,7 +794,6 @@ class Literal(Identifier):
     @py3compat.format_doctest_out
     def __eq__(self, other):
         """
-
         Literals are only equal to other literals. 
 
         "Two literals are equal if and only if all of the following hold:
@@ -1046,7 +1048,8 @@ class Literal(Identifier):
             >>> Literal(0.123456789)._literal_n3(use_plain=True)
             %(u)s'1.234568e-01'
 
-            >>> Literal('0.123456789', datatype=XSD.decimal)._literal_n3(use_plain=True)
+            >>> Literal('0.123456789',
+            ...     datatype=XSD.decimal)._literal_n3(use_plain=True)
             %(u)s'0.123456789'
 
         Using callback for datatype QNames::
@@ -1062,8 +1065,8 @@ class Literal(Identifier):
                 # this is a bit of a mess - 
                 # in py >=2.6 the string.format function makes this easier
                 # we try to produce "pretty" output
-                if self.datatype == _XSD_DOUBLE: 
-                    return sub("\\.?0*e","e", u'%e' % float(self))
+                if self.datatype == _XSD_DOUBLE:
+                    return sub("\\.?0*e", "e", u'%e' % float(self))
                 elif self.datatype == _XSD_DECIMAL:
                     s='%s'%self
                     if '.' not in s: s+='.0'
@@ -1095,9 +1098,9 @@ class Literal(Identifier):
     def _quote_encode(self):
         # This simpler encoding doesn't work; a newline gets encoded as "\\n",
         # which is ok in sourcecode, but we want "\n".
-        #encoded = self.encode('unicode-escape').replace(
+        # encoded = self.encode('unicode-escape').replace(
         #        '\\', '\\\\').replace('"','\\"')
-        #encoded = self.replace.replace('\\', '\\\\').replace('"','\\"')
+        # encoded = self.replace.replace('\\', '\\\\').replace('"','\\"')
 
         # NOTE: Could in theory chose quotes based on quotes appearing in the
         # string, i.e. '"' and "'", but N3/turtle doesn't allow "'"(?).
@@ -1107,11 +1110,17 @@ class Literal(Identifier):
             encoded = self.replace('\\', '\\\\')
             if '"""' in self:
                 # is this ok?
-                encoded = encoded.replace('"""','\\"\\"\\"')
-            return '"""%s"""' % encoded.replace('\r','\\r')
+                encoded = encoded.replace('"""', '\\"\\"\\"')
+            if encoded[-1] == '"' and encoded[-2] != '\\':
+                encoded = encoded[:-1] + '\\' + '"'
+
+            return '"""%s"""' % encoded.replace('\r', '\\r')
         else:
-            return '"%s"' % self.replace('\n','\\n').replace('\\', '\\\\'
-                            ).replace('"', '\\"').replace('\r','\\r')
+            return '"%s"' % self.replace(
+                '\n', '\\n').replace(
+                    '\\', '\\\\').replace(
+                        '"', '\\"').replace(
+                            '\r', '\\r')
 
     if not py3compat.PY3:
         def __str__(self):
@@ -1133,6 +1142,7 @@ class Literal(Identifier):
         """
         Returns an appropriate python datatype derived from this RDF Literal
         """
+
         if self.value is not None: return self.value 
         return self
 
@@ -1143,13 +1153,13 @@ class Literal(Identifier):
         Supported for backwards compatibility; new code should
         probably just use __hash__
         """
-        warnings.warn("method md5_term_hash is deprecated, and will be removed in the future. If you use this please let rdflib-dev know!", category=DeprecationWarning, stacklevel=2)
         d = md5(self.encode())
         d.update(b("L"))
         return d.hexdigest()
 
 
 
+
 # Cannot import Namespace/XSD because of circular dependencies
 
 _XSD_PFX = 'http://www.w3.org/2001/XMLSchema#'
@@ -1203,7 +1213,7 @@ def _castPythonToLiteral(obj):
     Casts a python datatype to a tuple of the lexical value and a
     datatype URI (or None)
     """
-    for pType,(castFunc,dType) in _PythonToXSD:
+    for pType, (castFunc, dType) in _PythonToXSD:
         if isinstance(obj, pType):
             if castFunc:
                 return castFunc(obj), dType
@@ -1211,7 +1221,7 @@ def _castPythonToLiteral(obj):
                 return obj, dType
             else:
                 return obj, None
-    return obj, None # TODO: is this right for the fall through case?
+    return obj, None  # TODO: is this right for the fall through case?
 
 from decimal import Decimal
 
@@ -1240,57 +1250,61 @@ _PythonToXSD = [
 ]
 
 XSDToPython = {
-    URIRef(_XSD_PFX+'time')               : parse_time,
-    URIRef(_XSD_PFX+'date')               : parse_date,
-    URIRef(_XSD_PFX+'dateTime')           : parse_datetime,
-    URIRef(_XSD_PFX+'string')             : None,
-    URIRef(_XSD_PFX+'normalizedString')   : None,
-    URIRef(_XSD_PFX+'token')              : None,
-    URIRef(_XSD_PFX+'language')           : None,
-    URIRef(_XSD_PFX+'boolean')            : lambda i:i.lower() in ['1','true'],
-    URIRef(_XSD_PFX+'decimal')            : Decimal,
-    URIRef(_XSD_PFX+'integer')            : long,
-    URIRef(_XSD_PFX+'nonPositiveInteger') : int,
-    URIRef(_XSD_PFX+'long')               : long,
-    URIRef(_XSD_PFX+'nonNegativeInteger') : int,
-    URIRef(_XSD_PFX+'negativeInteger')    : int,
-    URIRef(_XSD_PFX+'int')                : long,
-    URIRef(_XSD_PFX+'unsignedLong')       : long,
-    URIRef(_XSD_PFX+'positiveInteger')    : int,
-    URIRef(_XSD_PFX+'short')              : int,
-    URIRef(_XSD_PFX+'unsignedInt')        : long,
-    URIRef(_XSD_PFX+'byte')               : int,
-    URIRef(_XSD_PFX+'unsignedShort')      : int,
-    URIRef(_XSD_PFX+'unsignedByte')       : int,
-    URIRef(_XSD_PFX+'float')              : float,
-    URIRef(_XSD_PFX+'double')             : float,
-    URIRef(_XSD_PFX+'base64Binary')       : lambda s: base64.b64decode(py3compat.b(s)),
-    URIRef(_XSD_PFX+'anyURI')             : None,
+    URIRef(_XSD_PFX + 'time'): parse_time,
+    URIRef(_XSD_PFX + 'date'): parse_date,
+    URIRef(_XSD_PFX + 'dateTime'): parse_datetime,
+    URIRef(_XSD_PFX + 'string'): None,
+    URIRef(_XSD_PFX + 'normalizedString'): None,
+    URIRef(_XSD_PFX + 'token'): None,
+    URIRef(_XSD_PFX + 'language'): None,
+    URIRef(_XSD_PFX + 'boolean'): lambda i: i.lower() in ['1', 'true'],
+    URIRef(_XSD_PFX + 'decimal'): Decimal,
+    URIRef(_XSD_PFX + 'integer'): long,
+    URIRef(_XSD_PFX + 'nonPositiveInteger'): int,
+    URIRef(_XSD_PFX + 'long'): long,
+    URIRef(_XSD_PFX + 'nonNegativeInteger'): int,
+    URIRef(_XSD_PFX + 'negativeInteger'): int,
+    URIRef(_XSD_PFX + 'int'): long,
+    URIRef(_XSD_PFX + 'unsignedLong'): long,
+    URIRef(_XSD_PFX + 'positiveInteger'): int,
+    URIRef(_XSD_PFX + 'short'): int,
+    URIRef(_XSD_PFX + 'unsignedInt'): long,
+    URIRef(_XSD_PFX + 'byte'): int,
+    URIRef(_XSD_PFX + 'unsignedShort'): int,
+    URIRef(_XSD_PFX + 'unsignedByte'): int,
+    URIRef(_XSD_PFX + 'float'): float,
+    URIRef(_XSD_PFX + 'double'): float,
+    URIRef(
+        _XSD_PFX + 'base64Binary'): lambda s: base64.b64decode(py3compat.b(s)),
+    URIRef(_XSD_PFX + 'anyURI'): None,
 }
 
 _toPythonMapping = {}
 _toPythonMapping.update(XSDToPython)
 
+
 def bind(datatype, conversion_function):
     """
     bind a datatype to a function for converting it into a Python
     instance.
     """
     if datatype in _toPythonMapping:
-        _LOGGER.warning("datatype '%s' was already bound. Rebinding." % 
+        _LOGGER.warning("datatype '%s' was already bound. Rebinding." %
                         datatype)
     _toPythonMapping[datatype] = conversion_function
 
 
-
 class Variable(Identifier):
     """
     """
     __slots__ = ()
+
     def __new__(cls, value):
-        if len(value)==0: raise Exception("Attempted to create variable with empty string as name!")
-        if value[0]=='?':
-            value=value[1:]
+        if len(value) == 0:
+            raise Exception(
+                "Attempted to create variable with empty string as name!")
+        if value[0] == '?':
+            value = value[1:]
         return unicode.__new__(cls, value)
 
     def __repr__(self):
@@ -1312,7 +1326,6 @@ class Variable(Identifier):
         Supported for backwards compatibility; new code should
         probably just use __hash__
         """
-        warnings.warn("method md5_term_hash is deprecated, and will be removed in the future. If you use this please let rdflib-dev know!", category=DeprecationWarning, stacklevel=2)
         d = md5(self.encode())
         d.update(b("V"))
         return d.hexdigest()
@@ -1321,7 +1334,10 @@ class Variable(Identifier):
 class Statement(Node, tuple):
 
     def __new__(cls, (subject, predicate, object), context):
-        warnings.warn("Class Statement is deprecated, and will be removed in the future. If you use this please let rdflib-dev know!", category=DeprecationWarning, stacklevel=2)
+        warnings.warn(
+            "Class Statement is deprecated, and will be removed in " +
+            "the future. If you use this please let rdflib-dev know!",
+            category=DeprecationWarning, stacklevel=2)
         return tuple.__new__(cls, ((subject, predicate, object), context))
 
     def __reduce__(self):
@@ -1334,4 +1350,3 @@ class Statement(Node, tuple):
 if __name__ == '__main__':
     import doctest
     doctest.testmod()
-
author	gromgull <gromgull@gmail.com>	2013-03-04 16:42:32 +0100
committer	gromgull <gromgull@gmail.com>	2013-03-04 16:42:32 +0100
commit	0edfc2900eb618478da4b49cc435a89bca6afc0b (patch)
tree	ec8936ed263adb89669deeaed29b85d9ea4efae3 /rdflib/term.py
parent	798d88d98f42fd6cf756ef2c297326efb0d70686 (diff)
parent	67559e74917f7d0265fbfbbec5289736973c6a0f (diff)
download	rdflib-0edfc2900eb618478da4b49cc435a89bca6afc0b.tar.gz