Various fixes for Python 3 compatibility.

author: Thomas Kluyver <takowl@gmail.com> 2011-11-12 17:06:04 +0000
committer: Thomas Kluyver <takowl@gmail.com> 2011-11-12 17:06:04 +0000
commit: a1f30527d584215279cbfda0c30fa32ba7d81a70 (patch)
tree: bb2219e9d8552c901df13e3e815372e925d66e73
parent: e2fb491a3da80f9e01f3303b3df24881ab41eefa (diff)
download: rdflib-a1f30527d584215279cbfda0c30fa32ba7d81a70.tar.gz
12 files changed, 67 insertions, 51 deletions
diff --git a/rdflib/compare.py b/rdflib/compare.py
index e1b3d579..0703a575 100644
--- a/rdflib/compare.py
+++ b/rdflib/compare.py
@@ -214,7 +214,7 @@ def _md5_hash(t):
     h = hashlib.md5()
     for i in t:
         if isinstance(i, tuple):
-            h.update(_md5_hash(i))
+            h.update(_md5_hash(i).encode('ascii'))
         else:
             h.update(unicode(i).encode("utf8"))
     return h.hexdigest()
diff --git a/rdflib/graph.py b/rdflib/graph.py
index fed398b9..3e780748 100644
--- a/rdflib/graph.py
+++ b/rdflib/graph.py
@@ -527,7 +527,7 @@ class Graph(Node):
         else:
             if any is False:
                 try:
-                    next = values.next()
+                    values.next()
                     msg = ("While trying to find a value for (%s, %s, %s) the"
                            " following multiple values where found:\n" %
                            (subject, predicate, object))
@@ -811,7 +811,7 @@ class Graph(Node):
         ... '''
         >>> import tempfile
         >>> file_name = tempfile.mktemp()
-        >>> f = file(file_name, "w")
+        >>> f = open(file_name, "w")
         >>> f.write(my_data)
         >>> f.close()
 
@@ -826,7 +826,7 @@ class Graph(Node):
         2
 
         >>> g = Graph()
-        >>> result = g.parse(file=file(file_name, "r"), format="application/rdf+xml")
+        >>> result = g.parse(file=open(file_name, "r"), format="application/rdf+xml")
         >>> len(g)
         2
 
diff --git a/rdflib/parser.py b/rdflib/parser.py
index f8bd9509..8ecebb6c 100644
--- a/rdflib/parser.py
+++ b/rdflib/parser.py
@@ -23,10 +23,6 @@ except:
 from xml.sax import xmlreader
 from xml.sax.saxutils import prepare_input_source
 import types
-try:
-    _StringTypes = (types.StringType, types.UnicodeType)
-except AttributeError:
-    _StringTypes = (types.StringType,)
 
 from rdflib import __version__
 from rdflib.term import URIRef
@@ -139,7 +135,7 @@ def create_input_source(source=None, publicID=None,
         if isinstance(source, InputSource):
             input_source = source
         else:
-            if isinstance(source, _StringTypes):
+            if isinstance(source, basestring):
                 location = source
             elif hasattr(source, "read") and not isinstance(source, Namespace):
                 f = source
@@ -155,7 +151,7 @@ def create_input_source(source=None, publicID=None,
         absolute_location = URIRef(location, base=base).defrag()
         if absolute_location.startswith("file:///"):
             filename = url2pathname(absolute_location.replace("file:///", "/"))
-            file = __builtin__.file(filename, "rb")
+            file = open(filename, "rb")
         else:
             input_source = URLInputSource(absolute_location, format)
         publicID = publicID or absolute_location
diff --git a/rdflib/plugins/parsers/notation3.py b/rdflib/plugins/parsers/notation3.py
index 1a135d51..6bad45f5 100644
--- a/rdflib/plugins/parsers/notation3.py
+++ b/rdflib/plugins/parsers/notation3.py
@@ -42,6 +42,8 @@ from decimal import Decimal
 
 from rdflib.term import URIRef, BNode, Literal, Variable, _XSD_PFX, _unique_id
 from rdflib.graph import QuotedGraph, ConjunctiveGraph
+from rdflib import py3compat
+b = py3compat.b
 
 from rdflib.parser import Parser
 
@@ -345,7 +347,10 @@ def canonical(str_in):
     s = ''
     i = 0
     while i < len(s8):
-        ch = s8[i]; n = ord(ch)
+        if py3compat.PY3:
+            n = s8[i]; ch = chr(n)
+        else:
+            ch = s8[i]; n = ord(ch)
         if (n > 126) or (n < 33) :   # %-encode controls, SP, DEL, and utf-8
             s += "%%%02X" % ord(ch)
         elif ch == '%' and i+2 < len(s8):
@@ -2187,7 +2192,7 @@ def backslashUify(ustr):
         to the given unicode"""
 #    progress("String is "+`ustr`)
 #    s1=ustr.encode('utf-8')
-    str  = ""
+    s  = ""
     for ch in ustr:  # .encode('utf-8'):
         if ord(ch) > 65535:
             ch = "\\U%08X" % ord(ch)       
@@ -2195,8 +2200,8 @@ def backslashUify(ustr):
             ch = "\\u%04X" % ord(ch)
         else:
             ch = "%c" % ord(ch)
-        str = str + ch
-    return str
+        s = s + ch
+    return b(s)
 
 def hexify(ustr):
     """Use URL encoding to return an ASCII string
@@ -2208,14 +2213,14 @@ def hexify(ustr):
     """   #"
 #    progress("String is "+`ustr`)
 #    s1=ustr.encode('utf-8')
-    str  = ""
+    s  = ""
     for ch in ustr:  # .encode('utf-8'):
         if ord(ch) > 126 or ord(ch) < 33 :
             ch = "%%%02X" % ord(ch)
         else:
             ch = "%c" % ord(ch)
-        str = str + ch
-    return str
+        s = s + ch
+    return b(s)
     
 def dummy():
         res = ""
diff --git a/rdflib/plugins/parsers/nquads.py b/rdflib/plugins/parsers/nquads.py
index 1a40b05d..c64cad5f 100644
--- a/rdflib/plugins/parsers/nquads.py
+++ b/rdflib/plugins/parsers/nquads.py
@@ -5,7 +5,7 @@ graphs that can be used and queried. The store that backs the graph
 
 >>> from rdflib import ConjunctiveGraph, URIRef, Namespace
 >>> g = ConjunctiveGraph()
->>> with open("test/example.nquads", "r") as examples:
+>>> with open("test/example.nquads", "rb") as examples:
 ...    sink = g.parse(examples, format="nquads")
 >>> assert len(g.store) == 449
 >>> # There should be 16 separate contexts
@@ -18,6 +18,8 @@ graphs that can be used and queried. The store that backs the graph
 >>> assert(g.value(s, FOAF.name) == "Arco Publications")
 """
 
+from rdflib.py3compat import b
+
 # Build up from the NTriples parser:
 from rdflib.plugins.parsers.ntriples import NTriplesParser
 from rdflib.plugins.parsers.ntriples import ParseError
@@ -79,7 +81,7 @@ class NQuadsParser(NTriplesParser):
   
     def parseline(self):
         self.eat(r_wspace)
-        if (not self.line) or self.line.startswith('#'):
+        if (not self.line) or self.line.startswith(b('#')):
             return # The line is empty or a comment
 
         subject = self.subject()
diff --git a/rdflib/plugins/parsers/ntriples.py b/rdflib/plugins/parsers/ntriples.py
index 8434a091..8f031160 100644
--- a/rdflib/plugins/parsers/ntriples.py
+++ b/rdflib/plugins/parsers/ntriples.py
@@ -10,16 +10,18 @@ from rdflib.term import URIRef as URI
 from rdflib.term import BNode as bNode
 from rdflib.term import Literal
 
-uriref = r'<([^:]+:[^\s"<>]+)>'
-literal = r'"([^"\\]*(?:\\.[^"\\]*)*)"'
-litinfo = r'(?:@([a-z]+(?:-[a-z0-9]+)*)|\^\^' + uriref + r')?'
-
-r_line = re.compile(r'([^\r\n]*)(?:\r\n|\r|\n)')
-r_wspace = re.compile(r'[ \t]*')
-r_wspaces = re.compile(r'[ \t]+')
-r_tail = re.compile(r'[ \t]*\.[ \t]*')
+from rdflib.py3compat import b
+
+uriref = b(r'<([^:]+:[^\s"<>]+)>')
+literal = b(r'"([^"\\]*(?:\\.[^"\\]*)*)"')
+litinfo = b(r'(?:@([a-z]+(?:-[a-z0-9]+)*)|\^\^') + uriref + b(r')?')
+
+r_line = re.compile(b(r'([^\r\n]*)(?:\r\n|\r|\n)'))
+r_wspace = re.compile(b(r'[ \t]*'))
+r_wspaces = re.compile(b(r'[ \t]+'))
+r_tail = re.compile(b(r'[ \t]*\.[ \t]*'))
 r_uriref = re.compile(uriref)
-r_nodeid = re.compile(r'_:([A-Za-z][A-Za-z0-9]*)')
+r_nodeid = re.compile(b(r'_:([A-Za-z][A-Za-z0-9]*)'))
 r_literal = re.compile(literal + litinfo)
 
 bufsiz = 2048
@@ -37,10 +39,10 @@ class Sink(object):
         self.length += 1
         print (s, p, o)
 
-quot = {'t': '\t', 'n': '\n', 'r': '\r', '"': '"', '\\': '\\'}
-r_safe = re.compile(r'([\x20\x21\x23-\x5B\x5D-\x7E]+)')
-r_quot = re.compile(r'\\(t|n|r|"|\\)')
-r_uniquot = re.compile(r'\\u([0-9A-F]{4})|\\U([0-9A-F]{8})')
+quot = {b('t'): u'\t', b('n'): u'\n', b('r'): u'\r', b('"'): u'"', b('\\'): u'\\'}
+r_safe = re.compile(b(r'([\x20\x21\x23-\x5B\x5D-\x7E]+)'))
+r_quot = re.compile(b(r'\\(t|n|r|"|\\)'))
+r_uniquot = re.compile(b(r'\\u([0-9A-F]{4})|\\U([0-9A-F]{8})'))
 
 def unquote(s):
     """Unquote an N-Triples string."""
@@ -52,7 +54,7 @@ def unquote(s):
             m = r_safe.match(s)
             if m:
                 s = s[m.end():]
-                result.append(m.group(1))
+                result.append(m.group(1).decode('ascii'))
                 continue
 
             m = r_quot.match(s)
@@ -72,9 +74,9 @@ def unquote(s):
             elif s.startswith('\\'):
                 raise ParseError("Illegal escape at: %s..." % s[:10])
             else: raise ParseError("Illegal literal character: %r" % s[0])
-        return unicode(''.join(result))
+        return u''.join(result)
 
-r_hibyte = re.compile(r'([\x80-\xFF])')
+r_hibyte = re.compile(ur'([\x80-\xFF])')
 
 def uriquote(uri):
     if not validate:
@@ -146,7 +148,7 @@ class NTriplesParser(object):
 
     def parseline(self):
         self.eat(r_wspace)
-        if (not self.line) or self.line.startswith('#'):
+        if (not self.line) or self.line.startswith(b('#')):
             return # The line is empty or a comment
 
         subject = self.subject()
@@ -169,6 +171,7 @@ class NTriplesParser(object):
         m = pattern.match(self.line)
         if not m: # @@ Why can't we get the original pattern?
             print(dir(pattern))
+            print repr(self.line), type(self.line)
             raise ParseError("Failed to eat %s" % pattern)
         self.line = self.line[m.end():]
         return m
@@ -193,7 +196,7 @@ class NTriplesParser(object):
         return objt
 
     def uriref(self):
-        if self.peek('<'):
+        if self.peek(b('<')):
             uri = self.eat(r_uriref).group(1)
             uri = unquote(uri)
             uri = uriquote(uri)
@@ -201,15 +204,15 @@ class NTriplesParser(object):
         return False
 
     def nodeid(self):
-        if self.peek('_'):
-            return bNode(self.eat(r_nodeid).group(1))
+        if self.peek(b('_')):
+            return bNode(self.eat(r_nodeid).group(1).decode())
         return False
 
     def literal(self):
-        if self.peek('"'):
+        if self.peek(b('"')):
             lit, lang, dtype = self.eat(r_literal).groups()
-            lang = lang or None
-            dtype = dtype or None
+            lang = lang.decode() if lang else None
+            dtype = dtype.decode() if dtype else None
             if lang and dtype:
                 raise ParseError("Can't have both a language and a datatype")
             lit = unquote(lit)
diff --git a/rdflib/plugins/serializers/nt.py b/rdflib/plugins/serializers/nt.py
index e2284773..bbffb940 100644
--- a/rdflib/plugins/serializers/nt.py
+++ b/rdflib/plugins/serializers/nt.py
@@ -4,6 +4,7 @@ See <http://www.w3.org/TR/rdf-testcases/#ntriples> for details about the
 format.
 """
 from rdflib.serializer import Serializer
+from rdflib.py3compat import b
 import warnings
 
 
@@ -20,7 +21,7 @@ class NTSerializer(Serializer):
         encoding = self.encoding
         for triple in self.store:
             stream.write(_nt_row(triple).encode(encoding, "replace"))
-        stream.write("\n")
+        stream.write(b("\n"))
 
 
 def _nt_row(triple):
@@ -61,12 +62,14 @@ def _xmlcharref_encode(unicode_data, encoding="ascii"):
     # order to catch unencodable characters:                          
     for char in unicode_data:
         try:
-            chars.append(char.encode(encoding, 'strict'))
+            char.encode(encoding, 'strict')
         except UnicodeError:
             if ord(char) <= 0xFFFF:
-                chars.append('\u%04X' % ord(char))
+                chars.append('\\u%04X' % ord(char))
             else:
-                chars.append('\U%08X' % ord(char))
+                chars.append('\\U%08X' % ord(char))
+        else:
+            chars.append(char)
 
     return ''.join(chars)
 
diff --git a/rdflib/py3compat.py b/rdflib/py3compat.py
index d251a223..7b8ca666 100644
--- a/rdflib/py3compat.py
+++ b/rdflib/py3compat.py
@@ -34,6 +34,8 @@ def _modify_str_or_docstring(str_change_func):
 if PY3:
     # Python 3:
     # ---------
+    def b(s):
+        return s.encode('ascii')
     
     # Abstract u'abc' syntax:
     @_modify_str_or_docstring
@@ -46,6 +48,8 @@ if PY3:
 else:
     # Python 2
     # --------
+    def b(s):
+        return s
     
     # Abstract u'abc' syntax:
     @_modify_str_or_docstring
diff --git a/run_tests_py3.sh b/run_tests_py3.sh
index efe99fe4..15195395 100755
--- a/run_tests_py3.sh
+++ b/run_tests_py3.sh
@@ -16,4 +16,4 @@ cd build/py3_testing
 2to3 -wn --no-diffs test
 2to3 -wn --no-diffs run_tests.py
 
-python3 run_tests.py
+python3 run_tests.py 2> testlog
diff --git a/test/test_nquads.py b/test/test_nquads.py
index 7252bb90..c452f281 100644
--- a/test/test_nquads.py
+++ b/test/test_nquads.py
@@ -7,7 +7,7 @@ class NQuadsParserTest(unittest.TestCase):
 
     def _load_example(self):
         g = ConjunctiveGraph()
-        with open("test/example.nquads", "r") as examples:
+        with open("test/example.nquads", "rb") as examples:
             g.parse(examples, format="nquads")
         return g
       
diff --git a/test/test_ntparse.py b/test/test_ntparse.py
index 5a510114..b1cd03b8 100644
--- a/test/test_ntparse.py
+++ b/test/test_ntparse.py
@@ -108,7 +108,7 @@ class NTTestCase(unittest.TestCase):
         # self.assertRaises(ntriples.ParseError, p.literal)
 
 def check_nt_parse(fpath, fmt):
-    fp = open(fpath, 'r')
+    fp = open(fpath, 'rb')
     p = ntriples.NTriplesParser(sink=ntriples.Sink()) 
     sink = p.parse(fp) # file; use parsestring for a string
     fp.close() 
diff --git a/test/test_serializexml.py b/test/test_serializexml.py
index cbe1afcf..c5c58d41 100644
--- a/test/test_serializexml.py
+++ b/test/test_serializexml.py
@@ -4,7 +4,10 @@ from rdflib.namespace import RDFS
 from rdflib.plugins.serializers.rdfxml import XMLSerializer
 
 from rdflib.graph import ConjunctiveGraph
-from StringIO import StringIO
+try:
+    from io import BytesIO
+except ImportError:
+    from StringIO import StringIO as BytesIO
 
 
 class SerializerTestBase(object):
@@ -53,7 +56,7 @@ def _mangled_copy(g):
 
 def serialize(sourceGraph, makeSerializer, getValue=True, extra_args={}):
     serializer = makeSerializer(sourceGraph)
-    stream = StringIO()
+    stream = BytesIO()
     serializer.serialize(stream, **extra_args)
     return getValue and stream.getvalue() or stream
author	Thomas Kluyver <takowl@gmail.com>	2011-11-12 17:06:04 +0000
committer	Thomas Kluyver <takowl@gmail.com>	2011-11-12 17:06:04 +0000
commit	a1f30527d584215279cbfda0c30fa32ba7d81a70 (patch)
tree	bb2219e9d8552c901df13e3e815372e925d66e73
parent	e2fb491a3da80f9e01f3303b3df24881ab41eefa (diff)
download	rdflib-a1f30527d584215279cbfda0c30fa32ba7d81a70.tar.gz