summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorThomas Kluyver <takowl@gmail.com>2011-11-12 17:06:04 +0000
committerThomas Kluyver <takowl@gmail.com>2011-11-12 17:06:04 +0000
commita1f30527d584215279cbfda0c30fa32ba7d81a70 (patch)
treebb2219e9d8552c901df13e3e815372e925d66e73
parente2fb491a3da80f9e01f3303b3df24881ab41eefa (diff)
downloadrdflib-a1f30527d584215279cbfda0c30fa32ba7d81a70.tar.gz
Various fixes for Python 3 compatibility.
-rw-r--r--rdflib/compare.py2
-rw-r--r--rdflib/graph.py6
-rw-r--r--rdflib/parser.py8
-rw-r--r--rdflib/plugins/parsers/notation3.py19
-rw-r--r--rdflib/plugins/parsers/nquads.py6
-rw-r--r--rdflib/plugins/parsers/ntriples.py49
-rw-r--r--rdflib/plugins/serializers/nt.py11
-rw-r--r--rdflib/py3compat.py4
-rwxr-xr-xrun_tests_py3.sh2
-rw-r--r--test/test_nquads.py2
-rw-r--r--test/test_ntparse.py2
-rw-r--r--test/test_serializexml.py7
12 files changed, 67 insertions, 51 deletions
diff --git a/rdflib/compare.py b/rdflib/compare.py
index e1b3d579..0703a575 100644
--- a/rdflib/compare.py
+++ b/rdflib/compare.py
@@ -214,7 +214,7 @@ def _md5_hash(t):
h = hashlib.md5()
for i in t:
if isinstance(i, tuple):
- h.update(_md5_hash(i))
+ h.update(_md5_hash(i).encode('ascii'))
else:
h.update(unicode(i).encode("utf8"))
return h.hexdigest()
diff --git a/rdflib/graph.py b/rdflib/graph.py
index fed398b9..3e780748 100644
--- a/rdflib/graph.py
+++ b/rdflib/graph.py
@@ -527,7 +527,7 @@ class Graph(Node):
else:
if any is False:
try:
- next = values.next()
+ values.next()
msg = ("While trying to find a value for (%s, %s, %s) the"
" following multiple values where found:\n" %
(subject, predicate, object))
@@ -811,7 +811,7 @@ class Graph(Node):
... '''
>>> import tempfile
>>> file_name = tempfile.mktemp()
- >>> f = file(file_name, "w")
+ >>> f = open(file_name, "w")
>>> f.write(my_data)
>>> f.close()
@@ -826,7 +826,7 @@ class Graph(Node):
2
>>> g = Graph()
- >>> result = g.parse(file=file(file_name, "r"), format="application/rdf+xml")
+ >>> result = g.parse(file=open(file_name, "r"), format="application/rdf+xml")
>>> len(g)
2
diff --git a/rdflib/parser.py b/rdflib/parser.py
index f8bd9509..8ecebb6c 100644
--- a/rdflib/parser.py
+++ b/rdflib/parser.py
@@ -23,10 +23,6 @@ except:
from xml.sax import xmlreader
from xml.sax.saxutils import prepare_input_source
import types
-try:
- _StringTypes = (types.StringType, types.UnicodeType)
-except AttributeError:
- _StringTypes = (types.StringType,)
from rdflib import __version__
from rdflib.term import URIRef
@@ -139,7 +135,7 @@ def create_input_source(source=None, publicID=None,
if isinstance(source, InputSource):
input_source = source
else:
- if isinstance(source, _StringTypes):
+ if isinstance(source, basestring):
location = source
elif hasattr(source, "read") and not isinstance(source, Namespace):
f = source
@@ -155,7 +151,7 @@ def create_input_source(source=None, publicID=None,
absolute_location = URIRef(location, base=base).defrag()
if absolute_location.startswith("file:///"):
filename = url2pathname(absolute_location.replace("file:///", "/"))
- file = __builtin__.file(filename, "rb")
+ file = open(filename, "rb")
else:
input_source = URLInputSource(absolute_location, format)
publicID = publicID or absolute_location
diff --git a/rdflib/plugins/parsers/notation3.py b/rdflib/plugins/parsers/notation3.py
index 1a135d51..6bad45f5 100644
--- a/rdflib/plugins/parsers/notation3.py
+++ b/rdflib/plugins/parsers/notation3.py
@@ -42,6 +42,8 @@ from decimal import Decimal
from rdflib.term import URIRef, BNode, Literal, Variable, _XSD_PFX, _unique_id
from rdflib.graph import QuotedGraph, ConjunctiveGraph
+from rdflib import py3compat
+b = py3compat.b
from rdflib.parser import Parser
@@ -345,7 +347,10 @@ def canonical(str_in):
s = ''
i = 0
while i < len(s8):
- ch = s8[i]; n = ord(ch)
+ if py3compat.PY3:
+ n = s8[i]; ch = chr(n)
+ else:
+ ch = s8[i]; n = ord(ch)
if (n > 126) or (n < 33) : # %-encode controls, SP, DEL, and utf-8
s += "%%%02X" % ord(ch)
elif ch == '%' and i+2 < len(s8):
@@ -2187,7 +2192,7 @@ def backslashUify(ustr):
to the given unicode"""
# progress("String is "+`ustr`)
# s1=ustr.encode('utf-8')
- str = ""
+ s = ""
for ch in ustr: # .encode('utf-8'):
if ord(ch) > 65535:
ch = "\\U%08X" % ord(ch)
@@ -2195,8 +2200,8 @@ def backslashUify(ustr):
ch = "\\u%04X" % ord(ch)
else:
ch = "%c" % ord(ch)
- str = str + ch
- return str
+ s = s + ch
+ return b(s)
def hexify(ustr):
"""Use URL encoding to return an ASCII string
@@ -2208,14 +2213,14 @@ def hexify(ustr):
""" #"
# progress("String is "+`ustr`)
# s1=ustr.encode('utf-8')
- str = ""
+ s = ""
for ch in ustr: # .encode('utf-8'):
if ord(ch) > 126 or ord(ch) < 33 :
ch = "%%%02X" % ord(ch)
else:
ch = "%c" % ord(ch)
- str = str + ch
- return str
+ s = s + ch
+ return b(s)
def dummy():
res = ""
diff --git a/rdflib/plugins/parsers/nquads.py b/rdflib/plugins/parsers/nquads.py
index 1a40b05d..c64cad5f 100644
--- a/rdflib/plugins/parsers/nquads.py
+++ b/rdflib/plugins/parsers/nquads.py
@@ -5,7 +5,7 @@ graphs that can be used and queried. The store that backs the graph
>>> from rdflib import ConjunctiveGraph, URIRef, Namespace
>>> g = ConjunctiveGraph()
->>> with open("test/example.nquads", "r") as examples:
+>>> with open("test/example.nquads", "rb") as examples:
... sink = g.parse(examples, format="nquads")
>>> assert len(g.store) == 449
>>> # There should be 16 separate contexts
@@ -18,6 +18,8 @@ graphs that can be used and queried. The store that backs the graph
>>> assert(g.value(s, FOAF.name) == "Arco Publications")
"""
+from rdflib.py3compat import b
+
# Build up from the NTriples parser:
from rdflib.plugins.parsers.ntriples import NTriplesParser
from rdflib.plugins.parsers.ntriples import ParseError
@@ -79,7 +81,7 @@ class NQuadsParser(NTriplesParser):
def parseline(self):
self.eat(r_wspace)
- if (not self.line) or self.line.startswith('#'):
+ if (not self.line) or self.line.startswith(b('#')):
return # The line is empty or a comment
subject = self.subject()
diff --git a/rdflib/plugins/parsers/ntriples.py b/rdflib/plugins/parsers/ntriples.py
index 8434a091..8f031160 100644
--- a/rdflib/plugins/parsers/ntriples.py
+++ b/rdflib/plugins/parsers/ntriples.py
@@ -10,16 +10,18 @@ from rdflib.term import URIRef as URI
from rdflib.term import BNode as bNode
from rdflib.term import Literal
-uriref = r'<([^:]+:[^\s"<>]+)>'
-literal = r'"([^"\\]*(?:\\.[^"\\]*)*)"'
-litinfo = r'(?:@([a-z]+(?:-[a-z0-9]+)*)|\^\^' + uriref + r')?'
-
-r_line = re.compile(r'([^\r\n]*)(?:\r\n|\r|\n)')
-r_wspace = re.compile(r'[ \t]*')
-r_wspaces = re.compile(r'[ \t]+')
-r_tail = re.compile(r'[ \t]*\.[ \t]*')
+from rdflib.py3compat import b
+
+uriref = b(r'<([^:]+:[^\s"<>]+)>')
+literal = b(r'"([^"\\]*(?:\\.[^"\\]*)*)"')
+litinfo = b(r'(?:@([a-z]+(?:-[a-z0-9]+)*)|\^\^') + uriref + b(r')?')
+
+r_line = re.compile(b(r'([^\r\n]*)(?:\r\n|\r|\n)'))
+r_wspace = re.compile(b(r'[ \t]*'))
+r_wspaces = re.compile(b(r'[ \t]+'))
+r_tail = re.compile(b(r'[ \t]*\.[ \t]*'))
r_uriref = re.compile(uriref)
-r_nodeid = re.compile(r'_:([A-Za-z][A-Za-z0-9]*)')
+r_nodeid = re.compile(b(r'_:([A-Za-z][A-Za-z0-9]*)'))
r_literal = re.compile(literal + litinfo)
bufsiz = 2048
@@ -37,10 +39,10 @@ class Sink(object):
self.length += 1
print (s, p, o)
-quot = {'t': '\t', 'n': '\n', 'r': '\r', '"': '"', '\\': '\\'}
-r_safe = re.compile(r'([\x20\x21\x23-\x5B\x5D-\x7E]+)')
-r_quot = re.compile(r'\\(t|n|r|"|\\)')
-r_uniquot = re.compile(r'\\u([0-9A-F]{4})|\\U([0-9A-F]{8})')
+quot = {b('t'): u'\t', b('n'): u'\n', b('r'): u'\r', b('"'): u'"', b('\\'): u'\\'}
+r_safe = re.compile(b(r'([\x20\x21\x23-\x5B\x5D-\x7E]+)'))
+r_quot = re.compile(b(r'\\(t|n|r|"|\\)'))
+r_uniquot = re.compile(b(r'\\u([0-9A-F]{4})|\\U([0-9A-F]{8})'))
def unquote(s):
"""Unquote an N-Triples string."""
@@ -52,7 +54,7 @@ def unquote(s):
m = r_safe.match(s)
if m:
s = s[m.end():]
- result.append(m.group(1))
+ result.append(m.group(1).decode('ascii'))
continue
m = r_quot.match(s)
@@ -72,9 +74,9 @@ def unquote(s):
elif s.startswith('\\'):
raise ParseError("Illegal escape at: %s..." % s[:10])
else: raise ParseError("Illegal literal character: %r" % s[0])
- return unicode(''.join(result))
+ return u''.join(result)
-r_hibyte = re.compile(r'([\x80-\xFF])')
+r_hibyte = re.compile(ur'([\x80-\xFF])')
def uriquote(uri):
if not validate:
@@ -146,7 +148,7 @@ class NTriplesParser(object):
def parseline(self):
self.eat(r_wspace)
- if (not self.line) or self.line.startswith('#'):
+ if (not self.line) or self.line.startswith(b('#')):
return # The line is empty or a comment
subject = self.subject()
@@ -169,6 +171,7 @@ class NTriplesParser(object):
m = pattern.match(self.line)
if not m: # @@ Why can't we get the original pattern?
print(dir(pattern))
+ print repr(self.line), type(self.line)
raise ParseError("Failed to eat %s" % pattern)
self.line = self.line[m.end():]
return m
@@ -193,7 +196,7 @@ class NTriplesParser(object):
return objt
def uriref(self):
- if self.peek('<'):
+ if self.peek(b('<')):
uri = self.eat(r_uriref).group(1)
uri = unquote(uri)
uri = uriquote(uri)
@@ -201,15 +204,15 @@ class NTriplesParser(object):
return False
def nodeid(self):
- if self.peek('_'):
- return bNode(self.eat(r_nodeid).group(1))
+ if self.peek(b('_')):
+ return bNode(self.eat(r_nodeid).group(1).decode())
return False
def literal(self):
- if self.peek('"'):
+ if self.peek(b('"')):
lit, lang, dtype = self.eat(r_literal).groups()
- lang = lang or None
- dtype = dtype or None
+ lang = lang.decode() if lang else None
+ dtype = dtype.decode() if dtype else None
if lang and dtype:
raise ParseError("Can't have both a language and a datatype")
lit = unquote(lit)
diff --git a/rdflib/plugins/serializers/nt.py b/rdflib/plugins/serializers/nt.py
index e2284773..bbffb940 100644
--- a/rdflib/plugins/serializers/nt.py
+++ b/rdflib/plugins/serializers/nt.py
@@ -4,6 +4,7 @@ See <http://www.w3.org/TR/rdf-testcases/#ntriples> for details about the
format.
"""
from rdflib.serializer import Serializer
+from rdflib.py3compat import b
import warnings
@@ -20,7 +21,7 @@ class NTSerializer(Serializer):
encoding = self.encoding
for triple in self.store:
stream.write(_nt_row(triple).encode(encoding, "replace"))
- stream.write("\n")
+ stream.write(b("\n"))
def _nt_row(triple):
@@ -61,12 +62,14 @@ def _xmlcharref_encode(unicode_data, encoding="ascii"):
# order to catch unencodable characters:
for char in unicode_data:
try:
- chars.append(char.encode(encoding, 'strict'))
+ char.encode(encoding, 'strict')
except UnicodeError:
if ord(char) <= 0xFFFF:
- chars.append('\u%04X' % ord(char))
+ chars.append('\\u%04X' % ord(char))
else:
- chars.append('\U%08X' % ord(char))
+ chars.append('\\U%08X' % ord(char))
+ else:
+ chars.append(char)
return ''.join(chars)
diff --git a/rdflib/py3compat.py b/rdflib/py3compat.py
index d251a223..7b8ca666 100644
--- a/rdflib/py3compat.py
+++ b/rdflib/py3compat.py
@@ -34,6 +34,8 @@ def _modify_str_or_docstring(str_change_func):
if PY3:
# Python 3:
# ---------
+ def b(s):
+ return s.encode('ascii')
# Abstract u'abc' syntax:
@_modify_str_or_docstring
@@ -46,6 +48,8 @@ if PY3:
else:
# Python 2
# --------
+ def b(s):
+ return s
# Abstract u'abc' syntax:
@_modify_str_or_docstring
diff --git a/run_tests_py3.sh b/run_tests_py3.sh
index efe99fe4..15195395 100755
--- a/run_tests_py3.sh
+++ b/run_tests_py3.sh
@@ -16,4 +16,4 @@ cd build/py3_testing
2to3 -wn --no-diffs test
2to3 -wn --no-diffs run_tests.py
-python3 run_tests.py
+python3 run_tests.py 2> testlog
diff --git a/test/test_nquads.py b/test/test_nquads.py
index 7252bb90..c452f281 100644
--- a/test/test_nquads.py
+++ b/test/test_nquads.py
@@ -7,7 +7,7 @@ class NQuadsParserTest(unittest.TestCase):
def _load_example(self):
g = ConjunctiveGraph()
- with open("test/example.nquads", "r") as examples:
+ with open("test/example.nquads", "rb") as examples:
g.parse(examples, format="nquads")
return g
diff --git a/test/test_ntparse.py b/test/test_ntparse.py
index 5a510114..b1cd03b8 100644
--- a/test/test_ntparse.py
+++ b/test/test_ntparse.py
@@ -108,7 +108,7 @@ class NTTestCase(unittest.TestCase):
# self.assertRaises(ntriples.ParseError, p.literal)
def check_nt_parse(fpath, fmt):
- fp = open(fpath, 'r')
+ fp = open(fpath, 'rb')
p = ntriples.NTriplesParser(sink=ntriples.Sink())
sink = p.parse(fp) # file; use parsestring for a string
fp.close()
diff --git a/test/test_serializexml.py b/test/test_serializexml.py
index cbe1afcf..c5c58d41 100644
--- a/test/test_serializexml.py
+++ b/test/test_serializexml.py
@@ -4,7 +4,10 @@ from rdflib.namespace import RDFS
from rdflib.plugins.serializers.rdfxml import XMLSerializer
from rdflib.graph import ConjunctiveGraph
-from StringIO import StringIO
+try:
+ from io import BytesIO
+except ImportError:
+ from StringIO import StringIO as BytesIO
class SerializerTestBase(object):
@@ -53,7 +56,7 @@ def _mangled_copy(g):
def serialize(sourceGraph, makeSerializer, getValue=True, extra_args={}):
serializer = makeSerializer(sourceGraph)
- stream = StringIO()
+ stream = BytesIO()
serializer.serialize(stream, **extra_args)
return getValue and stream.getvalue() or stream