diff options
author | Thomas Kluyver <takowl@gmail.com> | 2011-11-12 17:06:04 +0000 |
---|---|---|
committer | Thomas Kluyver <takowl@gmail.com> | 2011-11-12 17:06:04 +0000 |
commit | a1f30527d584215279cbfda0c30fa32ba7d81a70 (patch) | |
tree | bb2219e9d8552c901df13e3e815372e925d66e73 | |
parent | e2fb491a3da80f9e01f3303b3df24881ab41eefa (diff) | |
download | rdflib-a1f30527d584215279cbfda0c30fa32ba7d81a70.tar.gz |
Various fixes for Python 3 compatibility.
-rw-r--r-- | rdflib/compare.py | 2 | ||||
-rw-r--r-- | rdflib/graph.py | 6 | ||||
-rw-r--r-- | rdflib/parser.py | 8 | ||||
-rw-r--r-- | rdflib/plugins/parsers/notation3.py | 19 | ||||
-rw-r--r-- | rdflib/plugins/parsers/nquads.py | 6 | ||||
-rw-r--r-- | rdflib/plugins/parsers/ntriples.py | 49 | ||||
-rw-r--r-- | rdflib/plugins/serializers/nt.py | 11 | ||||
-rw-r--r-- | rdflib/py3compat.py | 4 | ||||
-rwxr-xr-x | run_tests_py3.sh | 2 | ||||
-rw-r--r-- | test/test_nquads.py | 2 | ||||
-rw-r--r-- | test/test_ntparse.py | 2 | ||||
-rw-r--r-- | test/test_serializexml.py | 7 |
12 files changed, 67 insertions, 51 deletions
diff --git a/rdflib/compare.py b/rdflib/compare.py index e1b3d579..0703a575 100644 --- a/rdflib/compare.py +++ b/rdflib/compare.py @@ -214,7 +214,7 @@ def _md5_hash(t): h = hashlib.md5() for i in t: if isinstance(i, tuple): - h.update(_md5_hash(i)) + h.update(_md5_hash(i).encode('ascii')) else: h.update(unicode(i).encode("utf8")) return h.hexdigest() diff --git a/rdflib/graph.py b/rdflib/graph.py index fed398b9..3e780748 100644 --- a/rdflib/graph.py +++ b/rdflib/graph.py @@ -527,7 +527,7 @@ class Graph(Node): else: if any is False: try: - next = values.next() + values.next() msg = ("While trying to find a value for (%s, %s, %s) the" " following multiple values where found:\n" % (subject, predicate, object)) @@ -811,7 +811,7 @@ class Graph(Node): ... ''' >>> import tempfile >>> file_name = tempfile.mktemp() - >>> f = file(file_name, "w") + >>> f = open(file_name, "w") >>> f.write(my_data) >>> f.close() @@ -826,7 +826,7 @@ class Graph(Node): 2 >>> g = Graph() - >>> result = g.parse(file=file(file_name, "r"), format="application/rdf+xml") + >>> result = g.parse(file=open(file_name, "r"), format="application/rdf+xml") >>> len(g) 2 diff --git a/rdflib/parser.py b/rdflib/parser.py index f8bd9509..8ecebb6c 100644 --- a/rdflib/parser.py +++ b/rdflib/parser.py @@ -23,10 +23,6 @@ except: from xml.sax import xmlreader from xml.sax.saxutils import prepare_input_source import types -try: - _StringTypes = (types.StringType, types.UnicodeType) -except AttributeError: - _StringTypes = (types.StringType,) from rdflib import __version__ from rdflib.term import URIRef @@ -139,7 +135,7 @@ def create_input_source(source=None, publicID=None, if isinstance(source, InputSource): input_source = source else: - if isinstance(source, _StringTypes): + if isinstance(source, basestring): location = source elif hasattr(source, "read") and not isinstance(source, Namespace): f = source @@ -155,7 +151,7 @@ def create_input_source(source=None, publicID=None, absolute_location = URIRef(location, base=base).defrag() if absolute_location.startswith("file:///"): filename = url2pathname(absolute_location.replace("file:///", "/")) - file = __builtin__.file(filename, "rb") + file = open(filename, "rb") else: input_source = URLInputSource(absolute_location, format) publicID = publicID or absolute_location diff --git a/rdflib/plugins/parsers/notation3.py b/rdflib/plugins/parsers/notation3.py index 1a135d51..6bad45f5 100644 --- a/rdflib/plugins/parsers/notation3.py +++ b/rdflib/plugins/parsers/notation3.py @@ -42,6 +42,8 @@ from decimal import Decimal from rdflib.term import URIRef, BNode, Literal, Variable, _XSD_PFX, _unique_id from rdflib.graph import QuotedGraph, ConjunctiveGraph +from rdflib import py3compat +b = py3compat.b from rdflib.parser import Parser @@ -345,7 +347,10 @@ def canonical(str_in): s = '' i = 0 while i < len(s8): - ch = s8[i]; n = ord(ch) + if py3compat.PY3: + n = s8[i]; ch = chr(n) + else: + ch = s8[i]; n = ord(ch) if (n > 126) or (n < 33) : # %-encode controls, SP, DEL, and utf-8 s += "%%%02X" % ord(ch) elif ch == '%' and i+2 < len(s8): @@ -2187,7 +2192,7 @@ def backslashUify(ustr): to the given unicode""" # progress("String is "+`ustr`) # s1=ustr.encode('utf-8') - str = "" + s = "" for ch in ustr: # .encode('utf-8'): if ord(ch) > 65535: ch = "\\U%08X" % ord(ch) @@ -2195,8 +2200,8 @@ def backslashUify(ustr): ch = "\\u%04X" % ord(ch) else: ch = "%c" % ord(ch) - str = str + ch - return str + s = s + ch + return b(s) def hexify(ustr): """Use URL encoding to return an ASCII string @@ -2208,14 +2213,14 @@ def hexify(ustr): """ #" # progress("String is "+`ustr`) # s1=ustr.encode('utf-8') - str = "" + s = "" for ch in ustr: # .encode('utf-8'): if ord(ch) > 126 or ord(ch) < 33 : ch = "%%%02X" % ord(ch) else: ch = "%c" % ord(ch) - str = str + ch - return str + s = s + ch + return b(s) def dummy(): res = "" diff --git a/rdflib/plugins/parsers/nquads.py b/rdflib/plugins/parsers/nquads.py index 1a40b05d..c64cad5f 100644 --- a/rdflib/plugins/parsers/nquads.py +++ b/rdflib/plugins/parsers/nquads.py @@ -5,7 +5,7 @@ graphs that can be used and queried. The store that backs the graph >>> from rdflib import ConjunctiveGraph, URIRef, Namespace >>> g = ConjunctiveGraph() ->>> with open("test/example.nquads", "r") as examples: +>>> with open("test/example.nquads", "rb") as examples: ... sink = g.parse(examples, format="nquads") >>> assert len(g.store) == 449 >>> # There should be 16 separate contexts @@ -18,6 +18,8 @@ graphs that can be used and queried. The store that backs the graph >>> assert(g.value(s, FOAF.name) == "Arco Publications") """ +from rdflib.py3compat import b + # Build up from the NTriples parser: from rdflib.plugins.parsers.ntriples import NTriplesParser from rdflib.plugins.parsers.ntriples import ParseError @@ -79,7 +81,7 @@ class NQuadsParser(NTriplesParser): def parseline(self): self.eat(r_wspace) - if (not self.line) or self.line.startswith('#'): + if (not self.line) or self.line.startswith(b('#')): return # The line is empty or a comment subject = self.subject() diff --git a/rdflib/plugins/parsers/ntriples.py b/rdflib/plugins/parsers/ntriples.py index 8434a091..8f031160 100644 --- a/rdflib/plugins/parsers/ntriples.py +++ b/rdflib/plugins/parsers/ntriples.py @@ -10,16 +10,18 @@ from rdflib.term import URIRef as URI from rdflib.term import BNode as bNode from rdflib.term import Literal -uriref = r'<([^:]+:[^\s"<>]+)>' -literal = r'"([^"\\]*(?:\\.[^"\\]*)*)"' -litinfo = r'(?:@([a-z]+(?:-[a-z0-9]+)*)|\^\^' + uriref + r')?' - -r_line = re.compile(r'([^\r\n]*)(?:\r\n|\r|\n)') -r_wspace = re.compile(r'[ \t]*') -r_wspaces = re.compile(r'[ \t]+') -r_tail = re.compile(r'[ \t]*\.[ \t]*') +from rdflib.py3compat import b + +uriref = b(r'<([^:]+:[^\s"<>]+)>') +literal = b(r'"([^"\\]*(?:\\.[^"\\]*)*)"') +litinfo = b(r'(?:@([a-z]+(?:-[a-z0-9]+)*)|\^\^') + uriref + b(r')?') + +r_line = re.compile(b(r'([^\r\n]*)(?:\r\n|\r|\n)')) +r_wspace = re.compile(b(r'[ \t]*')) +r_wspaces = re.compile(b(r'[ \t]+')) +r_tail = re.compile(b(r'[ \t]*\.[ \t]*')) r_uriref = re.compile(uriref) -r_nodeid = re.compile(r'_:([A-Za-z][A-Za-z0-9]*)') +r_nodeid = re.compile(b(r'_:([A-Za-z][A-Za-z0-9]*)')) r_literal = re.compile(literal + litinfo) bufsiz = 2048 @@ -37,10 +39,10 @@ class Sink(object): self.length += 1 print (s, p, o) -quot = {'t': '\t', 'n': '\n', 'r': '\r', '"': '"', '\\': '\\'} -r_safe = re.compile(r'([\x20\x21\x23-\x5B\x5D-\x7E]+)') -r_quot = re.compile(r'\\(t|n|r|"|\\)') -r_uniquot = re.compile(r'\\u([0-9A-F]{4})|\\U([0-9A-F]{8})') +quot = {b('t'): u'\t', b('n'): u'\n', b('r'): u'\r', b('"'): u'"', b('\\'): u'\\'} +r_safe = re.compile(b(r'([\x20\x21\x23-\x5B\x5D-\x7E]+)')) +r_quot = re.compile(b(r'\\(t|n|r|"|\\)')) +r_uniquot = re.compile(b(r'\\u([0-9A-F]{4})|\\U([0-9A-F]{8})')) def unquote(s): """Unquote an N-Triples string.""" @@ -52,7 +54,7 @@ def unquote(s): m = r_safe.match(s) if m: s = s[m.end():] - result.append(m.group(1)) + result.append(m.group(1).decode('ascii')) continue m = r_quot.match(s) @@ -72,9 +74,9 @@ def unquote(s): elif s.startswith('\\'): raise ParseError("Illegal escape at: %s..." % s[:10]) else: raise ParseError("Illegal literal character: %r" % s[0]) - return unicode(''.join(result)) + return u''.join(result) -r_hibyte = re.compile(r'([\x80-\xFF])') +r_hibyte = re.compile(ur'([\x80-\xFF])') def uriquote(uri): if not validate: @@ -146,7 +148,7 @@ class NTriplesParser(object): def parseline(self): self.eat(r_wspace) - if (not self.line) or self.line.startswith('#'): + if (not self.line) or self.line.startswith(b('#')): return # The line is empty or a comment subject = self.subject() @@ -169,6 +171,7 @@ class NTriplesParser(object): m = pattern.match(self.line) if not m: # @@ Why can't we get the original pattern? print(dir(pattern)) + print repr(self.line), type(self.line) raise ParseError("Failed to eat %s" % pattern) self.line = self.line[m.end():] return m @@ -193,7 +196,7 @@ class NTriplesParser(object): return objt def uriref(self): - if self.peek('<'): + if self.peek(b('<')): uri = self.eat(r_uriref).group(1) uri = unquote(uri) uri = uriquote(uri) @@ -201,15 +204,15 @@ class NTriplesParser(object): return False def nodeid(self): - if self.peek('_'): - return bNode(self.eat(r_nodeid).group(1)) + if self.peek(b('_')): + return bNode(self.eat(r_nodeid).group(1).decode()) return False def literal(self): - if self.peek('"'): + if self.peek(b('"')): lit, lang, dtype = self.eat(r_literal).groups() - lang = lang or None - dtype = dtype or None + lang = lang.decode() if lang else None + dtype = dtype.decode() if dtype else None if lang and dtype: raise ParseError("Can't have both a language and a datatype") lit = unquote(lit) diff --git a/rdflib/plugins/serializers/nt.py b/rdflib/plugins/serializers/nt.py index e2284773..bbffb940 100644 --- a/rdflib/plugins/serializers/nt.py +++ b/rdflib/plugins/serializers/nt.py @@ -4,6 +4,7 @@ See <http://www.w3.org/TR/rdf-testcases/#ntriples> for details about the format. """ from rdflib.serializer import Serializer +from rdflib.py3compat import b import warnings @@ -20,7 +21,7 @@ class NTSerializer(Serializer): encoding = self.encoding for triple in self.store: stream.write(_nt_row(triple).encode(encoding, "replace")) - stream.write("\n") + stream.write(b("\n")) def _nt_row(triple): @@ -61,12 +62,14 @@ def _xmlcharref_encode(unicode_data, encoding="ascii"): # order to catch unencodable characters: for char in unicode_data: try: - chars.append(char.encode(encoding, 'strict')) + char.encode(encoding, 'strict') except UnicodeError: if ord(char) <= 0xFFFF: - chars.append('\u%04X' % ord(char)) + chars.append('\\u%04X' % ord(char)) else: - chars.append('\U%08X' % ord(char)) + chars.append('\\U%08X' % ord(char)) + else: + chars.append(char) return ''.join(chars) diff --git a/rdflib/py3compat.py b/rdflib/py3compat.py index d251a223..7b8ca666 100644 --- a/rdflib/py3compat.py +++ b/rdflib/py3compat.py @@ -34,6 +34,8 @@ def _modify_str_or_docstring(str_change_func): if PY3: # Python 3: # --------- + def b(s): + return s.encode('ascii') # Abstract u'abc' syntax: @_modify_str_or_docstring @@ -46,6 +48,8 @@ if PY3: else: # Python 2 # -------- + def b(s): + return s # Abstract u'abc' syntax: @_modify_str_or_docstring diff --git a/run_tests_py3.sh b/run_tests_py3.sh index efe99fe4..15195395 100755 --- a/run_tests_py3.sh +++ b/run_tests_py3.sh @@ -16,4 +16,4 @@ cd build/py3_testing 2to3 -wn --no-diffs test 2to3 -wn --no-diffs run_tests.py -python3 run_tests.py +python3 run_tests.py 2> testlog diff --git a/test/test_nquads.py b/test/test_nquads.py index 7252bb90..c452f281 100644 --- a/test/test_nquads.py +++ b/test/test_nquads.py @@ -7,7 +7,7 @@ class NQuadsParserTest(unittest.TestCase): def _load_example(self): g = ConjunctiveGraph() - with open("test/example.nquads", "r") as examples: + with open("test/example.nquads", "rb") as examples: g.parse(examples, format="nquads") return g diff --git a/test/test_ntparse.py b/test/test_ntparse.py index 5a510114..b1cd03b8 100644 --- a/test/test_ntparse.py +++ b/test/test_ntparse.py @@ -108,7 +108,7 @@ class NTTestCase(unittest.TestCase): # self.assertRaises(ntriples.ParseError, p.literal) def check_nt_parse(fpath, fmt): - fp = open(fpath, 'r') + fp = open(fpath, 'rb') p = ntriples.NTriplesParser(sink=ntriples.Sink()) sink = p.parse(fp) # file; use parsestring for a string fp.close() diff --git a/test/test_serializexml.py b/test/test_serializexml.py index cbe1afcf..c5c58d41 100644 --- a/test/test_serializexml.py +++ b/test/test_serializexml.py @@ -4,7 +4,10 @@ from rdflib.namespace import RDFS from rdflib.plugins.serializers.rdfxml import XMLSerializer from rdflib.graph import ConjunctiveGraph -from StringIO import StringIO +try: + from io import BytesIO +except ImportError: + from StringIO import StringIO as BytesIO class SerializerTestBase(object): @@ -53,7 +56,7 @@ def _mangled_copy(g): def serialize(sourceGraph, makeSerializer, getValue=True, extra_args={}): serializer = makeSerializer(sourceGraph) - stream = StringIO() + stream = BytesIO() serializer.serialize(stream, **extra_args) return getValue and stream.getvalue() or stream |