summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJörn Hees <dev@joernhees.de>2014-04-03 14:34:36 +0200
committerJörn Hees <dev@joernhees.de>2014-04-03 14:34:36 +0200
commit30f84baad79d9308e26f295e09a6496637c46fc4 (patch)
treeb75ba665346e849826094a970a3ec2ad92063633
parentf981fdb2f55bae8bdcc15173e92a620aba6de2a2 (diff)
parentfade112ce6a71e75b02d8a699177137f38c529c3 (diff)
downloadrdflib-30f84baad79d9308e26f295e09a6496637c46fc4.tar.gz
Merge pull request #377 from dbs/py3fix
Avoid bytes vs. str error in Python3 #closes 375
-rw-r--r--rdflib/plugins/parsers/pyMicrodata/__init__.py2
-rw-r--r--rdflib/plugins/parsers/pyRdfa/__init__.py2
-rw-r--r--rdflib/tools/rdfpipe.py4
-rw-r--r--test/mdata/codelab.html24
-rw-r--r--test/test_issue375.py183
5 files changed, 213 insertions, 2 deletions
diff --git a/rdflib/plugins/parsers/pyMicrodata/__init__.py b/rdflib/plugins/parsers/pyMicrodata/__init__.py
index 8f9f1a7a..5e9b24e0 100644
--- a/rdflib/plugins/parsers/pyMicrodata/__init__.py
+++ b/rdflib/plugins/parsers/pyMicrodata/__init__.py
@@ -206,7 +206,7 @@ class pyMicrodata :
return url_request.data
else :
self.base = name
- return file(name)
+ return open(name, 'rb')
else :
return name
diff --git a/rdflib/plugins/parsers/pyRdfa/__init__.py b/rdflib/plugins/parsers/pyRdfa/__init__.py
index 89a49958..57101570 100644
--- a/rdflib/plugins/parsers/pyRdfa/__init__.py
+++ b/rdflib/plugins/parsers/pyRdfa/__init__.py
@@ -445,7 +445,7 @@ class pyRdfa :
self.charset = 'utf-8'
break
self.options.set_host_language(self.media_type)
- return file(name)
+ return open(name, 'rb')
else :
return name
except HTTPError :
diff --git a/rdflib/tools/rdfpipe.py b/rdflib/tools/rdfpipe.py
index df15d13e..9fb4b3b9 100644
--- a/rdflib/tools/rdfpipe.py
+++ b/rdflib/tools/rdfpipe.py
@@ -18,6 +18,7 @@ from rdflib.parser import Parser
from rdflib.serializer import Serializer
from rdflib.util import guess_format
+from rdflib.py3compat import PY3
DEFAULT_INPUT_FORMAT = 'xml'
@@ -170,6 +171,9 @@ def main():
ns_bindings[pfx] = uri
outfile = sys.stdout
+ if PY3:
+ outfile = sys.stdout.buffer
+
if opts.no_out:
outfile = None
diff --git a/test/mdata/codelab.html b/test/mdata/codelab.html
new file mode 100644
index 00000000..a2218aac
--- /dev/null
+++ b/test/mdata/codelab.html
@@ -0,0 +1,24 @@
+<!DOCTYPE html>
+<html>
+<head>
+ <meta charset="utf-8">
+ <title>Structured data with schema.org codelab</title>
+</head>
+<body itemscope itemtype="http://schema.org/TechArticle">
+<h1 itemprop="name">Structured data with schema.org codelab</h1>
+ <img style="float:right" src="squares.png" itemprop="image" />
+ <meta itemprop="educationalUse" content="codelab">
+ <p class="byline">
+ By <span itemprop="author"><a href="http://example.com/AuthorName">Author Name</a></span>,
+ <time itemprop="datePublished" content="20140129">January 29, 2014</time>
+ </p>
+<div itemprop="description">
+ <h2>About this codelab</h2>
+</div>
+<div itemprop="articleBody">
+ <h2>Exercise 1: From basic HTML to RDFa: first steps</h2>
+ <h2>Exercise 2: Embedded types</h2>
+ <h2>Exercise 3: From strings to things</h2>
+</div>
+</body>
+</html>
diff --git a/test/test_issue375.py b/test/test_issue375.py
new file mode 100644
index 00000000..cb3075fe
--- /dev/null
+++ b/test/test_issue375.py
@@ -0,0 +1,183 @@
+import subprocess
+import re
+
+rdfa_expected = u'''@prefix dc: <http://purl.org/dc/terms/> .
+@prefix foaf: <http://xmlns.com/foaf/0.1/> .
+@prefix frbr: <http://vocab.org/frbr/core#> .
+@prefix gr: <http://purl.org/goodrelations/v1#> .
+@prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> .
+@prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> .
+@prefix xml: <http://www.w3.org/XML/1998/namespace> .
+@prefix xsd: <http://www.w3.org/2001/XMLSchema#> .
+
+<http://customer.wileyeurope.com/CGI-BIN/lansaweb?procfun+shopcart+shcfn01+funcparms+parmisbn(a0130):9780596516499+parmqty(p0050):1+parmurl(l0560):http://oreilly.com/store/> a gr:Offering ;
+ gr:includesObject [ a gr:TypeAndQuantityNode ;
+ gr:ammountOfThisGood "1.0"^^xsd:float ;
+ gr:hasPriceSpecification [ a gr:UnitPriceSpecification ;
+ gr:hasCurrency "GBP"@en ;
+ gr:hasCurrencyValue "34.5"^^xsd:float ] ;
+ gr:typeOfGood <urn:x-domain:oreilly.com:product:9780596516499.BOOK> ] .
+
+<http://my.safaribooksonline.com/9780596803346> a gr:Offering ;
+ gr:includesObject [ a gr:TypeAndQuantityNode ;
+ gr:ammountOfThisGood "1.0"^^xsd:float ;
+ gr:typeOfGood <urn:x-domain:oreilly.com:product:9780596803346.SAF> ] .
+
+<https://epoch.oreilly.com/shop/cart.orm?p=BUNDLE&prod=9780596516499.BOOK&prod=9780596803391.EBOOK&bundle=1&retUrl=http%3A%252F%252Foreilly.com%252Fstore%252F> a gr:Offering ;
+ gr:includesObject [ a gr:TypeAndQuantityNode ;
+ gr:ammountOfThisGood "1.0"^^xsd:float ;
+ gr:includesObject [ a gr:TypeAndQuantityNode ;
+ gr:ammountOfThisGood "1.0"^^xsd:float ;
+ gr:hasPriceSpecification [ a gr:UnitPriceSpecification ;
+ gr:hasCurrency "None"@en ;
+ gr:hasCurrencyValue "49.49"^^xsd:float ] ;
+ gr:typeOfGood <urn:x-domain:oreilly.com:product:9780596803391.EBOOK> ] ;
+ gr:typeOfGood <urn:x-domain:oreilly.com:product:9780596516499.BOOK> ] .
+
+<https://epoch.oreilly.com/shop/cart.orm?prod=9780596516499.BOOK> a gr:Offering ;
+ gr:includesObject [ a gr:TypeAndQuantityNode ;
+ gr:ammountOfThisGood "1.0"^^xsd:float ;
+ gr:hasPriceSpecification [ a gr:UnitPriceSpecification ;
+ gr:hasCurrency "USD"@en ;
+ gr:hasCurrencyValue "44.99"^^xsd:float ] ;
+ gr:typeOfGood <urn:x-domain:oreilly.com:product:9780596516499.BOOK> ] .
+
+<https://epoch.oreilly.com/shop/cart.orm?prod=9780596803391.EBOOK> a gr:Offering ;
+ gr:includesObject [ a gr:TypeAndQuantityNode ;
+ gr:ammountOfThisGood "1.0"^^xsd:float ;
+ gr:hasPriceSpecification [ a gr:UnitPriceSpecification ;
+ gr:hasCurrency "USD"@en ;
+ gr:hasCurrencyValue "35.99"^^xsd:float ] ;
+ gr:typeOfGood <urn:x-domain:oreilly.com:product:9780596803391.EBOOK> ] .
+
+<urn:x-domain:oreilly.com:product:9780596516499.IP> a frbr:Expression ;
+ dc:creator <urn:x-domain:oreilly.com:agent:pdb:3343>,
+ <urn:x-domain:oreilly.com:agent:pdb:3501>,
+ <urn:x-domain:oreilly.com:agent:pdb:3502> ;
+ dc:issued "2009-06-12"^^xsd:dateTime ;
+ dc:publisher "O'Reilly Media"@en ;
+ dc:title "Natural Language Processing with Python"@en ;
+ frbr:embodiment <urn:x-domain:oreilly.com:product:9780596516499.BOOK>,
+ <urn:x-domain:oreilly.com:product:9780596803346.SAF>,
+ <urn:x-domain:oreilly.com:product:9780596803391.EBOOK> .
+
+<urn:x-domain:oreilly.com:agent:pdb:3343> a foaf:Person ;
+ foaf:homepage <http://www.oreillynet.com/pub/au/3614> ;
+ foaf:name "Steven Bird"@en .
+
+<urn:x-domain:oreilly.com:agent:pdb:3501> a foaf:Person ;
+ foaf:homepage <http://www.oreillynet.com/pub/au/3615> ;
+ foaf:name "Ewan Klein"@en .
+
+<urn:x-domain:oreilly.com:agent:pdb:3502> a foaf:Person ;
+ foaf:homepage <http://www.oreillynet.com/pub/au/3616> ;
+ foaf:name "Edward Loper"@en .
+
+<urn:x-domain:oreilly.com:product:9780596803346.SAF> a frbr:Manifestation ;
+ dc:type <http://purl.oreilly.com/product-types/SAF> .
+
+<urn:x-domain:oreilly.com:product:9780596803391.EBOOK> a frbr:Manifestation ;
+ dc:identifier <urn:isbn:9780596803391> ;
+ dc:issued "2009-06-12"^^xsd:dateTime ;
+ dc:type <http://purl.oreilly.com/product-types/EBOOK> .
+
+<urn:x-domain:oreilly.com:product:9780596516499.BOOK> a frbr:Manifestation ;
+ dc:extent """
+ 512
+ """@en ;
+ dc:identifier <urn:isbn:9780596516499> ;
+ dc:issued "2009-06-19"^^xsd:dateTime ;
+ dc:type <http://purl.oreilly.com/product-types/BOOK> .
+'''.strip()
+
+mdata_expected = u'''@prefix cat: <http://www.w3.org/ns/dcat#> .
+@prefix cc: <http://creativecommons.org/ns#> .
+@prefix ctag: <http://commontag.org/ns#> .
+@prefix dc: <http://purl.org/dc/terms/> .
+@prefix dc11: <http://purl.org/dc/elements/1.1/> .
+@prefix dcterms: <http://purl.org/dc/terms/> .
+@prefix foaf: <http://xmlns.com/foaf/0.1/> .
+@prefix gr: <http://purl.org/goodrelations/v1#> .
+@prefix grddl: <http://www.w3.org/2003/g/data-view#> .
+@prefix hcalendar: <http://microformats.org/profile/hcalendar#> .
+@prefix hcard: <http://microformats.org/profile/hcard#> .
+@prefix ical: <http://www.w3.org/2002/12/cal/icaltzd#> .
+@prefix ma: <http://www.w3.org/ns/ma-ont#> .
+@prefix md: <http://www.w3.org/ns/md#> .
+@prefix og: <http://ogp.me/ns#> .
+@prefix org: <http://www.w3.org/ns/org#> .
+@prefix owl: <http://www.w3.org/2002/07/owl#> .
+@prefix prov: <http://www.w3.org/ns/prov#> .
+@prefix qb: <http://purl.org/linked-data/cube#> .
+@prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> .
+@prefix rdfa: <http://www.w3.org/ns/rdfa#> .
+@prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> .
+@prefix rev: <http://purl.org/stuff/rev#> .
+@prefix rif: <http://www.w3.org/2007/rif#> .
+@prefix rr: <http://www.w3.org/ns/r2rml#> .
+@prefix schema: <http://schema.org/> .
+@prefix sd: <http://www.w3.org/ns/sparql-service-description#> .
+@prefix sioc: <http://rdfs.org/sioc/ns#> .
+@prefix skos: <http://www.w3.org/2004/02/skos/core#> .
+@prefix skosxl: <http://www.w3.org/2008/05/skos-xl#> .
+@prefix v: <http://rdf.data-vocabulary.org/#> .
+@prefix vcard: <http://www.w3.org/2006/vcard/ns#> .
+@prefix void: <http://rdfs.org/ns/void#> .
+@prefix wdr: <http://www.w3.org/2007/05/powder#> .
+@prefix wdrs: <http://www.w3.org/2007/05/powder-s#> .
+@prefix xhv: <http://www.w3.org/1999/xhtml/vocab#> .
+@prefix xml: <http://www.w3.org/XML/1998/namespace> .
+@prefix xsd: <http://www.w3.org/2001/XMLSchema#> .
+
+<test/mdata/codelab.html> md:item ( [ a schema:TechArticle ;
+ schema:articleBody """
+ Exercise 1: From basic HTML to RDFa: first steps
+ Exercise 2: Embedded types
+ Exercise 3: From strings to things
+""" ;
+ schema:author "Author Name" ;
+ schema:datePublished "January 29, 2014" ;
+ schema:description """
+ About this codelab
+""" ;
+ schema:educationalUse "codelab" ;
+ schema:image <test/mdata/squares.png> ;
+ schema:name "Structured data with schema.org codelab" ] ) ;
+ rdfa:usesVocabulary schema: .
+'''.strip()
+
+def test_rdfpipe_bytes_vs_str():
+ """
+ Issue 375: rdfpipe command generates bytes vs. str TypeError
+
+ While Python2 exposes sys.stdout as a bytes buffer, Python 3
+ explicitly exposes sys.stdout.buffer for this purpose. Test
+ rdfpipe to ensure that we get the expected results.
+ """
+ args = ['python', 'rdflib/tools/rdfpipe.py', '-i', 'rdfa1.1', 'test/rdfa/oreilly.html']
+ proc = subprocess.Popen(args, stdout=subprocess.PIPE, universal_newlines=True)
+ res = ''
+ while proc.poll() is None:
+ res += proc.stdout.read()
+
+ assert res.strip() == rdfa_expected
+
+def test_rdfpipe_mdata_open():
+ """
+ Issue 375: rdfa1.1 and mdata processors used file() builtin
+
+ The file() builtin has been deprecated for a long time. Use
+ the open() builtin instead.
+ """
+ args = ['python', 'rdflib/tools/rdfpipe.py', '-i', 'mdata', 'test/mdata/codelab.html']
+ proc = subprocess.Popen(args, stdout=subprocess.PIPE, universal_newlines=True)
+ res = ''
+ while proc.poll() is None:
+ res += proc.stdout.read()
+
+ a = re.compile(r'^(.*?<)[^>]+(test/mdata/codelab.*?>)', flags=re.DOTALL)
+ b = re.compile(r'^(.*?<)[^>]+(test/mdata/squares.*?>)', flags=re.DOTALL)
+ res = a.sub(r'\1\2', res.strip())
+ res = b.sub(r'\1\2', res)
+
+ assert res == mdata_expected