summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authormilde <milde@929543f6-e4f2-0310-98a6-ba3bd3dd1d04>2012-06-13 14:14:12 +0000
committermilde <milde@929543f6-e4f2-0310-98a6-ba3bd3dd1d04>2012-06-13 14:14:12 +0000
commit6ecaeec7db2158e5692373a1be38d6820367e221 (patch)
tree89d9ab25474dfbf9d1ba033e6b02e5543eb64f20
parentd4b8bb1e834f812600c35ac82950a420f936ca00 (diff)
downloaddocutils-6ecaeec7db2158e5692373a1be38d6820367e221.tar.gz
Fixup: more save implementation of binary data output under Python 3.
Prevent test error under Python 3. Add tests for FileOutput. Document. git-svn-id: http://svn.code.sf.net/p/docutils/code/trunk/docutils@7440 929543f6-e4f2-0310-98a6-ba3bd3dd1d04
-rw-r--r--HISTORY.txt5
-rw-r--r--docutils/io.py76
-rw-r--r--test/DocutilsTestSupport.py2
-rw-r--r--test/test_error_reporting.py4
-rwxr-xr-xtest/test_io.py106
5 files changed, 152 insertions, 41 deletions
diff --git a/HISTORY.txt b/HISTORY.txt
index d7cb6f989..ebee0ce61 100644
--- a/HISTORY.txt
+++ b/HISTORY.txt
@@ -28,6 +28,11 @@ Changes Since 0.9
- Use converted sources from the ``build/`` directory for tests under
Python 3.
+* docutils/io.py
+
+ - Fix writing binary data to sys.stdout under Python 3 (allows
+ ``rst2odt.py`` to be used with output redirection).
+
* docutils/parsers/rst/directives/misc.py
- Fix [ 3525847 ]. Catch and report UnicodeEncodeError with
diff --git a/docutils/io.py b/docutils/io.py
index f44ed6e93..b992e6d71 100644
--- a/docutils/io.py
+++ b/docutils/io.py
@@ -21,6 +21,21 @@ from docutils.error_reporting import locale_encoding, ErrorString, ErrorOutput
class InputError(IOError): pass
class OutputError(IOError): pass
+def check_encoding(stream, encoding):
+ """Test, whether the encoding of `stream` matches `encoding`.
+
+ Returns
+
+ :None: if `encoding` or `stream.encoding` are not a valid encoding
+ argument (e.g. ``None``) or `stream.encoding is missing.
+ :True: if the encoding argument resolves to the same value as `encoding`,
+ :False: if the encodings differ.
+ """
+ try:
+ return codecs.lookup(stream.encoding) == codecs.lookup(encoding)
+ except (LookupError, AttributeError, TypeError):
+ return None
+
class Input(TransformSpec):
@@ -231,10 +246,7 @@ class FileInput(Input):
else:
self.source = sys.stdin
elif (sys.version_info >= (3,0) and
- self.encoding and hasattr(self.source, 'encoding') and
- self.encoding != self.source.encoding and
- codecs.lookup(self.encoding) !=
- codecs.lookup(self.source.encoding)):
+ check_encoding(self.source, self.encoding) is False):
# TODO: re-open, warn or raise error?
raise UnicodeError('Encoding clash: encoding given is "%s" '
'but source is opened with encoding "%s".' %
@@ -327,10 +339,7 @@ class FileOutput(Output):
if destination_path:
self.opened = False
else:
- if sys.version_info >= (3,0) and 'b' in self.mode:
- self.destination = sys.stdout.buffer
- else:
- self.destination = sys.stdout
+ self.destination = sys.stdout
elif (# destination is file-type object -> check mode:
mode and hasattr(self.destination, 'mode')
and mode != self.destination.mode):
@@ -342,16 +351,21 @@ class FileOutput(Output):
self.destination_path = self.destination.name
except AttributeError:
pass
- if (encoding and hasattr(self.destination, 'encoding')
- and codecs.lookup(self.encoding) !=
- codecs.lookup(self.destination.encoding)):
- if self.destination is sys.stdout and sys.version_info >= (3,0):
- self.destination = sys.stdout.buffer
- else:
- raise UnicodeError('Encoding of %s (%s) '
- 'differs from specified encoding (%s)' %
- (self.destination_path or 'destination',
- self.destination.encoding, encoding))
+ # Special cases under Python 3: different encoding or binary output
+ if sys.version_info >= (3,0):
+ if ('b' in self.mode
+ and self.destination in (sys.stdout, sys.stderr)
+ ):
+ self.destination = self.destination.buffer
+ if check_encoding(self.destination, self.encoding) is False:
+ if self.destination in (sys.stdout, sys.stderr):
+ self.destination = self.destination.buffer
+ else: # TODO: try the `write to .buffer` scheme instead?
+ raise ValueError('Encoding of %s (%s) differs \n'
+ ' from specified encoding (%s)' %
+ (self.destination_path or 'destination',
+ destination.encoding, encoding))
+
def open(self):
# Specify encoding in Python 3.
@@ -375,25 +389,23 @@ class FileOutput(Output):
def write(self, data):
"""Encode `data`, write it to a single file, and return it.
- With Python 3 or binary output mode, `data` is returned unchanged.
+ With Python 3 or binary output mode, `data` is returned unchanged,
+ except when specified encoding and output encoding differ.
"""
- if sys.version_info < (3,0) and 'b' not in self.mode:
- data = self.encode(data)
if not self.opened:
self.open()
try: # In Python < 2.5, try...except has to be nested in try...finally.
try:
- if (sys.version_info >= (3,0)
- and self.destination is sys.stdout.buffer
- and 'b' not in self.mode):
- # encode now, as sys.stdout.encoding != self.encoding
- bdata = self.encode(data)
- if os.linesep != '\n':
- bdata = bdata.replace('\n', os.linesep)
- self.destination.buffer.write(bdata)
- else:
- self.destination.write(data)
- except (UnicodeError, LookupError), err: # can only happen in py3k
+ if 'b' not in self.mode and (sys.version_info < (3,0) or
+ check_encoding(self.destination, self.encoding) is False):
+ data = self.encode(data)
+ if sys.version_info >= (3,0) and os.linesep != '\n':
+ # writing as binary data -> fix endings
+ data = data.replace('\n', os.linesep)
+
+ self.destination.write(data)
+
+ except (UnicodeError, LookupError), err:
raise UnicodeError(
'Unable to encode output data. output-encoding is: '
'%s.\n(%s)' % (self.encoding, ErrorString(err)))
diff --git a/test/DocutilsTestSupport.py b/test/DocutilsTestSupport.py
index 82c3391b4..dae25ccc3 100644
--- a/test/DocutilsTestSupport.py
+++ b/test/DocutilsTestSupport.py
@@ -881,7 +881,7 @@ def _format_str(*args):
return_tuple = []
for i in args:
r = repr(i)
- if ( (isinstance(i, str) or isinstance(i, unicode))
+ if ( (isinstance(i, bytes) or isinstance(i, unicode))
and '\n' in i):
stripped = ''
if isinstance(i, unicode) and r.startswith('u'):
diff --git a/test/test_error_reporting.py b/test/test_error_reporting.py
index fd13356c0..989d04518 100644
--- a/test/test_error_reporting.py
+++ b/test/test_error_reporting.py
@@ -158,14 +158,14 @@ class ErrorStringTests(unittest.TestCase):
# -----------------
# Stub: Buffer with 'strict' auto-conversion of input to byte string:
-class BBuf(BytesIO, object):
+class BBuf(BytesIO, object): # super class object required by Python <= 2.5
def write(self, data):
if isinstance(data, unicode):
data.encode('ascii', 'strict')
super(BBuf, self).write(data)
# Stub: Buffer expecting unicode string:
-class UBuf(StringIO, object):
+class UBuf(StringIO, object): # super class object required by Python <= 2.5
def write(self, data):
# emulate Python 3 handling of stdout, stderr
if isinstance(data, bytes):
diff --git a/test/test_io.py b/test/test_io.py
index fa77c749e..1d213ce7a 100755
--- a/test/test_io.py
+++ b/test/test_io.py
@@ -13,12 +13,45 @@ import DocutilsTestSupport # must be imported before docutils
from docutils import io
from docutils._compat import b, bytes
from docutils.error_reporting import locale_encoding
+from test_error_reporting import BBuf, UBuf
-class InputTests(unittest.TestCase):
+# python 2.3
+if not hasattr(unittest.TestCase, "assertTrue"):
+ assertTrue = unittest.TestCase.failUnless
+
+class mock_stdout(UBuf):
+ encoding = 'utf8'
+
+ def __init__(self):
+ self.buffer = BBuf()
+ UBuf.__init__(self)
+
+class HelperTests(unittest.TestCase):
+
+ def test_check_encoding_true(self):
+ """Return `True` if lookup returns the same codec"""
+ self.assertEqual(io.check_encoding(mock_stdout, 'utf8'), True)
+ self.assertEqual(io.check_encoding(mock_stdout, 'utf-8'), True)
+ self.assertEqual(io.check_encoding(mock_stdout, 'UTF-8'), True)
- # python 2.3
- if not hasattr(unittest.TestCase, "assertTrue"):
- assertTrue = unittest.TestCase.failUnless
+ def test_check_encoding_false(self):
+ """Return `False` if lookup returns different codecs"""
+ self.assertEqual(io.check_encoding(mock_stdout, 'ascii'), False)
+ self.assertEqual(io.check_encoding(mock_stdout, 'latin-1'), False)
+
+ def test_check_encoding_none(self):
+ """Cases where the comparison fails."""
+ # stream.encoding is None:
+ self.assertEqual(io.check_encoding(io.FileInput(), 'ascii'), None)
+ # stream.encoding does not exist:
+ self.assertEqual(io.check_encoding(BBuf, 'ascii'), None)
+ # encoding is None:
+ self.assertEqual(io.check_encoding(mock_stdout, None), None)
+ # encoding is invalid
+ self.assertEqual(io.check_encoding(mock_stdout, 'UTF-9'), None)
+
+
+class InputTests(unittest.TestCase):
def test_bom(self):
input = io.StringInput(source=b('\xef\xbb\xbf foo \xef\xbb\xbf bar'),
@@ -75,13 +108,13 @@ print "hello world"
# if no encoding is given, try decoding with utf8:
input = io.FileInput(source_path='functional/input/cyrillic.txt')
data = input.read()
- if sys.version_info < (3,0):
+ if sys.version_info < (3,0):
# in Py3k, the locale encoding is used without --input-encoding
# skipping the heuristic
self.assertEqual(input.successful_encoding, 'utf-8')
def test_heuristics_no_utf8(self):
- # if no encoding is given and decoding with utf8 fails,
+ # if no encoding is given and decoding with utf8 fails,
# use either the locale encoding (if specified) or latin1:
input = io.FileInput(source_path='data/latin1.txt')
data = input.read()
@@ -91,5 +124,66 @@ print "hello world"
self.assertEqual(data, u'Gr\xfc\xdfe\n')
+class OutputTests(unittest.TestCase):
+
+ bdata = b('\xfc')
+ udata = u'\xfc'
+
+ def setUp(self):
+ self.bdrain = BBuf()
+ """Buffer accepting binary strings (bytes)"""
+ self.udrain = UBuf()
+ """Buffer accepting unicode strings"""
+ self.mock_stdout = mock_stdout()
+ """Stub of sys.stdout under Python 3"""
+
+ def test_write_unicode(self):
+ fo = io.FileOutput(destination=self.udrain, encoding='unicode',
+ autoclose=False)
+ fo.write(self.udata)
+ self.assertEqual(self.udrain.getvalue(), self.udata)
+
+ def test_write_utf8(self):
+ if sys.version_info >= (3,0):
+ fo = io.FileOutput(destination=self.udrain, encoding='utf8',
+ autoclose=False)
+ fo.write(self.udata)
+ self.assertEqual(self.udrain.getvalue(), self.udata)
+ else:
+ fo = io.FileOutput(destination=self.bdrain, encoding='utf8',
+ autoclose=False)
+ fo.write(self.udata)
+ self.assertEqual(self.bdrain.getvalue(), self.udata.encode('utf8'))
+
+ # With destination in binary mode, data must be binary string
+ # and is written as-is:
+ def test_write_bytes(self):
+ fo = io.FileOutput(destination=self.bdrain, encoding='utf8',
+ mode='wb', autoclose=False)
+ fo.write(self.bdata)
+ self.assertEqual(self.bdrain.getvalue(), self.bdata)
+
+ # Test for Python 3 features:
+ if sys.version_info >= (3,0):
+ def test_write_bytes_to_stdout(self):
+ # binary data is written to destination.buffer, if the
+ # destination is sys.stdout or sys.stdin
+ backup = sys.stdout
+ sys.stdout = self.mock_stdout
+ fo = io.FileOutput(destination=sys.stdout, mode='wb',
+ autoclose=False)
+ fo.write(self.bdata)
+ self.assertEqual(self.mock_stdout.buffer.getvalue(),
+ self.bdata)
+ sys.stdout = backup
+
+ def test_encoding_clash(self):
+ # Raise error, if given and destination encodings differ
+ # TODO: try the `write to .buffer` scheme instead?
+ self.assertRaises(ValueError,
+ io.FileOutput, destination=self.mock_stdout,
+ encoding='latin1')
+
+
if __name__ == '__main__':
unittest.main()