diff options
author | milde <milde@929543f6-e4f2-0310-98a6-ba3bd3dd1d04> | 2012-06-13 14:14:12 +0000 |
---|---|---|
committer | milde <milde@929543f6-e4f2-0310-98a6-ba3bd3dd1d04> | 2012-06-13 14:14:12 +0000 |
commit | 6ecaeec7db2158e5692373a1be38d6820367e221 (patch) | |
tree | 89d9ab25474dfbf9d1ba033e6b02e5543eb64f20 | |
parent | d4b8bb1e834f812600c35ac82950a420f936ca00 (diff) | |
download | docutils-6ecaeec7db2158e5692373a1be38d6820367e221.tar.gz |
Fixup: more save implementation of binary data output under Python 3.
Prevent test error under Python 3. Add tests for FileOutput. Document.
git-svn-id: http://svn.code.sf.net/p/docutils/code/trunk/docutils@7440 929543f6-e4f2-0310-98a6-ba3bd3dd1d04
-rw-r--r-- | HISTORY.txt | 5 | ||||
-rw-r--r-- | docutils/io.py | 76 | ||||
-rw-r--r-- | test/DocutilsTestSupport.py | 2 | ||||
-rw-r--r-- | test/test_error_reporting.py | 4 | ||||
-rwxr-xr-x | test/test_io.py | 106 |
5 files changed, 152 insertions, 41 deletions
diff --git a/HISTORY.txt b/HISTORY.txt index d7cb6f989..ebee0ce61 100644 --- a/HISTORY.txt +++ b/HISTORY.txt @@ -28,6 +28,11 @@ Changes Since 0.9 - Use converted sources from the ``build/`` directory for tests under Python 3. +* docutils/io.py + + - Fix writing binary data to sys.stdout under Python 3 (allows + ``rst2odt.py`` to be used with output redirection). + * docutils/parsers/rst/directives/misc.py - Fix [ 3525847 ]. Catch and report UnicodeEncodeError with diff --git a/docutils/io.py b/docutils/io.py index f44ed6e93..b992e6d71 100644 --- a/docutils/io.py +++ b/docutils/io.py @@ -21,6 +21,21 @@ from docutils.error_reporting import locale_encoding, ErrorString, ErrorOutput class InputError(IOError): pass class OutputError(IOError): pass +def check_encoding(stream, encoding): + """Test, whether the encoding of `stream` matches `encoding`. + + Returns + + :None: if `encoding` or `stream.encoding` are not a valid encoding + argument (e.g. ``None``) or `stream.encoding is missing. + :True: if the encoding argument resolves to the same value as `encoding`, + :False: if the encodings differ. + """ + try: + return codecs.lookup(stream.encoding) == codecs.lookup(encoding) + except (LookupError, AttributeError, TypeError): + return None + class Input(TransformSpec): @@ -231,10 +246,7 @@ class FileInput(Input): else: self.source = sys.stdin elif (sys.version_info >= (3,0) and - self.encoding and hasattr(self.source, 'encoding') and - self.encoding != self.source.encoding and - codecs.lookup(self.encoding) != - codecs.lookup(self.source.encoding)): + check_encoding(self.source, self.encoding) is False): # TODO: re-open, warn or raise error? raise UnicodeError('Encoding clash: encoding given is "%s" ' 'but source is opened with encoding "%s".' % @@ -327,10 +339,7 @@ class FileOutput(Output): if destination_path: self.opened = False else: - if sys.version_info >= (3,0) and 'b' in self.mode: - self.destination = sys.stdout.buffer - else: - self.destination = sys.stdout + self.destination = sys.stdout elif (# destination is file-type object -> check mode: mode and hasattr(self.destination, 'mode') and mode != self.destination.mode): @@ -342,16 +351,21 @@ class FileOutput(Output): self.destination_path = self.destination.name except AttributeError: pass - if (encoding and hasattr(self.destination, 'encoding') - and codecs.lookup(self.encoding) != - codecs.lookup(self.destination.encoding)): - if self.destination is sys.stdout and sys.version_info >= (3,0): - self.destination = sys.stdout.buffer - else: - raise UnicodeError('Encoding of %s (%s) ' - 'differs from specified encoding (%s)' % - (self.destination_path or 'destination', - self.destination.encoding, encoding)) + # Special cases under Python 3: different encoding or binary output + if sys.version_info >= (3,0): + if ('b' in self.mode + and self.destination in (sys.stdout, sys.stderr) + ): + self.destination = self.destination.buffer + if check_encoding(self.destination, self.encoding) is False: + if self.destination in (sys.stdout, sys.stderr): + self.destination = self.destination.buffer + else: # TODO: try the `write to .buffer` scheme instead? + raise ValueError('Encoding of %s (%s) differs \n' + ' from specified encoding (%s)' % + (self.destination_path or 'destination', + destination.encoding, encoding)) + def open(self): # Specify encoding in Python 3. @@ -375,25 +389,23 @@ class FileOutput(Output): def write(self, data): """Encode `data`, write it to a single file, and return it. - With Python 3 or binary output mode, `data` is returned unchanged. + With Python 3 or binary output mode, `data` is returned unchanged, + except when specified encoding and output encoding differ. """ - if sys.version_info < (3,0) and 'b' not in self.mode: - data = self.encode(data) if not self.opened: self.open() try: # In Python < 2.5, try...except has to be nested in try...finally. try: - if (sys.version_info >= (3,0) - and self.destination is sys.stdout.buffer - and 'b' not in self.mode): - # encode now, as sys.stdout.encoding != self.encoding - bdata = self.encode(data) - if os.linesep != '\n': - bdata = bdata.replace('\n', os.linesep) - self.destination.buffer.write(bdata) - else: - self.destination.write(data) - except (UnicodeError, LookupError), err: # can only happen in py3k + if 'b' not in self.mode and (sys.version_info < (3,0) or + check_encoding(self.destination, self.encoding) is False): + data = self.encode(data) + if sys.version_info >= (3,0) and os.linesep != '\n': + # writing as binary data -> fix endings + data = data.replace('\n', os.linesep) + + self.destination.write(data) + + except (UnicodeError, LookupError), err: raise UnicodeError( 'Unable to encode output data. output-encoding is: ' '%s.\n(%s)' % (self.encoding, ErrorString(err))) diff --git a/test/DocutilsTestSupport.py b/test/DocutilsTestSupport.py index 82c3391b4..dae25ccc3 100644 --- a/test/DocutilsTestSupport.py +++ b/test/DocutilsTestSupport.py @@ -881,7 +881,7 @@ def _format_str(*args): return_tuple = [] for i in args: r = repr(i) - if ( (isinstance(i, str) or isinstance(i, unicode)) + if ( (isinstance(i, bytes) or isinstance(i, unicode)) and '\n' in i): stripped = '' if isinstance(i, unicode) and r.startswith('u'): diff --git a/test/test_error_reporting.py b/test/test_error_reporting.py index fd13356c0..989d04518 100644 --- a/test/test_error_reporting.py +++ b/test/test_error_reporting.py @@ -158,14 +158,14 @@ class ErrorStringTests(unittest.TestCase): # ----------------- # Stub: Buffer with 'strict' auto-conversion of input to byte string: -class BBuf(BytesIO, object): +class BBuf(BytesIO, object): # super class object required by Python <= 2.5 def write(self, data): if isinstance(data, unicode): data.encode('ascii', 'strict') super(BBuf, self).write(data) # Stub: Buffer expecting unicode string: -class UBuf(StringIO, object): +class UBuf(StringIO, object): # super class object required by Python <= 2.5 def write(self, data): # emulate Python 3 handling of stdout, stderr if isinstance(data, bytes): diff --git a/test/test_io.py b/test/test_io.py index fa77c749e..1d213ce7a 100755 --- a/test/test_io.py +++ b/test/test_io.py @@ -13,12 +13,45 @@ import DocutilsTestSupport # must be imported before docutils from docutils import io from docutils._compat import b, bytes from docutils.error_reporting import locale_encoding +from test_error_reporting import BBuf, UBuf -class InputTests(unittest.TestCase): +# python 2.3 +if not hasattr(unittest.TestCase, "assertTrue"): + assertTrue = unittest.TestCase.failUnless + +class mock_stdout(UBuf): + encoding = 'utf8' + + def __init__(self): + self.buffer = BBuf() + UBuf.__init__(self) + +class HelperTests(unittest.TestCase): + + def test_check_encoding_true(self): + """Return `True` if lookup returns the same codec""" + self.assertEqual(io.check_encoding(mock_stdout, 'utf8'), True) + self.assertEqual(io.check_encoding(mock_stdout, 'utf-8'), True) + self.assertEqual(io.check_encoding(mock_stdout, 'UTF-8'), True) - # python 2.3 - if not hasattr(unittest.TestCase, "assertTrue"): - assertTrue = unittest.TestCase.failUnless + def test_check_encoding_false(self): + """Return `False` if lookup returns different codecs""" + self.assertEqual(io.check_encoding(mock_stdout, 'ascii'), False) + self.assertEqual(io.check_encoding(mock_stdout, 'latin-1'), False) + + def test_check_encoding_none(self): + """Cases where the comparison fails.""" + # stream.encoding is None: + self.assertEqual(io.check_encoding(io.FileInput(), 'ascii'), None) + # stream.encoding does not exist: + self.assertEqual(io.check_encoding(BBuf, 'ascii'), None) + # encoding is None: + self.assertEqual(io.check_encoding(mock_stdout, None), None) + # encoding is invalid + self.assertEqual(io.check_encoding(mock_stdout, 'UTF-9'), None) + + +class InputTests(unittest.TestCase): def test_bom(self): input = io.StringInput(source=b('\xef\xbb\xbf foo \xef\xbb\xbf bar'), @@ -75,13 +108,13 @@ print "hello world" # if no encoding is given, try decoding with utf8: input = io.FileInput(source_path='functional/input/cyrillic.txt') data = input.read() - if sys.version_info < (3,0): + if sys.version_info < (3,0): # in Py3k, the locale encoding is used without --input-encoding # skipping the heuristic self.assertEqual(input.successful_encoding, 'utf-8') def test_heuristics_no_utf8(self): - # if no encoding is given and decoding with utf8 fails, + # if no encoding is given and decoding with utf8 fails, # use either the locale encoding (if specified) or latin1: input = io.FileInput(source_path='data/latin1.txt') data = input.read() @@ -91,5 +124,66 @@ print "hello world" self.assertEqual(data, u'Gr\xfc\xdfe\n') +class OutputTests(unittest.TestCase): + + bdata = b('\xfc') + udata = u'\xfc' + + def setUp(self): + self.bdrain = BBuf() + """Buffer accepting binary strings (bytes)""" + self.udrain = UBuf() + """Buffer accepting unicode strings""" + self.mock_stdout = mock_stdout() + """Stub of sys.stdout under Python 3""" + + def test_write_unicode(self): + fo = io.FileOutput(destination=self.udrain, encoding='unicode', + autoclose=False) + fo.write(self.udata) + self.assertEqual(self.udrain.getvalue(), self.udata) + + def test_write_utf8(self): + if sys.version_info >= (3,0): + fo = io.FileOutput(destination=self.udrain, encoding='utf8', + autoclose=False) + fo.write(self.udata) + self.assertEqual(self.udrain.getvalue(), self.udata) + else: + fo = io.FileOutput(destination=self.bdrain, encoding='utf8', + autoclose=False) + fo.write(self.udata) + self.assertEqual(self.bdrain.getvalue(), self.udata.encode('utf8')) + + # With destination in binary mode, data must be binary string + # and is written as-is: + def test_write_bytes(self): + fo = io.FileOutput(destination=self.bdrain, encoding='utf8', + mode='wb', autoclose=False) + fo.write(self.bdata) + self.assertEqual(self.bdrain.getvalue(), self.bdata) + + # Test for Python 3 features: + if sys.version_info >= (3,0): + def test_write_bytes_to_stdout(self): + # binary data is written to destination.buffer, if the + # destination is sys.stdout or sys.stdin + backup = sys.stdout + sys.stdout = self.mock_stdout + fo = io.FileOutput(destination=sys.stdout, mode='wb', + autoclose=False) + fo.write(self.bdata) + self.assertEqual(self.mock_stdout.buffer.getvalue(), + self.bdata) + sys.stdout = backup + + def test_encoding_clash(self): + # Raise error, if given and destination encodings differ + # TODO: try the `write to .buffer` scheme instead? + self.assertRaises(ValueError, + io.FileOutput, destination=self.mock_stdout, + encoding='latin1') + + if __name__ == '__main__': unittest.main() |