Fixup: more save implementation of binary data output under Python 3.

Prevent test error under Python 3. Add tests for FileOutput. Document. git-svn-id: http://svn.code.sf.net/p/docutils/code/trunk/docutils@7440 929543f6-e4f2-0310-98a6-ba3bd3dd1d04
author: milde <milde@929543f6-e4f2-0310-98a6-ba3bd3dd1d04> 2012-06-13 14:14:12 +0000
committer: milde <milde@929543f6-e4f2-0310-98a6-ba3bd3dd1d04> 2012-06-13 14:14:12 +0000
commit: 6ecaeec7db2158e5692373a1be38d6820367e221 (patch)
tree: 89d9ab25474dfbf9d1ba033e6b02e5543eb64f20
parent: d4b8bb1e834f812600c35ac82950a420f936ca00 (diff)
download: docutils-6ecaeec7db2158e5692373a1be38d6820367e221.tar.gz
5 files changed, 152 insertions, 41 deletions
diff --git a/HISTORY.txt b/HISTORY.txt
index d7cb6f989..ebee0ce61 100644
--- a/HISTORY.txt
+++ b/HISTORY.txt
@@ -28,6 +28,11 @@ Changes Since 0.9
   - Use converted sources from the ``build/`` directory for tests under
     Python 3.
 
+* docutils/io.py
+
+  - Fix writing binary data to sys.stdout under Python 3 (allows
+    ``rst2odt.py`` to be used with output redirection).
+
 * docutils/parsers/rst/directives/misc.py
 
   - Fix [ 3525847 ]. Catch and report UnicodeEncodeError with 
diff --git a/docutils/io.py b/docutils/io.py
index f44ed6e93..b992e6d71 100644
--- a/docutils/io.py
+++ b/docutils/io.py
@@ -21,6 +21,21 @@ from docutils.error_reporting import locale_encoding, ErrorString, ErrorOutput
 class InputError(IOError): pass
 class OutputError(IOError): pass
 
+def check_encoding(stream, encoding):
+    """Test, whether the encoding of `stream` matches `encoding`.
+
+    Returns
+
+    :None:  if `encoding` or `stream.encoding` are not a valid encoding
+            argument (e.g. ``None``) or `stream.encoding is missing.
+    :True:  if the encoding argument resolves to the same value as `encoding`,
+    :False: if the encodings differ.
+    """
+    try:
+        return codecs.lookup(stream.encoding) == codecs.lookup(encoding)
+    except (LookupError, AttributeError, TypeError):
+        return None
+
 
 class Input(TransformSpec):
 
@@ -231,10 +246,7 @@ class FileInput(Input):
             else:
                 self.source = sys.stdin
         elif (sys.version_info >= (3,0) and
-              self.encoding and hasattr(self.source, 'encoding') and
-              self.encoding != self.source.encoding and
-              codecs.lookup(self.encoding) !=
-              codecs.lookup(self.source.encoding)):
+              check_encoding(self.source, self.encoding) is False):
             # TODO: re-open, warn or raise error?
             raise UnicodeError('Encoding clash: encoding given is "%s" '
                                'but source is opened with encoding "%s".' %
@@ -327,10 +339,7 @@ class FileOutput(Output):
             if destination_path:
                 self.opened = False
             else:
-                if sys.version_info >= (3,0) and 'b' in self.mode:
-                    self.destination = sys.stdout.buffer
-                else:
-                    self.destination = sys.stdout
+                self.destination = sys.stdout
         elif (# destination is file-type object -> check mode:
               mode and hasattr(self.destination, 'mode')
               and mode != self.destination.mode):
@@ -342,16 +351,21 @@ class FileOutput(Output):
                 self.destination_path = self.destination.name
             except AttributeError:
                 pass
-        if (encoding and hasattr(self.destination, 'encoding')
-            and codecs.lookup(self.encoding) !=
-            codecs.lookup(self.destination.encoding)):
-            if self.destination is sys.stdout and sys.version_info >= (3,0):
-                self.destination = sys.stdout.buffer
-            else:
-                raise UnicodeError('Encoding of %s (%s) '
-                                   'differs from specified encoding (%s)' %
-                                   (self.destination_path or 'destination',
-                                    self.destination.encoding, encoding))
+        # Special cases under Python 3: different encoding or binary output
+        if sys.version_info >= (3,0):
+            if ('b' in self.mode
+                and self.destination in (sys.stdout, sys.stderr)
+               ):
+                self.destination = self.destination.buffer
+            if check_encoding(self.destination, self.encoding) is False:
+                if self.destination in (sys.stdout, sys.stderr):
+                    self.destination = self.destination.buffer
+                else:  # TODO: try the `write to .buffer` scheme instead?
+                    raise ValueError('Encoding of %s (%s) differs \n'
+                                     '  from specified encoding (%s)' %
+                                     (self.destination_path or 'destination',
+                                      destination.encoding, encoding))
+
 
     def open(self):
         # Specify encoding in Python 3.
@@ -375,25 +389,23 @@ class FileOutput(Output):
     def write(self, data):
         """Encode `data`, write it to a single file, and return it.
 
-        With Python 3 or binary output mode, `data` is returned unchanged.
+        With Python 3 or binary output mode, `data` is returned unchanged,
+        except when specified encoding and output encoding differ.
         """
-        if sys.version_info < (3,0) and 'b' not in self.mode:
-            data = self.encode(data)
         if not self.opened:
             self.open()
         try: # In Python < 2.5, try...except has to be nested in try...finally.
             try:
-                if (sys.version_info >= (3,0)
-                    and self.destination is sys.stdout.buffer
-                    and 'b' not in self.mode):
-                    # encode now, as sys.stdout.encoding != self.encoding
-                    bdata = self.encode(data)
-                    if os.linesep != '\n':
-                        bdata = bdata.replace('\n', os.linesep)
-                    self.destination.buffer.write(bdata)
-                else:
-                    self.destination.write(data)
-            except (UnicodeError, LookupError), err: # can only happen in py3k
+                if 'b' not in self.mode and (sys.version_info < (3,0) or
+                   check_encoding(self.destination, self.encoding) is False):
+                    data = self.encode(data)
+                    if sys.version_info >= (3,0) and os.linesep != '\n':
+                        # writing as binary data -> fix endings
+                        data = data.replace('\n', os.linesep)
+
+                self.destination.write(data)
+
+            except (UnicodeError, LookupError), err:
                 raise UnicodeError(
                     'Unable to encode output data. output-encoding is: '
                     '%s.\n(%s)' % (self.encoding, ErrorString(err)))
diff --git a/test/DocutilsTestSupport.py b/test/DocutilsTestSupport.py
index 82c3391b4..dae25ccc3 100644
--- a/test/DocutilsTestSupport.py
+++ b/test/DocutilsTestSupport.py
@@ -881,7 +881,7 @@ def _format_str(*args):
     return_tuple = []
     for i in args:
         r = repr(i)
-        if ( (isinstance(i, str) or isinstance(i, unicode))
+        if ( (isinstance(i, bytes) or isinstance(i, unicode))
              and '\n' in i):
             stripped = ''
             if isinstance(i, unicode) and r.startswith('u'):
diff --git a/test/test_error_reporting.py b/test/test_error_reporting.py
index fd13356c0..989d04518 100644
--- a/test/test_error_reporting.py
+++ b/test/test_error_reporting.py
@@ -158,14 +158,14 @@ class ErrorStringTests(unittest.TestCase):
 # -----------------
 
 # Stub: Buffer with 'strict' auto-conversion of input to byte string:
-class BBuf(BytesIO, object):
+class BBuf(BytesIO, object): # super class object required by Python <= 2.5
     def write(self, data):
         if isinstance(data, unicode):
             data.encode('ascii', 'strict')
         super(BBuf, self).write(data)
 
 # Stub: Buffer expecting unicode string:
-class UBuf(StringIO, object):
+class UBuf(StringIO, object): # super class object required by Python <= 2.5
     def write(self, data):
         # emulate Python 3 handling of stdout, stderr
         if isinstance(data, bytes):
diff --git a/test/test_io.py b/test/test_io.py
index fa77c749e..1d213ce7a 100755
--- a/test/test_io.py
+++ b/test/test_io.py
@@ -13,12 +13,45 @@ import DocutilsTestSupport              # must be imported before docutils
 from docutils import io
 from docutils._compat import b, bytes
 from docutils.error_reporting import locale_encoding
+from test_error_reporting import BBuf, UBuf
 
-class InputTests(unittest.TestCase):
+# python 2.3
+if not hasattr(unittest.TestCase, "assertTrue"):
+    assertTrue = unittest.TestCase.failUnless
+
+class mock_stdout(UBuf):
+    encoding = 'utf8'
+
+    def __init__(self):
+        self.buffer = BBuf()
+        UBuf.__init__(self)
+
+class HelperTests(unittest.TestCase):
+
+    def test_check_encoding_true(self):
+        """Return `True` if lookup returns the same codec"""
+        self.assertEqual(io.check_encoding(mock_stdout, 'utf8'), True)
+        self.assertEqual(io.check_encoding(mock_stdout, 'utf-8'), True)
+        self.assertEqual(io.check_encoding(mock_stdout, 'UTF-8'), True)
 
-    # python 2.3
-    if not hasattr(unittest.TestCase, "assertTrue"):
-        assertTrue = unittest.TestCase.failUnless
+    def test_check_encoding_false(self):
+        """Return `False` if lookup returns different codecs"""
+        self.assertEqual(io.check_encoding(mock_stdout, 'ascii'), False)
+        self.assertEqual(io.check_encoding(mock_stdout, 'latin-1'), False)
+
+    def test_check_encoding_none(self):
+        """Cases where the comparison fails."""
+        # stream.encoding is None:
+        self.assertEqual(io.check_encoding(io.FileInput(), 'ascii'), None)
+        # stream.encoding does not exist:
+        self.assertEqual(io.check_encoding(BBuf, 'ascii'), None)
+        # encoding is None:
+        self.assertEqual(io.check_encoding(mock_stdout, None), None)
+        # encoding is invalid
+        self.assertEqual(io.check_encoding(mock_stdout, 'UTF-9'), None)
+
+
+class InputTests(unittest.TestCase):
 
     def test_bom(self):
         input = io.StringInput(source=b('\xef\xbb\xbf foo \xef\xbb\xbf bar'),
@@ -75,13 +108,13 @@ print "hello world"
         # if no encoding is given, try decoding with utf8:
         input = io.FileInput(source_path='functional/input/cyrillic.txt')
         data = input.read()
-        if sys.version_info < (3,0): 
+        if sys.version_info < (3,0):
             # in Py3k, the locale encoding is used without --input-encoding
             # skipping the heuristic
             self.assertEqual(input.successful_encoding, 'utf-8')
 
     def test_heuristics_no_utf8(self):
-        # if no encoding is given and decoding with utf8 fails, 
+        # if no encoding is given and decoding with utf8 fails,
         # use either the locale encoding (if specified) or latin1:
         input = io.FileInput(source_path='data/latin1.txt')
         data = input.read()
@@ -91,5 +124,66 @@ print "hello world"
             self.assertEqual(data, u'Gr\xfc\xdfe\n')
 
 
+class OutputTests(unittest.TestCase):
+
+    bdata = b('\xfc')
+    udata = u'\xfc'
+
+    def setUp(self):
+        self.bdrain = BBuf()
+        """Buffer accepting binary strings (bytes)"""
+        self.udrain = UBuf()
+        """Buffer accepting unicode strings"""
+        self.mock_stdout = mock_stdout()
+        """Stub of sys.stdout under Python 3"""
+
+    def test_write_unicode(self):
+        fo = io.FileOutput(destination=self.udrain, encoding='unicode',
+                           autoclose=False)
+        fo.write(self.udata)
+        self.assertEqual(self.udrain.getvalue(), self.udata)
+
+    def test_write_utf8(self):
+        if sys.version_info >= (3,0):
+            fo = io.FileOutput(destination=self.udrain, encoding='utf8',
+                               autoclose=False)
+            fo.write(self.udata)
+            self.assertEqual(self.udrain.getvalue(), self.udata)
+        else:
+            fo = io.FileOutput(destination=self.bdrain, encoding='utf8',
+                               autoclose=False)
+            fo.write(self.udata)
+            self.assertEqual(self.bdrain.getvalue(), self.udata.encode('utf8'))
+
+    # With destination in binary mode, data must be binary string
+    # and is written as-is:
+    def test_write_bytes(self):
+        fo = io.FileOutput(destination=self.bdrain, encoding='utf8',
+                           mode='wb', autoclose=False)
+        fo.write(self.bdata)
+        self.assertEqual(self.bdrain.getvalue(), self.bdata)
+
+    # Test for Python 3 features:
+    if sys.version_info >= (3,0):
+        def test_write_bytes_to_stdout(self):
+            # binary data is written to destination.buffer, if the
+            # destination is sys.stdout or sys.stdin
+            backup = sys.stdout
+            sys.stdout = self.mock_stdout
+            fo = io.FileOutput(destination=sys.stdout, mode='wb',
+                               autoclose=False)
+            fo.write(self.bdata)
+            self.assertEqual(self.mock_stdout.buffer.getvalue(),
+                             self.bdata)
+            sys.stdout = backup
+
+        def test_encoding_clash(self):
+            # Raise error, if given and destination encodings differ
+            # TODO: try the `write to .buffer` scheme instead?
+            self.assertRaises(ValueError,
+                              io.FileOutput, destination=self.mock_stdout,
+                               encoding='latin1')
+
+
 if __name__ == '__main__':
     unittest.main()
author	milde <milde@929543f6-e4f2-0310-98a6-ba3bd3dd1d04>	2012-06-13 14:14:12 +0000
committer	milde <milde@929543f6-e4f2-0310-98a6-ba3bd3dd1d04>	2012-06-13 14:14:12 +0000
commit	6ecaeec7db2158e5692373a1be38d6820367e221 (patch)
tree	89d9ab25474dfbf9d1ba033e6b02e5543eb64f20
parent	d4b8bb1e834f812600c35ac82950a420f936ca00 (diff)
download	docutils-6ecaeec7db2158e5692373a1be38d6820367e221.tar.gz