Merge pull request #724 from jszakmeister/fix-some-unicode-errors

Fixes some unicode errors/handling in several areas of Nose. A special thanks to Thomas Kluyver (@takluyver) for all the reviews while getting this together.
author: John Szakmeister <john@szakmeister.net> 2013-10-21 02:22:06 -0700
committer: John Szakmeister <john@szakmeister.net> 2013-10-21 02:22:06 -0700
commit: 5d74d920f520089bd70035c8bbe1a2d51beac72e (patch)
tree: 1f0a5b474073cf809b78c605873ae616953d2abb
parent: 1325f7027669e1590eca7b1284c0299269a4d10b (diff)
parent: 7c447cf3b305313c5131a1d56dfee4d30a9e0d87 (diff)
download: nose-5d74d920f520089bd70035c8bbe1a2d51beac72e.tar.gz
8 files changed, 112 insertions, 97 deletions
diff --git a/functional_tests/support/issue720/test.py b/functional_tests/support/issue720/test.py
new file mode 100644
index 0000000..0a194fd
--- /dev/null
+++ b/functional_tests/support/issue720/test.py
@@ -0,0 +1,6 @@
+# -*- coding: utf-8 -*-
+import unittest
+class Test(unittest.TestCase):
+    def test(self):
+        print u"Unicöde"
+        assert 1 == 2
diff --git a/functional_tests/test_failuredetail_plugin.py b/functional_tests/test_failuredetail_plugin.py
index 284cf49..8484461 100644
--- a/functional_tests/test_failuredetail_plugin.py
+++ b/functional_tests/test_failuredetail_plugin.py
@@ -46,5 +46,19 @@ class TestFailureDetailWithCapture(PluginTester, unittest.TestCase):
 
         assert expect in self.output
 
+class TestFailureDetailWithUnicodeAndCapture(PluginTester, unittest.TestCase):
+    activate = "-d"
+    args = ['-v']
+    plugins = [FailureDetail(), Capture()]
+    suitepath = os.path.join(support, 'issue720')
+
+    def runTest(self):
+        print '*' * 70
+        print str(self.output)
+        print '*' * 70
+
+        assert 'UnicodeDecodeError' not in self.output
+        assert 'UnicodeEncodeError' not in self.output
+
 if __name__ == '__main__':
     unittest.main()
diff --git a/nose/failure.py b/nose/failure.py
index d24401c..c5fabfd 100644
--- a/nose/failure.py
+++ b/nose/failure.py
@@ -1,6 +1,7 @@
 import logging
 import unittest
 from traceback import format_tb
+from nose.pyversion import is_base_exception
 
 log = logging.getLogger(__name__)
 
@@ -34,7 +35,7 @@ class Failure(unittest.TestCase):
     
     def runTest(self):
         if self.tb is not None:
-            if isinstance(self.exc_val, BaseException):
+            if is_base_exception(self.exc_val):
                 raise self.exc_val, None, self.tb
             raise self.exc_class, self.exc_val, self.tb
         else:
diff --git a/nose/plugins/capture.py b/nose/plugins/capture.py
index 224f0a5..fa4e5dc 100644
--- a/nose/plugins/capture.py
+++ b/nose/plugins/capture.py
@@ -13,6 +13,7 @@ import logging
 import os
 import sys
 from nose.plugins.base import Plugin
+from nose.pyversion import exc_to_unicode, force_unicode
 from nose.util import ln
 from StringIO import StringIO
 
@@ -86,30 +87,8 @@ class Capture(Plugin):
         return self.formatError(test, err)
 
     def addCaptureToErr(self, ev, output):
-        if isinstance(ev, BaseException):
-            if hasattr(ev, '__unicode__'):
-                # 2.6+
-                try:
-                    ev = unicode(ev)
-                except UnicodeDecodeError:
-                    # We need a unicode string... take our best shot at getting,
-                    # since we don't know what the original encoding is in.
-                    ev = str(ev).decode('utf8', 'replace')
-            else:
-                # 2.5-
-                if not hasattr(ev, 'message'):
-                    # 2.4
-                    msg = len(ev.args) and ev.args[0] or ''
-                else:
-                    msg = ev.message
-                if (isinstance(msg, basestring) and
-                    not isinstance(msg, unicode)):
-                    msg = msg.decode('utf8', 'replace')
-                ev = u'%s: %s' % (ev.__class__.__name__, msg)
-        elif not isinstance(ev, basestring):
-            ev = repr(ev)
-        if not isinstance(output, unicode):
-            output = output.decode('utf8', 'replace')
+        ev = exc_to_unicode(ev)
+        output = force_unicode(output)
         return u'\n'.join([ev, ln(u'>> begin captured stdout <<'),
                            output, ln(u'>> end captured stdout <<')])
 
diff --git a/nose/plugins/failuredetail.py b/nose/plugins/failuredetail.py
index 4c0729c..6462865 100644
--- a/nose/plugins/failuredetail.py
+++ b/nose/plugins/failuredetail.py
@@ -7,6 +7,7 @@ debugging information.
 """
     
 from nose.plugins import Plugin
+from nose.pyversion import exc_to_unicode, force_unicode
 from nose.inspector import inspect_traceback
 
 class FailureDetail(Plugin):
@@ -38,10 +39,11 @@ class FailureDetail(Plugin):
         """Add detail from traceback inspection to error message of a failure.
         """
         ec, ev, tb = err
-        tbinfo, str_ev = None, str(ev)
+        tbinfo, str_ev = None, exc_to_unicode(ev)
+
         if tb:
-            tbinfo = inspect_traceback(tb)
-            str_ev = '\n'.join([str(ev), tbinfo])
+            tbinfo = force_unicode(inspect_traceback(tb))
+            str_ev = '\n'.join([str_ev, tbinfo])
         test.tbinfo = tbinfo
         return (ec, str_ev, tb)
 
diff --git a/nose/plugins/xunit.py b/nose/plugins/xunit.py
index 88255b9..7e5d793 100644
--- a/nose/plugins/xunit.py
+++ b/nose/plugins/xunit.py
@@ -49,7 +49,7 @@ from xml.sax import saxutils
 
 from nose.plugins.base import Plugin
 from nose.exc import SkipTest
-from nose.pyversion import UNICODE_STRINGS
+from nose.pyversion import force_unicode, format_exception
 
 # Invalid XML characters, control characters 0-31 sans \t, \n and \r
 CONTROL_CHARACTERS = re.compile(r"[\000-\010\013\014\016-\037]")
@@ -112,26 +112,16 @@ def exc_message(exc_info):
                 # Fallback to args as neither str nor
                 # unicode(Exception(u'\xe6')) work in Python < 2.6
                 result = exc.args[0]
+    result = force_unicode(result, 'UTF-8')
     return xml_safe(result)
 
-def format_exception(exc_info):
-    ec, ev, tb = exc_info
-
-    # formatError() may have turned our exception object into a string, and
-    # Python 3's traceback.format_exception() doesn't take kindly to that (it
-    # expects an actual exception object).  So we work around it, by doing the
-    # work ourselves if ev is a string.
-    if isinstance(ev, basestring):
-        tb_data = ''.join(traceback.format_tb(tb))
-        return tb_data + ev
-    else:
-        return ''.join(traceback.format_exception(*exc_info))
-
 class Tee(object):
-    def __init__(self, *args):
+    def __init__(self, encoding, *args):
+        self._encoding = encoding
         self._streams = args
 
     def write(self, data):
+        data = force_unicode(data, self._encoding)
         for s in self._streams:
             s.write(data)
 
@@ -173,8 +163,6 @@ class Xunit(Plugin):
     def _quoteattr(self, attr):
         """Escape an XML attribute. Value can be unicode."""
         attr = xml_safe(attr)
-        if isinstance(attr, unicode) and not UNICODE_STRINGS:
-            attr = attr.encode(self.encoding)
         return saxutils.quoteattr(attr)
 
     def options(self, parser, env):
@@ -217,7 +205,7 @@ class Xunit(Plugin):
             u'<testsuite name="nosetests" tests="%(total)d" '
             u'errors="%(errors)d" failures="%(failures)d" '
             u'skip="%(skipped)d">' % self.stats)
-        self.error_report_file.write(u''.join([self._forceUnicode(e)
+        self.error_report_file.write(u''.join([force_unicode(e, self.encoding)
                                                for e in self.errorlist]))
         self.error_report_file.write(u'</testsuite>')
         self.error_report_file.close()
@@ -229,8 +217,8 @@ class Xunit(Plugin):
         self._capture_stack.append((sys.stdout, sys.stderr))
         self._currentStdout = StringIO()
         self._currentStderr = StringIO()
-        sys.stdout = Tee(self._currentStdout, sys.stdout)
-        sys.stderr = Tee(self._currentStderr, sys.stderr)
+        sys.stdout = Tee(self.encoding, self._currentStdout, sys.stdout)
+        sys.stderr = Tee(self.encoding, self._currentStderr, sys.stderr)
 
     def startContext(self, context):
         self._startCapture()
@@ -281,12 +269,13 @@ class Xunit(Plugin):
             type = 'error'
             self.stats['errors'] += 1
 
-        tb = format_exception(err)
+        tb = format_exception(err, self.encoding)
         id = test.id()
+
         self.errorlist.append(
-            '<testcase classname=%(cls)s name=%(name)s time="%(taken).3f">'
-            '<%(type)s type=%(errtype)s message=%(message)s><![CDATA[%(tb)s]]>'
-            '</%(type)s>%(systemout)s%(systemerr)s</testcase>' %
+            u'<testcase classname=%(cls)s name=%(name)s time="%(taken).3f">'
+            u'<%(type)s type=%(errtype)s message=%(message)s><![CDATA[%(tb)s]]>'
+            u'</%(type)s>%(systemout)s%(systemerr)s</testcase>' %
             {'cls': self._quoteattr(id_split(id)[0]),
              'name': self._quoteattr(id_split(id)[-1]),
              'taken': taken,
@@ -302,13 +291,14 @@ class Xunit(Plugin):
         """Add failure output to Xunit report.
         """
         taken = self._timeTaken()
-        tb = format_exception(err)
+        tb = format_exception(err, self.encoding)
         self.stats['failures'] += 1
         id = test.id()
+
         self.errorlist.append(
-            '<testcase classname=%(cls)s name=%(name)s time="%(taken).3f">'
-            '<failure type=%(errtype)s message=%(message)s><![CDATA[%(tb)s]]>'
-            '</failure>%(systemout)s%(systemerr)s</testcase>' %
+            u'<testcase classname=%(cls)s name=%(name)s time="%(taken).3f">'
+            u'<failure type=%(errtype)s message=%(message)s><![CDATA[%(tb)s]]>'
+            u'</failure>%(systemout)s%(systemerr)s</testcase>' %
             {'cls': self._quoteattr(id_split(id)[0]),
              'name': self._quoteattr(id_split(id)[-1]),
              'taken': taken,
@@ -334,9 +324,3 @@ class Xunit(Plugin):
              'systemout': self._getCapturedStdout(),
              'systemerr': self._getCapturedStderr(),
              })
-
-    def _forceUnicode(self, s):
-        if not UNICODE_STRINGS:
-            if isinstance(s, str):
-                s = s.decode(self.encoding, 'replace')
-        return s
diff --git a/nose/pyversion.py b/nose/pyversion.py
index a6ec3f7..07c105f 100644
--- a/nose/pyversion.py
+++ b/nose/pyversion.py
@@ -3,19 +3,33 @@ This module contains fixups for using nose under different versions of Python.
 """
 import sys
 import os
+import traceback
 import types
 import inspect
 import nose.util
 
 __all__ = ['make_instancemethod', 'cmp_to_key', 'sort_list', 'ClassType',
            'TypeType', 'UNICODE_STRINGS', 'unbound_method', 'ismethod',
-           'bytes_']
+           'bytes_', 'is_base_exception', 'force_unicode', 'exc_to_unicode',
+           'format_exception']
 
 # In Python 3.x, all strings are unicode (the call to 'unicode()' in the 2.x
 # source will be replaced with 'str()' when running 2to3, so this test will
 # then become true)
 UNICODE_STRINGS = (type(unicode()) == type(str()))
 
+if sys.version_info[:2] < (3, 0):
+    def force_unicode(s, encoding='UTF-8'):
+        try:
+            s = unicode(s)
+        except UnicodeDecodeError:
+            s = str(s).decode(encoding, 'replace')
+
+        return s
+else:
+    def force_unicode(s, encoding='UTF-8'):
+        return str(s)
+
 # new.instancemethod() is obsolete for new-style classes (Python 3.x)
 # We need to use descriptor methods instead.
 try:
@@ -147,3 +161,52 @@ else:
             return func.func_code.co_flags & CO_GENERATOR != 0
         except AttributeError:
             return False
+
+# Make a function to help check if an exception is derived from BaseException.
+# In Python 2.4, we just use Exception instead.
+if sys.version_info[:2] < (2, 5):
+    def is_base_exception(exc):
+        return isinstance(exc, Exception)
+else:
+    def is_base_exception(exc):
+        return isinstance(exc, BaseException)
+
+if sys.version_info[:2] < (3, 0):
+    def exc_to_unicode(ev, encoding='utf-8'):
+        if is_base_exception(ev):
+            if not hasattr(ev, '__unicode__'):
+                # 2.5-
+                if not hasattr(ev, 'message'):
+                    # 2.4
+                    msg = len(ev.args) and ev.args[0] or ''
+                else:
+                    msg = ev.message
+                msg = force_unicode(msg, encoding=encoding)
+                clsname = force_unicode(ev.__class__.__name__,
+                        encoding=encoding)
+                ev = u'%s: %s' % (clsname, msg)
+        elif not isinstance(ev, unicode):
+            ev = repr(ev)
+
+        return force_unicode(ev, encoding=encoding)
+else:
+    def exc_to_unicode(ev, encoding='utf-8'):
+        return str(ev)
+
+def format_exception(exc_info, encoding='UTF-8'):
+    ec, ev, tb = exc_info
+
+    # Our exception object may have been turned into a string, and Python 3's
+    # traceback.format_exception() doesn't take kindly to that (it expects an
+    # actual exception object).  So we work around it, by doing the work
+    # ourselves if ev is not an exception object.
+    if not is_base_exception(ev):
+        tb_data = force_unicode(
+                ''.join(traceback.format_tb(tb)),
+                encoding)
+        ev = exc_to_unicode(ev)
+        return tb_data + ev
+    else:
+        return force_unicode(
+                ''.join(traceback.format_exception(*exc_info)),
+                encoding)
diff --git a/unit_tests/test_xunit.py b/unit_tests/test_xunit.py
index c141739..d98ccba 100644
--- a/unit_tests/test_xunit.py
+++ b/unit_tests/test_xunit.py
@@ -23,40 +23,6 @@ mktest.__test__ = False
 
 time_taken = re.compile(r'\d\.\d\d')
 
-class TestEscaping(unittest.TestCase):
-
-    def setUp(self):
-        self.x = Xunit()
-
-    def test_all(self):
-        eq_(self.x._quoteattr(
-            '''<baz src="http://foo?f=1&b=2" quote="inix hubris 'maximus'?" />'''),
-            ('"&lt;baz src=&quot;http://foo?f=1&amp;b=2&quot; '
-                'quote=&quot;inix hubris \'maximus\'?&quot; /&gt;"'))
-
-    def test_unicode_is_utf8_by_default(self):
-        if not UNICODE_STRINGS:
-            eq_(self.x._quoteattr(u'Ivan Krsti\u0107'),
-                '"Ivan Krsti\xc4\x87"')
-
-    def test_unicode_custom_utf16_madness(self):
-        self.x.encoding = 'utf-16'
-        utf16 = self.x._quoteattr(u'Ivan Krsti\u0107')[1:-1]
-
-        if UNICODE_STRINGS:
-            # If all internal strings are unicode, then _quoteattr shouldn't
-            # have changed anything.
-            eq_(utf16, u'Ivan Krsti\u0107')
-        else:
-            # to avoid big/little endian bytes, assert that we can put it back:
-            eq_(utf16.decode('utf16'), u'Ivan Krsti\u0107')
-
-    def test_control_characters(self):
-        # quoting of \n, \r varies in diff. python versions
-        n = saxutils.quoteattr('\n')[1:-1]
-        r = saxutils.quoteattr('\r')[1:-1]
-        eq_(self.x._quoteattr('foo\n\b\f\r'), '"foo%s??%s"' % (n, r))
-        eq_(escape_cdata('foo\n\b\f\r'), 'foo\n??\r')
 
 class TestSplitId(unittest.TestCase):
author	John Szakmeister <john@szakmeister.net>	2013-10-21 02:22:06 -0700
committer	John Szakmeister <john@szakmeister.net>	2013-10-21 02:22:06 -0700
commit	5d74d920f520089bd70035c8bbe1a2d51beac72e (patch)
tree	1f0a5b474073cf809b78c605873ae616953d2abb
parent	1325f7027669e1590eca7b1284c0299269a4d10b (diff)
parent	7c447cf3b305313c5131a1d56dfee4d30a9e0d87 (diff)
download	nose-5d74d920f520089bd70035c8bbe1a2d51beac72e.tar.gz