diff options
author | John Szakmeister <john@szakmeister.net> | 2013-10-21 02:22:06 -0700 |
---|---|---|
committer | John Szakmeister <john@szakmeister.net> | 2013-10-21 02:22:06 -0700 |
commit | 5d74d920f520089bd70035c8bbe1a2d51beac72e (patch) | |
tree | 1f0a5b474073cf809b78c605873ae616953d2abb | |
parent | 1325f7027669e1590eca7b1284c0299269a4d10b (diff) | |
parent | 7c447cf3b305313c5131a1d56dfee4d30a9e0d87 (diff) | |
download | nose-5d74d920f520089bd70035c8bbe1a2d51beac72e.tar.gz |
Merge pull request #724 from jszakmeister/fix-some-unicode-errors
Fixes some unicode errors/handling in several areas of Nose.
A special thanks to Thomas Kluyver (@takluyver) for all the reviews while getting this together.
-rw-r--r-- | functional_tests/support/issue720/test.py | 6 | ||||
-rw-r--r-- | functional_tests/test_failuredetail_plugin.py | 14 | ||||
-rw-r--r-- | nose/failure.py | 3 | ||||
-rw-r--r-- | nose/plugins/capture.py | 27 | ||||
-rw-r--r-- | nose/plugins/failuredetail.py | 8 | ||||
-rw-r--r-- | nose/plugins/xunit.py | 52 | ||||
-rw-r--r-- | nose/pyversion.py | 65 | ||||
-rw-r--r-- | unit_tests/test_xunit.py | 34 |
8 files changed, 112 insertions, 97 deletions
diff --git a/functional_tests/support/issue720/test.py b/functional_tests/support/issue720/test.py new file mode 100644 index 0000000..0a194fd --- /dev/null +++ b/functional_tests/support/issue720/test.py @@ -0,0 +1,6 @@ +# -*- coding: utf-8 -*- +import unittest +class Test(unittest.TestCase): + def test(self): + print u"Unicöde" + assert 1 == 2 diff --git a/functional_tests/test_failuredetail_plugin.py b/functional_tests/test_failuredetail_plugin.py index 284cf49..8484461 100644 --- a/functional_tests/test_failuredetail_plugin.py +++ b/functional_tests/test_failuredetail_plugin.py @@ -46,5 +46,19 @@ class TestFailureDetailWithCapture(PluginTester, unittest.TestCase): assert expect in self.output +class TestFailureDetailWithUnicodeAndCapture(PluginTester, unittest.TestCase): + activate = "-d" + args = ['-v'] + plugins = [FailureDetail(), Capture()] + suitepath = os.path.join(support, 'issue720') + + def runTest(self): + print '*' * 70 + print str(self.output) + print '*' * 70 + + assert 'UnicodeDecodeError' not in self.output + assert 'UnicodeEncodeError' not in self.output + if __name__ == '__main__': unittest.main() diff --git a/nose/failure.py b/nose/failure.py index d24401c..c5fabfd 100644 --- a/nose/failure.py +++ b/nose/failure.py @@ -1,6 +1,7 @@ import logging import unittest from traceback import format_tb +from nose.pyversion import is_base_exception log = logging.getLogger(__name__) @@ -34,7 +35,7 @@ class Failure(unittest.TestCase): def runTest(self): if self.tb is not None: - if isinstance(self.exc_val, BaseException): + if is_base_exception(self.exc_val): raise self.exc_val, None, self.tb raise self.exc_class, self.exc_val, self.tb else: diff --git a/nose/plugins/capture.py b/nose/plugins/capture.py index 224f0a5..fa4e5dc 100644 --- a/nose/plugins/capture.py +++ b/nose/plugins/capture.py @@ -13,6 +13,7 @@ import logging import os import sys from nose.plugins.base import Plugin +from nose.pyversion import exc_to_unicode, force_unicode from nose.util import ln from StringIO import StringIO @@ -86,30 +87,8 @@ class Capture(Plugin): return self.formatError(test, err) def addCaptureToErr(self, ev, output): - if isinstance(ev, BaseException): - if hasattr(ev, '__unicode__'): - # 2.6+ - try: - ev = unicode(ev) - except UnicodeDecodeError: - # We need a unicode string... take our best shot at getting, - # since we don't know what the original encoding is in. - ev = str(ev).decode('utf8', 'replace') - else: - # 2.5- - if not hasattr(ev, 'message'): - # 2.4 - msg = len(ev.args) and ev.args[0] or '' - else: - msg = ev.message - if (isinstance(msg, basestring) and - not isinstance(msg, unicode)): - msg = msg.decode('utf8', 'replace') - ev = u'%s: %s' % (ev.__class__.__name__, msg) - elif not isinstance(ev, basestring): - ev = repr(ev) - if not isinstance(output, unicode): - output = output.decode('utf8', 'replace') + ev = exc_to_unicode(ev) + output = force_unicode(output) return u'\n'.join([ev, ln(u'>> begin captured stdout <<'), output, ln(u'>> end captured stdout <<')]) diff --git a/nose/plugins/failuredetail.py b/nose/plugins/failuredetail.py index 4c0729c..6462865 100644 --- a/nose/plugins/failuredetail.py +++ b/nose/plugins/failuredetail.py @@ -7,6 +7,7 @@ debugging information. """ from nose.plugins import Plugin +from nose.pyversion import exc_to_unicode, force_unicode from nose.inspector import inspect_traceback class FailureDetail(Plugin): @@ -38,10 +39,11 @@ class FailureDetail(Plugin): """Add detail from traceback inspection to error message of a failure. """ ec, ev, tb = err - tbinfo, str_ev = None, str(ev) + tbinfo, str_ev = None, exc_to_unicode(ev) + if tb: - tbinfo = inspect_traceback(tb) - str_ev = '\n'.join([str(ev), tbinfo]) + tbinfo = force_unicode(inspect_traceback(tb)) + str_ev = '\n'.join([str_ev, tbinfo]) test.tbinfo = tbinfo return (ec, str_ev, tb) diff --git a/nose/plugins/xunit.py b/nose/plugins/xunit.py index 88255b9..7e5d793 100644 --- a/nose/plugins/xunit.py +++ b/nose/plugins/xunit.py @@ -49,7 +49,7 @@ from xml.sax import saxutils from nose.plugins.base import Plugin from nose.exc import SkipTest -from nose.pyversion import UNICODE_STRINGS +from nose.pyversion import force_unicode, format_exception # Invalid XML characters, control characters 0-31 sans \t, \n and \r CONTROL_CHARACTERS = re.compile(r"[\000-\010\013\014\016-\037]") @@ -112,26 +112,16 @@ def exc_message(exc_info): # Fallback to args as neither str nor # unicode(Exception(u'\xe6')) work in Python < 2.6 result = exc.args[0] + result = force_unicode(result, 'UTF-8') return xml_safe(result) -def format_exception(exc_info): - ec, ev, tb = exc_info - - # formatError() may have turned our exception object into a string, and - # Python 3's traceback.format_exception() doesn't take kindly to that (it - # expects an actual exception object). So we work around it, by doing the - # work ourselves if ev is a string. - if isinstance(ev, basestring): - tb_data = ''.join(traceback.format_tb(tb)) - return tb_data + ev - else: - return ''.join(traceback.format_exception(*exc_info)) - class Tee(object): - def __init__(self, *args): + def __init__(self, encoding, *args): + self._encoding = encoding self._streams = args def write(self, data): + data = force_unicode(data, self._encoding) for s in self._streams: s.write(data) @@ -173,8 +163,6 @@ class Xunit(Plugin): def _quoteattr(self, attr): """Escape an XML attribute. Value can be unicode.""" attr = xml_safe(attr) - if isinstance(attr, unicode) and not UNICODE_STRINGS: - attr = attr.encode(self.encoding) return saxutils.quoteattr(attr) def options(self, parser, env): @@ -217,7 +205,7 @@ class Xunit(Plugin): u'<testsuite name="nosetests" tests="%(total)d" ' u'errors="%(errors)d" failures="%(failures)d" ' u'skip="%(skipped)d">' % self.stats) - self.error_report_file.write(u''.join([self._forceUnicode(e) + self.error_report_file.write(u''.join([force_unicode(e, self.encoding) for e in self.errorlist])) self.error_report_file.write(u'</testsuite>') self.error_report_file.close() @@ -229,8 +217,8 @@ class Xunit(Plugin): self._capture_stack.append((sys.stdout, sys.stderr)) self._currentStdout = StringIO() self._currentStderr = StringIO() - sys.stdout = Tee(self._currentStdout, sys.stdout) - sys.stderr = Tee(self._currentStderr, sys.stderr) + sys.stdout = Tee(self.encoding, self._currentStdout, sys.stdout) + sys.stderr = Tee(self.encoding, self._currentStderr, sys.stderr) def startContext(self, context): self._startCapture() @@ -281,12 +269,13 @@ class Xunit(Plugin): type = 'error' self.stats['errors'] += 1 - tb = format_exception(err) + tb = format_exception(err, self.encoding) id = test.id() + self.errorlist.append( - '<testcase classname=%(cls)s name=%(name)s time="%(taken).3f">' - '<%(type)s type=%(errtype)s message=%(message)s><![CDATA[%(tb)s]]>' - '</%(type)s>%(systemout)s%(systemerr)s</testcase>' % + u'<testcase classname=%(cls)s name=%(name)s time="%(taken).3f">' + u'<%(type)s type=%(errtype)s message=%(message)s><![CDATA[%(tb)s]]>' + u'</%(type)s>%(systemout)s%(systemerr)s</testcase>' % {'cls': self._quoteattr(id_split(id)[0]), 'name': self._quoteattr(id_split(id)[-1]), 'taken': taken, @@ -302,13 +291,14 @@ class Xunit(Plugin): """Add failure output to Xunit report. """ taken = self._timeTaken() - tb = format_exception(err) + tb = format_exception(err, self.encoding) self.stats['failures'] += 1 id = test.id() + self.errorlist.append( - '<testcase classname=%(cls)s name=%(name)s time="%(taken).3f">' - '<failure type=%(errtype)s message=%(message)s><![CDATA[%(tb)s]]>' - '</failure>%(systemout)s%(systemerr)s</testcase>' % + u'<testcase classname=%(cls)s name=%(name)s time="%(taken).3f">' + u'<failure type=%(errtype)s message=%(message)s><![CDATA[%(tb)s]]>' + u'</failure>%(systemout)s%(systemerr)s</testcase>' % {'cls': self._quoteattr(id_split(id)[0]), 'name': self._quoteattr(id_split(id)[-1]), 'taken': taken, @@ -334,9 +324,3 @@ class Xunit(Plugin): 'systemout': self._getCapturedStdout(), 'systemerr': self._getCapturedStderr(), }) - - def _forceUnicode(self, s): - if not UNICODE_STRINGS: - if isinstance(s, str): - s = s.decode(self.encoding, 'replace') - return s diff --git a/nose/pyversion.py b/nose/pyversion.py index a6ec3f7..07c105f 100644 --- a/nose/pyversion.py +++ b/nose/pyversion.py @@ -3,19 +3,33 @@ This module contains fixups for using nose under different versions of Python. """ import sys import os +import traceback import types import inspect import nose.util __all__ = ['make_instancemethod', 'cmp_to_key', 'sort_list', 'ClassType', 'TypeType', 'UNICODE_STRINGS', 'unbound_method', 'ismethod', - 'bytes_'] + 'bytes_', 'is_base_exception', 'force_unicode', 'exc_to_unicode', + 'format_exception'] # In Python 3.x, all strings are unicode (the call to 'unicode()' in the 2.x # source will be replaced with 'str()' when running 2to3, so this test will # then become true) UNICODE_STRINGS = (type(unicode()) == type(str())) +if sys.version_info[:2] < (3, 0): + def force_unicode(s, encoding='UTF-8'): + try: + s = unicode(s) + except UnicodeDecodeError: + s = str(s).decode(encoding, 'replace') + + return s +else: + def force_unicode(s, encoding='UTF-8'): + return str(s) + # new.instancemethod() is obsolete for new-style classes (Python 3.x) # We need to use descriptor methods instead. try: @@ -147,3 +161,52 @@ else: return func.func_code.co_flags & CO_GENERATOR != 0 except AttributeError: return False + +# Make a function to help check if an exception is derived from BaseException. +# In Python 2.4, we just use Exception instead. +if sys.version_info[:2] < (2, 5): + def is_base_exception(exc): + return isinstance(exc, Exception) +else: + def is_base_exception(exc): + return isinstance(exc, BaseException) + +if sys.version_info[:2] < (3, 0): + def exc_to_unicode(ev, encoding='utf-8'): + if is_base_exception(ev): + if not hasattr(ev, '__unicode__'): + # 2.5- + if not hasattr(ev, 'message'): + # 2.4 + msg = len(ev.args) and ev.args[0] or '' + else: + msg = ev.message + msg = force_unicode(msg, encoding=encoding) + clsname = force_unicode(ev.__class__.__name__, + encoding=encoding) + ev = u'%s: %s' % (clsname, msg) + elif not isinstance(ev, unicode): + ev = repr(ev) + + return force_unicode(ev, encoding=encoding) +else: + def exc_to_unicode(ev, encoding='utf-8'): + return str(ev) + +def format_exception(exc_info, encoding='UTF-8'): + ec, ev, tb = exc_info + + # Our exception object may have been turned into a string, and Python 3's + # traceback.format_exception() doesn't take kindly to that (it expects an + # actual exception object). So we work around it, by doing the work + # ourselves if ev is not an exception object. + if not is_base_exception(ev): + tb_data = force_unicode( + ''.join(traceback.format_tb(tb)), + encoding) + ev = exc_to_unicode(ev) + return tb_data + ev + else: + return force_unicode( + ''.join(traceback.format_exception(*exc_info)), + encoding) diff --git a/unit_tests/test_xunit.py b/unit_tests/test_xunit.py index c141739..d98ccba 100644 --- a/unit_tests/test_xunit.py +++ b/unit_tests/test_xunit.py @@ -23,40 +23,6 @@ mktest.__test__ = False time_taken = re.compile(r'\d\.\d\d') -class TestEscaping(unittest.TestCase): - - def setUp(self): - self.x = Xunit() - - def test_all(self): - eq_(self.x._quoteattr( - '''<baz src="http://foo?f=1&b=2" quote="inix hubris 'maximus'?" />'''), - ('"<baz src="http://foo?f=1&b=2" ' - 'quote="inix hubris \'maximus\'?" />"')) - - def test_unicode_is_utf8_by_default(self): - if not UNICODE_STRINGS: - eq_(self.x._quoteattr(u'Ivan Krsti\u0107'), - '"Ivan Krsti\xc4\x87"') - - def test_unicode_custom_utf16_madness(self): - self.x.encoding = 'utf-16' - utf16 = self.x._quoteattr(u'Ivan Krsti\u0107')[1:-1] - - if UNICODE_STRINGS: - # If all internal strings are unicode, then _quoteattr shouldn't - # have changed anything. - eq_(utf16, u'Ivan Krsti\u0107') - else: - # to avoid big/little endian bytes, assert that we can put it back: - eq_(utf16.decode('utf16'), u'Ivan Krsti\u0107') - - def test_control_characters(self): - # quoting of \n, \r varies in diff. python versions - n = saxutils.quoteattr('\n')[1:-1] - r = saxutils.quoteattr('\r')[1:-1] - eq_(self.x._quoteattr('foo\n\b\f\r'), '"foo%s??%s"' % (n, r)) - eq_(escape_cdata('foo\n\b\f\r'), 'foo\n??\r') class TestSplitId(unittest.TestCase): |