diff options
| author | milde <milde@929543f6-e4f2-0310-98a6-ba3bd3dd1d04> | 2011-05-13 21:27:50 +0000 |
|---|---|---|
| committer | milde <milde@929543f6-e4f2-0310-98a6-ba3bd3dd1d04> | 2011-05-13 21:27:50 +0000 |
| commit | 01b90289083bc402fc71c6e95dce24eb8dc9cfe2 (patch) | |
| tree | 90002a82adb0b93d780b3bb51af1275a7da81a19 /docutils | |
| parent | 06df3821ad4af55b3d3e8a3edc6d26fde64216bb (diff) | |
| download | docutils-01b90289083bc402fc71c6e95dce24eb8dc9cfe2.tar.gz | |
Robust error reporting: ErrorString
A wrapper providing robust (bytes and unicode) string conversion.
Prevents encoding errors with locales that have non-ASCII chars in
their IOError messages, cf. https://bugs.gentoo.org/show_bug.cgi?id=349101
git-svn-id: http://svn.code.sf.net/p/docutils/code/trunk@7034 929543f6-e4f2-0310-98a6-ba3bd3dd1d04
Diffstat (limited to 'docutils')
| -rw-r--r-- | docutils/docutils/io.py | 101 | ||||
| -rw-r--r-- | docutils/docutils/parsers/rst/directives/misc.py | 8 | ||||
| -rwxr-xr-x | docutils/test/test_io.py | 70 |
3 files changed, 144 insertions, 35 deletions
diff --git a/docutils/docutils/io.py b/docutils/docutils/io.py index b155c6fdb..723bfbfe7 100644 --- a/docutils/docutils/io.py +++ b/docutils/docutils/io.py @@ -200,6 +200,87 @@ class Output(TransformSpec): else: return data.encode(self.encoding, self.error_handler) +# Robust error reporting +# ----------------------- +# +# Implicit conversions of strings and exceptions like +# +# >>> u'%s world' % 'H\xe4llo' +# +# fail in some Python versions: +# +# * In Python <= 2.6, ``unicode(<exception instance>)`` uses +# `__str__` and fails with non-ASCII chars in`unicode` arguments. +# (work around http://bugs.python.org/issue2517): +# +# * In Python 2, unicode(<exception instance>) fails, with non-ASCII +# chars in `str` arguments. (Use case: in some locales, the errstr +# argument of IOError contains non-ASCII chars.) +# +# * In Python 2, str(<exception instance>) fails, with non-ASCII chars +# in `unicode` arguments. +# +# However, when reporting an error we do not want to mask ist with +# encoding/decoding errors. The `ErrString` and `ErrorOutput` classes +# handle common exceptions: + +class ErrorString(object): + """ + A wrapper providing robust (bytes and unicode) string conversion. + """ + + def __init__(self, data, encoding=None, encoding_errors='backslashreplace', + decoding_errors='replace'): + self.data = data + self.encoding = (encoding or getattr(data, 'encoding', None) or + locale_encoding or 'ascii') + self.encoding_errors = encoding_errors + self.decoding_errors = decoding_errors + + + def __str__(self): + try: + return str(self.data) + except UnicodeEncodeError, err: + if isinstance(self.data, Exception): + args = [str(ErrorString(arg, self.encoding, + self.encoding_errors)) + for arg in self.data.args] + return ', '.join(args) + if isinstance(self.data, unicode): + return self.data.encode(self.encoding, self.encoding_errors) + raise + + def __unicode__(self): + """ + Return unicode representation of `self.data`. + + Try ``unicode(self.data)``, catch `UnicodeError` and + + * if `self.data` is an Exception instance, work around + http://bugs.python.org/issue2517 with an emulation of + Exception.__unicode__, + + * else decode with `self.encoding` and `self.decoding_errors`. + """ + try: + return unicode(self.data) + except UnicodeError, err: # can be ..EncodeError or ..DecodeError + if isinstance(self.data, IOError): + return u"[Errno %d] %s: '%s'" % (self.data.errno, + unicode(self.data.strerror, self.encoding, + self.decoding_errors), + unicode(self.data.filename, self.encoding, + self.decoding_errors)) + if isinstance(self.data, Exception): + args = [unicode(ErrorString(arg, self.encoding, + decoding_errors=self.decoding_errors)) + for arg in self.data.args] + return u', '.join(args) + if isinstance(err, UnicodeDecodeError): + return unicode(self.data, self.encoding, self.decoding_errors) + raise + class ErrorOutput(object): """ @@ -253,26 +334,14 @@ class ErrorOutput(object): if self.stream is False: return if isinstance(data, Exception): - # Convert now to detect errors: - # In Python <= 2.6, unicode(<exception instance>) - # uses __str__ and fails with non-ASCII chars in arguments - try: - data = unicode(data) - except UnicodeError, err: - try: - data = u', '.join(data.args) - except AttributeError: - raise err - except UnicodeDecodeError: - data = str(data) + data = unicode(ErrorString(data, self.encoding, + self.encoding_errors, self.decoding_errors)) try: self.stream.write(data) except UnicodeEncodeError: - self.stream.write(data.encode(self.encoding, - self.encoding_errors)) + self.stream.write(data.encode(self.encoding, self.encoding_errors)) except TypeError: # in Python 3, stderr expects unicode - self.stream.write(data.decode(self.encoding, - self.decoding_errors)) + self.stream.write(unicode(data, self.encoding, self.decoding_errors)) def close(self): """ diff --git a/docutils/docutils/parsers/rst/directives/misc.py b/docutils/docutils/parsers/rst/directives/misc.py index b98446eea..5cca85045 100644 --- a/docutils/docutils/parsers/rst/directives/misc.py +++ b/docutils/docutils/parsers/rst/directives/misc.py @@ -11,6 +11,7 @@ import os.path import re import time from docutils import io, nodes, statemachine, utils +from docutils.io import ErrorString from docutils.parsers.rst import Directive, convert_directive_function from docutils.parsers.rst import directives, roles, states from docutils.transforms import misc @@ -66,13 +67,8 @@ class Include(Directive): input_encoding_error_handler), handle_io_errors=None) except IOError, error: - # robust error-instance to unicode conversion - # (work around http://bugs.python.org/issue2517): - errmsg = u"[Errno %d] %s: '%s'" % (error.errno, - error.strerror, - error.filename) raise self.severe(u'Problems with "%s" directive path:\n%s: %s.' % - (self.name, error.__class__.__name__, errmsg)) + (self.name, error.__class__.__name__, ErrorString(error))) startline = self.options.get('start-line', None) endline = self.options.get('end-line', None) try: diff --git a/docutils/test/test_io.py b/docutils/test/test_io.py index 1044d57a9..db2937061 100755 --- a/docutils/test/test_io.py +++ b/docutils/test/test_io.py @@ -17,7 +17,7 @@ except ImportError: # io is new in Python 2.6 import DocutilsTestSupport # must be imported before docutils from docutils import io -from docutils._compat import b +from docutils._compat import b, bytes class InputTests(unittest.TestCase): @@ -69,13 +69,67 @@ print "hello world" self.assertEquals(input.successful_encoding, 'utf-8') +class ErrorStringTests(unittest.TestCase): + # test data: + bs = b('\xfc') # unicode(bs) fails, str(bs) in Python 3 return repr() + us = u'\xfc' # bytes(us) fails; str(us) fails in Python 2 + be = Exception(bs) # unicode(be) fails + ue = Exception(us) # bytes(ue) fails, str(ue) fails in Python 2; + # unicode(ue) fails in Python < 2.6 (issue2517_) + # wrapped test data: + wbs = io.ErrorString(bs) + wus = io.ErrorString(us) + wbe = io.ErrorString(be) + wue = io.ErrorString(ue) + + def test_7bit(self): + # wrapping (not required with 7-bit chars) must not change the + # result of conversions: + bs = b('foo') + us = u'foo' + be = Exception(bs) + ue = Exception(us) + self.assertEqual(str(7), str(io.ErrorString(7))) + self.assertEqual(str(bs), str(io.ErrorString(bs))) + self.assertEqual(str(us), str(io.ErrorString(us))) + self.assertEqual(str(be), str(io.ErrorString(be))) + self.assertEqual(str(ue), str(io.ErrorString(ue))) + self.assertEqual(unicode(7), unicode(io.ErrorString(7))) + self.assertEqual(unicode(bs), unicode(io.ErrorString(bs))) + self.assertEqual(unicode(us), unicode(io.ErrorString(us))) + self.assertEqual(unicode(be), unicode(io.ErrorString(be))) + self.assertEqual(unicode(ue), unicode(io.ErrorString(ue))) + + def test_ustr(self): + """Test conversion to a unicode-string.""" + # unicode(self.bs) fails + self.assertEqual(unicode, type(unicode(self.wbs))) + self.assertEqual(unicode(self.us), unicode(self.wus)) + # unicode(self.be) fails + self.assertEqual(unicode, type(unicode(self.wbe))) + # unicode(ue) fails in Python < 2.6 (issue2517_) + self.assertEqual(unicode, type(unicode(self.wue))) + self.assertEqual(self.us, unicode(self.wue)) + + def test_str(self): + """Test conversion to a string (bytes in Python 2, unicode in Python 3).""" + self.assertEqual(str(self.bs), str(self.wbs)) + self.assertEqual(str(self.be), str(self.be)) + # str(us) fails in Python 2 + self.assertEqual(str, type(str(self.wus))) + # str(ue) fails in Python 2 + self.assertEqual(str, type(str(self.wue))) + +# .. _issue2517: http://bugs.python.org/issue2517 + + # ErrorOutput tests # ----------------- # Stub: Buffer with 'strict' auto-conversion of input to byte string: class BBuf(BytesIO, object): def write(self, data): - if type(data) == unicode: + if isinstance(data, unicode): data.encode('ascii', 'strict') super(BBuf, self).write(data) @@ -83,7 +137,7 @@ class BBuf(BytesIO, object): class UBuf(StringIO, object): def write(self, data): # emulate Python 3 handling of stdout, stderr - if type(data) == b: + if isinstance(data, bytes): raise TypeError('must be unicode, not bytes') super(UBuf, self).write(data) @@ -127,15 +181,5 @@ class ErrorOutputTests(unittest.TestCase): self.assertEquals(buf.getvalue(), u'b\ufffd u\xfc e\xfc b\xfc') -# class FileInputTests(unittest.TestCase): -# def test_io_error_reporting(self): -# # it seems like IOError and SystemExit are not catched by assertRaises: -# self.assertRaises(IOError, open('foo')) -# self.assertRaises(IOError, -# io.FileInput(source_path=u'u\xfc', handle_io_errors=False)) -# self.assertRaises(IOError, -# io.FileInput(source_path=u'u\xfc', handle_io_errors=True)) - - if __name__ == '__main__': unittest.main() |
