summaryrefslogtreecommitdiff
path: root/docutils
diff options
context:
space:
mode:
authormilde <milde@929543f6-e4f2-0310-98a6-ba3bd3dd1d04>2011-05-13 21:27:50 +0000
committermilde <milde@929543f6-e4f2-0310-98a6-ba3bd3dd1d04>2011-05-13 21:27:50 +0000
commit01b90289083bc402fc71c6e95dce24eb8dc9cfe2 (patch)
tree90002a82adb0b93d780b3bb51af1275a7da81a19 /docutils
parent06df3821ad4af55b3d3e8a3edc6d26fde64216bb (diff)
downloaddocutils-01b90289083bc402fc71c6e95dce24eb8dc9cfe2.tar.gz
Robust error reporting: ErrorString
A wrapper providing robust (bytes and unicode) string conversion. Prevents encoding errors with locales that have non-ASCII chars in their IOError messages, cf. https://bugs.gentoo.org/show_bug.cgi?id=349101 git-svn-id: http://svn.code.sf.net/p/docutils/code/trunk@7034 929543f6-e4f2-0310-98a6-ba3bd3dd1d04
Diffstat (limited to 'docutils')
-rw-r--r--docutils/docutils/io.py101
-rw-r--r--docutils/docutils/parsers/rst/directives/misc.py8
-rwxr-xr-xdocutils/test/test_io.py70
3 files changed, 144 insertions, 35 deletions
diff --git a/docutils/docutils/io.py b/docutils/docutils/io.py
index b155c6fdb..723bfbfe7 100644
--- a/docutils/docutils/io.py
+++ b/docutils/docutils/io.py
@@ -200,6 +200,87 @@ class Output(TransformSpec):
else:
return data.encode(self.encoding, self.error_handler)
+# Robust error reporting
+# -----------------------
+#
+# Implicit conversions of strings and exceptions like
+#
+# >>> u'%s world' % 'H\xe4llo'
+#
+# fail in some Python versions:
+#
+# * In Python <= 2.6, ``unicode(<exception instance>)`` uses
+# `__str__` and fails with non-ASCII chars in`unicode` arguments.
+# (work around http://bugs.python.org/issue2517):
+#
+# * In Python 2, unicode(<exception instance>) fails, with non-ASCII
+# chars in `str` arguments. (Use case: in some locales, the errstr
+# argument of IOError contains non-ASCII chars.)
+#
+# * In Python 2, str(<exception instance>) fails, with non-ASCII chars
+# in `unicode` arguments.
+#
+# However, when reporting an error we do not want to mask ist with
+# encoding/decoding errors. The `ErrString` and `ErrorOutput` classes
+# handle common exceptions:
+
+class ErrorString(object):
+ """
+ A wrapper providing robust (bytes and unicode) string conversion.
+ """
+
+ def __init__(self, data, encoding=None, encoding_errors='backslashreplace',
+ decoding_errors='replace'):
+ self.data = data
+ self.encoding = (encoding or getattr(data, 'encoding', None) or
+ locale_encoding or 'ascii')
+ self.encoding_errors = encoding_errors
+ self.decoding_errors = decoding_errors
+
+
+ def __str__(self):
+ try:
+ return str(self.data)
+ except UnicodeEncodeError, err:
+ if isinstance(self.data, Exception):
+ args = [str(ErrorString(arg, self.encoding,
+ self.encoding_errors))
+ for arg in self.data.args]
+ return ', '.join(args)
+ if isinstance(self.data, unicode):
+ return self.data.encode(self.encoding, self.encoding_errors)
+ raise
+
+ def __unicode__(self):
+ """
+ Return unicode representation of `self.data`.
+
+ Try ``unicode(self.data)``, catch `UnicodeError` and
+
+ * if `self.data` is an Exception instance, work around
+ http://bugs.python.org/issue2517 with an emulation of
+ Exception.__unicode__,
+
+ * else decode with `self.encoding` and `self.decoding_errors`.
+ """
+ try:
+ return unicode(self.data)
+ except UnicodeError, err: # can be ..EncodeError or ..DecodeError
+ if isinstance(self.data, IOError):
+ return u"[Errno %d] %s: '%s'" % (self.data.errno,
+ unicode(self.data.strerror, self.encoding,
+ self.decoding_errors),
+ unicode(self.data.filename, self.encoding,
+ self.decoding_errors))
+ if isinstance(self.data, Exception):
+ args = [unicode(ErrorString(arg, self.encoding,
+ decoding_errors=self.decoding_errors))
+ for arg in self.data.args]
+ return u', '.join(args)
+ if isinstance(err, UnicodeDecodeError):
+ return unicode(self.data, self.encoding, self.decoding_errors)
+ raise
+
class ErrorOutput(object):
"""
@@ -253,26 +334,14 @@ class ErrorOutput(object):
if self.stream is False:
return
if isinstance(data, Exception):
- # Convert now to detect errors:
- # In Python <= 2.6, unicode(<exception instance>)
- # uses __str__ and fails with non-ASCII chars in arguments
- try:
- data = unicode(data)
- except UnicodeError, err:
- try:
- data = u', '.join(data.args)
- except AttributeError:
- raise err
- except UnicodeDecodeError:
- data = str(data)
+ data = unicode(ErrorString(data, self.encoding,
+ self.encoding_errors, self.decoding_errors))
try:
self.stream.write(data)
except UnicodeEncodeError:
- self.stream.write(data.encode(self.encoding,
- self.encoding_errors))
+ self.stream.write(data.encode(self.encoding, self.encoding_errors))
except TypeError: # in Python 3, stderr expects unicode
- self.stream.write(data.decode(self.encoding,
- self.decoding_errors))
+ self.stream.write(unicode(data, self.encoding, self.decoding_errors))
def close(self):
"""
diff --git a/docutils/docutils/parsers/rst/directives/misc.py b/docutils/docutils/parsers/rst/directives/misc.py
index b98446eea..5cca85045 100644
--- a/docutils/docutils/parsers/rst/directives/misc.py
+++ b/docutils/docutils/parsers/rst/directives/misc.py
@@ -11,6 +11,7 @@ import os.path
import re
import time
from docutils import io, nodes, statemachine, utils
+from docutils.io import ErrorString
from docutils.parsers.rst import Directive, convert_directive_function
from docutils.parsers.rst import directives, roles, states
from docutils.transforms import misc
@@ -66,13 +67,8 @@ class Include(Directive):
input_encoding_error_handler),
handle_io_errors=None)
except IOError, error:
- # robust error-instance to unicode conversion
- # (work around http://bugs.python.org/issue2517):
- errmsg = u"[Errno %d] %s: '%s'" % (error.errno,
- error.strerror,
- error.filename)
raise self.severe(u'Problems with "%s" directive path:\n%s: %s.' %
- (self.name, error.__class__.__name__, errmsg))
+ (self.name, error.__class__.__name__, ErrorString(error)))
startline = self.options.get('start-line', None)
endline = self.options.get('end-line', None)
try:
diff --git a/docutils/test/test_io.py b/docutils/test/test_io.py
index 1044d57a9..db2937061 100755
--- a/docutils/test/test_io.py
+++ b/docutils/test/test_io.py
@@ -17,7 +17,7 @@ except ImportError: # io is new in Python 2.6
import DocutilsTestSupport # must be imported before docutils
from docutils import io
-from docutils._compat import b
+from docutils._compat import b, bytes
class InputTests(unittest.TestCase):
@@ -69,13 +69,67 @@ print "hello world"
self.assertEquals(input.successful_encoding, 'utf-8')
+class ErrorStringTests(unittest.TestCase):
+ # test data:
+ bs = b('\xfc') # unicode(bs) fails, str(bs) in Python 3 return repr()
+ us = u'\xfc' # bytes(us) fails; str(us) fails in Python 2
+ be = Exception(bs) # unicode(be) fails
+ ue = Exception(us) # bytes(ue) fails, str(ue) fails in Python 2;
+ # unicode(ue) fails in Python < 2.6 (issue2517_)
+ # wrapped test data:
+ wbs = io.ErrorString(bs)
+ wus = io.ErrorString(us)
+ wbe = io.ErrorString(be)
+ wue = io.ErrorString(ue)
+
+ def test_7bit(self):
+ # wrapping (not required with 7-bit chars) must not change the
+ # result of conversions:
+ bs = b('foo')
+ us = u'foo'
+ be = Exception(bs)
+ ue = Exception(us)
+ self.assertEqual(str(7), str(io.ErrorString(7)))
+ self.assertEqual(str(bs), str(io.ErrorString(bs)))
+ self.assertEqual(str(us), str(io.ErrorString(us)))
+ self.assertEqual(str(be), str(io.ErrorString(be)))
+ self.assertEqual(str(ue), str(io.ErrorString(ue)))
+ self.assertEqual(unicode(7), unicode(io.ErrorString(7)))
+ self.assertEqual(unicode(bs), unicode(io.ErrorString(bs)))
+ self.assertEqual(unicode(us), unicode(io.ErrorString(us)))
+ self.assertEqual(unicode(be), unicode(io.ErrorString(be)))
+ self.assertEqual(unicode(ue), unicode(io.ErrorString(ue)))
+
+ def test_ustr(self):
+ """Test conversion to a unicode-string."""
+ # unicode(self.bs) fails
+ self.assertEqual(unicode, type(unicode(self.wbs)))
+ self.assertEqual(unicode(self.us), unicode(self.wus))
+ # unicode(self.be) fails
+ self.assertEqual(unicode, type(unicode(self.wbe)))
+ # unicode(ue) fails in Python < 2.6 (issue2517_)
+ self.assertEqual(unicode, type(unicode(self.wue)))
+ self.assertEqual(self.us, unicode(self.wue))
+
+ def test_str(self):
+ """Test conversion to a string (bytes in Python 2, unicode in Python 3)."""
+ self.assertEqual(str(self.bs), str(self.wbs))
+ self.assertEqual(str(self.be), str(self.be))
+ # str(us) fails in Python 2
+ self.assertEqual(str, type(str(self.wus)))
+ # str(ue) fails in Python 2
+ self.assertEqual(str, type(str(self.wue)))
+
+# .. _issue2517: http://bugs.python.org/issue2517
+
+
# ErrorOutput tests
# -----------------
# Stub: Buffer with 'strict' auto-conversion of input to byte string:
class BBuf(BytesIO, object):
def write(self, data):
- if type(data) == unicode:
+ if isinstance(data, unicode):
data.encode('ascii', 'strict')
super(BBuf, self).write(data)
@@ -83,7 +137,7 @@ class BBuf(BytesIO, object):
class UBuf(StringIO, object):
def write(self, data):
# emulate Python 3 handling of stdout, stderr
- if type(data) == b:
+ if isinstance(data, bytes):
raise TypeError('must be unicode, not bytes')
super(UBuf, self).write(data)
@@ -127,15 +181,5 @@ class ErrorOutputTests(unittest.TestCase):
self.assertEquals(buf.getvalue(), u'b\ufffd u\xfc e\xfc b\xfc')
-# class FileInputTests(unittest.TestCase):
-# def test_io_error_reporting(self):
-# # it seems like IOError and SystemExit are not catched by assertRaises:
-# self.assertRaises(IOError, open('foo'))
-# self.assertRaises(IOError,
-# io.FileInput(source_path=u'u\xfc', handle_io_errors=False))
-# self.assertRaises(IOError,
-# io.FileInput(source_path=u'u\xfc', handle_io_errors=True))
-
-
if __name__ == '__main__':
unittest.main()