summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorIrit Katriel <1055913+iritkatriel@users.noreply.github.com>2023-03-21 21:36:31 +0000
committerGitHub <noreply@github.com>2023-03-21 21:36:31 +0000
commit76350e85ebf96df588384f3d9bdc20d11045bef4 (patch)
treedfc8197d86f5cbb646d932d05e7612435887c5c7
parente6ecd3e6b437f3056e0a410a57c52e2639b56353 (diff)
downloadcpython-git-76350e85ebf96df588384f3d9bdc20d11045bef4.tar.gz
gh-102406: replace exception chaining by PEP-678 notes in codecs (#102407)
-rw-r--r--Include/cpython/pyerrors.h18
-rw-r--r--Lib/test/test_codecs.py106
-rw-r--r--Misc/NEWS.d/next/Core and Builtins/2023-03-03-23-21-16.gh-issue-102406.XLqYO3.rst1
-rw-r--r--Objects/exceptions.c110
-rw-r--r--Python/codecs.c35
5 files changed, 64 insertions, 206 deletions
diff --git a/Include/cpython/pyerrors.h b/Include/cpython/pyerrors.h
index d0300f6ee5..65bdc942f5 100644
--- a/Include/cpython/pyerrors.h
+++ b/Include/cpython/pyerrors.h
@@ -116,24 +116,6 @@ PyAPI_FUNC(int) _PyException_AddNote(
PyObject *exc,
PyObject *note);
-/* Helper that attempts to replace the current exception with one of the
- * same type but with a prefix added to the exception text. The resulting
- * exception description looks like:
- *
- * prefix (exc_type: original_exc_str)
- *
- * Only some exceptions can be safely replaced. If the function determines
- * it isn't safe to perform the replacement, it will leave the original
- * unmodified exception in place.
- *
- * Returns a borrowed reference to the new exception (if any), NULL if the
- * existing exception was left in place.
- */
-PyAPI_FUNC(PyObject *) _PyErr_TrySetFromCause(
- const char *prefix_format, /* ASCII-encoded string */
- ...
- );
-
/* In signalmodule.c */
int PySignal_SetWakeupFd(int fd);
diff --git a/Lib/test/test_codecs.py b/Lib/test/test_codecs.py
index e3add0c1ee..376175f90f 100644
--- a/Lib/test/test_codecs.py
+++ b/Lib/test/test_codecs.py
@@ -2819,24 +2819,19 @@ class TransformCodecTest(unittest.TestCase):
self.assertIsNone(failure.exception.__cause__)
@unittest.skipUnless(zlib, "Requires zlib support")
- def test_custom_zlib_error_is_wrapped(self):
+ def test_custom_zlib_error_is_noted(self):
# Check zlib codec gives a good error for malformed input
- msg = "^decoding with 'zlib_codec' codec failed"
- with self.assertRaisesRegex(Exception, msg) as failure:
+ msg = "decoding with 'zlib_codec' codec failed"
+ with self.assertRaises(Exception) as failure:
codecs.decode(b"hello", "zlib_codec")
- self.assertIsInstance(failure.exception.__cause__,
- type(failure.exception))
+ self.assertEqual(msg, failure.exception.__notes__[0])
- def test_custom_hex_error_is_wrapped(self):
+ def test_custom_hex_error_is_noted(self):
# Check hex codec gives a good error for malformed input
- msg = "^decoding with 'hex_codec' codec failed"
- with self.assertRaisesRegex(Exception, msg) as failure:
+ msg = "decoding with 'hex_codec' codec failed"
+ with self.assertRaises(Exception) as failure:
codecs.decode(b"hello", "hex_codec")
- self.assertIsInstance(failure.exception.__cause__,
- type(failure.exception))
-
- # Unfortunately, the bz2 module throws OSError, which the codec
- # machinery currently can't wrap :(
+ self.assertEqual(msg, failure.exception.__notes__[0])
# Ensure codec aliases from http://bugs.python.org/issue7475 work
def test_aliases(self):
@@ -2860,11 +2855,8 @@ class TransformCodecTest(unittest.TestCase):
self.assertRaises(ValueError, codecs.decode, b"", "uu-codec")
-# The codec system tries to wrap exceptions in order to ensure the error
-# mentions the operation being performed and the codec involved. We
-# currently *only* want this to happen for relatively stateless
-# exceptions, where the only significant information they contain is their
-# type and a single str argument.
+# The codec system tries to add notes to exceptions in order to ensure
+# the error mentions the operation being performed and the codec involved.
# Use a local codec registry to avoid appearing to leak objects when
# registering multiple search functions
@@ -2874,10 +2866,10 @@ def _get_test_codec(codec_name):
return _TEST_CODECS.get(codec_name)
-class ExceptionChainingTest(unittest.TestCase):
+class ExceptionNotesTest(unittest.TestCase):
def setUp(self):
- self.codec_name = 'exception_chaining_test'
+ self.codec_name = 'exception_notes_test'
codecs.register(_get_test_codec)
self.addCleanup(codecs.unregister, _get_test_codec)
@@ -2901,91 +2893,77 @@ class ExceptionChainingTest(unittest.TestCase):
_TEST_CODECS[self.codec_name] = codec_info
@contextlib.contextmanager
- def assertWrapped(self, operation, exc_type, msg):
- full_msg = r"{} with {!r} codec failed \({}: {}\)".format(
- operation, self.codec_name, exc_type.__name__, msg)
- with self.assertRaisesRegex(exc_type, full_msg) as caught:
+ def assertNoted(self, operation, exc_type, msg):
+ full_msg = r"{} with {!r} codec failed".format(
+ operation, self.codec_name)
+ with self.assertRaises(exc_type) as caught:
yield caught
- self.assertIsInstance(caught.exception.__cause__, exc_type)
- self.assertIsNotNone(caught.exception.__cause__.__traceback__)
+ self.assertIn(full_msg, caught.exception.__notes__[0])
+ caught.exception.__notes__.clear()
def raise_obj(self, *args, **kwds):
# Helper to dynamically change the object raised by a test codec
raise self.obj_to_raise
- def check_wrapped(self, obj_to_raise, msg, exc_type=RuntimeError):
+ def check_note(self, obj_to_raise, msg, exc_type=RuntimeError):
self.obj_to_raise = obj_to_raise
self.set_codec(self.raise_obj, self.raise_obj)
- with self.assertWrapped("encoding", exc_type, msg):
+ with self.assertNoted("encoding", exc_type, msg):
"str_input".encode(self.codec_name)
- with self.assertWrapped("encoding", exc_type, msg):
+ with self.assertNoted("encoding", exc_type, msg):
codecs.encode("str_input", self.codec_name)
- with self.assertWrapped("decoding", exc_type, msg):
+ with self.assertNoted("decoding", exc_type, msg):
b"bytes input".decode(self.codec_name)
- with self.assertWrapped("decoding", exc_type, msg):
+ with self.assertNoted("decoding", exc_type, msg):
codecs.decode(b"bytes input", self.codec_name)
def test_raise_by_type(self):
- self.check_wrapped(RuntimeError, "")
+ self.check_note(RuntimeError, "")
def test_raise_by_value(self):
- msg = "This should be wrapped"
- self.check_wrapped(RuntimeError(msg), msg)
+ msg = "This should be noted"
+ self.check_note(RuntimeError(msg), msg)
def test_raise_grandchild_subclass_exact_size(self):
- msg = "This should be wrapped"
+ msg = "This should be noted"
class MyRuntimeError(RuntimeError):
__slots__ = ()
- self.check_wrapped(MyRuntimeError(msg), msg, MyRuntimeError)
+ self.check_note(MyRuntimeError(msg), msg, MyRuntimeError)
def test_raise_subclass_with_weakref_support(self):
- msg = "This should be wrapped"
+ msg = "This should be noted"
class MyRuntimeError(RuntimeError):
pass
- self.check_wrapped(MyRuntimeError(msg), msg, MyRuntimeError)
-
- def check_not_wrapped(self, obj_to_raise, msg):
- def raise_obj(*args, **kwds):
- raise obj_to_raise
- self.set_codec(raise_obj, raise_obj)
- with self.assertRaisesRegex(RuntimeError, msg):
- "str input".encode(self.codec_name)
- with self.assertRaisesRegex(RuntimeError, msg):
- codecs.encode("str input", self.codec_name)
- with self.assertRaisesRegex(RuntimeError, msg):
- b"bytes input".decode(self.codec_name)
- with self.assertRaisesRegex(RuntimeError, msg):
- codecs.decode(b"bytes input", self.codec_name)
+ self.check_note(MyRuntimeError(msg), msg, MyRuntimeError)
- def test_init_override_is_not_wrapped(self):
+ def test_init_override(self):
class CustomInit(RuntimeError):
def __init__(self):
pass
- self.check_not_wrapped(CustomInit, "")
+ self.check_note(CustomInit, "")
- def test_new_override_is_not_wrapped(self):
+ def test_new_override(self):
class CustomNew(RuntimeError):
def __new__(cls):
return super().__new__(cls)
- self.check_not_wrapped(CustomNew, "")
+ self.check_note(CustomNew, "")
- def test_instance_attribute_is_not_wrapped(self):
- msg = "This should NOT be wrapped"
+ def test_instance_attribute(self):
+ msg = "This should be noted"
exc = RuntimeError(msg)
exc.attr = 1
- self.check_not_wrapped(exc, "^{}$".format(msg))
+ self.check_note(exc, "^{}$".format(msg))
- def test_non_str_arg_is_not_wrapped(self):
- self.check_not_wrapped(RuntimeError(1), "1")
+ def test_non_str_arg(self):
+ self.check_note(RuntimeError(1), "1")
- def test_multiple_args_is_not_wrapped(self):
+ def test_multiple_args(self):
msg_re = r"^\('a', 'b', 'c'\)$"
- self.check_not_wrapped(RuntimeError('a', 'b', 'c'), msg_re)
+ self.check_note(RuntimeError('a', 'b', 'c'), msg_re)
# http://bugs.python.org/issue19609
- def test_codec_lookup_failure_not_wrapped(self):
+ def test_codec_lookup_failure(self):
msg = "^unknown encoding: {}$".format(self.codec_name)
- # The initial codec lookup should not be wrapped
with self.assertRaisesRegex(LookupError, msg):
"str input".encode(self.codec_name)
with self.assertRaisesRegex(LookupError, msg):
diff --git a/Misc/NEWS.d/next/Core and Builtins/2023-03-03-23-21-16.gh-issue-102406.XLqYO3.rst b/Misc/NEWS.d/next/Core and Builtins/2023-03-03-23-21-16.gh-issue-102406.XLqYO3.rst
new file mode 100644
index 0000000000..e0d061c372
--- /dev/null
+++ b/Misc/NEWS.d/next/Core and Builtins/2023-03-03-23-21-16.gh-issue-102406.XLqYO3.rst
@@ -0,0 +1 @@
+:mod:`codecs` encoding/decoding errors now get the context information (which operation and which codecs) attached as :pep:`678` notes instead of through chaining a new instance of the exception.
diff --git a/Objects/exceptions.c b/Objects/exceptions.c
index d69f7400ca..a355244cf9 100644
--- a/Objects/exceptions.c
+++ b/Objects/exceptions.c
@@ -3764,113 +3764,3 @@ _PyException_AddNote(PyObject *exc, PyObject *note)
return res;
}
-/* Helper to do the equivalent of "raise X from Y" in C, but always using
- * the current exception rather than passing one in.
- *
- * We currently limit this to *only* exceptions that use the BaseException
- * tp_init and tp_new methods, since we can be reasonably sure we can wrap
- * those correctly without losing data and without losing backwards
- * compatibility.
- *
- * We also aim to rule out *all* exceptions that might be storing additional
- * state, whether by having a size difference relative to BaseException,
- * additional arguments passed in during construction or by having a
- * non-empty instance dict.
- *
- * We need to be very careful with what we wrap, since changing types to
- * a broader exception type would be backwards incompatible for
- * existing codecs, and with different init or new method implementations
- * may either not support instantiation with PyErr_Format or lose
- * information when instantiated that way.
- *
- * XXX (ncoghlan): This could be made more comprehensive by exploiting the
- * fact that exceptions are expected to support pickling. If more builtin
- * exceptions (e.g. AttributeError) start to be converted to rich
- * exceptions with additional attributes, that's probably a better approach
- * to pursue over adding special cases for particular stateful subclasses.
- *
- * Returns a borrowed reference to the new exception (if any), NULL if the
- * existing exception was left in place.
- */
-PyObject *
-_PyErr_TrySetFromCause(const char *format, ...)
-{
- PyObject* msg_prefix;
- PyObject *instance_args;
- Py_ssize_t num_args, caught_type_size, base_exc_size;
- va_list vargs;
- int same_basic_size;
-
- PyObject *exc = PyErr_GetRaisedException();
- PyTypeObject *caught_type = Py_TYPE(exc);
- /* Ensure type info indicates no extra state is stored at the C level
- * and that the type can be reinstantiated using PyErr_Format
- */
- caught_type_size = caught_type->tp_basicsize;
- base_exc_size = _PyExc_BaseException.tp_basicsize;
- same_basic_size = (
- caught_type_size == base_exc_size ||
- (_PyType_SUPPORTS_WEAKREFS(caught_type) &&
- (caught_type_size == base_exc_size + (Py_ssize_t)sizeof(PyObject *))
- )
- );
- if (caught_type->tp_init != (initproc)BaseException_init ||
- caught_type->tp_new != BaseException_new ||
- !same_basic_size ||
- caught_type->tp_itemsize != _PyExc_BaseException.tp_itemsize) {
- /* We can't be sure we can wrap this safely, since it may contain
- * more state than just the exception type. Accordingly, we just
- * leave it alone.
- */
- PyErr_SetRaisedException(exc);
- return NULL;
- }
-
- /* Check the args are empty or contain a single string */
- instance_args = ((PyBaseExceptionObject *)exc)->args;
- num_args = PyTuple_GET_SIZE(instance_args);
- if (num_args > 1 ||
- (num_args == 1 &&
- !PyUnicode_CheckExact(PyTuple_GET_ITEM(instance_args, 0)))) {
- /* More than 1 arg, or the one arg we do have isn't a string
- */
- PyErr_SetRaisedException(exc);
- return NULL;
- }
-
- /* Ensure the instance dict is also empty */
- if (!_PyObject_IsInstanceDictEmpty(exc)) {
- /* While we could potentially copy a non-empty instance dictionary
- * to the replacement exception, for now we take the more
- * conservative path of leaving exceptions with attributes set
- * alone.
- */
- PyErr_SetRaisedException(exc);
- return NULL;
- }
-
- /* For exceptions that we can wrap safely, we chain the original
- * exception to a new one of the exact same type with an
- * error message that mentions the additional details and the
- * original exception.
- *
- * It would be nice to wrap OSError and various other exception
- * types as well, but that's quite a bit trickier due to the extra
- * state potentially stored on OSError instances.
- */
- va_start(vargs, format);
- msg_prefix = PyUnicode_FromFormatV(format, vargs);
- va_end(vargs);
- if (msg_prefix == NULL) {
- Py_DECREF(exc);
- return NULL;
- }
-
- PyErr_Format((PyObject*)Py_TYPE(exc), "%U (%s: %S)",
- msg_prefix, Py_TYPE(exc)->tp_name, exc);
- Py_DECREF(msg_prefix);
- PyObject *new_exc = PyErr_GetRaisedException();
- PyException_SetCause(new_exc, exc);
- PyErr_SetRaisedException(new_exc);
- return new_exc;
-}
diff --git a/Python/codecs.c b/Python/codecs.c
index b2087b499d..9d800f9856 100644
--- a/Python/codecs.c
+++ b/Python/codecs.c
@@ -382,20 +382,27 @@ PyObject *PyCodec_StreamWriter(const char *encoding,
return codec_getstreamcodec(encoding, stream, errors, 3);
}
-/* Helper that tries to ensure the reported exception chain indicates the
- * codec that was invoked to trigger the failure without changing the type
- * of the exception raised.
- */
static void
-wrap_codec_error(const char *operation,
- const char *encoding)
+add_note_to_codec_error(const char *operation,
+ const char *encoding)
{
- /* TrySetFromCause will replace the active exception with a suitably
- * updated clone if it can, otherwise it will leave the original
- * exception alone.
- */
- _PyErr_TrySetFromCause("%s with '%s' codec failed",
- operation, encoding);
+ PyObject *exc = PyErr_GetRaisedException();
+ if (exc == NULL) {
+ return;
+ }
+ PyObject *note = PyUnicode_FromFormat("%s with '%s' codec failed",
+ operation, encoding);
+ if (note == NULL) {
+ _PyErr_ChainExceptions1(exc);
+ return;
+ }
+ int res = _PyException_AddNote(exc, note);
+ Py_DECREF(note);
+ if (res < 0) {
+ _PyErr_ChainExceptions1(exc);
+ return;
+ }
+ PyErr_SetRaisedException(exc);
}
/* Encode an object (e.g. a Unicode object) using the given encoding
@@ -418,7 +425,7 @@ _PyCodec_EncodeInternal(PyObject *object,
result = PyObject_Call(encoder, args, NULL);
if (result == NULL) {
- wrap_codec_error("encoding", encoding);
+ add_note_to_codec_error("encoding", encoding);
goto onError;
}
@@ -463,7 +470,7 @@ _PyCodec_DecodeInternal(PyObject *object,
result = PyObject_Call(decoder, args, NULL);
if (result == NULL) {
- wrap_codec_error("decoding", encoding);
+ add_note_to_codec_error("decoding", encoding);
goto onError;
}
if (!PyTuple_Check(result) ||