summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorVictor Stinner <vstinner@redhat.com>2018-08-28 23:26:33 +0200
committerGitHub <noreply@github.com>2018-08-28 23:26:33 +0200
commit9e4994d410970fb4e75168401d159ba47a8f7108 (patch)
tree31eb67e89ade21902bfe925d353f650ae95fb5d8
parentd500e5307aec9c5d535f66d567fadb9c587a9a36 (diff)
downloadcpython-git-9e4994d410970fb4e75168401d159ba47a8f7108.tar.gz
bpo-34485: Enhance init_sys_streams() (GH-8978)
Python now gets the locale encoding with C code to initialize the encoding of standard streams like sys.stdout. Moreover, the encoding is now initialized to the Python codec name to get a normalized encoding name and to ensure that the codec is loaded. The change avoids importing _bootlocale and _locale modules at startup by default. When the PYTHONIOENCODING environment variable only contains an encoding, the error handler is now is now set explicitly to "strict". Rename also get_default_standard_stream_error_handler() to get_stdio_errors(). Reduce the buffer to format the "cpXXX" string (Windows locale encoding).
-rw-r--r--Lib/test/test_embed.py16
-rw-r--r--Lib/test/test_sys.py6
-rw-r--r--Lib/test/test_utf8_mode.py12
-rw-r--r--Misc/NEWS.d/next/Core and Builtins/2018-08-28-17-48-40.bpo-34485.aFwck2.rst5
-rw-r--r--Misc/NEWS.d/next/Core and Builtins/2018-08-28-23-01-14.bpo-34485.dq1Kqk.rst3
-rw-r--r--Modules/_localemodule.c2
-rw-r--r--Programs/_testembed.c4
-rw-r--r--Python/pylifecycle.c86
8 files changed, 91 insertions, 43 deletions
diff --git a/Lib/test/test_embed.py b/Lib/test/test_embed.py
index 25593bdf42..3922447c64 100644
--- a/Lib/test/test_embed.py
+++ b/Lib/test/test_embed.py
@@ -171,17 +171,17 @@ class EmbeddingTests(EmbeddingTestsMixin, unittest.TestCase):
"stdout: {out_encoding}:ignore",
"stderr: {out_encoding}:backslashreplace",
"--- Set encoding only ---",
- "Expected encoding: latin-1",
+ "Expected encoding: iso8859-1",
"Expected errors: default",
- "stdin: latin-1:{errors}",
- "stdout: latin-1:{errors}",
- "stderr: latin-1:backslashreplace",
+ "stdin: iso8859-1:{errors}",
+ "stdout: iso8859-1:{errors}",
+ "stderr: iso8859-1:backslashreplace",
"--- Set encoding and errors ---",
- "Expected encoding: latin-1",
+ "Expected encoding: iso8859-1",
"Expected errors: replace",
- "stdin: latin-1:replace",
- "stdout: latin-1:replace",
- "stderr: latin-1:backslashreplace"])
+ "stdin: iso8859-1:replace",
+ "stdout: iso8859-1:replace",
+ "stderr: iso8859-1:backslashreplace"])
expected_output = expected_output.format(
in_encoding=expected_stream_encoding,
out_encoding=expected_stream_encoding,
diff --git a/Lib/test/test_sys.py b/Lib/test/test_sys.py
index 336ae447a8..005c82d13d 100644
--- a/Lib/test/test_sys.py
+++ b/Lib/test/test_sys.py
@@ -668,7 +668,7 @@ class SysModuleTest(unittest.TestCase):
'dump("stdout")',
'dump("stderr")',
))
- args = [sys.executable, "-c", code]
+ args = [sys.executable, "-X", "utf8=0", "-c", code]
if isolated:
args.append("-I")
if encoding is not None:
@@ -712,8 +712,8 @@ class SysModuleTest(unittest.TestCase):
# have no any effect
out = self.c_locale_get_error_handler(encoding=':')
self.assertEqual(out,
- 'stdin: strict\n'
- 'stdout: strict\n'
+ 'stdin: surrogateescape\n'
+ 'stdout: surrogateescape\n'
'stderr: backslashreplace\n')
out = self.c_locale_get_error_handler(encoding='')
self.assertEqual(out,
diff --git a/Lib/test/test_utf8_mode.py b/Lib/test/test_utf8_mode.py
index df988c1fc9..7280ce77ef 100644
--- a/Lib/test/test_utf8_mode.py
+++ b/Lib/test/test_utf8_mode.py
@@ -139,16 +139,16 @@ class UTF8ModeTests(unittest.TestCase):
out = self.get_output('-X', 'utf8', '-c', code,
PYTHONIOENCODING="latin1")
self.assertEqual(out.splitlines(),
- ['stdin: latin1/strict',
- 'stdout: latin1/strict',
- 'stderr: latin1/backslashreplace'])
+ ['stdin: iso8859-1/strict',
+ 'stdout: iso8859-1/strict',
+ 'stderr: iso8859-1/backslashreplace'])
out = self.get_output('-X', 'utf8', '-c', code,
PYTHONIOENCODING=":namereplace")
self.assertEqual(out.splitlines(),
- ['stdin: UTF-8/namereplace',
- 'stdout: UTF-8/namereplace',
- 'stderr: UTF-8/backslashreplace'])
+ ['stdin: utf-8/namereplace',
+ 'stdout: utf-8/namereplace',
+ 'stderr: utf-8/backslashreplace'])
def test_io(self):
code = textwrap.dedent('''
diff --git a/Misc/NEWS.d/next/Core and Builtins/2018-08-28-17-48-40.bpo-34485.aFwck2.rst b/Misc/NEWS.d/next/Core and Builtins/2018-08-28-17-48-40.bpo-34485.aFwck2.rst
new file mode 100644
index 0000000000..f6cd9515f2
--- /dev/null
+++ b/Misc/NEWS.d/next/Core and Builtins/2018-08-28-17-48-40.bpo-34485.aFwck2.rst
@@ -0,0 +1,5 @@
+Python now gets the locale encoding with C code to initialize the encoding
+of standard streams like sys.stdout. Moreover, the encoding is now
+initialized to the Python codec name to get a normalized encoding name and
+to ensure that the codec is loaded. The change avoids importing _bootlocale
+and _locale modules at startup by default.
diff --git a/Misc/NEWS.d/next/Core and Builtins/2018-08-28-23-01-14.bpo-34485.dq1Kqk.rst b/Misc/NEWS.d/next/Core and Builtins/2018-08-28-23-01-14.bpo-34485.dq1Kqk.rst
new file mode 100644
index 0000000000..5ca373aeab
--- /dev/null
+++ b/Misc/NEWS.d/next/Core and Builtins/2018-08-28-23-01-14.bpo-34485.dq1Kqk.rst
@@ -0,0 +1,3 @@
+Fix the error handler of standard streams like sys.stdout:
+PYTHONIOENCODING=":" is now ignored instead of setting the error handler to
+"strict".
diff --git a/Modules/_localemodule.c b/Modules/_localemodule.c
index 524886d466..3fdbc5ea81 100644
--- a/Modules/_localemodule.c
+++ b/Modules/_localemodule.c
@@ -319,7 +319,7 @@ exit:
static PyObject*
PyLocale_getdefaultlocale(PyObject* self, PyObject *Py_UNUSED(ignored))
{
- char encoding[100];
+ char encoding[20];
char locale[100];
PyOS_snprintf(encoding, sizeof(encoding), "cp%d", GetACP());
diff --git a/Programs/_testembed.c b/Programs/_testembed.c
index f1d30f6c54..d0c00cfc6c 100644
--- a/Programs/_testembed.c
+++ b/Programs/_testembed.c
@@ -113,9 +113,9 @@ static int test_forced_io_encoding(void)
printf("--- Set errors only ---\n");
check_stdio_details(NULL, "ignore");
printf("--- Set encoding only ---\n");
- check_stdio_details("latin-1", NULL);
+ check_stdio_details("iso8859-1", NULL);
printf("--- Set encoding and errors ---\n");
- check_stdio_details("latin-1", "replace");
+ check_stdio_details("iso8859-1", "replace");
/* Check calling after initialization fails */
Py_Initialize();
diff --git a/Python/pylifecycle.c b/Python/pylifecycle.c
index cc64cf956d..29711dfc98 100644
--- a/Python/pylifecycle.c
+++ b/Python/pylifecycle.c
@@ -244,22 +244,26 @@ error:
return NULL;
}
-static char*
-get_locale_encoding(void)
+static _PyInitError
+get_locale_encoding(char **locale_encoding)
{
-#if defined(HAVE_LANGINFO_H) && defined(CODESET)
- char* codeset = nl_langinfo(CODESET);
- if (!codeset || codeset[0] == '\0') {
- PyErr_SetString(PyExc_ValueError, "CODESET is not set or empty");
- return NULL;
- }
- return get_codec_name(codeset);
+#ifdef MS_WINDOWS
+ char encoding[20];
+ PyOS_snprintf(encoding, sizeof(encoding), "cp%d", GetACP());
#elif defined(__ANDROID__)
- return get_codec_name("UTF-8");
+ const char *encoding = "UTF-8";
#else
- PyErr_SetNone(PyExc_NotImplementedError);
- return NULL;
+ const char *encoding = nl_langinfo(CODESET);
+ if (!encoding || encoding[0] == '\0') {
+ return _Py_INIT_USER_ERR("failed to get the locale encoding: "
+ "nl_langinfo(CODESET) failed");
+ }
#endif
+ *locale_encoding = _PyMem_RawStrdup(encoding);
+ if (*locale_encoding == NULL) {
+ return _Py_INIT_NO_MEMORY();
+ }
+ return _Py_INIT_OK();
}
static _PyInitError
@@ -397,7 +401,7 @@ static _LocaleCoercionTarget _TARGET_LOCALES[] = {
};
static const char *
-get_default_standard_stream_error_handler(void)
+get_stdio_errors(void)
{
const char *ctype_loc = setlocale(LC_CTYPE, NULL);
if (ctype_loc != NULL) {
@@ -417,8 +421,7 @@ get_default_standard_stream_error_handler(void)
#endif
}
- /* Otherwise return NULL to request the typical default error handler */
- return NULL;
+ return "strict";
}
#ifdef PY_COERCE_C_LOCALE
@@ -1586,9 +1589,17 @@ initfsencoding(PyInterpreterState *interp)
Py_HasFileSystemDefaultEncoding = 1;
}
else {
- Py_FileSystemDefaultEncoding = get_locale_encoding();
+ char *locale_encoding;
+ _PyInitError err = get_locale_encoding(&locale_encoding);
+ if (_Py_INIT_FAILED(err)) {
+ return err;
+ }
+
+ Py_FileSystemDefaultEncoding = get_codec_name(locale_encoding);
+ PyMem_RawFree(locale_encoding);
if (Py_FileSystemDefaultEncoding == NULL) {
- return _Py_INIT_ERR("Unable to get the locale encoding");
+ return _Py_INIT_ERR("failed to get the Python codec "
+ "of the locale encoding");
}
Py_HasFileSystemDefaultEncoding = 0;
@@ -1787,6 +1798,8 @@ init_sys_streams(PyInterpreterState *interp)
PyObject * encoding_attr;
char *pythonioencoding = NULL;
const char *encoding, *errors;
+ char *locale_encoding = NULL;
+ char *codec_name = NULL;
_PyInitError res = _Py_INIT_OK();
/* Hack to avoid a nasty recursion issue when Python is invoked
@@ -1838,21 +1851,46 @@ init_sys_streams(PyInterpreterState *interp)
errors = err;
}
}
- if (*pythonioencoding && !encoding) {
+ if (!encoding && *pythonioencoding) {
encoding = pythonioencoding;
+ if (!errors) {
+ errors = "strict";
+ }
}
}
- else if (interp->core_config.utf8_mode) {
- encoding = "utf-8";
- errors = "surrogateescape";
+
+ if (interp->core_config.utf8_mode) {
+ if (!encoding) {
+ encoding = "utf-8";
+ }
+ if (!errors) {
+ errors = "surrogateescape";
+ }
}
- if (!errors && !pythonioencoding) {
+ if (!errors) {
/* Choose the default error handler based on the current locale */
- errors = get_default_standard_stream_error_handler();
+ errors = get_stdio_errors();
}
}
+ if (encoding == NULL) {
+ _PyInitError err = get_locale_encoding(&locale_encoding);
+ if (_Py_INIT_FAILED(err)) {
+ return err;
+ }
+ encoding = locale_encoding;
+ }
+
+ codec_name = get_codec_name(encoding);
+ if (codec_name == NULL) {
+ PyErr_SetString(PyExc_RuntimeError,
+ "failed to get the Python codec name "
+ "of stdio encoding");
+ goto error;
+ }
+ encoding = codec_name;
+
/* Set sys.stdin */
fd = fileno(stdin);
/* Under some conditions stdin, stdout and stderr may not be connected
@@ -1928,6 +1966,8 @@ done:
PyMem_SetAllocator(PYMEM_DOMAIN_RAW, &old_alloc);
+ PyMem_RawFree(locale_encoding);
+ PyMem_RawFree(codec_name);
PyMem_Free(pythonioencoding);
Py_XDECREF(bimod);
Py_XDECREF(iomod);