summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorVictor Stinner <vstinner@redhat.com>2019-06-26 17:31:12 +0200
committerGitHub <noreply@github.com>2019-06-26 17:31:12 +0200
commit689830ee6243126798a6c519c05aa11ba73db7cd (patch)
tree165d26fd54d4853988429c3731234cba84c6ce68
parentc6a2320e876354ee62cf8149b849bcff9492d38a (diff)
downloadcpython-git-689830ee6243126798a6c519c05aa11ba73db7cd.tar.gz
bpo-37412: os.getcwdb() now uses UTF-8 on Windows (GH-14396)
The os.getcwdb() function now uses the UTF-8 encoding on Windows, rather than the ANSI code page: see PEP 529 for the rationale. The function is no longer deprecated on Windows. os.getcwd() and os.getcwdb() now detect integer overflow on memory allocations. On Unix, these functions properly report MemoryError on memory allocation failure.
-rw-r--r--Doc/library/os.rst5
-rw-r--r--Doc/whatsnew/3.8.rst5
-rw-r--r--Lib/test/test_os.py11
-rw-r--r--Misc/NEWS.d/next/Library/2019-06-26-16-28-59.bpo-37412.lx0VjC.rst3
-rw-r--r--Modules/posixmodule.c123
5 files changed, 88 insertions, 59 deletions
diff --git a/Doc/library/os.rst b/Doc/library/os.rst
index e7acb356d3..45ce643d88 100644
--- a/Doc/library/os.rst
+++ b/Doc/library/os.rst
@@ -1730,6 +1730,11 @@ features:
Return a bytestring representing the current working directory.
+ .. versionchanged:: 3.8
+ The function now uses the UTF-8 encoding on Windows, rather than the ANSI
+ code page: see :pep:`529` for the rationale. The function is no longer
+ deprecated on Windows.
+
.. function:: lchflags(path, flags)
diff --git a/Doc/whatsnew/3.8.rst b/Doc/whatsnew/3.8.rst
index a0c881a4e7..46e531f589 100644
--- a/Doc/whatsnew/3.8.rst
+++ b/Doc/whatsnew/3.8.rst
@@ -1231,6 +1231,11 @@ Changes in Python behavior
Changes in the Python API
-------------------------
+* The :func:`os.getcwdb` function now uses the UTF-8 encoding on Windows,
+ rather than the ANSI code page: see :pep:`529` for the rationale. The
+ function is no longer deprecated on Windows.
+ (Contributed by Victor Stinner in :issue:`37412`.)
+
* :class:`subprocess.Popen` can now use :func:`os.posix_spawn` in some cases
for better performance. On Windows Subsystem for Linux and QEMU User
Emulation, Popen constructor using :func:`os.posix_spawn` no longer raise an
diff --git a/Lib/test/test_os.py b/Lib/test/test_os.py
index 527f814632..18cd78b55f 100644
--- a/Lib/test/test_os.py
+++ b/Lib/test/test_os.py
@@ -82,6 +82,17 @@ def create_file(filename, content=b'content'):
fp.write(content)
+class MiscTests(unittest.TestCase):
+ def test_getcwd(self):
+ cwd = os.getcwd()
+ self.assertIsInstance(cwd, str)
+
+ def test_getcwdb(self):
+ cwd = os.getcwdb()
+ self.assertIsInstance(cwd, bytes)
+ self.assertEqual(os.fsdecode(cwd), os.getcwd())
+
+
# Tests creating TESTFN
class FileTests(unittest.TestCase):
def setUp(self):
diff --git a/Misc/NEWS.d/next/Library/2019-06-26-16-28-59.bpo-37412.lx0VjC.rst b/Misc/NEWS.d/next/Library/2019-06-26-16-28-59.bpo-37412.lx0VjC.rst
new file mode 100644
index 0000000000..3ce4102129
--- /dev/null
+++ b/Misc/NEWS.d/next/Library/2019-06-26-16-28-59.bpo-37412.lx0VjC.rst
@@ -0,0 +1,3 @@
+The :func:`os.getcwdb` function now uses the UTF-8 encoding on Windows,
+rather than the ANSI code page: see :pep:`529` for the rationale. The function
+is no longer deprecated on Windows.
diff --git a/Modules/posixmodule.c b/Modules/posixmodule.c
index 10549d6f60..5134ed7bdf 100644
--- a/Modules/posixmodule.c
+++ b/Modules/posixmodule.c
@@ -506,17 +506,6 @@ void _Py_attribute_data_to_stat(BY_HANDLE_FILE_INFORMATION *,
ULONG, struct _Py_stat_struct *);
#endif
-#ifdef MS_WINDOWS
-static int
-win32_warn_bytes_api()
-{
- return PyErr_WarnEx(PyExc_DeprecationWarning,
- "The Windows bytes API has been deprecated, "
- "use Unicode filenames instead",
- 1);
-}
-#endif
-
#ifndef MS_WINDOWS
PyObject *
@@ -3334,83 +3323,99 @@ os_lchown_impl(PyObject *module, path_t *path, uid_t uid, gid_t gid)
static PyObject *
posix_getcwd(int use_bytes)
{
- char *buf, *tmpbuf;
- char *cwd;
- const size_t chunk = 1024;
- size_t buflen = 0;
- PyObject *obj;
-
#ifdef MS_WINDOWS
- if (!use_bytes) {
- wchar_t wbuf[MAXPATHLEN];
- wchar_t *wbuf2 = wbuf;
- PyObject *resobj;
- DWORD len;
- Py_BEGIN_ALLOW_THREADS
- len = GetCurrentDirectoryW(Py_ARRAY_LENGTH(wbuf), wbuf);
- /* If the buffer is large enough, len does not include the
- terminating \0. If the buffer is too small, len includes
- the space needed for the terminator. */
- if (len >= Py_ARRAY_LENGTH(wbuf)) {
+ wchar_t wbuf[MAXPATHLEN];
+ wchar_t *wbuf2 = wbuf;
+ DWORD len;
+
+ Py_BEGIN_ALLOW_THREADS
+ len = GetCurrentDirectoryW(Py_ARRAY_LENGTH(wbuf), wbuf);
+ /* If the buffer is large enough, len does not include the
+ terminating \0. If the buffer is too small, len includes
+ the space needed for the terminator. */
+ if (len >= Py_ARRAY_LENGTH(wbuf)) {
+ if (len >= PY_SSIZE_T_MAX / sizeof(wchar_t)) {
wbuf2 = PyMem_RawMalloc(len * sizeof(wchar_t));
- if (wbuf2)
- len = GetCurrentDirectoryW(len, wbuf2);
}
- Py_END_ALLOW_THREADS
- if (!wbuf2) {
- PyErr_NoMemory();
- return NULL;
+ else {
+ wbuf2 = NULL;
}
- if (!len) {
- if (wbuf2 != wbuf)
- PyMem_RawFree(wbuf2);
- return PyErr_SetFromWindowsErr(0);
+ if (wbuf2) {
+ len = GetCurrentDirectoryW(len, wbuf2);
}
- resobj = PyUnicode_FromWideChar(wbuf2, len);
+ }
+ Py_END_ALLOW_THREADS
+
+ if (!wbuf2) {
+ PyErr_NoMemory();
+ return NULL;
+ }
+ if (!len) {
if (wbuf2 != wbuf)
PyMem_RawFree(wbuf2);
- return resobj;
+ return PyErr_SetFromWindowsErr(0);
}
- if (win32_warn_bytes_api())
- return NULL;
-#endif
+ PyObject *resobj = PyUnicode_FromWideChar(wbuf2, len);
+ if (wbuf2 != wbuf) {
+ PyMem_RawFree(wbuf2);
+ }
+
+ if (use_bytes) {
+ if (resobj == NULL) {
+ return NULL;
+ }
+ Py_SETREF(resobj, PyUnicode_EncodeFSDefault(resobj));
+ }
+
+ return resobj;
+#else
+ const size_t chunk = 1024;
+
+ char *buf = NULL;
+ char *cwd = NULL;
+ size_t buflen = 0;
- buf = cwd = NULL;
Py_BEGIN_ALLOW_THREADS
do {
- buflen += chunk;
-#ifdef MS_WINDOWS
- if (buflen > INT_MAX) {
- PyErr_NoMemory();
- break;
+ char *newbuf;
+ if (buflen <= PY_SSIZE_T_MAX - chunk) {
+ buflen += chunk;
+ newbuf = PyMem_RawRealloc(buf, buflen);
}
-#endif
- tmpbuf = PyMem_RawRealloc(buf, buflen);
- if (tmpbuf == NULL)
+ else {
+ newbuf = NULL;
+ }
+ if (newbuf == NULL) {
+ PyMem_RawFree(buf);
+ buf = NULL;
break;
+ }
+ buf = newbuf;
- buf = tmpbuf;
-#ifdef MS_WINDOWS
- cwd = getcwd(buf, (int)buflen);
-#else
cwd = getcwd(buf, buflen);
-#endif
} while (cwd == NULL && errno == ERANGE);
Py_END_ALLOW_THREADS
+ if (buf == NULL) {
+ return PyErr_NoMemory();
+ }
if (cwd == NULL) {
PyMem_RawFree(buf);
return posix_error();
}
- if (use_bytes)
+ PyObject *obj;
+ if (use_bytes) {
obj = PyBytes_FromStringAndSize(buf, strlen(buf));
- else
+ }
+ else {
obj = PyUnicode_DecodeFSDefault(buf);
+ }
PyMem_RawFree(buf);
return obj;
+#endif /* !MS_WINDOWS */
}