summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorEric Snow <ericsnowcurrently@gmail.com>2021-10-22 17:20:03 -0600
committerGitHub <noreply@github.com>2021-10-22 17:20:03 -0600
commit17c61045c51512add61a9e75e9c7343cf4e4fb82 (patch)
tree59a98afdea508cacfb4dc4f10acf2f5b402ea0b2
parentf30ad65dbf3c6b1b5eec14dc954d65ef32327857 (diff)
downloadcpython-git-17c61045c51512add61a9e75e9c7343cf4e4fb82.tar.gz
bpo-45506: Normalize _PyPathConfig.stdlib_dir when calculated. (#29040)
The recently added PyConfig.stdlib_dir was being set with ".." entries. When __file__ was added for from modules this caused a problem on out-of-tree builds. This PR fixes that by normalizing "stdlib_dir" when it is calculated in getpath.c. https://bugs.python.org/issue45506
-rw-r--r--Include/internal/pycore_fileutils.h3
-rw-r--r--Lib/test/test_fileutils.py30
-rw-r--r--Lib/test/test_posixpath.py62
-rw-r--r--Modules/_testinternalcapi.c24
-rw-r--r--Modules/getpath.c46
-rw-r--r--Python/fileutils.c95
6 files changed, 236 insertions, 24 deletions
diff --git a/Include/internal/pycore_fileutils.h b/Include/internal/pycore_fileutils.h
index ab436ae9b0..d1caf9c237 100644
--- a/Include/internal/pycore_fileutils.h
+++ b/Include/internal/pycore_fileutils.h
@@ -80,6 +80,9 @@ extern int _Py_add_relfile(wchar_t *dirname,
const wchar_t *relfile,
size_t bufsize);
extern size_t _Py_find_basename(const wchar_t *filename);
+PyAPI_FUNC(int) _Py_normalize_path(const wchar_t *path,
+ wchar_t *buf, const size_t buf_len);
+
// Macros to protect CRT calls against instant termination when passed an
// invalid parameter (bpo-23524). IPH stands for Invalid Parameter Handler.
diff --git a/Lib/test/test_fileutils.py b/Lib/test/test_fileutils.py
new file mode 100644
index 0000000000..45b3f3233c
--- /dev/null
+++ b/Lib/test/test_fileutils.py
@@ -0,0 +1,30 @@
+# Run tests for functions in Python/fileutils.c.
+
+import os
+import os.path
+import unittest
+from test.support import import_helper
+
+# Skip this test if the _testcapi module isn't available.
+_testcapi = import_helper.import_module('_testinternalcapi')
+
+
+class PathTests(unittest.TestCase):
+
+ def test_capi_normalize_path(self):
+ if os.name == 'nt':
+ raise unittest.SkipTest('Windows has its own helper for this')
+ else:
+ from .test_posixpath import PosixPathTest as posixdata
+ tests = posixdata.NORMPATH_CASES
+ for filename, expected in tests:
+ if not os.path.isabs(filename):
+ continue
+ with self.subTest(filename):
+ result = _testcapi.normalize_path(filename)
+ self.assertEqual(result, expected,
+ msg=f'input: {filename!r} expected output: {expected!r}')
+
+
+if __name__ == "__main__":
+ unittest.main()
diff --git a/Lib/test/test_posixpath.py b/Lib/test/test_posixpath.py
index 8d398ec010..e4d8384ef0 100644
--- a/Lib/test/test_posixpath.py
+++ b/Lib/test/test_posixpath.py
@@ -304,25 +304,51 @@ class PosixPathTest(unittest.TestCase):
for path in ('~', '~/.local', '~vstinner/'):
self.assertEqual(posixpath.expanduser(path), path)
+ NORMPATH_CASES = [
+ ("", "."),
+ ("/", "/"),
+ ("/.", "/"),
+ ("/./", "/"),
+ ("/.//.", "/"),
+ ("/foo", "/foo"),
+ ("/foo/bar", "/foo/bar"),
+ ("//", "//"),
+ ("///", "/"),
+ ("///foo/.//bar//", "/foo/bar"),
+ ("///foo/.//bar//.//..//.//baz///", "/foo/baz"),
+ ("///..//./foo/.//bar", "/foo/bar"),
+ (".", "."),
+ (".//.", "."),
+ ("..", ".."),
+ ("../", ".."),
+ ("../foo", "../foo"),
+ ("../../foo", "../../foo"),
+ ("../foo/../bar", "../bar"),
+ ("../../foo/../bar/./baz/boom/..", "../../bar/baz"),
+ ("/..", "/"),
+ ("/..", "/"),
+ ("/../", "/"),
+ ("/..//", "/"),
+ ("//..", "//"),
+ ("/../foo", "/foo"),
+ ("/../../foo", "/foo"),
+ ("/../foo/../", "/"),
+ ("/../foo/../bar", "/bar"),
+ ("/../../foo/../bar/./baz/boom/..", "/bar/baz"),
+ ("/../../foo/../bar/./baz/boom/.", "/bar/baz/boom"),
+ ]
+
def test_normpath(self):
- self.assertEqual(posixpath.normpath(""), ".")
- self.assertEqual(posixpath.normpath("/"), "/")
- self.assertEqual(posixpath.normpath("//"), "//")
- self.assertEqual(posixpath.normpath("///"), "/")
- self.assertEqual(posixpath.normpath("///foo/.//bar//"), "/foo/bar")
- self.assertEqual(posixpath.normpath("///foo/.//bar//.//..//.//baz"),
- "/foo/baz")
- self.assertEqual(posixpath.normpath("///..//./foo/.//bar"), "/foo/bar")
-
- self.assertEqual(posixpath.normpath(b""), b".")
- self.assertEqual(posixpath.normpath(b"/"), b"/")
- self.assertEqual(posixpath.normpath(b"//"), b"//")
- self.assertEqual(posixpath.normpath(b"///"), b"/")
- self.assertEqual(posixpath.normpath(b"///foo/.//bar//"), b"/foo/bar")
- self.assertEqual(posixpath.normpath(b"///foo/.//bar//.//..//.//baz"),
- b"/foo/baz")
- self.assertEqual(posixpath.normpath(b"///..//./foo/.//bar"),
- b"/foo/bar")
+ for path, expected in self.NORMPATH_CASES:
+ with self.subTest(path):
+ result = posixpath.normpath(path)
+ self.assertEqual(result, expected)
+
+ path = path.encode('utf-8')
+ expected = expected.encode('utf-8')
+ with self.subTest(path, type=bytes):
+ result = posixpath.normpath(path)
+ self.assertEqual(result, expected)
@skip_if_ABSTFN_contains_backslash
def test_realpath_curdir(self):
diff --git a/Modules/_testinternalcapi.c b/Modules/_testinternalcapi.c
index 1ca06069e1..1f205b873b 100644
--- a/Modules/_testinternalcapi.c
+++ b/Modules/_testinternalcapi.c
@@ -14,12 +14,14 @@
#include "Python.h"
#include "pycore_atomic_funcs.h" // _Py_atomic_int_get()
#include "pycore_bitutils.h" // _Py_bswap32()
+#include "pycore_fileutils.h" // _Py_normalize_path
#include "pycore_gc.h" // PyGC_Head
#include "pycore_hashtable.h" // _Py_hashtable_new()
#include "pycore_initconfig.h" // _Py_GetConfigsAsDict()
#include "pycore_interp.h" // _PyInterpreterState_GetConfigCopy()
#include "pycore_pyerrors.h" // _Py_UTF8_Edit_Cost()
#include "pycore_pystate.h" // _PyThreadState_GET()
+#include "osdefs.h" // MAXPATHLEN
static PyObject *
@@ -366,6 +368,27 @@ test_edit_cost(PyObject *self, PyObject *Py_UNUSED(args))
}
+static PyObject *
+normalize_path(PyObject *self, PyObject *filename)
+{
+ Py_ssize_t size = -1;
+ wchar_t *encoded = PyUnicode_AsWideCharString(filename, &size);
+ if (encoded == NULL) {
+ return NULL;
+ }
+
+ wchar_t buf[MAXPATHLEN + 1];
+ int res = _Py_normalize_path(encoded, buf, Py_ARRAY_LENGTH(buf));
+ PyMem_Free(encoded);
+ if (res != 0) {
+ PyErr_SetString(PyExc_ValueError, "string too long");
+ return NULL;
+ }
+
+ return PyUnicode_FromWideChar(buf, -1);
+}
+
+
static PyMethodDef TestMethods[] = {
{"get_configs", get_configs, METH_NOARGS},
{"get_recursion_depth", get_recursion_depth, METH_NOARGS},
@@ -377,6 +400,7 @@ static PyMethodDef TestMethods[] = {
{"set_config", test_set_config, METH_O},
{"test_atomic_funcs", test_atomic_funcs, METH_NOARGS},
{"test_edit_cost", test_edit_cost, METH_NOARGS},
+ {"normalize_path", normalize_path, METH_O, NULL},
{NULL, NULL} /* sentinel */
};
diff --git a/Modules/getpath.c b/Modules/getpath.c
index 1405023b39..4dbd502ddc 100644
--- a/Modules/getpath.c
+++ b/Modules/getpath.c
@@ -520,6 +520,42 @@ search_for_prefix(PyCalculatePath *calculate, _PyPathConfig *pathconfig,
static PyStatus
+calculate_set_stdlib_dir(PyCalculatePath *calculate, _PyPathConfig *pathconfig)
+{
+ // Note that, unlike calculate_set_prefix(), here we allow a negative
+ // prefix_found. That means the source tree Lib dir gets used.
+ if (!calculate->prefix_found) {
+ return _PyStatus_OK();
+ }
+ PyStatus status;
+ wchar_t *prefix = calculate->prefix;
+ if (!_Py_isabs(prefix)) {
+ prefix = _PyMem_RawWcsdup(prefix);
+ if (prefix == NULL) {
+ return _PyStatus_NO_MEMORY();
+ }
+ status = absolutize(&prefix);
+ if (_PyStatus_EXCEPTION(status)) {
+ return status;
+ }
+ }
+ wchar_t buf[MAXPATHLEN + 1];
+ int res = _Py_normalize_path(prefix, buf, Py_ARRAY_LENGTH(buf));
+ if (prefix != calculate->prefix) {
+ PyMem_RawFree(prefix);
+ }
+ if (res < 0) {
+ return PATHLEN_ERR();
+ }
+ pathconfig->stdlib_dir = _PyMem_RawWcsdup(buf);
+ if (pathconfig->stdlib_dir == NULL) {
+ return _PyStatus_NO_MEMORY();
+ }
+ return _PyStatus_OK();
+}
+
+
+static PyStatus
calculate_prefix(PyCalculatePath *calculate, _PyPathConfig *pathconfig)
{
wchar_t prefix[MAXPATHLEN+1];
@@ -1494,12 +1530,10 @@ calculate_path(PyCalculatePath *calculate, _PyPathConfig *pathconfig)
}
if (pathconfig->stdlib_dir == NULL) {
- if (calculate->prefix_found) {
- /* This must be done *before* calculate_set_prefix() is called. */
- pathconfig->stdlib_dir = _PyMem_RawWcsdup(calculate->prefix);
- if (pathconfig->stdlib_dir == NULL) {
- return _PyStatus_NO_MEMORY();
- }
+ /* This must be done *before* calculate_set_prefix() is called. */
+ status = calculate_set_stdlib_dir(calculate, pathconfig);
+ if (_PyStatus_EXCEPTION(status)) {
+ return status;
}
}
diff --git a/Python/fileutils.c b/Python/fileutils.c
index 3d8f3a4f16..ac0046cdac 100644
--- a/Python/fileutils.c
+++ b/Python/fileutils.c
@@ -2181,6 +2181,101 @@ _Py_find_basename(const wchar_t *filename)
}
+/* Remove navigation elements such as "." and "..".
+
+ This is mostly a C implementation of posixpath.normpath().
+ Return 0 on success. Return -1 if "orig" is too big for the buffer. */
+int
+_Py_normalize_path(const wchar_t *path, wchar_t *buf, const size_t buf_len)
+{
+ assert(path && *path != L'\0');
+ assert(*path == SEP); // an absolute path
+ if (wcslen(path) + 1 >= buf_len) {
+ return -1;
+ }
+
+ int dots = -1;
+ int check_leading = 1;
+ const wchar_t *buf_start = buf;
+ wchar_t *buf_next = buf;
+ // The resulting filename will never be longer than path.
+ for (const wchar_t *remainder = path; *remainder != L'\0'; remainder++) {
+ wchar_t c = *remainder;
+ buf_next[0] = c;
+ buf_next++;
+ if (c == SEP) {
+ assert(dots <= 2);
+ if (dots == 2) {
+ // Turn "/x/y/../z" into "/x/z".
+ buf_next -= 4; // "/../"
+ assert(*buf_next == SEP);
+ // We cap it off at the root, so "/../spam" becomes "/spam".
+ if (buf_next == buf_start) {
+ buf_next++;
+ }
+ else {
+ // Move to the previous SEP in the buffer.
+ while (*(buf_next - 1) != SEP) {
+ assert(buf_next != buf_start);
+ buf_next--;
+ }
+ }
+ }
+ else if (dots == 1) {
+ // Turn "/./" into "/".
+ buf_next -= 2; // "./"
+ assert(*(buf_next - 1) == SEP);
+ }
+ else if (dots == 0) {
+ // Turn "//" into "/".
+ buf_next--;
+ assert(*(buf_next - 1) == SEP);
+ if (check_leading) {
+ if (buf_next - 1 == buf && *(remainder + 1) != SEP) {
+ // Leave a leading "//" alone, unless "///...".
+ buf_next++;
+ buf_start++;
+ }
+ check_leading = 0;
+ }
+ }
+ dots = 0;
+ }
+ else {
+ check_leading = 0;
+ if (dots >= 0) {
+ if (c == L'.' && dots < 2) {
+ dots++;
+ }
+ else {
+ dots = -1;
+ }
+ }
+ }
+ }
+ if (dots >= 0) {
+ // Strip any trailing dots and trailing slash.
+ buf_next -= dots + 1; // "/" or "/." or "/.."
+ assert(*buf_next == SEP);
+ if (buf_next == buf_start) {
+ // Leave the leading slash for root.
+ buf_next++;
+ }
+ else {
+ if (dots == 2) {
+ // Move to the previous SEP in the buffer.
+ do {
+ assert(buf_next != buf_start);
+ buf_next--;
+ } while (*(buf_next) != SEP);
+ }
+ }
+ }
+ *buf_next = L'\0';
+ return 0;
+}
+
+
/* Get the current directory. buflen is the buffer size in wide characters
including the null character. Decode the path from the locale encoding.