diff options
Diffstat (limited to 'Python/fileutils.c')
-rw-r--r-- | Python/fileutils.c | 235 |
1 files changed, 144 insertions, 91 deletions
diff --git a/Python/fileutils.c b/Python/fileutils.c index ac0046cdac..cae6b75b6a 100644 --- a/Python/fileutils.c +++ b/Python/fileutils.c @@ -2000,13 +2000,28 @@ _Py_wrealpath(const wchar_t *path, #endif -#ifndef MS_WINDOWS int _Py_isabs(const wchar_t *path) { +#ifdef MS_WINDOWS + const wchar_t *tail; + HRESULT hr = PathCchSkipRoot(path, &tail); + if (FAILED(hr) || path == tail) { + return 0; + } + if (tail == &path[1] && (path[0] == SEP || path[0] == ALTSEP)) { + // Exclude paths with leading SEP + return 0; + } + if (tail == &path[2] && path[1] == L':') { + // Exclude drive-relative paths (e.g. C:filename.ext) + return 0; + } + return 1; +#else return (path[0] == SEP); -} #endif +} /* Get an absolute path. @@ -2017,6 +2032,22 @@ _Py_isabs(const wchar_t *path) int _Py_abspath(const wchar_t *path, wchar_t **abspath_p) { + if (path[0] == '\0' || !wcscmp(path, L".")) { + wchar_t cwd[MAXPATHLEN + 1]; + cwd[Py_ARRAY_LENGTH(cwd) - 1] = 0; + if (!_Py_wgetcwd(cwd, Py_ARRAY_LENGTH(cwd) - 1)) { + /* unable to get the current directory */ + return -1; + } + *abspath_p = _PyMem_RawWcsdup(cwd); + return 0; + } + + if (_Py_isabs(path)) { + *abspath_p = _PyMem_RawWcsdup(path); + return 0; + } + #ifdef MS_WINDOWS wchar_t woutbuf[MAX_PATH], *woutbufp = woutbuf; DWORD result; @@ -2028,7 +2059,7 @@ _Py_abspath(const wchar_t *path, wchar_t **abspath_p) return -1; } - if (result > Py_ARRAY_LENGTH(woutbuf)) { + if (result >= Py_ARRAY_LENGTH(woutbuf)) { if ((size_t)result <= (size_t)PY_SSIZE_T_MAX / sizeof(wchar_t)) { woutbufp = PyMem_RawMalloc((size_t)result * sizeof(wchar_t)); } @@ -2055,11 +2086,6 @@ _Py_abspath(const wchar_t *path, wchar_t **abspath_p) *abspath_p = _PyMem_RawWcsdup(woutbufp); return 0; #else - if (_Py_isabs(path)) { - *abspath_p = _PyMem_RawWcsdup(path); - return 0; - } - wchar_t cwd[MAXPATHLEN + 1]; cwd[Py_ARRAY_LENGTH(cwd) - 1] = 0; if (!_Py_wgetcwd(cwd, Py_ARRAY_LENGTH(cwd) - 1)) { @@ -2102,7 +2128,8 @@ join_relfile(wchar_t *buffer, size_t bufsize, const wchar_t *dirname, const wchar_t *relfile) { #ifdef MS_WINDOWS - if (FAILED(PathCchCombineEx(buffer, bufsize, dirname, relfile, 0))) { + if (FAILED(PathCchCombineEx(buffer, bufsize, dirname, relfile, + PATHCCH_ALLOW_LONG_PATHS | PATHCCH_FORCE_ENABLE_LONG_NAME_PROCESS))) { return -1; } #else @@ -2180,99 +2207,125 @@ _Py_find_basename(const wchar_t *filename) return 0; } - -/* Remove navigation elements such as "." and "..". - - This is mostly a C implementation of posixpath.normpath(). - Return 0 on success. Return -1 if "orig" is too big for the buffer. */ -int -_Py_normalize_path(const wchar_t *path, wchar_t *buf, const size_t buf_len) +/* In-place path normalisation. Returns the start of the normalized + path, which will be within the original buffer. Guaranteed to not + make the path longer, and will not fail. 'size' is the length of + the path, if known. If -1, the first null character will be assumed + to be the end of the path. */ +wchar_t * +_Py_normpath(wchar_t *path, Py_ssize_t size) { - assert(path && *path != L'\0'); - assert(*path == SEP); // an absolute path - if (wcslen(path) + 1 >= buf_len) { - return -1; + if (!path[0] || size == 0) { + return path; } + wchar_t lastC = L'\0'; + wchar_t *p1 = path; + wchar_t *pEnd = size >= 0 ? &path[size] : NULL; + wchar_t *p2 = path; + wchar_t *minP2 = path; - int dots = -1; - int check_leading = 1; - const wchar_t *buf_start = buf; - wchar_t *buf_next = buf; - // The resulting filename will never be longer than path. - for (const wchar_t *remainder = path; *remainder != L'\0'; remainder++) { - wchar_t c = *remainder; - buf_next[0] = c; - buf_next++; - if (c == SEP) { - assert(dots <= 2); - if (dots == 2) { - // Turn "/x/y/../z" into "/x/z". - buf_next -= 4; // "/../" - assert(*buf_next == SEP); - // We cap it off at the root, so "/../spam" becomes "/spam". - if (buf_next == buf_start) { - buf_next++; - } - else { - // Move to the previous SEP in the buffer. - while (*(buf_next - 1) != SEP) { - assert(buf_next != buf_start); - buf_next--; - } - } - } - else if (dots == 1) { - // Turn "/./" into "/". - buf_next -= 2; // "./" - assert(*(buf_next - 1) == SEP); - } - else if (dots == 0) { - // Turn "//" into "/". - buf_next--; - assert(*(buf_next - 1) == SEP); - if (check_leading) { - if (buf_next - 1 == buf && *(remainder + 1) != SEP) { - // Leave a leading "//" alone, unless "///...". - buf_next++; - buf_start++; - } - check_leading = 0; - } - } - dots = 0; +#define IS_END(x) (pEnd ? (x) == pEnd : !*(x)) +#ifdef ALTSEP +#define IS_SEP(x) (*(x) == SEP || *(x) == ALTSEP) +#else +#define IS_SEP(x) (*(x) == SEP) +#endif +#define SEP_OR_END(x) (IS_SEP(x) || IS_END(x)) + + // Skip leading '.\' + if (p1[0] == L'.' && IS_SEP(&p1[1])) { + path = &path[2]; + while (IS_SEP(path) && !IS_END(path)) { + path++; } - else { - check_leading = 0; - if (dots >= 0) { - if (c == L'.' && dots < 2) { - dots++; - } - else { - dots = -1; - } + p1 = p2 = minP2 = path; + lastC = SEP; + } +#ifdef MS_WINDOWS + // Skip past drive segment and update minP2 + else if (p1[0] && p1[1] == L':') { + *p2++ = *p1++; + *p2++ = *p1++; + minP2 = p2; + lastC = L':'; + } + // Skip past all \\-prefixed paths, including \\?\, \\.\, + // and network paths, including the first segment. + else if (IS_SEP(&p1[0]) && IS_SEP(&p1[1])) { + int sepCount = 2; + *p2++ = SEP; + *p2++ = SEP; + p1 += 2; + for (; !IS_END(p1) && sepCount; ++p1) { + if (IS_SEP(p1)) { + --sepCount; + *p2++ = lastC = SEP; + } else { + *p2++ = lastC = *p1; } } + minP2 = p2; } - if (dots >= 0) { - // Strip any trailing dots and trailing slash. - buf_next -= dots + 1; // "/" or "/." or "/.." - assert(*buf_next == SEP); - if (buf_next == buf_start) { - // Leave the leading slash for root. - buf_next++; +#else + // Skip past two leading SEPs + else if (IS_SEP(&p1[0]) && IS_SEP(&p1[1]) && !IS_SEP(&p1[2])) { + *p2++ = *p1++; + *p2++ = *p1++; + minP2 = p2; + lastC = SEP; + } +#endif /* MS_WINDOWS */ + + /* if pEnd is specified, check that. Else, check for null terminator */ + for (; !IS_END(p1); ++p1) { + wchar_t c = *p1; +#ifdef ALTSEP + if (c == ALTSEP) { + c = SEP; } - else { - if (dots == 2) { - // Move to the previous SEP in the buffer. - do { - assert(buf_next != buf_start); - buf_next--; - } while (*(buf_next) != SEP); +#endif + if (lastC == SEP) { + if (c == L'.') { + int sep_at_1 = SEP_OR_END(&p1[1]); + int sep_at_2 = !sep_at_1 && SEP_OR_END(&p1[2]); + if (sep_at_2 && p1[1] == L'.') { + wchar_t *p3 = p2; + while (p3 != minP2 && *--p3 == SEP) { } + while (p3 != minP2 && *(p3 - 1) != SEP) { --p3; } + if (p3[0] == L'.' && p3[1] == L'.' && IS_SEP(&p3[2])) { + // Previous segment is also ../, so append instead + *p2++ = L'.'; + *p2++ = L'.'; + lastC = L'.'; + } else if (p3[0] == SEP) { + // Absolute path, so absorb segment + p2 = p3 + 1; + } else { + p2 = p3; + } + p1 += 1; + } else if (sep_at_1) { + } else { + *p2++ = lastC = c; + } + } else if (c == SEP) { + } else { + *p2++ = lastC = c; } + } else { + *p2++ = lastC = c; + } + } + *p2 = L'\0'; + if (p2 != minP2) { + while (--p2 != minP2 && *p2 == SEP) { + *p2 = L'\0'; } } - *buf_next = L'\0'; - return 0; +#undef SEP_OR_END +#undef IS_SEP +#undef IS_END + return path; } |