summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMark Hammond <mhammond@skippinet.com.au>2009-05-06 08:04:54 +0000
committerMark Hammond <mhammond@skippinet.com.au>2009-05-06 08:04:54 +0000
commit5a607a3ee5e81bdcef3f886f9d20c1376a533df4 (patch)
tree8f345de07eede5253b7507a925973bd2b8c1cdbb
parent9348901e24da507d5e8c68b8bad8b9b2827a4596 (diff)
downloadcpython-git-5a607a3ee5e81bdcef3f886f9d20c1376a533df4.tar.gz
Issue #5799: ntpath (ie, os.path on Windows) fully supports UNC pathnames.
By Larry Hastings, reviewed eric.smith and mark.hammond.
-rw-r--r--Doc/library/os.path.rst19
-rw-r--r--Lib/ntpath.py175
-rw-r--r--Lib/test/test_ntpath.py59
-rw-r--r--Misc/NEWS4
4 files changed, 183 insertions, 74 deletions
diff --git a/Doc/library/os.path.rst b/Doc/library/os.path.rst
index c85412e749..6d177f81a2 100644
--- a/Doc/library/os.path.rst
+++ b/Doc/library/os.path.rst
@@ -23,10 +23,6 @@ applications should use string objects to access all files.
their parameters. The result is an object of the same type, if a path or
file name is returned.
-.. note::
-
- On Windows, many of these functions do not properly support UNC pathnames.
- :func:`splitunc` and :func:`ismount` do handle them correctly.
.. note::
@@ -266,10 +262,20 @@ applications should use string objects to access all files.
.. function:: splitdrive(path)
Split the pathname *path* into a pair ``(drive, tail)`` where *drive* is either
- a drive specification or the empty string. On systems which do not use drive
+ a mount point or the empty string. On systems which do not use drive
specifications, *drive* will always be the empty string. In all cases, ``drive
+ tail`` will be the same as *path*.
+ On Windows, splits a pathname into drive/UNC sharepoint and relative path.
+
+ If the path contains a drive letter, drive will contain everything
+ up to and including the colon.
+ e.g. ``splitdrive("c:/dir")`` returns ``("c:", "/dir")``
+
+ If the path contains a UNC path, drive will contain the host name
+ and share, up to but not including the fourth separator.
+ e.g. ``splitdrive("//host/computer/dir")`` returns ``("//host/computer", "/dir")``
+
.. function:: splitext(path)
@@ -281,6 +287,9 @@ applications should use string objects to access all files.
.. function:: splitunc(path)
+ .. deprecated:: 3.1
+ Use *splitdrive* instead.
+
Split the pathname *path* into a pair ``(unc, rest)`` so that *unc* is the UNC
mount point (such as ``r'\\host\mount'``), if present, and *rest* the rest of
the path (such as ``r'\path\file.ext'``). For paths containing drive letters,
diff --git a/Lib/ntpath.py b/Lib/ntpath.py
index ac928e1a92..2fd26b1f3d 100644
--- a/Lib/ntpath.py
+++ b/Lib/ntpath.py
@@ -34,6 +34,12 @@ elif 'os2' in sys.builtin_module_names:
altsep = '/'
devnull = 'nul'
+def _get_empty(path):
+ if isinstance(path, bytes):
+ return b''
+ else:
+ return ''
+
def _get_sep(path):
if isinstance(path, bytes):
return b'\\'
@@ -76,9 +82,9 @@ def normcase(s):
# Return whether a path is absolute.
-# Trivial in Posix, harder on the Mac or MS-DOS.
-# For DOS it is absolute if it starts with a slash or backslash (current
-# volume), or if a pathname after the volume letter and colon / UNC resource
+# Trivial in Posix, harder on Windows.
+# For Windows it is absolute if it starts with a slash or backslash (current
+# volume), or if a pathname after the volume-letter-and-colon or UNC-resource
# starts with a slash or backslash.
def isabs(s):
@@ -104,22 +110,40 @@ def join(a, *p):
elif isabs(b):
# This probably wipes out path so far. However, it's more
- # complicated if path begins with a drive letter:
+ # complicated if path begins with a drive letter. You get a+b
+ # (minus redundant slashes) in these four cases:
# 1. join('c:', '/a') == 'c:/a'
- # 2. join('c:/', '/a') == 'c:/a'
- # But
- # 3. join('c:/a', '/b') == '/b'
- # 4. join('c:', 'd:/') = 'd:/'
- # 5. join('c:/', 'd:/') = 'd:/'
- if path[1:2] != colon or b[1:2] == colon:
- # Path doesn't start with a drive letter, or cases 4 and 5.
- b_wins = 1
-
- # Else path has a drive letter, and b doesn't but is absolute.
- elif len(path) > 3 or (len(path) == 3 and
- path[-1:] not in seps):
- # case 3
+ # 2. join('//computer/share', '/a') == '//computer/share/a'
+ # 3. join('c:/', '/a') == 'c:/a'
+ # 4. join('//computer/share/', '/a') == '//computer/share/a'
+ # But b wins in all of these cases:
+ # 5. join('c:/a', '/b') == '/b'
+ # 6. join('//computer/share/a', '/b') == '/b'
+ # 7. join('c:', 'd:/') == 'd:/'
+ # 8. join('c:', '//computer/share/') == '//computer/share/'
+ # 9. join('//computer/share', 'd:/') == 'd:/'
+ # 10. join('//computer/share', '//computer/share/') == '//computer/share/'
+ # 11. join('c:/', 'd:/') == 'd:/'
+ # 12. join('c:/', '//computer/share/') == '//computer/share/'
+ # 13. join('//computer/share/', 'd:/') == 'd:/'
+ # 14. join('//computer/share/', '//computer/share/') == '//computer/share/'
+ b_prefix, b_rest = splitdrive(b)
+
+ # if b has a prefix, it always wins.
+ if b_prefix:
b_wins = 1
+ else:
+ # b doesn't have a prefix.
+ # but isabs(b) returned true.
+ # and therefore b_rest[0] must be a slash.
+ # (but let's check that.)
+ assert(b_rest and b_rest[0] in seps)
+
+ # so, b still wins if path has a rest that's more than a sep.
+ # you get a+b if path_rest is empty or only has a sep.
+ # (see cases 1-4 for times when b loses.)
+ path_rest = splitdrive(path)[1]
+ b_wins = path_rest and path_rest not in seps
if b_wins:
path = b
@@ -152,22 +176,64 @@ def join(a, *p):
# colon) and the path specification.
# It is always true that drivespec + pathspec == p
def splitdrive(p):
- """Split a pathname into drive and path specifiers. Returns a 2-tuple
-"(drive,path)"; either part may be empty"""
- if p[1:2] == _get_colon(p):
- return p[0:2], p[2:]
- return p[:0], p
+ """Split a pathname into drive/UNC sharepoint and relative path specifiers.
+ Returns a 2-tuple (drive_or_unc, path); either part may be empty.
+
+ If you assign
+ result = splitdrive(p)
+ It is always true that:
+ result[0] + result[1] == p
+
+ If the path contained a drive letter, drive_or_unc will contain everything
+ up to and including the colon. e.g. splitdrive("c:/dir") returns ("c:", "/dir")
+
+ If the path contained a UNC path, the drive_or_unc will contain the host name
+ and share up to but not including the fourth directory separator character.
+ e.g. splitdrive("//host/computer/dir") returns ("//host/computer", "/dir")
+
+ Paths cannot contain both a drive letter and a UNC path.
+
+ """
+ empty = _get_empty(p)
+ if len(p) > 1:
+ sep = _get_sep(p)
+ normp = normcase(p)
+ if (normp[0:2] == sep*2) and (normp[2:3] != sep):
+ # is a UNC path:
+ # vvvvvvvvvvvvvvvvvvvv drive letter or UNC path
+ # \\machine\mountpoint\directory\etc\...
+ # directory ^^^^^^^^^^^^^^^
+ index = normp.find(sep, 2)
+ if index == -1:
+ return empty, p
+ index2 = normp.find(sep, index + 1)
+ # a UNC path can't have two slashes in a row
+ # (after the initial two)
+ if index2 == index + 1:
+ return empty, p
+ if index2 == -1:
+ index2 = len(p)
+ return p[:index2], p[index2:]
+ if normp[1:2] == _get_colon(p):
+ return p[:2], p[2:]
+ return empty, p
# Parse UNC paths
def splitunc(p):
- """Split a pathname into UNC mount point and relative path specifiers.
+ """Deprecated since Python 3.1. Please use splitdrive() instead;
+ it now handles UNC paths.
+
+ Split a pathname into UNC mount point and relative path specifiers.
Return a 2-tuple (unc, rest); either part may be empty.
If unc is not empty, it has the form '//host/mount' (or similar
using backslashes). unc+rest is always the input path.
Paths containing drive letters never have an UNC part.
"""
+ import warnings
+ warnings.warn("ntpath.splitunc is deprecated, use ntpath.splitdrive instead",
+ PendingDeprecationWarning)
sep = _get_sep(p)
if not p[1:2]:
return p[:0], p # Drive letter present
@@ -256,12 +322,11 @@ lexists = exists
def ismount(path):
"""Test whether a path is a mount point (defined as root of drive)"""
- unc, rest = splitunc(path)
seps = _get_bothseps(path)
- if unc:
- return rest in p[:0] + seps
- p = splitdrive(path)[1]
- return len(p) == 1 and p[0] in seps
+ root, rest = splitdrive(path)
+ if root and root[0] in seps:
+ return (not rest) or (rest in seps)
+ return rest in seps
# Expand paths beginning with '~' or '~user'.
@@ -445,25 +510,12 @@ def normpath(path):
dotdot = _get_dot(path) * 2
path = path.replace(_get_altsep(path), sep)
prefix, path = splitdrive(path)
- # We need to be careful here. If the prefix is empty, and the path starts
- # with a backslash, it could either be an absolute path on the current
- # drive (\dir1\dir2\file) or a UNC filename (\\server\mount\dir1\file). It
- # is therefore imperative NOT to collapse multiple backslashes blindly in
- # that case.
- # The code below preserves multiple backslashes when there is no drive
- # letter. This means that the invalid filename \\\a\b is preserved
- # unchanged, where a\\\b is normalised to a\b. It's not clear that there
- # is any better behaviour for such edge cases.
- if not prefix:
- # No drive letter - preserve initial backslashes
- while path[:1] == sep:
- prefix = prefix + sep
- path = path[1:]
- else:
- # We have a drive letter - collapse initial backslashes
- if path.startswith(sep):
- prefix = prefix + sep
- path = path.lstrip(sep)
+
+ # collapse initial backslashes
+ if path.startswith(sep):
+ prefix = prefix + sep
+ path = path.lstrip(sep)
+
comps = path.split(sep)
i = 0
while i < len(comps):
@@ -528,22 +580,23 @@ def relpath(path, start=curdir):
if not path:
raise ValueError("no path specified")
- start_list = abspath(start).split(sep)
- path_list = abspath(path).split(sep)
- if start_list[0].lower() != path_list[0].lower():
- unc_path, rest = splitunc(path)
- unc_start, rest = splitunc(start)
- if bool(unc_path) ^ bool(unc_start):
- raise ValueError("Cannot mix UNC and non-UNC paths (%s and %s)"
- % (path, start))
- else:
- raise ValueError("path is on drive %s, start on drive %s"
- % (path_list[0], start_list[0]))
+
+ start_abs = abspath(normpath(start))
+ path_abs = abspath(normpath(path))
+ start_drive, start_rest = splitdrive(start_abs)
+ path_drive, path_rest = splitdrive(path_abs)
+ if start_drive != path_drive:
+ error = "path is on mount '{0}', start on mount '{1}'".format(
+ path_drive, start_drive)
+ raise ValueError(error)
+
+ start_list = [x for x in start_rest.split(sep) if x]
+ path_list = [x for x in path_rest.split(sep) if x]
# Work out how much of the filepath is shared by start and path.
- for i in range(min(len(start_list), len(path_list))):
- if start_list[i].lower() != path_list[i].lower():
+ i = 0
+ for e1, e2 in zip(start_list, path_list):
+ if e1 != e2:
break
- else:
i += 1
if isinstance(path, bytes):
diff --git a/Lib/test/test_ntpath.py b/Lib/test/test_ntpath.py
index 2b2b7d5360..4a7a48b19c 100644
--- a/Lib/test/test_ntpath.py
+++ b/Lib/test/test_ntpath.py
@@ -30,6 +30,7 @@ def tester(fn, wantResult):
raise TestFailed("%s should return: %s but returned: %s" \
%(str(fn), str(wantResult), repr(gotResult)))
+
class TestNtpath(unittest.TestCase):
def test_splitext(self):
tester('ntpath.splitext("foo.ext")', ('foo', '.ext'))
@@ -48,12 +49,18 @@ class TestNtpath(unittest.TestCase):
('c:', '\\foo\\bar'))
tester('ntpath.splitdrive("c:/foo/bar")',
('c:', '/foo/bar'))
-
- def test_splitunc(self):
- tester('ntpath.splitunc("\\\\conky\\mountpoint\\foo\\bar")',
+ tester('ntpath.splitdrive("\\\\conky\\mountpoint\\foo\\bar")',
('\\\\conky\\mountpoint', '\\foo\\bar'))
- tester('ntpath.splitunc("//conky/mountpoint/foo/bar")',
+ tester('ntpath.splitdrive("//conky/mountpoint/foo/bar")',
('//conky/mountpoint', '/foo/bar'))
+ tester('ntpath.splitdrive("\\\\\\conky\\mountpoint\\foo\\bar")',
+ ('', '\\\\\\conky\\mountpoint\\foo\\bar'))
+ tester('ntpath.splitdrive("///conky/mountpoint/foo/bar")',
+ ('', '///conky/mountpoint/foo/bar'))
+ tester('ntpath.splitdrive("\\\\conky\\\\mountpoint\\foo\\bar")',
+ ('', '\\\\conky\\\\mountpoint\\foo\\bar'))
+ tester('ntpath.splitdrive("//conky//mountpoint/foo/bar")',
+ ('', '//conky//mountpoint/foo/bar'))
def test_split(self):
tester('ntpath.split("c:\\foo\\bar")', ('c:\\foo', 'bar'))
@@ -62,10 +69,10 @@ class TestNtpath(unittest.TestCase):
tester('ntpath.split("c:\\")', ('c:\\', ''))
tester('ntpath.split("\\\\conky\\mountpoint\\")',
- ('\\\\conky\\mountpoint', ''))
+ ('\\\\conky\\mountpoint\\', ''))
tester('ntpath.split("c:/")', ('c:/', ''))
- tester('ntpath.split("//conky/mountpoint/")', ('//conky/mountpoint', ''))
+ tester('ntpath.split("//conky/mountpoint/")', ('//conky/mountpoint/', ''))
def test_isabs(self):
tester('ntpath.isabs("c:\\")', 1)
@@ -116,6 +123,33 @@ class TestNtpath(unittest.TestCase):
tester("ntpath.join('a\\', '')", 'a\\')
tester("ntpath.join('a\\', '', '', '', '')", 'a\\')
+ # from comment in ntpath.join
+ tester("ntpath.join('c:', '/a')", 'c:/a')
+ tester("ntpath.join('//computer/share', '/a')", '//computer/share/a')
+ tester("ntpath.join('c:/', '/a')", 'c:/a')
+ tester("ntpath.join('//computer/share/', '/a')", '//computer/share/a')
+ tester("ntpath.join('c:/a', '/b')", '/b')
+ tester("ntpath.join('//computer/share/a', '/b')", '/b')
+ tester("ntpath.join('c:', 'd:/')", 'd:/')
+ tester("ntpath.join('c:', '//computer/share/')", '//computer/share/')
+ tester("ntpath.join('//computer/share', 'd:/')", 'd:/')
+ tester("ntpath.join('//computer/share', '//computer/share/')", '//computer/share/')
+ tester("ntpath.join('c:/', 'd:/')", 'd:/')
+ tester("ntpath.join('c:/', '//computer/share/')", '//computer/share/')
+ tester("ntpath.join('//computer/share/', 'd:/')", 'd:/')
+ tester("ntpath.join('//computer/share/', '//computer/share/')", '//computer/share/')
+
+ tester("ntpath.join('c:', '//computer/share/')", '//computer/share/')
+ tester("ntpath.join('c:/', '//computer/share/')", '//computer/share/')
+ tester("ntpath.join('c:/', '//computer/share/a/b')", '//computer/share/a/b')
+
+ tester("ntpath.join('\\\\computer\\share\\', 'a', 'b')", '\\\\computer\\share\\a\\b')
+ tester("ntpath.join('\\\\computer\\share', 'a', 'b')", '\\\\computer\\share\\a\\b')
+ tester("ntpath.join('\\\\computer\\share', 'a\\b')", '\\\\computer\\share\\a\\b')
+ tester("ntpath.join('//computer/share/', 'a', 'b')", '//computer/share/a\\b')
+ tester("ntpath.join('//computer/share', 'a', 'b')", '//computer/share\\a\\b')
+ tester("ntpath.join('//computer/share', 'a/b')", '//computer/share\\a/b')
+
def test_normpath(self):
tester("ntpath.normpath('A//////././//.//B')", r'A\B')
tester("ntpath.normpath('A/./B')", r'A\B')
@@ -174,10 +208,9 @@ class TestNtpath(unittest.TestCase):
# from any platform.
try:
import nt
+ tester('ntpath.abspath("C:\\")', "C:\\")
except ImportError:
pass
- else:
- tester('ntpath.abspath("C:\\")', "C:\\")
def test_relpath(self):
currentdir = os.path.split(os.getcwd())[-1]
@@ -188,8 +221,18 @@ class TestNtpath(unittest.TestCase):
tester('ntpath.relpath("a", "../b")', '..\\'+currentdir+'\\a')
tester('ntpath.relpath("a/b", "../c")', '..\\'+currentdir+'\\a\\b')
tester('ntpath.relpath("a", "b/c")', '..\\..\\a')
+ tester('ntpath.relpath("c:/foo/bar/bat", "c:/x/y")', '..\\..\\foo\\bar\\bat')
tester('ntpath.relpath("//conky/mountpoint/a", "//conky/mountpoint/b/c")', '..\\..\\a')
tester('ntpath.relpath("a", "a")', '.')
+ tester('ntpath.relpath("/foo/bar/bat", "/x/y/z")', '..\\..\\..\\foo\\bar\\bat')
+ tester('ntpath.relpath("/foo/bar/bat", "/foo/bar")', 'bat')
+ tester('ntpath.relpath("/foo/bar/bat", "/")', 'foo\\bar\\bat')
+ tester('ntpath.relpath("/", "/foo/bar/bat")', '..\\..\\..')
+ tester('ntpath.relpath("/foo/bar/bat", "/x")', '..\\foo\\bar\\bat')
+ tester('ntpath.relpath("/x", "/foo/bar/bat")', '..\\..\\..\\x')
+ tester('ntpath.relpath("/", "/")', '.')
+ tester('ntpath.relpath("/a", "/a")', '.')
+ tester('ntpath.relpath("/a/b", "/a/b")', '.')
def test_main():
diff --git a/Misc/NEWS b/Misc/NEWS
index 45d9a49d23..35490d2d2d 100644
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -12,6 +12,10 @@ What's New in Python 3.1 beta 1?
Core and Builtins
-----------------
+- Issue #5799: ntpath (ie, os.path on Windows) fully supports UNC pathnames
+ in all operations, including splitdrive, split, etc. splitunc() now issues
+ a PendingDeprecation warning.
+
- Issue #5920: For float.__format__, change the behavior with the
empty presentation type (that is, not one of 'e', 'f', 'g', or 'n')
to be like 'g' but with at least one decimal point and with a