summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJ Wyman <jeremy.wyman@microsoft.com>2015-03-26 18:10:24 -0400
committerEdward Thomson <ethomson@microsoft.com>2015-04-28 14:25:02 -0400
commit1920ee4ef6096f888a9bb19bc329424d2c7ee656 (patch)
treec3c3c5f7c734440389d12da5a64158ed6e52e6e8
parent4c09e19a3764a1e5f3340dabf8104dfed32e7673 (diff)
downloadlibgit2-1920ee4ef6096f888a9bb19bc329424d2c7ee656.tar.gz
Improvements to status performance on Windows.
Changed win32/path_w32.c to utilize NTFS' FindFirst..FindNext data instead of doing an lstat per file. Avoiding unnecessary directory opens and file scans reduces IO, improving overall performance. Effect is magnified due to NTFS being a kernel mode file system (as opposed to user mode).
-rw-r--r--src/iterator.c18
-rw-r--r--src/win32/path_w32.c225
-rw-r--r--src/win32/path_w32.h27
-rw-r--r--src/win32/posix.h12
-rw-r--r--src/win32/posix_w32.c86
5 files changed, 281 insertions, 87 deletions
diff --git a/src/iterator.c b/src/iterator.c
index 8bab1aab0..80b7d5faa 100644
--- a/src/iterator.c
+++ b/src/iterator.c
@@ -984,6 +984,21 @@ static void fs_iterator__seek_frame_start(
ff->index = 0;
}
+GIT_INLINE(int) path_dirload_with_stat(
+ const char *path,
+ size_t prefix_len,
+ unsigned int flags,
+ const char *start_stat,
+ const char *end_stat,
+ git_vector *contents)
+{
+#if defined(GIT_WIN32) && !defined(__MINGW32__)
+ return git_win32_path_dirload_with_stat(path, prefix_len, flags, start_stat, end_stat, contents);
+#else
+ return git_path_dirload_with_stat(path, prefix_len, flags, start_stat, end_stat, contents);
+#endif
+}
+
static int fs_iterator__expand_dir(fs_iterator *fi)
{
int error;
@@ -998,7 +1013,7 @@ static int fs_iterator__expand_dir(fs_iterator *fi)
ff = fs_iterator__alloc_frame(fi);
GITERR_CHECK_ALLOC(ff);
- error = git_path_dirload_with_stat(
+ error = path_dirload_with_stat(
fi->path.ptr, fi->root_len, fi->dirload_flags,
fi->base.start, fi->base.end, &ff->entries);
@@ -1350,6 +1365,7 @@ GIT_INLINE(git_dir_flag) git_entry__dir_flag(git_index_entry *entry) {
? S_ISDIR(entry->mode) ? GIT_DIR_FLAG_TRUE : GIT_DIR_FLAG_FALSE
: GIT_DIR_FLAG_UNKNOWN;
#else
+ GIT_UNUSED(entry);
return GIT_DIR_FLAG_UNKNOWN;
#endif
}
diff --git a/src/win32/path_w32.c b/src/win32/path_w32.c
index d66969c4d..e9bc64a5f 100644
--- a/src/win32/path_w32.c
+++ b/src/win32/path_w32.c
@@ -9,6 +9,9 @@
#include "path.h"
#include "path_w32.h"
#include "utf-conv.h"
+#include "posix.h"
+#include "reparse.h"
+#include "dir.h"
#define PATH__NT_NAMESPACE L"\\\\?\\"
#define PATH__NT_NAMESPACE_LEN 4
@@ -27,6 +30,8 @@
#define path__is_unc(p) \
(((p)[0] == '\\' && (p)[1] == '\\') || ((p)[0] == '/' && (p)[1] == '/'))
+#define PATH__MAX_UNC_LEN (32767)
+
GIT_INLINE(int) path__cwd(wchar_t *path, int size)
{
int len;
@@ -303,3 +308,223 @@ char *git_win32_path_8dot3_name(const char *path)
return shortname;
}
+
+#if !defined(__MINGW32__)
+int git_win32_path_dirload_with_stat(
+ const char *path,
+ size_t prefix_len,
+ unsigned int flags,
+ const char *start_stat,
+ const char *end_stat,
+ git_vector *contents)
+{
+ int error = 0;
+ git_path_with_stat *ps;
+ git_win32_path pathw;
+ DIR *dir;
+ int(*strncomp)(const char *a, const char *b, size_t sz);
+ size_t cmp_len;
+ size_t start_len = start_stat ? strlen(start_stat) : 0;
+ size_t end_len = end_stat ? strlen(end_stat) : 0;
+ size_t path_size = strlen(path);
+ const char *repo_path = path + prefix_len;
+ size_t repo_path_len = strlen(repo_path);
+ char work_path[PATH__MAX_UNC_LEN];
+ git_win32_path target;
+ size_t path_len;
+ int fMode;
+
+ if (!git_win32__findfirstfile_filter(pathw, path)) {
+ error = -1;
+ giterr_set(GITERR_OS, "Could not parse the path '%s'", path);
+ goto clean_up_and_exit;
+ }
+
+ strncomp = (flags & GIT_PATH_DIR_IGNORE_CASE) != 0
+ ? git__strncasecmp
+ : git__strncmp;
+
+ /* use of FIND_FIRST_EX_LARGE_FETCH flag in the FindFirstFileExW call could benefit perormance
+ * here when querying large repositories on Windows 7 (0x0600) or newer versions of Windows.
+ * doing so could introduce compatibility issues on older versions of Windows. */
+ dir = git__calloc(1, sizeof(DIR));
+ dir->h = FindFirstFileExW(pathw, FindExInfoBasic, &dir->f, FindExSearchNameMatch, NULL, 0);
+ dir->first = 1;
+ if (dir->h == INVALID_HANDLE_VALUE) {
+ error = -1;
+ giterr_set(GITERR_OS, "Could not open directory '%s'", path);
+ goto clean_up_and_exit;
+ }
+
+ if (repo_path_len > PATH__MAX_UNC_LEN) {
+ error = -1;
+ giterr_set(GITERR_OS, "Could not open directory '%s'", path);
+ goto clean_up_and_exit;
+ }
+
+ memcpy(work_path, repo_path, repo_path_len);
+
+ while (dir) {
+ if (!git_path_is_dot_or_dotdotW(dir->f.cFileName)) {
+ path_len = git__utf16_to_8(work_path + repo_path_len, ARRAYSIZE(work_path) - repo_path_len, dir->f.cFileName);
+
+ work_path[path_len + repo_path_len] = '\0';
+ path_len = path_len + repo_path_len;
+
+ cmp_len = min(start_len, path_len);
+ if (!(cmp_len && strncomp(work_path, start_stat, cmp_len) < 0)) {
+ cmp_len = min(end_len, path_len);
+ if (!(cmp_len && strncomp(work_path, end_stat, cmp_len) > 0)) {
+ fMode = S_IREAD;
+
+ if (dir->f.dwFileAttributes & FILE_ATTRIBUTE_DIRECTORY)
+ fMode |= S_IFDIR;
+ else
+ fMode |= S_IFREG;
+
+ if (!(dir->f.dwFileAttributes & FILE_ATTRIBUTE_READONLY))
+ fMode |= S_IWRITE;
+
+ ps = git__calloc(1, sizeof(git_path_with_stat) + path_len + 2);
+ memcpy(ps->path, work_path, path_len + 1);
+ ps->path_len = path_len;
+ ps->st.st_atime = filetime_to_time_t(&dir->f.ftLastAccessTime);
+ ps->st.st_ctime = filetime_to_time_t(&dir->f.ftCreationTime);
+ ps->st.st_mtime = filetime_to_time_t(&dir->f.ftLastWriteTime);
+ ps->st.st_size = dir->f.nFileSizeHigh;
+ ps->st.st_size <<= 32;
+ ps->st.st_size |= dir->f.nFileSizeLow;
+ ps->st.st_dev = ps->st.st_rdev = (_getdrive() - 1);
+ ps->st.st_mode = (mode_t)fMode;
+ ps->st.st_ino = 0;
+ ps->st.st_gid = 0;
+ ps->st.st_uid = 0;
+ ps->st.st_nlink = 1;
+
+ if (dir->f.dwFileAttributes & FILE_ATTRIBUTE_REPARSE_POINT) {
+ if (git_win32_path_readlink_w(target, dir->f.cFileName) >= 0) {
+ ps->st.st_mode = (ps->st.st_mode & ~S_IFMT) | S_IFLNK;
+
+ /* st_size gets the UTF-8 length of the target name, in bytes,
+ * not counting the NULL terminator */
+ if ((ps->st.st_size = git__utf16_to_8(NULL, 0, target)) < 0) {
+ error = -1;
+ giterr_set(GITERR_OS, "Could not manage reparse link '%s'", dir->f.cFileName);
+ goto clean_up_and_exit;
+ }
+ }
+ }
+
+ if (S_ISDIR(ps->st.st_mode)) {
+ ps->path[ps->path_len++] = '/';
+ ps->path[ps->path_len] = '\0';
+ } else if (!S_ISREG(ps->st.st_mode) && !S_ISLNK(ps->st.st_mode)) {
+ git__free(ps);
+ ps = NULL;
+ }
+
+ if (ps)
+ git_vector_insert(contents, ps);
+ }
+ }
+ }
+
+ memset(&dir->f, 0, sizeof(git_path_with_stat));
+ dir->first = 0;
+
+ if (!FindNextFileW(dir->h, &dir->f)) {
+ if (GetLastError() == ERROR_NO_MORE_FILES)
+ break;
+ else {
+ error = -1;
+ giterr_set(GITERR_OS, "Could not get attributes for file in '%s'", path);
+ goto clean_up_and_exit;
+ }
+ }
+ }
+
+ /* sort now that directory suffix is added */
+ git_vector_sort(contents);
+
+clean_up_and_exit:
+
+ if (dir) {
+ FindClose(dir->h);
+ free(dir);
+ }
+
+ return error;
+}
+#endif
+
+static bool path_is_volume(wchar_t *target, size_t target_len)
+{
+ return (target_len && wcsncmp(target, L"\\??\\Volume{", 11) == 0);
+}
+
+/* On success, returns the length, in characters, of the path stored in dest.
+* On failure, returns a negative value. */
+int git_win32_path_readlink_w(git_win32_path dest, const git_win32_path path)
+{
+ BYTE buf[MAXIMUM_REPARSE_DATA_BUFFER_SIZE];
+ GIT_REPARSE_DATA_BUFFER *reparse_buf = (GIT_REPARSE_DATA_BUFFER *)buf;
+ HANDLE handle = NULL;
+ DWORD ioctl_ret;
+ wchar_t *target;
+ size_t target_len;
+
+ int error = -1;
+
+ handle = CreateFileW(path, GENERIC_READ,
+ FILE_SHARE_READ | FILE_SHARE_DELETE, NULL, OPEN_EXISTING,
+ FILE_FLAG_OPEN_REPARSE_POINT | FILE_FLAG_BACKUP_SEMANTICS, NULL);
+
+ if (handle == INVALID_HANDLE_VALUE) {
+ errno = ENOENT;
+ return -1;
+ }
+
+ if (!DeviceIoControl(handle, FSCTL_GET_REPARSE_POINT, NULL, 0,
+ reparse_buf, sizeof(buf), &ioctl_ret, NULL)) {
+ errno = EINVAL;
+ goto on_error;
+ }
+
+ switch (reparse_buf->ReparseTag) {
+ case IO_REPARSE_TAG_SYMLINK:
+ target = reparse_buf->SymbolicLinkReparseBuffer.PathBuffer +
+ (reparse_buf->SymbolicLinkReparseBuffer.SubstituteNameOffset / sizeof(WCHAR));
+ target_len = reparse_buf->SymbolicLinkReparseBuffer.SubstituteNameLength / sizeof(WCHAR);
+ break;
+ case IO_REPARSE_TAG_MOUNT_POINT:
+ target = reparse_buf->MountPointReparseBuffer.PathBuffer +
+ (reparse_buf->MountPointReparseBuffer.SubstituteNameOffset / sizeof(WCHAR));
+ target_len = reparse_buf->MountPointReparseBuffer.SubstituteNameLength / sizeof(WCHAR);
+ break;
+ default:
+ errno = EINVAL;
+ goto on_error;
+ }
+
+ if (path_is_volume(target, target_len)) {
+ /* This path is a reparse point that represents another volume mounted
+ * at this location, it is not a symbolic link our input was canonical.
+ */
+ errno = EINVAL;
+ error = -1;
+ } else if (target_len) {
+ /* The path may need to have a prefix removed. */
+ target_len = git_win32__canonicalize_path(target, target_len);
+
+ /* Need one additional character in the target buffer
+ * for the terminating NULL. */
+ if (GIT_WIN_PATH_UTF16 > target_len) {
+ wcscpy(dest, target);
+ error = (int)target_len;
+ }
+ }
+
+on_error:
+ CloseHandle(handle);
+ return error;
+}
diff --git a/src/win32/path_w32.h b/src/win32/path_w32.h
index 033afbb0f..57ce732d6 100644
--- a/src/win32/path_w32.h
+++ b/src/win32/path_w32.h
@@ -8,6 +8,7 @@
#define INCLUDE_git_path_w32_h__
#include "common.h"
+#include "vector.h"
/*
* Provides a large enough buffer to support Windows paths: MAX_PATH is
@@ -79,4 +80,30 @@ extern int git_win32_path_to_utf8(git_win32_utf8_path dest, const wchar_t *src);
*/
extern char *git_win32_path_8dot3_name(const char *path);
+#if !defined(__MINGW32__)
+/**
+ * Load all directory entries along with stat info into a vector.
+ * Performed in a single pass per directory for optimized performance on Windows.
+ *
+ * This adds four things on top of plain `git_path_dirload`:
+ *
+ * 1. Each entry in the vector is a `git_path_with_stat` struct that
+ * contains both the path and the stat info
+ * 2. The entries will be sorted alphabetically
+ * 3. Entries that are directories will be suffixed with a '/'
+ * 4. Optionally, you can be a start and end prefix and only elements
+ * after the start and before the end (inclusively) will be stat'ed.
+ *
+ * @param path The directory to read from
+ * @param prefix_len The trailing part of path to prefix to entry paths
+ * @param flags GIT_PATH_DIR flags from above
+ * @param start_stat As optimization, only stat values after this prefix
+ * @param end_stat As optimization, only stat values before this prefix
+ * @param contents Vector to fill with git_path_with_stat structures
+ */
+extern int git_win32_path_dirload_with_stat(const char *path, size_t prefix_len, unsigned int flags, const char *start_stat, const char *end_stat, git_vector *contents);
+#endif
+
+extern int git_win32_path_readlink_w(git_win32_path dest, const git_win32_path path);
+
#endif
diff --git a/src/win32/posix.h b/src/win32/posix.h
index 4bc6bfe2e..1a1ae76b2 100644
--- a/src/win32/posix.h
+++ b/src/win32/posix.h
@@ -49,7 +49,15 @@ extern int p_ftruncate(int fd, git_off_t size);
*/
extern int p_lstat_posixly(const char *filename, struct stat *buf);
-extern struct tm * p_localtime_r (const time_t *timer, struct tm *result);
-extern struct tm * p_gmtime_r (const time_t *timer, struct tm *result);
+extern struct tm * p_localtime_r(const time_t *timer, struct tm *result);
+extern struct tm * p_gmtime_r(const time_t *timer, struct tm *result);
+
+GIT_INLINE(time_t) filetime_to_time_t(const FILETIME *ft)
+{
+ long long winTime = ((long long)ft->dwHighDateTime << 32) + ft->dwLowDateTime;
+ winTime -= 116444736000000000LL; /* Windows to Unix Epoch conversion */
+ winTime /= 10000000; /* Nano to seconds resolution */
+ return (time_t)winTime;
+}
#endif
diff --git a/src/win32/posix_w32.c b/src/win32/posix_w32.c
index 544b1ebd5..1c490a8e9 100644
--- a/src/win32/posix_w32.c
+++ b/src/win32/posix_w32.c
@@ -130,88 +130,6 @@ int p_fsync(int fd)
return 0;
}
-GIT_INLINE(time_t) filetime_to_time_t(const FILETIME *ft)
-{
- long long winTime = ((long long)ft->dwHighDateTime << 32) + ft->dwLowDateTime;
- winTime -= 116444736000000000LL; /* Windows to Unix Epoch conversion */
- winTime /= 10000000; /* Nano to seconds resolution */
- return (time_t)winTime;
-}
-
-static bool path_is_volume(wchar_t *target, size_t target_len)
-{
- return (target_len && wcsncmp(target, L"\\??\\Volume{", 11) == 0);
-}
-
-/* On success, returns the length, in characters, of the path stored in dest.
- * On failure, returns a negative value. */
-static int readlink_w(
- git_win32_path dest,
- const git_win32_path path)
-{
- BYTE buf[MAXIMUM_REPARSE_DATA_BUFFER_SIZE];
- GIT_REPARSE_DATA_BUFFER *reparse_buf = (GIT_REPARSE_DATA_BUFFER *)buf;
- HANDLE handle = NULL;
- DWORD ioctl_ret;
- wchar_t *target;
- size_t target_len;
-
- int error = -1;
-
- handle = CreateFileW(path, GENERIC_READ,
- FILE_SHARE_READ | FILE_SHARE_DELETE, NULL, OPEN_EXISTING,
- FILE_FLAG_OPEN_REPARSE_POINT | FILE_FLAG_BACKUP_SEMANTICS, NULL);
-
- if (handle == INVALID_HANDLE_VALUE) {
- errno = ENOENT;
- return -1;
- }
-
- if (!DeviceIoControl(handle, FSCTL_GET_REPARSE_POINT, NULL, 0,
- reparse_buf, sizeof(buf), &ioctl_ret, NULL)) {
- errno = EINVAL;
- goto on_error;
- }
-
- switch (reparse_buf->ReparseTag) {
- case IO_REPARSE_TAG_SYMLINK:
- target = reparse_buf->SymbolicLinkReparseBuffer.PathBuffer +
- (reparse_buf->SymbolicLinkReparseBuffer.SubstituteNameOffset / sizeof(WCHAR));
- target_len = reparse_buf->SymbolicLinkReparseBuffer.SubstituteNameLength / sizeof(WCHAR);
- break;
- case IO_REPARSE_TAG_MOUNT_POINT:
- target = reparse_buf->MountPointReparseBuffer.PathBuffer +
- (reparse_buf->MountPointReparseBuffer.SubstituteNameOffset / sizeof(WCHAR));
- target_len = reparse_buf->MountPointReparseBuffer.SubstituteNameLength / sizeof(WCHAR);
- break;
- default:
- errno = EINVAL;
- goto on_error;
- }
-
- if (path_is_volume(target, target_len)) {
- /* This path is a reparse point that represents another volume mounted
- * at this location, it is not a symbolic link our input was canonical.
- */
- errno = EINVAL;
- error = -1;
- } else if (target_len) {
- /* The path may need to have a prefix removed. */
- target_len = git_win32__canonicalize_path(target, target_len);
-
- /* Need one additional character in the target buffer
- * for the terminating NULL. */
- if (GIT_WIN_PATH_UTF16 > target_len) {
- wcscpy(dest, target);
- error = (int)target_len;
- }
- }
-
-on_error:
- CloseHandle(handle);
- return error;
-}
-
#define WIN32_IS_WSEP(CH) ((CH) == L'/' || (CH) == L'\\')
static int lstat_w(
@@ -249,7 +167,7 @@ static int lstat_w(
if (fdata.dwFileAttributes & FILE_ATTRIBUTE_REPARSE_POINT) {
git_win32_path target;
- if (readlink_w(target, path) >= 0) {
+ if (git_win32_path_readlink_w(target, path) >= 0) {
buf->st_mode = (buf->st_mode & ~S_IFMT) | S_IFLNK;
/* st_size gets the UTF-8 length of the target name, in bytes,
@@ -331,7 +249,7 @@ int p_readlink(const char *path, char *buf, size_t bufsiz)
* we need to buffer the result on the stack. */
if (git_win32_path_from_utf8(path_w, path) < 0 ||
- readlink_w(target_w, path_w) < 0 ||
+ git_win32_path_readlink_w(target_w, path_w) < 0 ||
(len = git_win32_path_to_utf8(target, target_w)) < 0)
return -1;