From 5d5cea67af386cfd53428f1eb404841eca8e9062 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Mon, 9 Apr 2007 21:13:58 -0700 Subject: Avoid overflowing name buffer in deep directory structures This just makes sure that when we do a read_directory(), we check that the filename fits in the buffer we allocated (with a bit of slop) Signed-off-by: Linus Torvalds Signed-off-by: Junio C Hamano --- dir.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'dir.c') diff --git a/dir.c b/dir.c index 7426fde330..4f5a2241e6 100644 --- a/dir.c +++ b/dir.c @@ -353,6 +353,9 @@ static int read_directory_recursive(struct dir_struct *dir, const char *path, co !strcmp(de->d_name + 1, "git"))) continue; len = strlen(de->d_name); + /* Ignore overly long pathnames! */ + if (len + baselen + 8 > sizeof(fullname)) + continue; memcpy(fullname + baselen, de->d_name, len+1); if (simplify_away(fullname, baselen + len, simplify)) continue; -- cgit v1.2.1 From 095952585c2a955f45deac69df17a702d7584c80 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Wed, 11 Apr 2007 14:49:44 -0700 Subject: Teach directory traversal about subprojects This is the promised cleaned-up version of teaching directory traversal (ie the "read_directory()" logic) about subprojects. That makes "git add" understand to add/update subprojects. It now knows to look at the index file to see if a directory is marked as a subproject, and use that as information as whether it should be recursed into or not. It also generally cleans up the handling of directory entries when traversing the working tree, by splitting up the decision-making process into small functions of their own, and adding a fair number of comments. Finally, it teaches "add_file_to_cache()" that directory names can have slashes at the end, since the directory traversal adds them to make the difference between a file and a directory clear (it always did that, but my previous too-ugly-to-apply subproject patch had a totally different path for subproject directories and avoided the slash for that case). Signed-off-by: Linus Torvalds Signed-off-by: Junio C Hamano --- dir.c | 133 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++--------- 1 file changed, 115 insertions(+), 18 deletions(-) (limited to 'dir.c') diff --git a/dir.c b/dir.c index 4f5a2241e6..7b91501255 100644 --- a/dir.c +++ b/dir.c @@ -7,12 +7,17 @@ */ #include "cache.h" #include "dir.h" +#include "refs.h" struct path_simplify { int len; const char *path; }; +static int read_directory_recursive(struct dir_struct *dir, + const char *path, const char *base, int baselen, + int check_only, const struct path_simplify *simplify); + int common_prefix(const char **pathspec) { const char *path, *slash, *next; @@ -286,15 +291,109 @@ struct dir_entry *dir_add_name(struct dir_struct *dir, const char *pathname, int return ent; } -static int dir_exists(const char *dirname, int len) +enum exist_status { + index_nonexistent = 0, + index_directory, + index_gitdir, +}; + +/* + * The index sorts alphabetically by entry name, which + * means that a gitlink sorts as '\0' at the end, while + * a directory (which is defined not as an entry, but as + * the files it contains) will sort with the '/' at the + * end. + */ +static enum exist_status directory_exists_in_index(const char *dirname, int len) { int pos = cache_name_pos(dirname, len); - if (pos >= 0) - return 1; - pos = -pos-1; - if (pos >= active_nr) /* can't */ - return 0; - return !strncmp(active_cache[pos]->name, dirname, len); + if (pos < 0) + pos = -pos-1; + while (pos < active_nr) { + struct cache_entry *ce = active_cache[pos++]; + unsigned char endchar; + + if (strncmp(ce->name, dirname, len)) + break; + endchar = ce->name[len]; + if (endchar > '/') + break; + if (endchar == '/') + return index_directory; + if (!endchar && S_ISDIRLNK(ntohl(ce->ce_mode))) + return index_gitdir; + } + return index_nonexistent; +} + +/* + * When we find a directory when traversing the filesystem, we + * have three distinct cases: + * + * - ignore it + * - see it as a directory + * - recurse into it + * + * and which one we choose depends on a combination of existing + * git index contents and the flags passed into the directory + * traversal routine. + * + * Case 1: If we *already* have entries in the index under that + * directory name, we always recurse into the directory to see + * all the files. + * + * Case 2: If we *already* have that directory name as a gitlink, + * we always continue to see it as a gitlink, regardless of whether + * there is an actual git directory there or not (it might not + * be checked out as a subproject!) + * + * Case 3: if we didn't have it in the index previously, we + * have a few sub-cases: + * + * (a) if "show_other_directories" is true, we show it as + * just a directory, unless "hide_empty_directories" is + * also true and the directory is empty, in which case + * we just ignore it entirely. + * (b) if it looks like a git directory, and we don't have + * 'no_dirlinks' set we treat it as a gitlink, and show it + * as a directory. + * (c) otherwise, we recurse into it. + */ +enum directory_treatment { + show_directory, + ignore_directory, + recurse_into_directory, +}; + +static enum directory_treatment treat_directory(struct dir_struct *dir, + const char *dirname, int len, + const struct path_simplify *simplify) +{ + /* The "len-1" is to strip the final '/' */ + switch (directory_exists_in_index(dirname, len-1)) { + case index_directory: + return recurse_into_directory; + + case index_gitdir: + return show_directory; + + case index_nonexistent: + if (dir->show_other_directories) + break; + if (!dir->no_dirlinks) { + unsigned char sha1[20]; + if (resolve_gitlink_ref(dirname, "HEAD", sha1) == 0) + return show_directory; + } + return recurse_into_directory; + } + + /* This is the "show_other_directories" case */ + if (!dir->hide_empty_directories) + return show_directory; + if (!read_directory_recursive(dir, dirname, dirname, len, 1, simplify)) + return ignore_directory; + return show_directory; } /* @@ -380,19 +479,17 @@ static int read_directory_recursive(struct dir_struct *dir, const char *path, co case DT_DIR: memcpy(fullname + baselen + len, "/", 2); len++; - if (dir->show_other_directories && - !dir_exists(fullname, baselen + len)) { - if (dir->hide_empty_directories && - !read_directory_recursive(dir, - fullname, fullname, - baselen + len, 1, simplify)) - continue; + switch (treat_directory(dir, fullname, baselen + len, simplify)) { + case show_directory: break; + case recurse_into_directory: + contents += read_directory_recursive(dir, + fullname, fullname, baselen + len, 0, simplify); + continue; + case ignore_directory: + continue; } - - contents += read_directory_recursive(dir, - fullname, fullname, baselen + len, 0, simplify); - continue; + break; case DT_REG: case DT_LNK: break; -- cgit v1.2.1 From ab22aed3b7517c6390cb622b368bfcf503b7a37a Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Thu, 12 Apr 2007 14:32:21 -0700 Subject: Don't show gitlink directories when we want "other" files When "show_other_directories" is set, that implies that we are looking for untracked files, which obviously means that we should ignore directories that are marked as gitlinks in the index. This fixes "git status" in a superproject, that would otherwise always report that subprojects were "Untracked files:" Signed-off-by: Linus Torvalds Signed-off-by: Junio C Hamano --- dir.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'dir.c') diff --git a/dir.c b/dir.c index 7b91501255..6564a929ff 100644 --- a/dir.c +++ b/dir.c @@ -375,6 +375,8 @@ static enum directory_treatment treat_directory(struct dir_struct *dir, return recurse_into_directory; case index_gitdir: + if (dir->show_other_directories) + return ignore_directory; return show_directory; case index_nonexistent: -- cgit v1.2.1