summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJunio C Hamano <gitster@pobox.com>2013-01-10 13:47:25 -0800
committerJunio C Hamano <gitster@pobox.com>2013-01-10 13:47:25 -0800
commitd912b0e44f82dc430a4aac8566a8217b60629638 (patch)
tree2f8336f1fe82a9b1ca4786c6af31a90ffbeb2356
parent20e47e50a1a3e2f0781cafe4bb250aa2d9a28c10 (diff)
parentf61988125130ac091bfb69bda5d62b0ad8f054c4 (diff)
downloadgit-d912b0e44f82dc430a4aac8566a8217b60629638.tar.gz
Merge branch 'as/dir-c-cleanup'
Refactor and generally clean up the directory traversal API implementation. * as/dir-c-cleanup: dir.c: rename free_excludes() to clear_exclude_list() dir.c: refactor is_path_excluded() dir.c: refactor is_excluded() dir.c: refactor is_excluded_from_list() dir.c: rename excluded() to is_excluded() dir.c: rename excluded_from_list() to is_excluded_from_list() dir.c: rename path_excluded() to is_path_excluded() dir.c: rename cryptic 'which' variable to more consistent name Improve documentation and comments regarding directory traversal API api-directory-listing.txt: update to match code
-rw-r--r--Documentation/technical/api-directory-listing.txt21
-rw-r--r--attr.c2
-rw-r--r--builtin/add.c2
-rw-r--r--builtin/ls-files.c2
-rw-r--r--dir.c149
-rw-r--r--dir.h45
-rw-r--r--unpack-trees.c12
7 files changed, 171 insertions, 62 deletions
diff --git a/Documentation/technical/api-directory-listing.txt b/Documentation/technical/api-directory-listing.txt
index add6f435b5..944fc39fac 100644
--- a/Documentation/technical/api-directory-listing.txt
+++ b/Documentation/technical/api-directory-listing.txt
@@ -9,37 +9,40 @@ Data structure
--------------
`struct dir_struct` structure is used to pass directory traversal
-options to the library and to record the paths discovered. The notable
-options are:
+options to the library and to record the paths discovered. A single
+`struct dir_struct` is used regardless of whether or not the traversal
+recursively descends into subdirectories.
+
+The notable options are:
`exclude_per_dir`::
The name of the file to be read in each directory for excluded
files (typically `.gitignore`).
-`collect_ignored`::
+`flags`::
- Include paths that are to be excluded in the result.
+ A bit-field of options:
-`show_ignored`::
+`DIR_SHOW_IGNORED`:::
The traversal is for finding just ignored files, not unignored
files.
-`show_other_directories`::
+`DIR_SHOW_OTHER_DIRECTORIES`:::
Include a directory that is not tracked.
-`hide_empty_directories`::
+`DIR_HIDE_EMPTY_DIRECTORIES`:::
Do not include a directory that is not tracked and is empty.
-`no_gitlinks`::
+`DIR_NO_GITLINKS`:::
If set, recurse into a directory that looks like a git
directory. Otherwise it is shown as a directory.
-The result of the enumeration is left in these fields::
+The result of the enumeration is left in these fields:
`entries[]`::
diff --git a/attr.c b/attr.c
index 466c93fa50..d6d71901b2 100644
--- a/attr.c
+++ b/attr.c
@@ -284,7 +284,7 @@ static struct match_attr *parse_attr_line(const char *line, const char *src,
* (reading the file from top to bottom), .gitattribute of the root
* directory (again, reading the file from top to bottom) down to the
* current directory, and then scan the list backwards to find the first match.
- * This is exactly the same as what excluded() does in dir.c to deal with
+ * This is exactly the same as what is_excluded() does in dir.c to deal with
* .gitignore
*/
diff --git a/builtin/add.c b/builtin/add.c
index e664100c71..075312afcd 100644
--- a/builtin/add.c
+++ b/builtin/add.c
@@ -454,7 +454,7 @@ int cmd_add(int argc, const char **argv, const char *prefix)
&& !file_exists(pathspec[i])) {
if (ignore_missing) {
int dtype = DT_UNKNOWN;
- if (path_excluded(&check, pathspec[i], -1, &dtype))
+ if (is_path_excluded(&check, pathspec[i], -1, &dtype))
dir_add_ignored(&dir, pathspec[i], strlen(pathspec[i]));
} else
die(_("pathspec '%s' did not match any files"),
diff --git a/builtin/ls-files.c b/builtin/ls-files.c
index 4a9ee690c7..373c573449 100644
--- a/builtin/ls-files.c
+++ b/builtin/ls-files.c
@@ -203,7 +203,7 @@ static void show_ru_info(void)
static int ce_excluded(struct path_exclude_check *check, struct cache_entry *ce)
{
int dtype = ce_to_dtype(ce);
- return path_excluded(check, ce->name, ce_namelen(ce), &dtype);
+ return is_path_excluded(check, ce->name, ce_namelen(ce), &dtype);
}
static void show_files(struct dir_struct *dir)
diff --git a/dir.c b/dir.c
index 3780755047..e883a91483 100644
--- a/dir.c
+++ b/dir.c
@@ -2,6 +2,8 @@
* This handles recursive filename detection with exclude
* files, index knowledge etc..
*
+ * See Documentation/technical/api-directory-listing.txt
+ *
* Copyright (C) Linus Torvalds, 2005-2006
* Junio Hamano, 2005-2006
*/
@@ -377,7 +379,7 @@ void parse_exclude_pattern(const char **pattern,
}
void add_exclude(const char *string, const char *base,
- int baselen, struct exclude_list *which)
+ int baselen, struct exclude_list *el)
{
struct exclude *x;
int patternlen;
@@ -401,8 +403,8 @@ void add_exclude(const char *string, const char *base,
x->base = base;
x->baselen = baselen;
x->flags = flags;
- ALLOC_GROW(which->excludes, which->nr + 1, which->alloc);
- which->excludes[which->nr++] = x;
+ ALLOC_GROW(el->excludes, el->nr + 1, el->alloc);
+ el->excludes[el->nr++] = x;
}
static void *read_skip_worktree_file_from_index(const char *path, size_t *size)
@@ -428,7 +430,11 @@ static void *read_skip_worktree_file_from_index(const char *path, size_t *size)
return data;
}
-void free_excludes(struct exclude_list *el)
+/*
+ * Frees memory within el which was allocated for exclude patterns and
+ * the file buffer. Does not free el itself.
+ */
+void clear_exclude_list(struct exclude_list *el)
{
int i;
@@ -444,7 +450,7 @@ int add_excludes_from_file_to_list(const char *fname,
const char *base,
int baselen,
char **buf_p,
- struct exclude_list *which,
+ struct exclude_list *el,
int check_index)
{
struct stat st;
@@ -493,7 +499,7 @@ int add_excludes_from_file_to_list(const char *fname,
if (buf[i] == '\n') {
if (entry != buf + i && entry[0] != '#') {
buf[i - (i && buf[i-1] == '\r')] = 0;
- add_exclude(entry, base, baselen, which);
+ add_exclude(entry, base, baselen, el);
}
entry = buf + i + 1;
}
@@ -508,6 +514,10 @@ void add_excludes_from_file(struct dir_struct *dir, const char *fname)
die("cannot use %s as an exclude file", fname);
}
+/*
+ * Loads the per-directory exclude list for the substring of base
+ * which has a char length of baselen.
+ */
static void prep_exclude(struct dir_struct *dir, const char *base, int baselen)
{
struct exclude_list *el;
@@ -518,7 +528,7 @@ static void prep_exclude(struct dir_struct *dir, const char *base, int baselen)
(baselen + strlen(dir->exclude_per_dir) >= PATH_MAX))
return; /* too long a path -- ignore */
- /* Pop the ones that are not the prefix of the path being checked. */
+ /* Pop the directories that are not the prefix of the path being checked. */
el = &dir->exclude_list[EXC_DIRS];
while ((stk = dir->exclude_stack) != NULL) {
if (stk->baselen <= baselen &&
@@ -629,22 +639,26 @@ int match_pathname(const char *pathname, int pathlen,
ignore_case ? FNM_CASEFOLD : 0) == 0;
}
-/* Scan the list and let the last match determine the fate.
- * Return 1 for exclude, 0 for include and -1 for undecided.
+/*
+ * Scan the given exclude list in reverse to see whether pathname
+ * should be ignored. The first match (i.e. the last on the list), if
+ * any, determines the fate. Returns the exclude_list element which
+ * matched, or NULL for undecided.
*/
-int excluded_from_list(const char *pathname,
- int pathlen, const char *basename, int *dtype,
- struct exclude_list *el)
+static struct exclude *last_exclude_matching_from_list(const char *pathname,
+ int pathlen,
+ const char *basename,
+ int *dtype,
+ struct exclude_list *el)
{
int i;
if (!el->nr)
- return -1; /* undefined */
+ return NULL; /* undefined */
for (i = el->nr - 1; 0 <= i; i--) {
struct exclude *x = el->excludes[i];
const char *exclude = x->pattern;
- int to_exclude = x->flags & EXC_FLAG_NEGATIVE ? 0 : 1;
int prefix = x->nowildcardlen;
if (x->flags & EXC_FLAG_MUSTBEDIR) {
@@ -659,7 +673,7 @@ int excluded_from_list(const char *pathname,
pathlen - (basename - pathname),
exclude, prefix, x->patternlen,
x->flags))
- return to_exclude;
+ return x;
continue;
}
@@ -667,28 +681,64 @@ int excluded_from_list(const char *pathname,
if (match_pathname(pathname, pathlen,
x->base, x->baselen ? x->baselen - 1 : 0,
exclude, prefix, x->patternlen, x->flags))
- return to_exclude;
+ return x;
}
+ return NULL; /* undecided */
+}
+
+/*
+ * Scan the list and let the last match determine the fate.
+ * Return 1 for exclude, 0 for include and -1 for undecided.
+ */
+int is_excluded_from_list(const char *pathname,
+ int pathlen, const char *basename, int *dtype,
+ struct exclude_list *el)
+{
+ struct exclude *exclude;
+ exclude = last_exclude_matching_from_list(pathname, pathlen, basename, dtype, el);
+ if (exclude)
+ return exclude->flags & EXC_FLAG_NEGATIVE ? 0 : 1;
return -1; /* undecided */
}
-static int excluded(struct dir_struct *dir, const char *pathname, int *dtype_p)
+/*
+ * Loads the exclude lists for the directory containing pathname, then
+ * scans all exclude lists to determine whether pathname is excluded.
+ * Returns the exclude_list element which matched, or NULL for
+ * undecided.
+ */
+static struct exclude *last_exclude_matching(struct dir_struct *dir,
+ const char *pathname,
+ int *dtype_p)
{
int pathlen = strlen(pathname);
int st;
+ struct exclude *exclude;
const char *basename = strrchr(pathname, '/');
basename = (basename) ? basename+1 : pathname;
prep_exclude(dir, pathname, basename-pathname);
for (st = EXC_CMDL; st <= EXC_FILE; st++) {
- switch (excluded_from_list(pathname, pathlen, basename,
- dtype_p, &dir->exclude_list[st])) {
- case 0:
- return 0;
- case 1:
- return 1;
- }
+ exclude = last_exclude_matching_from_list(
+ pathname, pathlen, basename, dtype_p,
+ &dir->exclude_list[st]);
+ if (exclude)
+ return exclude;
}
+ return NULL;
+}
+
+/*
+ * Loads the exclude lists for the directory containing pathname, then
+ * scans all exclude lists to determine whether pathname is excluded.
+ * Returns 1 if true, otherwise 0.
+ */
+static int is_excluded(struct dir_struct *dir, const char *pathname, int *dtype_p)
+{
+ struct exclude *exclude =
+ last_exclude_matching(dir, pathname, dtype_p);
+ if (exclude)
+ return exclude->flags & EXC_FLAG_NEGATIVE ? 0 : 1;
return 0;
}
@@ -696,6 +746,7 @@ void path_exclude_check_init(struct path_exclude_check *check,
struct dir_struct *dir)
{
check->dir = dir;
+ check->exclude = NULL;
strbuf_init(&check->path, 256);
}
@@ -705,32 +756,41 @@ void path_exclude_check_clear(struct path_exclude_check *check)
}
/*
- * Is this name excluded? This is for a caller like show_files() that
- * do not honor directory hierarchy and iterate through paths that are
- * possibly in an ignored directory.
+ * For each subdirectory in name, starting with the top-most, checks
+ * to see if that subdirectory is excluded, and if so, returns the
+ * corresponding exclude structure. Otherwise, checks whether name
+ * itself (which is presumably a file) is excluded.
*
* A path to a directory known to be excluded is left in check->path to
* optimize for repeated checks for files in the same excluded directory.
*/
-int path_excluded(struct path_exclude_check *check,
- const char *name, int namelen, int *dtype)
+struct exclude *last_exclude_matching_path(struct path_exclude_check *check,
+ const char *name, int namelen,
+ int *dtype)
{
int i;
struct strbuf *path = &check->path;
+ struct exclude *exclude;
/*
* we allow the caller to pass namelen as an optimization; it
* must match the length of the name, as we eventually call
- * excluded() on the whole name string.
+ * is_excluded() on the whole name string.
*/
if (namelen < 0)
namelen = strlen(name);
+ /*
+ * If path is non-empty, and name is equal to path or a
+ * subdirectory of path, name should be excluded, because
+ * it's inside a directory which is already known to be
+ * excluded and was previously left in check->path.
+ */
if (path->len &&
path->len <= namelen &&
!memcmp(name, path->buf, path->len) &&
(!name[path->len] || name[path->len] == '/'))
- return 1;
+ return check->exclude;
strbuf_setlen(path, 0);
for (i = 0; name[i]; i++) {
@@ -738,8 +798,12 @@ int path_excluded(struct path_exclude_check *check,
if (ch == '/') {
int dt = DT_DIR;
- if (excluded(check->dir, path->buf, &dt))
- return 1;
+ exclude = last_exclude_matching(check->dir,
+ path->buf, &dt);
+ if (exclude) {
+ check->exclude = exclude;
+ return exclude;
+ }
}
strbuf_addch(path, ch);
}
@@ -747,7 +811,22 @@ int path_excluded(struct path_exclude_check *check,
/* An entry in the index; cannot be a directory with subentries */
strbuf_setlen(path, 0);
- return excluded(check->dir, name, dtype);
+ return last_exclude_matching(check->dir, name, dtype);
+}
+
+/*
+ * Is this name excluded? This is for a caller like show_files() that
+ * do not honor directory hierarchy and iterate through paths that are
+ * possibly in an ignored directory.
+ */
+int is_path_excluded(struct path_exclude_check *check,
+ const char *name, int namelen, int *dtype)
+{
+ struct exclude *exclude =
+ last_exclude_matching_path(check, name, namelen, dtype);
+ if (exclude)
+ return exclude->flags & EXC_FLAG_NEGATIVE ? 0 : 1;
+ return 0;
}
static struct dir_entry *dir_entry_new(const char *pathname, int len)
@@ -1047,7 +1126,7 @@ static enum path_treatment treat_one_path(struct dir_struct *dir,
const struct path_simplify *simplify,
int dtype, struct dirent *de)
{
- int exclude = excluded(dir, path->buf, &dtype);
+ int exclude = is_excluded(dir, path->buf, &dtype);
if (exclude && (dir->flags & DIR_COLLECT_IGNORED)
&& exclude_matches_pathspec(path->buf, path->len, simplify))
dir_add_ignored(dir, path->buf, path->len);
diff --git a/dir.h b/dir.h
index ab5af42b2e..ae1bc467ae 100644
--- a/dir.h
+++ b/dir.h
@@ -1,6 +1,8 @@
#ifndef DIR_H
#define DIR_H
+/* See Documentation/technical/api-directory-listing.txt */
+
#include "strbuf.h"
struct dir_entry {
@@ -13,6 +15,12 @@ struct dir_entry {
#define EXC_FLAG_MUSTBEDIR 8
#define EXC_FLAG_NEGATIVE 16
+/*
+ * Each .gitignore file will be parsed into patterns which are then
+ * appended to the relevant exclude_list (either EXC_DIRS or
+ * EXC_FILE). exclude_lists are also used to represent the list of
+ * --exclude values passed via CLI args (EXC_CMDL).
+ */
struct exclude_list {
int nr;
int alloc;
@@ -26,9 +34,15 @@ struct exclude_list {
} **excludes;
};
+/*
+ * The contents of the per-directory exclude files are lazily read on
+ * demand and then cached in memory, one per exclude_stack struct, in
+ * order to avoid opening and parsing each one every time that
+ * directory is traversed.
+ */
struct exclude_stack {
- struct exclude_stack *prev;
- char *filebuf;
+ struct exclude_stack *prev; /* the struct exclude_stack for the parent directory */
+ char *filebuf; /* remember pointer to per-directory exclude file contents so we can free() */
int baselen;
int exclude_ix;
};
@@ -59,6 +73,14 @@ struct dir_struct {
#define EXC_DIRS 1
#define EXC_FILE 2
+ /*
+ * Temporary variables which are used during loading of the
+ * per-directory exclude lists.
+ *
+ * exclude_stack points to the top of the exclude_stack, and
+ * basebuf contains the full path to the current
+ * (sub)directory in the traversal.
+ */
struct exclude_stack *exclude_stack;
char basebuf[PATH_MAX];
};
@@ -76,8 +98,8 @@ extern int within_depth(const char *name, int namelen, int depth, int max_depth)
extern int fill_directory(struct dir_struct *dir, const char **pathspec);
extern int read_directory(struct dir_struct *, const char *path, int len, const char **pathspec);
-extern int excluded_from_list(const char *pathname, int pathlen, const char *basename,
- int *dtype, struct exclude_list *el);
+extern int is_excluded_from_list(const char *pathname, int pathlen, const char *basename,
+ int *dtype, struct exclude_list *el);
struct dir_entry *dir_add_ignored(struct dir_struct *dir, const char *pathname, int len);
/*
@@ -91,26 +113,29 @@ extern int match_pathname(const char *, int,
const char *, int, int, int);
/*
- * The excluded() API is meant for callers that check each level of leading
- * directory hierarchies with excluded() to avoid recursing into excluded
+ * The is_excluded() API is meant for callers that check each level of leading
+ * directory hierarchies with is_excluded() to avoid recursing into excluded
* directories. Callers that do not do so should use this API instead.
*/
struct path_exclude_check {
struct dir_struct *dir;
+ struct exclude *exclude;
struct strbuf path;
};
extern void path_exclude_check_init(struct path_exclude_check *, struct dir_struct *);
extern void path_exclude_check_clear(struct path_exclude_check *);
-extern int path_excluded(struct path_exclude_check *, const char *, int namelen, int *dtype);
+extern struct exclude *last_exclude_matching_path(struct path_exclude_check *, const char *,
+ int namelen, int *dtype);
+extern int is_path_excluded(struct path_exclude_check *, const char *, int namelen, int *dtype);
extern int add_excludes_from_file_to_list(const char *fname, const char *base, int baselen,
- char **buf_p, struct exclude_list *which, int check_index);
+ char **buf_p, struct exclude_list *el, int check_index);
extern void add_excludes_from_file(struct dir_struct *, const char *fname);
extern void parse_exclude_pattern(const char **string, int *patternlen, int *flags, int *nowildcardlen);
extern void add_exclude(const char *string, const char *base,
- int baselen, struct exclude_list *which);
-extern void free_excludes(struct exclude_list *el);
+ int baselen, struct exclude_list *el);
+extern void clear_exclude_list(struct exclude_list *el);
extern int file_exists(const char *);
extern int is_inside_dir(const char *dir);
diff --git a/unpack-trees.c b/unpack-trees.c
index 61acc5e564..0e1a196ace 100644
--- a/unpack-trees.c
+++ b/unpack-trees.c
@@ -837,7 +837,8 @@ static int clear_ce_flags_dir(struct cache_entry **cache, int nr,
{
struct cache_entry **cache_end;
int dtype = DT_DIR;
- int ret = excluded_from_list(prefix, prefix_len, basename, &dtype, el);
+ int ret = is_excluded_from_list(prefix, prefix_len,
+ basename, &dtype, el);
prefix[prefix_len++] = '/';
@@ -856,7 +857,7 @@ static int clear_ce_flags_dir(struct cache_entry **cache, int nr,
* with ret (iow, we know in advance the incl/excl
* decision for the entire directory), clear flag here without
* calling clear_ce_flags_1(). That function will call
- * the expensive excluded_from_list() on every entry.
+ * the expensive is_excluded_from_list() on every entry.
*/
return clear_ce_flags_1(cache, cache_end - cache,
prefix, prefix_len,
@@ -939,7 +940,8 @@ static int clear_ce_flags_1(struct cache_entry **cache, int nr,
/* Non-directory */
dtype = ce_to_dtype(ce);
- ret = excluded_from_list(ce->name, ce_namelen(ce), name, &dtype, el);
+ ret = is_excluded_from_list(ce->name, ce_namelen(ce),
+ name, &dtype, el);
if (ret < 0)
ret = defval;
if (ret > 0)
@@ -1152,7 +1154,7 @@ int unpack_trees(unsigned len, struct tree_desc *t, struct unpack_trees_options
*o->dst_index = o->result;
done:
- free_excludes(&el);
+ clear_exclude_list(&el);
if (o->path_exclude_check) {
path_exclude_check_clear(o->path_exclude_check);
free(o->path_exclude_check);
@@ -1373,7 +1375,7 @@ static int check_ok_to_remove(const char *name, int len, int dtype,
return 0;
if (o->dir &&
- path_excluded(o->path_exclude_check, name, -1, &dtype))
+ is_path_excluded(o->path_exclude_check, name, -1, &dtype))
/*
* ce->name is explicitly excluded, so it is Ok to
* overwrite it.