From 78db6ea9dc1a872f9d07a36fe7aec700a5c963b9 Mon Sep 17 00:00:00 2001 From: Jeff King Date: Thu, 2 Feb 2012 03:18:29 -0500 Subject: grep: make locking flag global The low-level grep code traditionally didn't care about threading, as it doesn't do any threading itself and didn't call out to other non-thread-safe code. That changed with 0579f91 (grep: enable threading with -p and -W using lazy attribute lookup, 2011-12-12), which pushed the lookup of funcname attributes (which is not thread-safe) into the low-level grep code. As a result, the low-level code learned about a new global "grep_attr_mutex" to serialize access to the attribute code. A multi-threaded caller (e.g., builtin/grep.c) is expected to initialize the mutex and set "use_threads" in the grep_opt structure. The low-level code only uses the lock if use_threads is set. However, putting the use_threads flag into the grep_opt struct is not the most logical place. Whether threading is in use is not something that matters for each call to grep_buffer, but is instead global to the whole program (i.e., if any thread is doing multi-threaded grep, every other thread, even if it thinks it is doing its own single-threaded grep, would need to use the locking). In practice, this distinction isn't a problem for us, because the only user of multi-threaded grep is "git-grep", which does nothing except call grep. This patch turns the opt->use_threads flag into a global flag. More important than the nit-picking semantic argument above is that this means that the locking functions don't need to actually have access to a grep_opt to know whether to lock. Which in turn can make adding new locks simpler, as we don't need to pass around a grep_opt. Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- builtin/grep.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'builtin') diff --git a/builtin/grep.c b/builtin/grep.c index 9ce064ac11..5b881870f6 100644 --- a/builtin/grep.c +++ b/builtin/grep.c @@ -259,6 +259,7 @@ static void start_threads(struct grep_opt *opt) pthread_cond_init(&cond_add, NULL); pthread_cond_init(&cond_write, NULL); pthread_cond_init(&cond_result, NULL); + grep_use_locks = 1; for (i = 0; i < ARRAY_SIZE(todo); i++) { strbuf_init(&todo[i].out, 0); @@ -307,6 +308,7 @@ static int wait_all(void) pthread_cond_destroy(&cond_add); pthread_cond_destroy(&cond_write); pthread_cond_destroy(&cond_result); + grep_use_locks = 0; return hit; } @@ -1030,8 +1032,6 @@ int cmd_grep(int argc, const char **argv, const char *prefix) use_threads = 0; #endif - opt.use_threads = use_threads; - #ifndef NO_PTHREADS if (use_threads) { if (opt.pre_context || opt.post_context || opt.file_break || -- cgit v1.2.1 From b3aeb285d0ac1dcb4d578a61a68e08646f96501f Mon Sep 17 00:00:00 2001 From: Jeff King Date: Thu, 2 Feb 2012 03:18:41 -0500 Subject: grep: move sha1-reading mutex into low-level code The multi-threaded git-grep code needs to serialize access to the thread-unsafe read_sha1_file call. It does this with a mutex that is local to builtin/grep.c. Let's instead push this down into grep.c, where it can be used by both builtin/grep.c and grep.c. This will let us safely teach the low-level grep.c code tricks that involve reading from the object db. Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- builtin/grep.c | 29 ++++++----------------------- 1 file changed, 6 insertions(+), 23 deletions(-) (limited to 'builtin') diff --git a/builtin/grep.c b/builtin/grep.c index 5b881870f6..aeb361663a 100644 --- a/builtin/grep.c +++ b/builtin/grep.c @@ -85,21 +85,6 @@ static inline void grep_unlock(void) pthread_mutex_unlock(&grep_mutex); } -/* Used to serialize calls to read_sha1_file. */ -static pthread_mutex_t read_sha1_mutex; - -static inline void read_sha1_lock(void) -{ - if (use_threads) - pthread_mutex_lock(&read_sha1_mutex); -} - -static inline void read_sha1_unlock(void) -{ - if (use_threads) - pthread_mutex_unlock(&read_sha1_mutex); -} - /* Signalled when a new work_item is added to todo. */ static pthread_cond_t cond_add; @@ -254,7 +239,7 @@ static void start_threads(struct grep_opt *opt) int i; pthread_mutex_init(&grep_mutex, NULL); - pthread_mutex_init(&read_sha1_mutex, NULL); + pthread_mutex_init(&grep_read_mutex, NULL); pthread_mutex_init(&grep_attr_mutex, NULL); pthread_cond_init(&cond_add, NULL); pthread_cond_init(&cond_write, NULL); @@ -303,7 +288,7 @@ static int wait_all(void) } pthread_mutex_destroy(&grep_mutex); - pthread_mutex_destroy(&read_sha1_mutex); + pthread_mutex_destroy(&grep_read_mutex); pthread_mutex_destroy(&grep_attr_mutex); pthread_cond_destroy(&cond_add); pthread_cond_destroy(&cond_write); @@ -313,8 +298,6 @@ static int wait_all(void) return hit; } #else /* !NO_PTHREADS */ -#define read_sha1_lock() -#define read_sha1_unlock() static int wait_all(void) { @@ -376,9 +359,9 @@ static void *lock_and_read_sha1_file(const unsigned char *sha1, enum object_type { void *data; - read_sha1_lock(); + grep_read_lock(); data = read_sha1_file(sha1, type, size); - read_sha1_unlock(); + grep_read_unlock(); return data; } @@ -617,10 +600,10 @@ static int grep_object(struct grep_opt *opt, const struct pathspec *pathspec, struct strbuf base; int hit, len; - read_sha1_lock(); + grep_read_lock(); data = read_object_with_reference(obj->sha1, tree_type, &size, NULL); - read_sha1_unlock(); + grep_read_unlock(); if (!data) die(_("unable to read tree (%s)"), sha1_to_hex(obj->sha1)); -- cgit v1.2.1 From 8f24a6323ece9be1bf1a04b4b5856112438337f2 Mon Sep 17 00:00:00 2001 From: Jeff King Date: Thu, 2 Feb 2012 03:19:37 -0500 Subject: convert git-grep to use grep_source interface The grep_source interface (as opposed to grep_buffer) will eventually gives us a richer interface for telling the low-level grep code about our buffers. Eventually this will lead to things like better binary-file handling. For now, it lets us drop a lot of now-redundant code. The conversion is mostly straight-forward. One thing to note is that the memory ownership rules for "struct grep_source" are different than the "struct work_item" found here (the former will copy things like the filename, rather than taking ownership). Therefore you will also see some slight tweaking of when filename buffers are released. Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- builtin/grep.c | 142 ++++++++++----------------------------------------------- 1 file changed, 23 insertions(+), 119 deletions(-) (limited to 'builtin') diff --git a/builtin/grep.c b/builtin/grep.c index aeb361663a..c9b6a138fe 100644 --- a/builtin/grep.c +++ b/builtin/grep.c @@ -29,25 +29,12 @@ static int use_threads = 1; #define THREADS 8 static pthread_t threads[THREADS]; -static void *load_sha1(const unsigned char *sha1, unsigned long *size, - const char *name); -static void *load_file(const char *filename, size_t *sz); - -enum work_type {WORK_SHA1, WORK_FILE}; - /* We use one producer thread and THREADS consumer * threads. The producer adds struct work_items to 'todo' and the * consumers pick work items from the same array. */ struct work_item { - enum work_type type; - char *name; - - /* if type == WORK_SHA1, then 'identifier' is a SHA1, - * otherwise type == WORK_FILE, and 'identifier' is a NUL - * terminated filename. - */ - void *identifier; + struct grep_source source; char done; struct strbuf out; }; @@ -98,7 +85,8 @@ static pthread_cond_t cond_result; static int skip_first_line; -static void add_work(enum work_type type, char *name, void *id) +static void add_work(enum grep_source_type type, const char *name, + const void *id) { grep_lock(); @@ -106,9 +94,7 @@ static void add_work(enum work_type type, char *name, void *id) pthread_cond_wait(&cond_write, &grep_mutex); } - todo[todo_end].type = type; - todo[todo_end].name = name; - todo[todo_end].identifier = id; + grep_source_init(&todo[todo_end].source, type, name, id); todo[todo_end].done = 0; strbuf_reset(&todo[todo_end].out); todo_end = (todo_end + 1) % ARRAY_SIZE(todo); @@ -136,21 +122,6 @@ static struct work_item *get_work(void) return ret; } -static void grep_sha1_async(struct grep_opt *opt, char *name, - const unsigned char *sha1) -{ - unsigned char *s; - s = xmalloc(20); - memcpy(s, sha1, 20); - add_work(WORK_SHA1, name, s); -} - -static void grep_file_async(struct grep_opt *opt, char *name, - const char *filename) -{ - add_work(WORK_FILE, name, xstrdup(filename)); -} - static void work_done(struct work_item *w) { int old_done; @@ -177,8 +148,7 @@ static void work_done(struct work_item *w) write_or_die(1, p, len); } - free(w->name); - free(w->identifier); + grep_source_clear(&w->source); } if (old_done != todo_done) @@ -201,25 +171,8 @@ static void *run(void *arg) break; opt->output_priv = w; - if (w->type == WORK_SHA1) { - unsigned long sz; - void* data = load_sha1(w->identifier, &sz, w->name); - - if (data) { - hit |= grep_buffer(opt, w->name, data, sz); - free(data); - } - } else if (w->type == WORK_FILE) { - size_t sz; - void* data = load_file(w->identifier, &sz); - if (data) { - hit |= grep_buffer(opt, w->name, data, sz); - free(data); - } - } else { - assert(0); - } - + hit |= grep_source(opt, &w->source); + grep_source_clear_data(&w->source); work_done(w); } free_grep_patterns(arg); @@ -365,23 +318,10 @@ static void *lock_and_read_sha1_file(const unsigned char *sha1, enum object_type return data; } -static void *load_sha1(const unsigned char *sha1, unsigned long *size, - const char *name) -{ - enum object_type type; - void *data = lock_and_read_sha1_file(sha1, &type, size); - - if (!data) - error(_("'%s': unable to read %s"), name, sha1_to_hex(sha1)); - - return data; -} - static int grep_sha1(struct grep_opt *opt, const unsigned char *sha1, const char *filename, int tree_name_len) { struct strbuf pathbuf = STRBUF_INIT; - char *name; if (opt->relative && opt->prefix_length) { quote_path_relative(filename + tree_name_len, -1, &pathbuf, @@ -391,87 +331,51 @@ static int grep_sha1(struct grep_opt *opt, const unsigned char *sha1, strbuf_addstr(&pathbuf, filename); } - name = strbuf_detach(&pathbuf, NULL); - #ifndef NO_PTHREADS if (use_threads) { - grep_sha1_async(opt, name, sha1); + add_work(GREP_SOURCE_SHA1, pathbuf.buf, sha1); + strbuf_release(&pathbuf); return 0; } else #endif { + struct grep_source gs; int hit; - unsigned long sz; - void *data = load_sha1(sha1, &sz, name); - if (!data) - hit = 0; - else - hit = grep_buffer(opt, name, data, sz); - free(data); - free(name); - return hit; - } -} + grep_source_init(&gs, GREP_SOURCE_SHA1, pathbuf.buf, sha1); + strbuf_release(&pathbuf); + hit = grep_source(opt, &gs); -static void *load_file(const char *filename, size_t *sz) -{ - struct stat st; - char *data; - int i; - - if (lstat(filename, &st) < 0) { - err_ret: - if (errno != ENOENT) - error(_("'%s': %s"), filename, strerror(errno)); - return NULL; - } - if (!S_ISREG(st.st_mode)) - return NULL; - *sz = xsize_t(st.st_size); - i = open(filename, O_RDONLY); - if (i < 0) - goto err_ret; - data = xmalloc(*sz + 1); - if (st.st_size != read_in_full(i, data, *sz)) { - error(_("'%s': short read %s"), filename, strerror(errno)); - close(i); - free(data); - return NULL; + grep_source_clear(&gs); + return hit; } - close(i); - data[*sz] = 0; - return data; } static int grep_file(struct grep_opt *opt, const char *filename) { struct strbuf buf = STRBUF_INIT; - char *name; if (opt->relative && opt->prefix_length) quote_path_relative(filename, -1, &buf, opt->prefix); else strbuf_addstr(&buf, filename); - name = strbuf_detach(&buf, NULL); #ifndef NO_PTHREADS if (use_threads) { - grep_file_async(opt, name, filename); + add_work(GREP_SOURCE_FILE, buf.buf, filename); + strbuf_release(&buf); return 0; } else #endif { + struct grep_source gs; int hit; - size_t sz; - void *data = load_file(filename, &sz); - if (!data) - hit = 0; - else - hit = grep_buffer(opt, name, data, sz); - free(data); - free(name); + grep_source_init(&gs, GREP_SOURCE_FILE, buf.buf, filename); + strbuf_release(&buf); + hit = grep_source(opt, &gs); + + grep_source_clear(&gs); return hit; } } -- cgit v1.2.1 From 9dd5245c1043dd18fd7b3f44b9e51eef7e4b58d8 Mon Sep 17 00:00:00 2001 From: Jeff King Date: Thu, 2 Feb 2012 03:24:28 -0500 Subject: grep: pre-load userdiff drivers when threaded The low-level grep_source code will automatically load the userdiff driver to see whether a file is binary. However, when we are threaded, it will load the drivers in a non-deterministic order, handling each one as its assigned thread happens to be scheduled. Meanwhile, the attribute lookup code (which underlies the userdiff driver lookup) is optimized to handle paths in sequential order (because they tend to share the same gitattributes files). Multi-threading the lookups destroys the locality and makes this optimization less effective. We can fix this by pre-loading the userdiff driver in the main thread, before we hand off the file to a worker thread. My best-of-five for "git grep foo" on the linux-2.6 repository went from: real 0m0.391s user 0m1.708s sys 0m0.584s to: real 0m0.360s user 0m1.576s sys 0m0.572s Not a huge speedup, but it's quite easy to do. The only trick is that we shouldn't perform this optimization if "-a" was used, in which case we won't bother checking whether the files are binary at all. Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- builtin/grep.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) (limited to 'builtin') diff --git a/builtin/grep.c b/builtin/grep.c index c9b6a138fe..e741aca18c 100644 --- a/builtin/grep.c +++ b/builtin/grep.c @@ -85,8 +85,8 @@ static pthread_cond_t cond_result; static int skip_first_line; -static void add_work(enum grep_source_type type, const char *name, - const void *id) +static void add_work(struct grep_opt *opt, enum grep_source_type type, + const char *name, const void *id) { grep_lock(); @@ -95,6 +95,8 @@ static void add_work(enum grep_source_type type, const char *name, } grep_source_init(&todo[todo_end].source, type, name, id); + if (opt->binary != GREP_BINARY_TEXT) + grep_source_load_driver(&todo[todo_end].source); todo[todo_end].done = 0; strbuf_reset(&todo[todo_end].out); todo_end = (todo_end + 1) % ARRAY_SIZE(todo); @@ -333,7 +335,7 @@ static int grep_sha1(struct grep_opt *opt, const unsigned char *sha1, #ifndef NO_PTHREADS if (use_threads) { - add_work(GREP_SOURCE_SHA1, pathbuf.buf, sha1); + add_work(opt, GREP_SOURCE_SHA1, pathbuf.buf, sha1); strbuf_release(&pathbuf); return 0; } else @@ -362,7 +364,7 @@ static int grep_file(struct grep_opt *opt, const char *filename) #ifndef NO_PTHREADS if (use_threads) { - add_work(GREP_SOURCE_FILE, buf.buf, filename); + add_work(opt, GREP_SOURCE_FILE, buf.buf, filename); strbuf_release(&buf); return 0; } else -- cgit v1.2.1