From 6aead43db34313e6cdbc72e2f7a70f6b82c78cf2 Mon Sep 17 00:00:00 2001 From: Jim Meyering Date: Tue, 10 Apr 2007 01:01:44 +0200 Subject: sscanf/strtoul: parse integers robustly * builtin-grep.c (strtoul_ui): Move function definition from here, to... * git-compat-util.h (strtoul_ui): ...here, with an added "base" parameter. * builtin-grep.c (cmd_grep): Update use of strtoul_ui to include base, "10". * builtin-update-index.c (read_index_info): Diagnose an invalid mode integer that is out of range or merely larger than INT_MAX. (cmd_update_index): Use strtoul_ui, not sscanf. * convert-objects.c (write_subdirectory): Likewise. Signed-off-by: Jim Meyering Signed-off-by: Junio C Hamano --- builtin-grep.c | 15 +-------------- 1 file changed, 1 insertion(+), 14 deletions(-) (limited to 'builtin-grep.c') diff --git a/builtin-grep.c b/builtin-grep.c index 981f3d4d8e..e13cb31f2b 100644 --- a/builtin-grep.c +++ b/builtin-grep.c @@ -434,19 +434,6 @@ static const char emsg_missing_context_len[] = static const char emsg_missing_argument[] = "option requires an argument -%s"; -static int strtoul_ui(char const *s, unsigned int *result) -{ - unsigned long ul; - char *p; - - errno = 0; - ul = strtoul(s, &p, 10); - if (errno || *p || p == s || (unsigned int) ul != ul) - return -1; - *result = ul; - return 0; -} - int cmd_grep(int argc, const char **argv, const char *prefix) { int hit = 0; @@ -569,7 +556,7 @@ int cmd_grep(int argc, const char **argv, const char *prefix) scan = arg + 1; break; } - if (strtoul_ui(scan, &num)) + if (strtoul_ui(scan, 10, &num)) die(emsg_invalid_context_len, scan); switch (arg[1]) { case 'A': -- cgit v1.2.1 From d99ebf081797dbb43ff618ff59f4c607b0acf045 Mon Sep 17 00:00:00 2001 From: Junio C Hamano Date: Fri, 14 Sep 2007 00:31:00 -0700 Subject: Split grep arguments in a way that does not requires to add /dev/null. In order to (almost) always show the name of the file without relying on "-H" option of GNU grep, we used to add /dev/null to the argument list unless we are doing -l or -L. This caused "/dev/null:0" to show up when -c is given in the output. It is not enough to add -c to the set of options we do not pass /dev/null for. When we have too many files, we invoke grep multiple times and we need to avoid giving a widow filename to the last invocation -- otherwise we will not see the name. This keeps two filenames when the argv[] buffer is about to overflow and we have not finished iterating over the index, so that the last round will always have at least two paths to work with (and not require /dev/null). An obvious and the only exception is when there is only 1 file that is given to the underlying grep, and in that case we avoid passing /dev/null and let the external "grep -c" report only the number of matches. Signed-off-by: Junio C Hamano --- builtin-grep.c | 90 +++++++++++++++++++++++++++++++++++++++++++++++++--------- 1 file changed, 76 insertions(+), 14 deletions(-) (limited to 'builtin-grep.c') diff --git a/builtin-grep.c b/builtin-grep.c index e13cb31f2b..c7b45c4d58 100644 --- a/builtin-grep.c +++ b/builtin-grep.c @@ -187,6 +187,78 @@ static int exec_grep(int argc, const char **argv) else die("maximum number of args exceeded"); \ } while (0) +/* + * If you send a singleton filename to grep, it does not give + * the name of the file. GNU grep has "-H" but we would want + * that behaviour in a portable way. + * + * So we keep two pathnames in argv buffer unsent to grep in + * the main loop if we need to do more than one grep. + */ +static int flush_grep(struct grep_opt *opt, + int argc, int arg0, const char **argv, int *kept) +{ + int status; + int count = argc - arg0; + const char *kept_0 = NULL; + + if (count <= 2) { + /* + * Because we keep at least 2 paths in the call from + * the main loop (i.e. kept != NULL), and MAXARGS is + * far greater than 2, this usually is a call to + * conclude the grep. However, the user could attempt + * to overflow the argv buffer by giving too many + * options to leave very small number of real + * arguments even for the call in the main loop. + */ + if (kept) + die("insanely many options to grep"); + + /* + * If we have two or more paths, we do not have to do + * anything special, but we need to push /dev/null to + * get "-H" behaviour of GNU grep portably but when we + * are not doing "-l" nor "-L" nor "-c". + */ + if (count == 1 && + !opt->name_only && + !opt->unmatch_name_only && + !opt->count) { + argv[argc++] = "/dev/null"; + argv[argc] = NULL; + } + } + + else if (kept) { + /* + * Called because we found many paths and haven't finished + * iterating over the cache yet. We keep two paths + * for the concluding call. argv[argc-2] and argv[argc-1] + * has the last two paths, so save the first one away, + * replace it with NULL while sending the list to grep, + * and recover them after we are done. + */ + *kept = 2; + kept_0 = argv[argc-2]; + argv[argc-2] = NULL; + argc -= 2; + } + + status = exec_grep(argc, argv); + + if (kept_0) { + /* + * Then recover them. Now the last arg is beyond the + * terminating NULL which is at argc, and the second + * from the last is what we saved away in kept_0 + */ + argv[arg0++] = kept_0; + argv[arg0] = argv[argc+1]; + } + return status; +} + static int external_grep(struct grep_opt *opt, const char **paths, int cached) { int i, nr, argc, hit, len, status; @@ -253,22 +325,12 @@ static int external_grep(struct grep_opt *opt, const char **paths, int cached) push_arg(p->pattern); } - /* - * To make sure we get the header printed out when we want it, - * add /dev/null to the paths to grep. This is unnecessary - * (and wrong) with "-l" or "-L", which always print out the - * name anyway. - * - * GNU grep has "-H", but this is portable. - */ - if (!opt->name_only && !opt->unmatch_name_only) - push_arg("/dev/null"); - hit = 0; argc = nr; for (i = 0; i < active_nr; i++) { struct cache_entry *ce = active_cache[i]; char *name; + int kept; if (!S_ISREG(ntohl(ce->ce_mode))) continue; if (!pathspec_matches(paths, ce->name)) @@ -283,10 +345,10 @@ static int external_grep(struct grep_opt *opt, const char **paths, int cached) argv[argc++] = name; if (argc < MAXARGS && !ce_stage(ce)) continue; - status = exec_grep(argc, argv); + status = flush_grep(opt, argc, nr, argv, &kept); if (0 < status) hit = 1; - argc = nr; + argc = nr + kept; if (ce_stage(ce)) { do { i++; @@ -296,7 +358,7 @@ static int external_grep(struct grep_opt *opt, const char **paths, int cached) } } if (argc > nr) { - status = exec_grep(argc, argv); + status = flush_grep(opt, argc, nr, argv, NULL); if (0 < status) hit = 1; } -- cgit v1.2.1 From b67a43bb8f4a8ffb64f26b7351c3b0b90239696a Mon Sep 17 00:00:00 2001 From: Junio C Hamano Date: Mon, 5 Nov 2007 17:16:47 -0800 Subject: grep with unmerged index We called flush_grep() every time we saw an unmerged entry in the index. If we happen to find an unmerged entry before we saw more than two paths, we incorrectly declared that the user had too many non-paths options in front. Signed-off-by: Junio C Hamano --- builtin-grep.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'builtin-grep.c') diff --git a/builtin-grep.c b/builtin-grep.c index c7b45c4d58..185876b0a6 100644 --- a/builtin-grep.c +++ b/builtin-grep.c @@ -343,7 +343,7 @@ static int external_grep(struct grep_opt *opt, const char **paths, int cached) memcpy(name + 2, ce->name, len + 1); } argv[argc++] = name; - if (argc < MAXARGS && !ce_stage(ce)) + if (argc < MAXARGS) continue; status = flush_grep(opt, argc, nr, argv, &kept); if (0 < status) -- cgit v1.2.1 From 4b87474bc9b620cf07b09130e33963cdaf34c603 Mon Sep 17 00:00:00 2001 From: Junio C Hamano Date: Sat, 17 Nov 2007 21:18:14 -0800 Subject: grep -An -Bm: fix invocation of external grep command When building command line to invoke external grep, the arguments to -A/-B/-C options were placd in randarg[] buffer, but the code forgot that snprintf() does not count terminating NUL in its return value. This caused "git grep -A1 -B2" to invoke external grep with "-B21 -A1". Signed-off-by: Junio C Hamano --- builtin-grep.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'builtin-grep.c') diff --git a/builtin-grep.c b/builtin-grep.c index 185876b0a6..bbf747fc7b 100644 --- a/builtin-grep.c +++ b/builtin-grep.c @@ -294,7 +294,7 @@ static int external_grep(struct grep_opt *opt, const char **paths, int cached) if (opt->pre_context) { push_arg("-B"); len += snprintf(argptr, sizeof(randarg)-len, - "%u", opt->pre_context); + "%u", opt->pre_context) + 1; if (sizeof(randarg) <= len) die("maximum length of args exceeded"); push_arg(argptr); @@ -303,7 +303,7 @@ static int external_grep(struct grep_opt *opt, const char **paths, int cached) if (opt->post_context) { push_arg("-A"); len += snprintf(argptr, sizeof(randarg)-len, - "%u", opt->post_context); + "%u", opt->post_context) + 1; if (sizeof(randarg) <= len) die("maximum length of args exceeded"); push_arg(argptr); @@ -313,7 +313,7 @@ static int external_grep(struct grep_opt *opt, const char **paths, int cached) else { push_arg("-C"); len += snprintf(argptr, sizeof(randarg)-len, - "%u", opt->post_context); + "%u", opt->post_context) + 1; if (sizeof(randarg) <= len) die("maximum length of args exceeded"); push_arg(argptr); -- cgit v1.2.1 From 6326cee51b21d67e1335ead285cb52b83234b2b4 Mon Sep 17 00:00:00 2001 From: Junio C Hamano Date: Wed, 5 Dec 2007 16:13:08 -0800 Subject: git grep shows the same hit repeatedly for unmerged paths When the index is unmerged, e.g. $ git ls-files -u 100644 faf413748eb6ccb15161a212156c5e348302b1b6 1 setup.c 100644 145eca50f41d811c4c8fcb21ed2604e6b2971aba 2 setup.c 100644 cb9558c49b6027bf225ba2a6154c4d2a52bcdbe2 3 setup.c running "git grep" for work tree files repeats hits for each unmerged stage. $ git grep -n -e setup_work_tree -- '*.[ch]' setup.c:209:void setup_work_tree(void) setup.c:209:void setup_work_tree(void) setup.c:209:void setup_work_tree(void) This should fix it. Signed-off-by: Junio C Hamano --- builtin-grep.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'builtin-grep.c') diff --git a/builtin-grep.c b/builtin-grep.c index bbf747fc7b..f1ff8dc556 100644 --- a/builtin-grep.c +++ b/builtin-grep.c @@ -343,12 +343,12 @@ static int external_grep(struct grep_opt *opt, const char **paths, int cached) memcpy(name + 2, ce->name, len + 1); } argv[argc++] = name; - if (argc < MAXARGS) - continue; - status = flush_grep(opt, argc, nr, argv, &kept); - if (0 < status) - hit = 1; - argc = nr + kept; + if (MAXARGS <= argc) { + status = flush_grep(opt, argc, nr, argv, &kept); + if (0 < status) + hit = 1; + argc = nr + kept; + } if (ce_stage(ce)) { do { i++; -- cgit v1.2.1 From 872c930dcb048c1a2b50e6ce881c521dcee15e23 Mon Sep 17 00:00:00 2001 From: Jim Meyering Date: Fri, 4 Jan 2008 18:37:41 +0100 Subject: Don't access line[-1] for a zero-length "line" from fgets. A NUL byte at beginning of file, or just after a newline would provoke an invalid buf[-1] access in a few places. * builtin-grep.c (cmd_grep): Don't access buf[-1]. * builtin-pack-objects.c (get_object_list): Likewise. * builtin-rev-list.c (read_revisions_from_stdin): Likewise. * bundle.c (read_bundle_header): Likewise. * server-info.c (read_pack_info_file): Likewise. * transport.c (insert_packed_refs): Likewise. Signed-off-by: Jim Meyering Signed-off-by: Junio C Hamano --- builtin-grep.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'builtin-grep.c') diff --git a/builtin-grep.c b/builtin-grep.c index f1ff8dc556..0d6cc7361f 100644 --- a/builtin-grep.c +++ b/builtin-grep.c @@ -644,7 +644,7 @@ int cmd_grep(int argc, const char **argv, const char *prefix) die("'%s': %s", argv[1], strerror(errno)); while (fgets(buf, sizeof(buf), patterns)) { int len = strlen(buf); - if (buf[len-1] == '\n') + if (len && buf[len-1] == '\n') buf[len-1] = 0; /* ignore empty line like grep does */ if (!buf[0]) -- cgit v1.2.1 From 7a51ed66f653c248993b3c4a61932e47933d835e Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Mon, 14 Jan 2008 16:03:17 -0800 Subject: Make on-disk index representation separate from in-core one This converts the index explicitly on read and write to its on-disk format, allowing the in-core format to contain more flags, and be simpler. In particular, the in-core format is now host-endian (as opposed to the on-disk one that is network endian in order to be able to be shared across machines) and as a result we can dispense with all the htonl/ntohl on accesses to the cache_entry fields. This will make it easier to make use of various temporary flags that do not exist in the on-disk format. Signed-off-by: Linus Torvalds --- builtin-grep.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'builtin-grep.c') diff --git a/builtin-grep.c b/builtin-grep.c index 0d6cc7361f..9180b39e3f 100644 --- a/builtin-grep.c +++ b/builtin-grep.c @@ -331,7 +331,7 @@ static int external_grep(struct grep_opt *opt, const char **paths, int cached) struct cache_entry *ce = active_cache[i]; char *name; int kept; - if (!S_ISREG(ntohl(ce->ce_mode))) + if (!S_ISREG(ce->ce_mode)) continue; if (!pathspec_matches(paths, ce->name)) continue; @@ -387,7 +387,7 @@ static int grep_cache(struct grep_opt *opt, const char **paths, int cached) for (nr = 0; nr < active_nr; nr++) { struct cache_entry *ce = active_cache[nr]; - if (!S_ISREG(ntohl(ce->ce_mode))) + if (!S_ISREG(ce->ce_mode)) continue; if (!pathspec_matches(paths, ce->name)) continue; -- cgit v1.2.1 From 2cd5dfd240ecb63c77bcb2532664984e3b69ae47 Mon Sep 17 00:00:00 2001 From: "Shawn O. Pearce" Date: Wed, 20 Feb 2008 23:28:07 -0500 Subject: Teach git-grep --name-only as synonym for -l I expected git grep --name-only to give me only the file names, much as git diff --name-only only generates filenames. Alas the option is -l, which matches common external greps but doesn't match other parts of the git UI. Signed-off-by: Shawn O. Pearce Signed-off-by: Junio C Hamano --- builtin-grep.c | 1 + 1 file changed, 1 insertion(+) (limited to 'builtin-grep.c') diff --git a/builtin-grep.c b/builtin-grep.c index 9180b39e3f..f4f4ecb11b 100644 --- a/builtin-grep.c +++ b/builtin-grep.c @@ -578,6 +578,7 @@ int cmd_grep(int argc, const char **argv, const char *prefix) continue; } if (!strcmp("-l", arg) || + !strcmp("--name-only", arg) || !strcmp("--files-with-matches", arg)) { opt.name_only = 1; continue; -- cgit v1.2.1 From 5f7c643afe8da21c82762f872632d6407530f9e8 Mon Sep 17 00:00:00 2001 From: Jeff King Date: Wed, 12 Mar 2008 17:39:16 -0400 Subject: add NO_EXTERNAL_GREP build option Previously, we just chose whether to allow external grep based on the __unix__ define. However, there are systems which define this macro but which have an inferior group (e.g., one that does not support all options used by t7002). This allows users to accept the potential speed penalty to get a more consistent grep experience (and to pass the testsuite). Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- builtin-grep.c | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) (limited to 'builtin-grep.c') diff --git a/builtin-grep.c b/builtin-grep.c index f4f4ecb11b..ef299108f5 100644 --- a/builtin-grep.c +++ b/builtin-grep.c @@ -12,6 +12,14 @@ #include "builtin.h" #include "grep.h" +#ifndef NO_EXTERNAL_GREP +#ifdef __unix__ +#define NO_EXTERNAL_GREP 0 +#else +#define NO_EXTERNAL_GREP 1 +#endif +#endif + /* * git grep pathspecs are somewhat different from diff-tree pathspecs; * pathname wildcards are allowed. @@ -153,7 +161,7 @@ static int grep_file(struct grep_opt *opt, const char *filename) return i; } -#ifdef __unix__ +#if !NO_EXTERNAL_GREP static int exec_grep(int argc, const char **argv) { pid_t pid; @@ -372,7 +380,7 @@ static int grep_cache(struct grep_opt *opt, const char **paths, int cached) int nr; read_cache(); -#ifdef __unix__ +#if !NO_EXTERNAL_GREP /* * Use the external "grep" command for the case where * we grep through the checked-out files. It tends to -- cgit v1.2.1 From 1b1dd23f2d6a707b7077cdf6bc6d4055bd0bfb7d Mon Sep 17 00:00:00 2001 From: Stephan Beyer Date: Sun, 13 Jul 2008 15:36:15 +0200 Subject: Make usage strings dash-less When you misuse a git command, you are shown the usage string. But this is currently shown in the dashed form. So if you just copy what you see, it will not work, when the dashed form is no longer supported. This patch makes git commands show the dash-less version. For shell scripts that do not specify OPTIONS_SPEC, git-sh-setup.sh generates a dash-less usage string now. Signed-off-by: Stephan Beyer Signed-off-by: Junio C Hamano --- builtin-grep.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'builtin-grep.c') diff --git a/builtin-grep.c b/builtin-grep.c index ef299108f5..0cac39590d 100644 --- a/builtin-grep.c +++ b/builtin-grep.c @@ -495,7 +495,7 @@ static int grep_object(struct grep_opt *opt, const char **paths, } static const char builtin_grep_usage[] = -"git-grep