diff options
-rw-r--r-- | ChangeLog | 56 | ||||
-rw-r--r-- | bootstrap/Makefile.try | 32 | ||||
-rw-r--r-- | src/Makefile.am | 23 | ||||
-rw-r--r-- | src/egrep.c | 2 | ||||
-rw-r--r-- | src/esearch.c | 2 | ||||
-rw-r--r-- | src/fgrep.c | 2 | ||||
-rw-r--r-- | src/fsearch.c | 2 | ||||
-rw-r--r-- | src/grep.c | 190 | ||||
-rw-r--r-- | src/grep.h | 41 | ||||
-rw-r--r-- | src/grepmat.c | 6 | ||||
-rw-r--r-- | src/search.c | 264 |
11 files changed, 334 insertions, 286 deletions
@@ -1,3 +1,59 @@ +2005-11-09 Charles Levert <charles_levert@gna.org> + + The following set of changes aims to make "egrep" and "fgrep" + minimal executable programs for legacy applications, instead of + shell scripts. This "fgrep" is much smaller than "grep". + This set of changes appears more daunting than it really is. + + * src/egrep.c, src/fgrep.c, src/esearch.c, src/fsearch.c: New files + that #define either EGREP_PROGRAM or FGREP_PROGRAM and #include + the corresponding generic (i.e., non e or f specific) *.c file. + * src/grepmat.c: Remove whole file. + * src/Makefile.am: Remove no-dependencies from AUTOMAKE_OPTIONS. + Add definitions to make "egrep" and "fgrep" specific standalone + executable programs that only use the source files they need. + Remove rules for "egrep" and "fgrep" shell scripts. + * src/grep.h: #define GREP_PROGRAM if both EGREP_PROGRAM and + FGREP_PROGRAM are #undef. Only declare matchers[] in this case + along with the compile_fp_t and execute_fp_t function pointers + typedefs, otherwise declare prototypes for straight compile() + and execute() functions for the specialized "egrep" and "fgrep" + programs. Remove the extern declaration for matcher. + Define COMPILE_RET, COMPILE_ARGS, EXECUTE_RET, EXECUTE_ARGS, + COMPILE_FCT, and EXECUTE_FCT helper preprocessor macros. + * src/grep.c (short_options, long_options, usage, main): Only + support -G, -E, -F, -P, and -X for GREP_PROGRAM, but not for + EGREP_PROGRAM or FGREP_PROGRAM. Customize usage messages. + * src/grep.c (set_limits): New function with unchanged code, + called from main(), because it shouldn't be in install_matcher() + since it was already matcher-independent. + * src/grep.c (matcher): Add as static, only for GREP_PROGRAM. + * src/grep.c (setmatcher, install_matcher): Only for GREP_PROGRAM. + * src/grep.c (main): Remove any tweaking and dependence on argv[0]. + * src/grep.c (print_line_middle, prpending, grepbuf, main): Call + compile() and execute() directly, not through a function + pointer dereferencing notation, so that it works with both + straight functions (in EGREP_PROGRAM and FGREP_PROGRAM) and + function pointers (in GREP_PROGRAM). + * src/search.c (<regex.h>, "dfa.h", dfa, pattern0, patterns, + pcount, dfaerror, kwset_exact_matches, kwsmusts): Only + include/declare/define if not FGREP_PROGRAM. + * src/search.c: Remove function prototypes for all functions + that are not used before their definition, since this is just + a hassle now with their varying names and conditional definition. + * src/search.c (GEAcompile): Rename from Ecompile(). Add new + syntax_bits argument/variable. Use as compile() for EGREP_PROGRAM. + Put in the needed RE_ICASE fix, albeit commented-out for now. + Make sure to free() modified word/line pattern after use, if any. + * src/search.c (Gcompile): Merge with GEAcompile() then remove. + * src/search.c (Gcompile, Acompile, Ecompile): New small functions + that call GEAcompile(), now that matcher is not an extern variable. + * src/search.c (GEAcompile, Gcompile, Acompile, Ecompile, + Fcompile, Pcompile, EGexecute, Fexecute, Pexecute, matchers): + Only define when needed according to *GREP_PROGRAM, and rename + to just compile() and execute() when appropriate. + * grep/bootstrap/Makefile.try: Similar changes. + 2005-11-08 Charles Levert <charles_levert@gna.org> * README.DOS, TODO, grep.spec, doc/grep.1, doc/grep.texi, diff --git a/bootstrap/Makefile.try b/bootstrap/Makefile.try index f1d1acab..88f2caf2 100644 --- a/bootstrap/Makefile.try +++ b/bootstrap/Makefile.try @@ -7,11 +7,20 @@ EXEEXT = OBJEXT = o # Source of grep. -OBJS = \ - dfa.$(OBJEXT) \ +grep_OBJS = \ grep.$(OBJEXT) \ + search.$(OBJEXT) \ kwset.$(OBJEXT) \ - search.$(OBJEXT) + dfa.$(OBJEXT) +egrep_OBJS = \ + egrep.$(OBJEXT) \ + esearch.$(OBJEXT) \ + kwset.$(OBJEXT) \ + dfa.$(OBJEXT) +fgrep_OBJS = \ + fgrep.$(OBJEXT) \ + fsearch.$(OBJEXT) \ + kwset.$(OBJEXT) # Supporting routines. LIB_OBJS_core = \ @@ -116,20 +125,21 @@ libgreputils_a = $(libdir)/libgreputils.a all : $(libgreputils_a) $(PROGS) -grep$(EXEEXT) : $(OBJS) grepmat.$(OBJEXT) $(libgreputils_a) - $(CC) $(OBJS) grepmat.$(OBJEXT) -o grep $(libgreputils_a) +grep$(EXEEXT) : $(grep_OBJS) $(libgreputils_a) + $(CC) $(grep_OBJS) -o grep $(libgreputils_a) -egrep$(EXEEXT) : $(OBJS) egrepmat.$(OBJEXT) $(libgreputils_a) - $(CC) $(OBJS) egrepmat.$(OBJEXT) -o egrep $(libgreputils_a) +egrep$(EXEEXT) : $(egrep_OBJS) $(libgreputils_a) + $(CC) $(egrep_OBJS) -o egrep $(libgreputils_a) -fgrep$(EXEEXT) : $(OBJS) fgrepmat.$(OBJEXT) $(libgreputils_a) - $(CC) $(OBJS) fgrepmat.$(OBJEXT) -o fgrep $(libgreputils_a) +fgrep$(EXEEXT) : $(fgrep_OBJS) $(libgreputils_a) + $(CC) $(fgrep_OBJS) -o fgrep $(libgreputils_a) $(libgreputils_a) : $(LIB_OBJS) $(AR) $(ARFLAGS) $(libgreputils_a) $(LIB_OBJS) clean : - $(RM) grepmat.$(OBJEXT) egrepmat.$(OBJEXT) fgrepmat.$(OBJEXT) - $(RM) $(OBJS) + $(RM) grep.$(OBJEXT) egrep.$(OBJEXT) fgrep.$(OBJEXT) + $(RM) search.$(OBJEXT) esearch.$(OBJEXT) fsearch.$(OBJEXT) + $(RM) kwset.$(OBJEXT) dfa.$(OBJEXT) $(RM) $(PROGS) $(RM) $(libgreputils_a) $(LIB_OBJS) diff --git a/src/Makefile.am b/src/Makefile.am index 60aa00fa..d68117b3 100644 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -1,13 +1,12 @@ ## Process this file with automake to create Makefile.in -AUTOMAKE_OPTIONS = ansi2knr no-dependencies +AUTOMAKE_OPTIONS = ansi2knr LN = ln -bin_PROGRAMS = grep -bin_SCRIPTS = egrep fgrep -CLEANFILES = egrep fgrep -grep_SOURCES = grep.c dfa.c kwset.c search.c \ - grepmat.c +bin_PROGRAMS = grep egrep fgrep +grep_SOURCES = grep.c search.c kwset.c dfa.c +egrep_SOURCES = egrep.c esearch.c kwset.c dfa.c +fgrep_SOURCES = fgrep.c fsearch.c kwset.c noinst_HEADERS = grep.h dfa.h kwset.h getpagesize.h system.h mbsupport.h LDADD = @INTLLIBS@ ../lib/libgreputils.a @@ -18,15 +17,3 @@ INCLUDES = -I../intl -I$(top_srcdir)/lib -DLOCALEDIR=\"$(localedir)\" EXTRA_DIST = \ dosbuf.c \ vms_fab.c vms_fab.h - -OPTION_for_egrep = -E -OPTION_for_fgrep = -F - -egrep fgrep: Makefile - (echo '#! /bin/sh'; \ - echo 'case $$0 in' ; \ - echo ' */*) dir=$${0%/*}/ ;;' ; \ - echo ' *) dir="" ;;' ; \ - echo 'esac' ; \ - echo 'exec $${dir}grep $(OPTION_for_$@) $${1+"$$@"}' ) >$@ - chmod a+x $@ diff --git a/src/egrep.c b/src/egrep.c new file mode 100644 index 00000000..1cabb4d8 --- /dev/null +++ b/src/egrep.c @@ -0,0 +1,2 @@ +#define EGREP_PROGRAM +#include "grep.c" diff --git a/src/esearch.c b/src/esearch.c new file mode 100644 index 00000000..f605e08b --- /dev/null +++ b/src/esearch.c @@ -0,0 +1,2 @@ +#define EGREP_PROGRAM +#include "search.c" diff --git a/src/fgrep.c b/src/fgrep.c new file mode 100644 index 00000000..43323101 --- /dev/null +++ b/src/fgrep.c @@ -0,0 +1,2 @@ +#define FGREP_PROGRAM +#include "grep.c" diff --git a/src/fsearch.c b/src/fsearch.c new file mode 100644 index 00000000..3bcac9d8 --- /dev/null +++ b/src/fsearch.c @@ -0,0 +1,2 @@ +#define FGREP_PROGRAM +#include "search.c" @@ -245,7 +245,11 @@ static struct exclude *excluded_patterns; static struct exclude *included_patterns; /* Short options. */ static char const short_options[] = -"0123456789A:B:C:D:EFGHIPTUVX:abcd:e:f:hiKLlm:noqRrsuvwxyZz"; +"0123456789A:B:C:D:HITUVabcd:e:f:hiKLlm:noqRrsuvwxyZz" +#ifdef GREP_PROGRAM +"EFGPX:" +#endif +; /* Non-boolean long options that have no corresponding short equivalents. */ enum @@ -262,8 +266,14 @@ enum /* Long options equivalences. */ static struct option const long_options[] = { +#ifdef GREP_PROGRAM + {"basic-regexp", no_argument, NULL, 'G'}, + {"extended-regexp", no_argument, NULL, 'E'}, + {"fixed-regexp", no_argument, NULL, 'F'}, + {"fixed-strings", no_argument, NULL, 'F'}, + {"perl-regexp", no_argument, NULL, 'P'}, +#endif {"after-context", required_argument, NULL, 'A'}, - {"basic-regexp", no_argument, NULL, 'G'}, {"before-context", required_argument, NULL, 'B'}, {"binary-files", required_argument, NULL, BINARY_FILES_OPTION}, {"byte-offset", no_argument, NULL, 'b'}, @@ -273,14 +283,11 @@ static struct option const long_options[] = {"count", no_argument, NULL, 'c'}, {"devices", required_argument, NULL, 'D'}, {"directories", required_argument, NULL, 'd'}, - {"extended-regexp", no_argument, NULL, 'E'}, {"exclude", required_argument, NULL, EXCLUDE_OPTION}, {"exclude-from", required_argument, NULL, EXCLUDE_FROM_OPTION}, {"file", required_argument, NULL, 'f'}, {"files-with-matches", no_argument, NULL, 'l'}, {"files-without-match", no_argument, NULL, 'L'}, - {"fixed-regexp", no_argument, NULL, 'F'}, - {"fixed-strings", no_argument, NULL, 'F'}, {"help", no_argument, &show_help, 1}, {"include", required_argument, NULL, INCLUDE_OPTION}, {"ignore-case", no_argument, NULL, 'i'}, @@ -296,7 +303,6 @@ static struct option const long_options[] = {"null", no_argument, NULL, 'Z'}, {"null-data", no_argument, NULL, 'z'}, {"only-matching", no_argument, NULL, 'o'}, - {"perl-regexp", no_argument, NULL, 'P'}, {"quiet", no_argument, NULL, 'q'}, {"recursive", no_argument, NULL, 'r'}, {"recursive", no_argument, NULL, 'R'}, @@ -345,8 +351,10 @@ static inline int undossify_input PARAMS ((register char *, size_t)); #endif /* Functions we'll use to search. */ -static void (*compile) PARAMS ((char const *, size_t)); -static size_t (*execute) PARAMS ((char const *, size_t, size_t *, int)); +#ifdef GREP_PROGRAM +static compile_fp_t compile; +static execute_fp_t execute; +#endif /* Like error, but suppress the diagnostic if requested. */ static void @@ -774,7 +782,7 @@ print_line_middle (const char *beg, const char *lim) } while ( lim > beg - && ( (match_offset = (*execute) (ibeg, lim - beg, &match_size, 1)) + && ( (match_offset = execute(ibeg, lim - beg, &match_size, 1)) != (size_t) -1)) { char const *b = beg + match_offset; @@ -896,7 +904,7 @@ prpending (char const *lim) size_t match_size; --pending; if (outleft - || (((*execute) (lastout, nl + 1 - lastout, &match_size, 0) == (size_t) -1) + || ((execute(lastout, nl + 1 - lastout, &match_size, 0) == (size_t) -1) == !out_invert)) prline (lastout, nl + 1, SEP_CHAR_CONTEXT); else @@ -986,7 +994,7 @@ grepbuf (char const *beg, char const *lim) nlines = 0; p = beg; - while ((match_offset = (*execute) (p, lim - p, &match_size, 0)) != (size_t) -1) + while ((match_offset = execute(p, lim - p, &match_size, 0)) != (size_t) -1) { char const *b = p + match_offset; char const *endp = b + match_size; @@ -1354,18 +1362,31 @@ usage (int status) { printf (_("Usage: %s [OPTION]... PATTERN [FILE]...\n"), program_name); printf (_("\ -Search for PATTERN in each FILE or standard input.\n\ +Search for PATTERN in each FILE or standard input.\n")); +#if defined(EGREP_PROGRAM) + printf (_("\ +PATTERN is an extended regular expression (ERE).\n")); +#elif defined(FGREP_PROGRAM) + printf (_("\ +PATTERN is a set of newline-separated fixed strings.\n")); +#else + printf (_("\ +PATTERN is, by default, a basic regular expression (BRE).\n")); +#endif /* ?GREP_PROGRAM */ + printf (_("\ Example: %s -i 'hello world' menu.h main.c\n\ \n\ Regexp selection and interpretation:\n"), program_name); +#ifdef GREP_PROGRAM printf (_("\ - -E, --extended-regexp PATTERN is an extended regular expression\n\ - -F, --fixed-strings PATTERN is a set of newline-separated strings\n\ - -G, --basic-regexp PATTERN is a basic regular expression\n\ + -E, --extended-regexp PATTERN is an extended regular expression (ERE)\n\ + -F, --fixed-strings PATTERN is a set of newline-separated fixed strings\n\ + -G, --basic-regexp PATTERN is a basic regular expression (BRE)\n\ -P, --perl-regexp PATTERN is a Perl regular expression\n")); /* -X is undocumented on purpose. */ +#endif /* GREP_PROGRAM */ printf (_("\ - -e, --regexp=PATTERN use PATTERN as a regular expression\n\ + -e, --regexp=PATTERN use PATTERN for matching\n\ -f, --file=FILE obtain PATTERN from FILE\n\ -i, --ignore-case ignore case distinctions\n\ -w, --word-regexp force PATTERN to match only whole words\n\ @@ -1420,8 +1441,19 @@ Context control:\n\ WHEN is `always', `never', or `auto'\n\ -U, --binary do not strip CR characters at EOL (MSDOS)\n\ -u, --unix-byte-offsets report offsets as if CRs were not there (MSDOS)\n\ -\n\ +\n")); +#if defined(EGREP_PROGRAM) + printf (_("\ +Invocation as `egrep' is deprecated; use `grep -E' instead.\n")); +#elif defined(FGREP_PROGRAM) + printf (_("\ +Invocation as `fgrep' is deprecated; use `grep -F' instead.\n")); +#else + printf (_("\ `egrep' means `grep -E'. `fgrep' means `grep -F'.\n\ +Direct invocation as either `egrep' or `fgrep' is deprecated.\n")); +#endif /* ?GREP_PROGRAM */ + printf (_("\ With no FILE, or when FILE is -, read standard input. If less than two FILEs\n\ are given, assume -h. Exit status is 0 if any line was selected, 1 otherwise;\n\ if any error occurs and -q was not given, the exit status is 2.\n")); @@ -1430,6 +1462,9 @@ if any error occurs and -q was not given, the exit status is 2.\n")); exit (status); } +#ifdef GREP_PROGRAM +static char const *matcher; + /* Set the matcher to M, reporting any conflicts. */ static void setmatcher (char const *m) @@ -1445,45 +1480,50 @@ static int install_matcher (char const *name) { int i; -#if defined(HAVE_SETRLIMIT) - struct rlimit rlim; -#endif for (i = 0; matchers[i].compile; i++) if (strcmp (name, matchers[i].name) == 0) { compile = matchers[i].compile; execute = matchers[i].execute; -#if defined(HAVE_SETRLIMIT) && defined(RLIMIT_STACK) - /* I think every platform needs to do this, so that regex.c - doesn't oveflow the stack. The default value of - `re_max_failures' is too large for some platforms: it needs - more than 3MB-large stack. - - The test for HAVE_SETRLIMIT should go into `configure'. */ - if (!getrlimit (RLIMIT_STACK, &rlim)) - { - long newlim; - extern long int re_max_failures; /* from regex.c */ - - /* Approximate the amount regex.c needs, plus some more. */ - newlim = re_max_failures * 2 * 20 * sizeof (char *); - if (newlim > rlim.rlim_max) - { - newlim = rlim.rlim_max; - re_max_failures = newlim / (2 * 20 * sizeof (char *)); - } - if (rlim.rlim_cur < newlim) - { - rlim.rlim_cur = newlim; - setrlimit (RLIMIT_STACK, &rlim); - } - } -#endif return 1; } return 0; } +#endif /* GREP_PROGRAM */ + +static void +set_limits(void) +{ +#if defined(HAVE_SETRLIMIT) && defined(RLIMIT_STACK) + struct rlimit rlim; + + /* I think every platform needs to do this, so that regex.c + doesn't oveflow the stack. The default value of + `re_max_failures' is too large for some platforms: it needs + more than 3MB-large stack. + + The test for HAVE_SETRLIMIT should go into `configure'. */ + if (!getrlimit (RLIMIT_STACK, &rlim)) + { + long newlim; + extern long int re_max_failures; /* from regex.c */ + + /* Approximate the amount regex.c needs, plus some more. */ + newlim = re_max_failures * 2 * 20 * sizeof (char *); + if (newlim > rlim.rlim_max) + { + newlim = rlim.rlim_max; + re_max_failures = newlim / (2 * 20 * sizeof (char *)); + } + if (rlim.rlim_cur < newlim) + { + rlim.rlim_cur = newlim; + setrlimit (RLIMIT_STACK, &rlim); + } + } +#endif +} /* Find the white-space-separated options specified by OPTIONS, and using BUF to store copies of these options, set ARGV[0], ARGV[1], @@ -1741,38 +1781,6 @@ main (int argc, char **argv) initialize_main (&argc, &argv); program_name = argv[0]; - if (program_name && strrchr (program_name, '/')) - program_name = strrchr (program_name, '/') + 1; - - if (!strcmp(program_name, "egrep")) - setmatcher ("egrep"); - if (!strcmp(program_name, "fgrep")) - setmatcher ("fgrep"); - -#if defined(__MSDOS__) || defined(_WIN32) - /* DOS and MS-Windows use backslashes as directory separators, and usually - have an .exe suffix. They also have case-insensitive filesystems. */ - if (program_name) - { - char *p = program_name; - char *bslash = strrchr (argv[0], '\\'); - - if (bslash && bslash >= program_name) /* for mixed forward/backslash case */ - program_name = bslash + 1; - else if (program_name == argv[0] - && argv[0][0] && argv[0][1] == ':') /* "c:progname" */ - program_name = argv[0] + 2; - - /* Collapse the letter-case, so `strcmp' could be used hence. */ - for ( ; *p; p++) - if (*p >= 'A' && *p <= 'Z') - *p += 'a' - 'A'; - - /* Remove the .exe extension, if any. */ - if ((p = strrchr (program_name, '.')) && strcmp (p, ".exe") == 0) - *p = '\0'; - } -#endif keys = NULL; keycc = 0; @@ -1828,6 +1836,7 @@ main (int argc, char **argv) error (2, 0, _("unknown devices method")); break; +#ifdef GREP_PROGRAM case 'E': setmatcher ("egrep"); break; @@ -1844,6 +1853,11 @@ main (int argc, char **argv) setmatcher ("grep"); break; + case 'X': /* undocumented on purpose */ + setmatcher (optarg); + break; +#endif /* GREP_PROGRAM */ + case 'H': with_filenames = 1; break; @@ -1872,10 +1886,6 @@ main (int argc, char **argv) show_version = 1; break; - case 'X': /* undocumented on purpose */ - setmatcher (optarg); - break; - case 'a': binary_files = TEXT_BINARY_FILES; break; @@ -2120,16 +2130,13 @@ main (int argc, char **argv) parse_grep_colors(); } - if (! matcher) - matcher = "grep"; - if (show_version) { printf ("%s\n\n", PACKAGE_STRING); printf (_("\ -Copyright (C) 1988, 1992-1999, 2000, 2001, 2002, 2004 Free Software Foundation, Inc.\n")); +Copyright (C) 1988, 1992-2002, 2004, 2005 Free Software Foundation, Inc.\n")); printf (_("\ -This is free software; see the source for copying conditions. There is NO\n\ +This is free software; see the source for copying conditions. There is NO\n\ warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.\n")); printf ("\n"); exit (0); @@ -2159,15 +2166,22 @@ warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.\n")) else usage (2); +#ifdef GREP_PROGRAM + if (! matcher) + matcher = "grep"; + if (!install_matcher (matcher) && !install_matcher ("default")) abort (); +#endif /* GREP_PROGRAM */ + + set_limits(); #ifdef MBS_SUPPORT if (match_icase) mb_icase_keys (&keys, &keycc); #endif /* MBS_SUPPORT */ - (*compile)(keys, keycc); + compile(keys, keycc); if ((argc - optind > 1 && !no_filenames) || with_filenames) out_file = 1; @@ -20,19 +20,48 @@ # define __attribute__(x) #endif +/* We build specialized legacy "egrep" and "fgrep" programs. + No program adjusts its behavior according to its argv[0]. + No scripts are provided as an alternative. Distributors + are free to do otherwise, but it is their burden to do so. */ +#if !defined(GREP_PROGRAM) && !defined(EGREP_PROGRAM) && !defined(FGREP_PROGRAM) +# define GREP_PROGRAM +#endif + +/* The two functions each matcher provides. */ +#define COMPILE_RET void +#define COMPILE_ARGS \ + (char const *pattern, size_t size) +#define EXECUTE_RET size_t +#define EXECUTE_ARGS \ + (char const *buf, size_t size, size_t *match_size, int exact) + +#ifdef GREP_PROGRAM +/* Function definitions. */ +# define COMPILE_FCT(f) static COMPILE_RET f COMPILE_ARGS +# define EXECUTE_FCT(f) static EXECUTE_RET f EXECUTE_ARGS +/* Function pointer types. */ +typedef COMPILE_RET (*compile_fp_t) PARAMS (COMPILE_ARGS); +typedef EXECUTE_RET (*execute_fp_t) PARAMS (EXECUTE_ARGS); + /* grep.c expects the matchers vector to be terminated by an entry with a NULL compile, and to contain at least an entry named "default". */ - extern struct matcher { char name[8]; - void (*compile) PARAMS ((char const *, size_t)); - size_t (*execute) PARAMS ((char const *, size_t, size_t *, int)); + compile_fp_t compile; + execute_fp_t execute; } const matchers[]; - -/* Exported from fgrepmat.c, egrepmat.c, grepmat.c. */ -extern char const *matcher; +#else /* !GREP_PROGRAM */ +/* Straight functions for specialized "egrep" and "fgrep" programs. */ +/* Function definitions. */ +# define COMPILE_FCT(f) COMPILE_RET compile COMPILE_ARGS +# define EXECUTE_FCT(f) EXECUTE_RET execute EXECUTE_ARGS +/* Function prototypes. */ +extern COMPILE_RET compile PARAMS (COMPILE_ARGS); +extern EXECUTE_RET execute PARAMS (EXECUTE_ARGS); +#endif /* GREP_PROGRAM */ /* The following flags are exported from grep for the matchers to look at. */ diff --git a/src/grepmat.c b/src/grepmat.c deleted file mode 100644 index 7947a657..00000000 --- a/src/grepmat.c +++ /dev/null @@ -1,6 +0,0 @@ -#ifdef HAVE_CONFIG_H -# include <config.h> -#endif -#include "system.h" -#include "grep.h" -char const *matcher; diff --git a/src/search.c b/src/search.c index 82e22bce..f06a75e2 100644 --- a/src/search.c +++ b/src/search.c @@ -33,8 +33,10 @@ #include "system.h" #include "grep.h" -#include "regex.h" -#include "dfa.h" +#ifndef FGREP_PROGRAM +# include <regex.h> +# include "dfa.h" +#endif #include "kwset.h" #include "error.h" #include "xalloc.h" @@ -47,6 +49,26 @@ /* For -w, we also consider _ to be word constituent. */ #define WCHAR(C) (ISALNUM(C) || (C) == '_') +/* KWset compiled pattern. For Ecompile and Gcompile, we compile + a list of strings, at least one of which is known to occur in + any string matching the regexp. */ +static kwset_t kwset; + +static void +kwsinit (void) +{ + static char trans[NCHAR]; + int i; + + if (match_icase) + for (i = 0; i < NCHAR; ++i) + trans[i] = TOLOWER (i); + + if (!(kwset = kwsalloc (match_icase ? trans : (char *) 0))) + error (2, 0, _("memory exhausted")); +} + +#ifndef FGREP_PROGRAM /* DFA compiled regexp. */ static struct dfa dfa; @@ -62,48 +84,16 @@ static struct patterns struct patterns *patterns; size_t pcount; -/* KWset compiled pattern. For Ecompile and Gcompile, we compile - a list of strings, at least one of which is known to occur in - any string matching the regexp. */ -static kwset_t kwset; - -/* Number of compiled fixed strings known to exactly match the regexp. - If kwsexec returns < kwset_exact_matches, then we don't need to - call the regexp matcher at all. */ -static int kwset_exact_matches; - -#if defined(MBS_SUPPORT) -static char* check_multibyte_string PARAMS ((char const *buf, size_t size)); -#endif -static void kwsinit PARAMS ((void)); -static void kwsmusts PARAMS ((void)); -static void Gcompile PARAMS ((char const *, size_t)); -static void Ecompile PARAMS ((char const *, size_t)); -static size_t EGexecute PARAMS ((char const *, size_t, size_t *, int )); -static void Fcompile PARAMS ((char const *, size_t)); -static size_t Fexecute PARAMS ((char const *, size_t, size_t *, int)); -static void Pcompile PARAMS ((char const *, size_t )); -static size_t Pexecute PARAMS ((char const *, size_t, size_t *, int)); - void dfaerror (char const *mesg) { error (2, 0, mesg); } -static void -kwsinit (void) -{ - static char trans[NCHAR]; - int i; - - if (match_icase) - for (i = 0; i < NCHAR; ++i) - trans[i] = TOLOWER (i); - - if (!(kwset = kwsalloc (match_icase ? trans : (char *) 0))) - error (2, 0, _("memory exhausted")); -} +/* Number of compiled fixed strings known to exactly match the regexp. + If kwsexec returns < kwset_exact_matches, then we don't need to + call the regexp matcher at all. */ +static int kwset_exact_matches; /* If the DFA turns out to have some set of fixed strings one of which must occur in the match, then we build a kwset matcher @@ -142,6 +132,7 @@ kwsmusts (void) error (2, 0, err); } } +#endif /* !FGREP_PROGRAM */ #ifdef MBS_SUPPORT /* This function allocate the array which correspond to "buf". @@ -184,18 +175,30 @@ check_multibyte_string(char const *buf, size_t size) return mb_properties; } -#endif +#endif /* MBS_SUPPORT */ -static void -Gcompile (char const *pattern, size_t size) +#if defined(GREP_PROGRAM) || defined(EGREP_PROGRAM) +#ifdef EGREP_PROGRAM +COMPILE_FCT(Ecompile) { + reg_syntax_t syntax_bits = RE_SYNTAX_POSIX_EGREP; +#else +/* No __VA_ARGS__ in C89. So we have to do it this way. */ +static COMPILE_RET +GEAcompile (char const *pattern, size_t size, reg_syntax_t syntax_bits) +{ +#endif /* EGREP_PROGRAM */ const char *err; - char const *sep; + const char *sep; size_t total = size; char const *motif = pattern; - re_set_syntax (RE_SYNTAX_GREP | RE_HAT_LISTS_NOT_NEWLINE); - dfasyntax (RE_SYNTAX_GREP | RE_HAT_LISTS_NOT_NEWLINE, match_icase, eolbyte); +#if 0 + if (match_icase) + syntax_bits |= RE_ICASE; +#endif + re_set_syntax (syntax_bits); + dfasyntax (syntax_bits, match_icase, eolbyte); /* For GNU regex compiler we have to pass the patterns separately to detect errors like "[\nallo\n]\n". The patterns here are "[", "allo" and "]" @@ -220,7 +223,6 @@ Gcompile (char const *pattern, size_t size) patterns = realloc (patterns, (pcount + 1) * sizeof (*patterns)); if (patterns == NULL) error (2, errno, _("memory exhausted")); - patterns[pcount] = patterns0; if ((err = re_compile_pattern (motif, len, @@ -237,116 +239,63 @@ Gcompile (char const *pattern, size_t size) to decide whether the match should really count. */ if (match_words || match_lines) { - /* In the whole-word case, we use the pattern: - \(^\|[^[:alnum:]_]\)\(userpattern\)\([^[:alnum:]_]|$\). - In the whole-line case, we use the pattern: - ^\(userpattern\)$. */ - - static char const line_beg[] = "^\\("; - static char const line_end[] = "\\)$"; - static char const word_beg[] = "\\(^\\|[^[:alnum:]_]\\)\\("; - static char const word_end[] = "\\)\\([^[:alnum:]_]\\|$\\)"; - char *n = xmalloc (sizeof word_beg - 1 + size + sizeof word_end); - size_t i; - strcpy (n, match_lines ? line_beg : word_beg); - i = strlen (n); - memcpy (n + i, pattern, size); - i += size; - strcpy (n + i, match_lines ? line_end : word_end); - i += strlen (n + i); - pattern = n; - size = i; + static char const line_beg_no_bk[] = "^("; + static char const line_end_no_bk[] = ")$"; + static char const word_beg_no_bk[] = "(^|[^[:alnum:]_])("; + static char const word_end_no_bk[] = ")([^[:alnum:]_]|$)"; +#ifdef EGREP_PROGRAM +# define IF_BK(x, y) (y) + char *n = xmalloc (sizeof word_beg_no_bk - 1 + size + sizeof word_end_no_bk); +#else + static char const line_beg_bk[] = "^\\("; + static char const line_end_bk[] = "\\)$"; + static char const word_beg_bk[] = "\\(^\\|[^[:alnum:]_]\\)\\("; + static char const word_end_bk[] = "\\)\\([^[:alnum:]_]\\|$\\)"; + int bk = !(syntax_bits & RE_NO_BK_PARENS); +# define IF_BK(x, y) ((bk) ? (x) : (y)) + char *n = xmalloc (sizeof word_beg_bk - 1 + size + sizeof word_end_bk); +#endif /* EGREP_PROGRAM */ + + strcpy (n, match_lines ? IF_BK(line_beg_bk, line_beg_no_bk) + : IF_BK(word_beg_bk, word_beg_no_bk)); + total = strlen(n); + memcpy (n + total, pattern, size); + total += size; + strcpy (n + total, match_lines ? IF_BK(line_end_bk, line_end_no_bk) + : IF_BK(word_end_bk, word_end_no_bk)); + total += strlen (n + total); + pattern = motif = n; + size = total; } + else + motif = NULL; dfacomp (pattern, size, &dfa, 1); kwsmusts (); + + if (motif) + free((char *) motif); } -static void -Ecompile (char const *pattern, size_t size) +#ifndef EGREP_PROGRAM +COMPILE_FCT(Gcompile) { - const char *err; - const char *sep; - size_t total = size; - char const *motif = pattern; - - if (strcmp (matcher, "awk") == 0) - { - re_set_syntax (RE_SYNTAX_AWK); - dfasyntax (RE_SYNTAX_AWK, match_icase, eolbyte); - } - else - { - re_set_syntax (RE_SYNTAX_POSIX_EGREP); - dfasyntax (RE_SYNTAX_POSIX_EGREP, match_icase, eolbyte); - } - - /* For GNU regex compiler we have to pass the patterns separately to detect - errors like "[\nallo\n]\n". The patterns here are "[", "allo" and "]" - GNU regex should have raise a syntax error. The same for backref, where - the backref should have been local to each pattern. */ - do - { - size_t len; - sep = memchr (motif, '\n', total); - if (sep) - { - len = sep - motif; - sep++; - total -= (len + 1); - } - else - { - len = total; - total = 0; - } - - patterns = realloc (patterns, (pcount + 1) * sizeof (*patterns)); - if (patterns == NULL) - error (2, errno, _("memory exhausted")); - patterns[pcount] = patterns0; - - if ((err = re_compile_pattern (motif, len, - &(patterns[pcount].regexbuf))) != 0) - error (2, 0, err); - pcount++; - - motif = sep; - } while (sep && total != 0); + return GEAcompile (pattern, size, + RE_SYNTAX_GREP | RE_HAT_LISTS_NOT_NEWLINE); +} - /* In the match_words and match_lines cases, we use a different pattern - for the DFA matcher that will quickly throw out cases that won't work. - Then if DFA succeeds we do some hairy stuff using the regex matcher - to decide whether the match should really count. */ - if (match_words || match_lines) - { - /* In the whole-word case, we use the pattern: - (^|[^[:alnum:]_])(userpattern)([^[:alnum:]_]|$). - In the whole-line case, we use the pattern: - ^(userpattern)$. */ - - static char const line_beg[] = "^("; - static char const line_end[] = ")$"; - static char const word_beg[] = "(^|[^[:alnum:]_])("; - static char const word_end[] = ")([^[:alnum:]_]|$)"; - char *n = xmalloc (sizeof word_beg - 1 + size + sizeof word_end); - size_t i; - strcpy (n, match_lines ? line_beg : word_beg); - i = strlen(n); - memcpy (n + i, pattern, size); - i += size; - strcpy (n + i, match_lines ? line_end : word_end); - i += strlen (n + i); - pattern = n; - size = i; - } +COMPILE_FCT(Acompile) +{ + return GEAcompile (pattern, size, RE_SYNTAX_AWK); +} - dfacomp (pattern, size, &dfa, 1); - kwsmusts (); +COMPILE_FCT(Ecompile) +{ + return GEAcompile (pattern, size, RE_SYNTAX_POSIX_EGREP); } +#endif /* !EGREP_PROGRAM */ -static size_t -EGexecute (char const *buf, size_t size, size_t *match_size, int exact) +EXECUTE_FCT(EGexecute) { register char const *buflim, *beg, *end; char eol = eolbyte; @@ -501,9 +450,10 @@ EGexecute (char const *buf, size_t size, size_t *match_size, int exact) *match_size = end - beg; return ret_val; } +#endif /* defined(GREP_PROGRAM) || defined(EGREP_PROGRAM) */ -static void -Fcompile (char const *pattern, size_t size) +#if defined(GREP_PROGRAM) || defined(FGREP_PROGRAM) +COMPILE_FCT(Fcompile) { char const *beg, *lim, *err; @@ -525,8 +475,7 @@ Fcompile (char const *pattern, size_t size) error (2, 0, err); } -static size_t -Fexecute (char const *buf, size_t size, size_t *match_size, int exact) +EXECUTE_FCT(Fexecute) { register char const *beg, *try, *end; register size_t len; @@ -639,7 +588,9 @@ Fexecute (char const *buf, size_t size, size_t *match_size, int exact) #endif /* MBS_SUPPORT */ return ret_val; } +#endif /* defined(GREP_PROGRAM) || defined(FGREP_PROGRAM) */ +#ifdef GREP_PROGRAM #if HAVE_LIBPCRE /* Compiled internal form of a Perl regular expression. */ static pcre *cre; @@ -648,8 +599,7 @@ static pcre *cre; static pcre_extra *extra; #endif -static void -Pcompile (char const *pattern, size_t size) +COMPILE_FCT(Pcompile) { #if !HAVE_LIBPCRE error (2, 0, _("The -P option is not supported")); @@ -713,8 +663,7 @@ Pcompile (char const *pattern, size_t size) #endif } -static size_t -Pexecute (char const *buf, size_t size, size_t *match_size, int exact) +EXECUTE_FCT(Pexecute) { #if !HAVE_LIBPCRE abort (); @@ -770,10 +719,11 @@ Pexecute (char const *buf, size_t size, size_t *match_size, int exact) struct matcher const matchers[] = { { "default", Gcompile, EGexecute }, - { "grep", Gcompile, EGexecute }, - { "egrep", Ecompile, EGexecute }, - { "awk", Ecompile, EGexecute }, - { "fgrep", Fcompile, Fexecute }, - { "perl", Pcompile, Pexecute }, + { "grep", Gcompile, EGexecute }, + { "egrep", Ecompile, EGexecute }, + { "awk", Acompile, EGexecute }, + { "fgrep", Fcompile, Fexecute }, + { "perl", Pcompile, Pexecute }, { "", 0, 0 }, }; +#endif /* GREP_PROGRAM */ |