diff options
Diffstat (limited to 'src')
-rw-r--r-- | src/Makefile.am | 16 | ||||
-rw-r--r-- | src/egrep.man | 1 | ||||
-rw-r--r-- | src/fgrep.man | 1 | ||||
-rw-r--r-- | src/grep.1 | 477 | ||||
-rw-r--r-- | src/regex.c | 26 |
5 files changed, 10 insertions, 511 deletions
diff --git a/src/Makefile.am b/src/Makefile.am index 47f54bc7..4924a7bd 100644 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -20,19 +20,7 @@ datadir = $(prefix)/@DATADIRNAME@ localedir = $(datadir)/locale INCLUDES = -I../intl -DLOCALEDIR=\"$(localedir)\" -man_MANS = grep.1 fgrep.1 egrep.1 - -EXTRA_DIST = grep.1 egrep.man fgrep.man \ - regex.h \ +EXTRA_DIST = regex.h \ dosbuf.c \ - vms_fab.c vms_fab.h - -CLEANFILES = egrep.1 fgrep.1 - -fgrep.1: fgrep.man - inst=`echo "grep" | sed '$(transform)'`.1; \ -sed -e "s%@grep@%$$inst%g" $(srcdir)/fgrep.man > $@ + vms_fab.c vms_fab.h -egrep.1: egrep.man - inst=`echo "grep" | sed '$(transform)'`.1; \ -sed -e "s%@grep@%$$inst%g" $(srcdir)/egrep.man > $@ diff --git a/src/egrep.man b/src/egrep.man deleted file mode 100644 index 877a8a89..00000000 --- a/src/egrep.man +++ /dev/null @@ -1 +0,0 @@ -.so man1/@grep@ diff --git a/src/fgrep.man b/src/fgrep.man deleted file mode 100644 index 877a8a89..00000000 --- a/src/fgrep.man +++ /dev/null @@ -1 +0,0 @@ -.so man1/@grep@ diff --git a/src/grep.1 b/src/grep.1 deleted file mode 100644 index 4354b74a..00000000 --- a/src/grep.1 +++ /dev/null @@ -1,477 +0,0 @@ -.\" grep man page -.de Id -.ds Dt \\$4 -.. -.Id $Id: grep.1,v 1.6 1998/11/10 00:29:38 alainm Exp $ -.TH GREP 1 \*(Dt "GNU Project" -.SH NAME -grep, egrep, fgrep \- print lines matching a pattern -.SH SYNOPSIS -.B grep -[-[AB] NUM] [-CEFGVabchiLlnqrsvwxyUu] [-e PATTERN | -f FILE] -[-d ACTION] [--directories=ACTION] -[--extended-regexp] [--fixed-strings] [--basic-regexp] -[--regexp=PATTERN] [--file=FILE] [--ignore-case] [--word-regexp] -[--line-regexp] [--line-regexp] [--no-messages] [--revert-match] -[--version] [--help] [--byte-offset] [--line-number] -[--with-filename] [--no-filename] [--quiet] [--silent] [--text] -[--files-without-match] [--files-with-matcces] [--count] -[--before-context=NUM] [--after-context=NUM] [--context] -[--binary] [--unix-byte-offsets] [--recursive] -.I files... -.SH DESCRIPTION -.PP -.B Grep -searches the named input -.I files -(or standard input if no files are named, or -the file name -.B \- -is given) -for lines containing a match to the given -.IR pattern . -By default, -.B grep -prints the matching lines. -.PP -There are three major variants of -.BR grep , -controlled by the following options. -.PD 0 -.TP -.B \-G, --basic-regexp -Interpret -.I pattern -as a basic regular expression (see below). This is the default. -.TP -.B \-E, --extended-regexp -Interpret -.I pattern -as an extended regular expression (see below). -.TP -.B \-F, --fixed-strings -Interpret -.I pattern -as a list of fixed strings, separated by newlines, -any of which is to be matched. -.LP -In addition, two variant programs -.B egrep -and -.B fgrep -are available. -.B Egrep -is similar (but not identical) to -.BR "grep\ \-E" , -and is compatible with the historical Unix -.BR egrep . -.B Fgrep -is the same as -.BR "grep\ \-F" . -.PD -.LP -All variants of -.B grep -understand the following options: -.PD 0 -.TP -.BI \-A " NUM" ", --after-context=" NUM -Print -.I NUM -lines of trailing context after matching lines. -.TP -.BI \-B " NUM" ", --before-context=" NUM -Print -.I NUM -lines of leading context before matching lines. -.TP -.BI \-C ,\ --context"[=NUM]" -Print -.I NUM -lines (default 2) of output context. -.TP -.BI \- NUM \ -Same as --context=NUM lines of leading and trailing context. However, -.B grep -will never print any given line more than once. -.TP -.B \-V, --version -Print the version number of -.B grep -to standard error. This version number should -be included in all bug reports (see below). -.TP -.B \-b, --byte-offset -Print the byte offset within the input file before -each line of output. -.TP -.B \-c, --count -Suppress normal output; instead print a count of -matching lines for each input file. -With the -.B \-v, --revert-match -option (see below), count non-matching lines. -.TP -.BI \-d " ACTION" ", --directories=" ACTION -If an input file is a directory, use -.I ACTION -to process it. By default, -.I ACTION -is -.BR read , -which means that directories are read just as if they were ordinary files. -If -.I ACTION -is -.BR skip , -directories are silently skipped. -If -.I ACTION -is -.BR recurse , -.B -grep reads all files under each directory, recursively; -this is equivalent to the -.B \-r -option. -.TP -.BI \-e " PATTERN" ", --regexp=" PATTERN -Use -.I PATTERN -as the pattern; useful to protect patterns beginning with -.BR \- . -.TP -.BI \-f " FILE" ", --file=" FILE -Obtain patterns from -.IR FILE , -one per line. -The empty file contains zero patterns, and therfore matches nothing. -.TP -.B \-h, --no-filename -Suppress the prefixing of filenames on output -when multiple files are searched. -.TP -.B \-i, --ignore-case -Ignore case distinctions in both the -.I pattern -and the input files. -.TP -.B \-L, --files-without-match -Suppress normal output; instead print the name -of each input file from which no output would -normally have been printed. The scanning will stop -on the first match. -.TP -.B \-l, --files-with-matches -Suppress normal output; instead print -the name of each input file from which output -would normally have been printed. The scanning will -stop on the first match. -.TP -.B \-n, --line-number -Prefix each line of output with the line number -within its input file. -.TP -.B \-q, --quiet, --silent -Quiet; suppress normal output. The scanning will stop -on the first match. -Also see the -.B \-s -or -.B --no-messages -option below. -.TP -.B \-r, --recursive -Read all files under each directory, recursively; -this is equivalent to the -.B "\-d recurse" -option. -.TP -.B \-s, --no-messages -Suppress error messages about nonexistent or unreadable files. -Portability note: unlike GNU -.BR grep , -BSD -.B grep -does not comply with POSIX.2, because BSD -.B grep -lacks a -.B \-q -option and its -.B \-s -option behaves like GNU -.BR grep 's -.B \-q -option. -Shell scripts intended to be portable to BSD -.B grep -should avoid both -.B \-q -and -.B \-s -and should redirect output to /dev/null instead. -.TP -.B \-a, --text -Do not suppress output lines that contain binary data. -Normally, if the first few bytes of a file indicate that -the file contains binary data, -.B grep -outputs only a message saying that the file matches the pattern. -This option causes -.B grep -to act as if the file is a text file, -even if it would otherwise be treated as binary. -.TP -.B \-v, --revert-match -Invert the sense of matching, to select non-matching lines. -.TP -.B \-w, --word-regexp -Select only those lines containing matches that form whole words. -The test is that the matching substring must either be at the -beginning of the line, or preceded by a non-word constituent -character. Similarly, it must be either at the end of the line -or followed by a non-word constituent character. Word-constituent -characters are letters, digits, and the underscore. -.TP -.B \-x, --line-regexp -Select only those matches that exactly match the whole line. -.TP -.B \-y -Obsolete synonym for -.BR \-i . -.TP -.B \-U, --binary -Treat the file(s) as binary. By default, under MS-DOS and MS-Windows, -.BR grep -guesses the file type by looking at the contents of the first 32KB -read from the file. If -.BR grep -decides the file is a text file, it strips the CR characters from the -original file contents (to make regular expressions with -.B ^ -and -.B $ -work correctly). Specifying -.B \-U -overrules this guesswork, causing all files to be read and passed to the -matching mechanism verbatim; if the file is a text file with CR/LF -pairs at the end of each line, this will cause some regular -expressions to fail. This option is only supported on MS-DOS and -MS-Windows. -.TP -.B \-u, --unix-byte-offsets -Report Unix-style byte offsets. This switch causes -.B grep -to report byte offsets as if the file were Unix-style text file, i.e. with -CR characters stripped off. This will produce results identical to running -.B grep -on a Unix machine. This option has no effect unless -.B \-b -option is also used; it is only supported on MS-DOS and MS-Windows. -.PD -.SH "REGULAR EXPRESSIONS" -.PP -A regular expression is a pattern that describes a set of strings. -Regular expressions are constructed analogously to arithmetic -expressions, by using various operators to combine smaller expressions. -.PP -.B Grep -understands two different versions of regular expression syntax: -``basic'' and ``extended.'' In -.RB "GNU\ " grep , -there is no difference in available functionality using either syntax. -In other implementations, basic regular expressions are less powerful. -The following description applies to extended regular expressions; -differences for basic regular expressions are summarized afterwards. -.PP -The fundamental building blocks are the regular expressions that match -a single character. Most characters, including all letters and digits, -are regular expressions that match themselves. Any metacharacter with -special meaning may be quoted by preceding it with a backslash. -.PP -A list of characters enclosed by -.B [ -and -.B ] -matches any single -character in that list; if the first character of the list -is the caret -.B ^ -then it matches any character -.I not -in the list. -For example, the regular expression -.B [0123456789] -matches any single digit. A range of ASCII characters -may be specified by giving the first and last characters, separated -by a hyphen. -Finally, certain named classes of characters are predefined. -Their names are self explanatory, and they are -.BR [:alnum:] , -.BR [:alpha:] , -.BR [:cntrl:] , -.BR [:digit:] , -.BR [:graph:] , -.BR [:lower:] , -.BR [:print:] , -.BR [:punct:] , -.BR [:space:] , -.BR [:upper:] , -and -.BR [:xdigit:]. -For example, -.B [[:alnum:]] -means -.BR [0-9A-Za-z] , -except the latter form is dependent upon the ASCII character encoding, -whereas the former is portable. -(Note that the brackets in these class names are part of the symbolic -names, and must be included in addition to the brackets delimiting -the bracket list.) Most metacharacters lose their special meaning -inside lists. To include a literal -.B ] -place it first in the list. Similarly, to include a literal -.B ^ -place it anywhere but first. Finally, to include a literal -.B \- -place it last. -.PP -The period -.B . -matches any single character. -The symbol -.B \ew -is a synonym for -.B [[:alnum:]] -and -.B \eW -is a synonym for -.BR [^[:alnum]] . -.PP -The caret -.B ^ -and the dollar sign -.B $ -are metacharacters that respectively match the empty string at the -beginning and end of a line. -The symbols -.B \e< -and -.B \e> -respectively match the empty string at the beginning and end of a word. -The symbol -.B \eb -matches the empty string at the edge of a word, -and -.B \eB -matches the empty string provided it's -.I not -at the edge of a word. -.PP -A regular expression may be followed by one of several repetition operators: -.PD 0 -.TP -.B ? -The preceding item is optional and matched at most once. -.TP -.B * -The preceding item will be matched zero or more times. -.TP -.B + -The preceding item will be matched one or more times. -.TP -.BI { n } -The preceding item is matched exactly -.I n -times. -.TP -.BI { n ,} -The preceding item is matched -.I n -or more times. -.TP -.BI {, m } -The preceding item is optional and is matched at most -.I m -times. -.TP -.BI { n , m } -The preceding item is matched at least -.I n -times, but not more than -.I m -times. -.PD -.PP -Two regular expressions may be concatenated; the resulting -regular expression matches any string formed by concatenating -two substrings that respectively match the concatenated -subexpressions. -.PP -Two regular expressions may be joined by the infix operator -.BR | ; -the resulting regular expression matches any string matching -either subexpression. -.PP -Repetition takes precedence over concatenation, which in turn -takes precedence over alternation. A whole subexpression may be -enclosed in parentheses to override these precedence rules. -.PP -The backreference -.BI \e n\c -\&, where -.I n -is a single digit, matches the substring -previously matched by the -.IR n th -parenthesized subexpression of the regular expression. -.PP -In basic regular expressions the metacharacters -.BR ? , -.BR + , -.BR { , -.BR | , -.BR ( , -and -.BR ) -lose their special meaning; instead use the backslashed -versions -.BR \e? , -.BR \e+ , -.BR \e{ , -.BR \e| , -.BR \e( , -and -.BR \e) . -.PP -In -.B egrep -the metacharacter -.B { -loses its special meaning; instead use -.BR \e{ . -.SH DIAGNOSTICS -.PP -Normally, exit status is 0 if matches were found, -and 1 if no matches were found. (The -.B \-v -option inverts the sense of the exit status.) -Exit status is 2 if there were syntax errors -in the pattern, inaccessible input files, or -other system errors. -.SH BUGS -.PP -Email bug reports to -.BR bug-gnu-utils@gnu.org . -Be sure to include the word ``grep'' somewhere in the ``Subject:'' field. -.PP -Large repetition counts in the -.BI { m , n } -construct may cause grep to use lots of memory. -In addition, -certain other obscure regular expressions require exponential time -and space, and may cause -.B grep -to run out of memory. -.PP -Backreferences are very slow, and may require exponential time. diff --git a/src/regex.c b/src/regex.c index 6c692914..ba01f735 100644 --- a/src/regex.c +++ b/src/regex.c @@ -1735,7 +1735,7 @@ typedef struct # define IS_CHAR_CLASS(string) wctype (string) # endif #else -# define CHAR_CLASS_MAX_LENGTH 256 /* Namely, `xdigit'. */ +# define CHAR_CLASS_MAX_LENGTH 6 /* Namely, `xdigit'. */ # define IS_CHAR_CLASS(string) \ (STREQ (string, "alpha") || STREQ (string, "upper") \ @@ -2210,10 +2210,13 @@ regex_compile (pattern, size, syntax, bufp) for (;;) { PATFETCH (c); - if ((c == ':' && *p == ']') || p == pend - || c1 == CHAR_CLASS_MAX_LENGTH) + if ((c == ':' && *p == ']') || p == pend) break; - str[c1++] = c; + if (c1 < CHAR_CLASS_MAX_LENGTH) + str[c1++] = c; + else + /* This is in any case an invalid class name. */ + str[0] = '\0'; } str[c1] = '\0'; @@ -5594,10 +5597,6 @@ re_exec (s) It returns 0 if it succeeds, nonzero if it doesn't. (See regex.h for the return codes and their meanings.) */ -#ifdef __APPLE__ -__private_extern__ -#endif - int regcomp (preg, pattern, cflags) regex_t *preg; @@ -5689,10 +5688,6 @@ weak_alias (__regcomp, regcomp) We return 0 if we find a match and REG_NOMATCH if not. */ -#ifdef __APPLE__ -__private_extern__ -#endif - int regexec (preg, string, nmatch, pmatch, eflags) const regex_t *preg; @@ -5760,9 +5755,6 @@ weak_alias (__regexec, regexec) /* Returns a message corresponding to an error code, ERRCODE, returned from either regcomp or regexec. We don't use PREG here. */ -#ifdef __APPLE__ -__private_extern__ -#endif size_t regerror (errcode, preg, errbuf, errbuf_size) int errcode; @@ -5809,9 +5801,7 @@ weak_alias (__regerror, regerror) /* Free dynamically allocated space used by PREG. */ -#ifdef __APPLE__ -__private_extern__ -#endif + void regfree (preg) regex_t *preg; |