summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/Makefile.am16
-rw-r--r--src/egrep.man1
-rw-r--r--src/fgrep.man1
-rw-r--r--src/grep.1477
-rw-r--r--src/regex.c26
5 files changed, 10 insertions, 511 deletions
diff --git a/src/Makefile.am b/src/Makefile.am
index 47f54bc7..4924a7bd 100644
--- a/src/Makefile.am
+++ b/src/Makefile.am
@@ -20,19 +20,7 @@ datadir = $(prefix)/@DATADIRNAME@
localedir = $(datadir)/locale
INCLUDES = -I../intl -DLOCALEDIR=\"$(localedir)\"
-man_MANS = grep.1 fgrep.1 egrep.1
-
-EXTRA_DIST = grep.1 egrep.man fgrep.man \
- regex.h \
+EXTRA_DIST = regex.h \
dosbuf.c \
- vms_fab.c vms_fab.h
-
-CLEANFILES = egrep.1 fgrep.1
-
-fgrep.1: fgrep.man
- inst=`echo "grep" | sed '$(transform)'`.1; \
-sed -e "s%@grep@%$$inst%g" $(srcdir)/fgrep.man > $@
+ vms_fab.c vms_fab.h
-egrep.1: egrep.man
- inst=`echo "grep" | sed '$(transform)'`.1; \
-sed -e "s%@grep@%$$inst%g" $(srcdir)/egrep.man > $@
diff --git a/src/egrep.man b/src/egrep.man
deleted file mode 100644
index 877a8a89..00000000
--- a/src/egrep.man
+++ /dev/null
@@ -1 +0,0 @@
-.so man1/@grep@
diff --git a/src/fgrep.man b/src/fgrep.man
deleted file mode 100644
index 877a8a89..00000000
--- a/src/fgrep.man
+++ /dev/null
@@ -1 +0,0 @@
-.so man1/@grep@
diff --git a/src/grep.1 b/src/grep.1
deleted file mode 100644
index 4354b74a..00000000
--- a/src/grep.1
+++ /dev/null
@@ -1,477 +0,0 @@
-.\" grep man page
-.de Id
-.ds Dt \\$4
-..
-.Id $Id: grep.1,v 1.6 1998/11/10 00:29:38 alainm Exp $
-.TH GREP 1 \*(Dt "GNU Project"
-.SH NAME
-grep, egrep, fgrep \- print lines matching a pattern
-.SH SYNOPSIS
-.B grep
-[-[AB] NUM] [-CEFGVabchiLlnqrsvwxyUu] [-e PATTERN | -f FILE]
-[-d ACTION] [--directories=ACTION]
-[--extended-regexp] [--fixed-strings] [--basic-regexp]
-[--regexp=PATTERN] [--file=FILE] [--ignore-case] [--word-regexp]
-[--line-regexp] [--line-regexp] [--no-messages] [--revert-match]
-[--version] [--help] [--byte-offset] [--line-number]
-[--with-filename] [--no-filename] [--quiet] [--silent] [--text]
-[--files-without-match] [--files-with-matcces] [--count]
-[--before-context=NUM] [--after-context=NUM] [--context]
-[--binary] [--unix-byte-offsets] [--recursive]
-.I files...
-.SH DESCRIPTION
-.PP
-.B Grep
-searches the named input
-.I files
-(or standard input if no files are named, or
-the file name
-.B \-
-is given)
-for lines containing a match to the given
-.IR pattern .
-By default,
-.B grep
-prints the matching lines.
-.PP
-There are three major variants of
-.BR grep ,
-controlled by the following options.
-.PD 0
-.TP
-.B \-G, --basic-regexp
-Interpret
-.I pattern
-as a basic regular expression (see below). This is the default.
-.TP
-.B \-E, --extended-regexp
-Interpret
-.I pattern
-as an extended regular expression (see below).
-.TP
-.B \-F, --fixed-strings
-Interpret
-.I pattern
-as a list of fixed strings, separated by newlines,
-any of which is to be matched.
-.LP
-In addition, two variant programs
-.B egrep
-and
-.B fgrep
-are available.
-.B Egrep
-is similar (but not identical) to
-.BR "grep\ \-E" ,
-and is compatible with the historical Unix
-.BR egrep .
-.B Fgrep
-is the same as
-.BR "grep\ \-F" .
-.PD
-.LP
-All variants of
-.B grep
-understand the following options:
-.PD 0
-.TP
-.BI \-A " NUM" ", --after-context=" NUM
-Print
-.I NUM
-lines of trailing context after matching lines.
-.TP
-.BI \-B " NUM" ", --before-context=" NUM
-Print
-.I NUM
-lines of leading context before matching lines.
-.TP
-.BI \-C ,\ --context"[=NUM]"
-Print
-.I NUM
-lines (default 2) of output context.
-.TP
-.BI \- NUM \
-Same as --context=NUM lines of leading and trailing context. However,
-.B grep
-will never print any given line more than once.
-.TP
-.B \-V, --version
-Print the version number of
-.B grep
-to standard error. This version number should
-be included in all bug reports (see below).
-.TP
-.B \-b, --byte-offset
-Print the byte offset within the input file before
-each line of output.
-.TP
-.B \-c, --count
-Suppress normal output; instead print a count of
-matching lines for each input file.
-With the
-.B \-v, --revert-match
-option (see below), count non-matching lines.
-.TP
-.BI \-d " ACTION" ", --directories=" ACTION
-If an input file is a directory, use
-.I ACTION
-to process it. By default,
-.I ACTION
-is
-.BR read ,
-which means that directories are read just as if they were ordinary files.
-If
-.I ACTION
-is
-.BR skip ,
-directories are silently skipped.
-If
-.I ACTION
-is
-.BR recurse ,
-.B
-grep reads all files under each directory, recursively;
-this is equivalent to the
-.B \-r
-option.
-.TP
-.BI \-e " PATTERN" ", --regexp=" PATTERN
-Use
-.I PATTERN
-as the pattern; useful to protect patterns beginning with
-.BR \- .
-.TP
-.BI \-f " FILE" ", --file=" FILE
-Obtain patterns from
-.IR FILE ,
-one per line.
-The empty file contains zero patterns, and therfore matches nothing.
-.TP
-.B \-h, --no-filename
-Suppress the prefixing of filenames on output
-when multiple files are searched.
-.TP
-.B \-i, --ignore-case
-Ignore case distinctions in both the
-.I pattern
-and the input files.
-.TP
-.B \-L, --files-without-match
-Suppress normal output; instead print the name
-of each input file from which no output would
-normally have been printed. The scanning will stop
-on the first match.
-.TP
-.B \-l, --files-with-matches
-Suppress normal output; instead print
-the name of each input file from which output
-would normally have been printed. The scanning will
-stop on the first match.
-.TP
-.B \-n, --line-number
-Prefix each line of output with the line number
-within its input file.
-.TP
-.B \-q, --quiet, --silent
-Quiet; suppress normal output. The scanning will stop
-on the first match.
-Also see the
-.B \-s
-or
-.B --no-messages
-option below.
-.TP
-.B \-r, --recursive
-Read all files under each directory, recursively;
-this is equivalent to the
-.B "\-d recurse"
-option.
-.TP
-.B \-s, --no-messages
-Suppress error messages about nonexistent or unreadable files.
-Portability note: unlike GNU
-.BR grep ,
-BSD
-.B grep
-does not comply with POSIX.2, because BSD
-.B grep
-lacks a
-.B \-q
-option and its
-.B \-s
-option behaves like GNU
-.BR grep 's
-.B \-q
-option.
-Shell scripts intended to be portable to BSD
-.B grep
-should avoid both
-.B \-q
-and
-.B \-s
-and should redirect output to /dev/null instead.
-.TP
-.B \-a, --text
-Do not suppress output lines that contain binary data.
-Normally, if the first few bytes of a file indicate that
-the file contains binary data,
-.B grep
-outputs only a message saying that the file matches the pattern.
-This option causes
-.B grep
-to act as if the file is a text file,
-even if it would otherwise be treated as binary.
-.TP
-.B \-v, --revert-match
-Invert the sense of matching, to select non-matching lines.
-.TP
-.B \-w, --word-regexp
-Select only those lines containing matches that form whole words.
-The test is that the matching substring must either be at the
-beginning of the line, or preceded by a non-word constituent
-character. Similarly, it must be either at the end of the line
-or followed by a non-word constituent character. Word-constituent
-characters are letters, digits, and the underscore.
-.TP
-.B \-x, --line-regexp
-Select only those matches that exactly match the whole line.
-.TP
-.B \-y
-Obsolete synonym for
-.BR \-i .
-.TP
-.B \-U, --binary
-Treat the file(s) as binary. By default, under MS-DOS and MS-Windows,
-.BR grep
-guesses the file type by looking at the contents of the first 32KB
-read from the file. If
-.BR grep
-decides the file is a text file, it strips the CR characters from the
-original file contents (to make regular expressions with
-.B ^
-and
-.B $
-work correctly). Specifying
-.B \-U
-overrules this guesswork, causing all files to be read and passed to the
-matching mechanism verbatim; if the file is a text file with CR/LF
-pairs at the end of each line, this will cause some regular
-expressions to fail. This option is only supported on MS-DOS and
-MS-Windows.
-.TP
-.B \-u, --unix-byte-offsets
-Report Unix-style byte offsets. This switch causes
-.B grep
-to report byte offsets as if the file were Unix-style text file, i.e. with
-CR characters stripped off. This will produce results identical to running
-.B grep
-on a Unix machine. This option has no effect unless
-.B \-b
-option is also used; it is only supported on MS-DOS and MS-Windows.
-.PD
-.SH "REGULAR EXPRESSIONS"
-.PP
-A regular expression is a pattern that describes a set of strings.
-Regular expressions are constructed analogously to arithmetic
-expressions, by using various operators to combine smaller expressions.
-.PP
-.B Grep
-understands two different versions of regular expression syntax:
-``basic'' and ``extended.'' In
-.RB "GNU\ " grep ,
-there is no difference in available functionality using either syntax.
-In other implementations, basic regular expressions are less powerful.
-The following description applies to extended regular expressions;
-differences for basic regular expressions are summarized afterwards.
-.PP
-The fundamental building blocks are the regular expressions that match
-a single character. Most characters, including all letters and digits,
-are regular expressions that match themselves. Any metacharacter with
-special meaning may be quoted by preceding it with a backslash.
-.PP
-A list of characters enclosed by
-.B [
-and
-.B ]
-matches any single
-character in that list; if the first character of the list
-is the caret
-.B ^
-then it matches any character
-.I not
-in the list.
-For example, the regular expression
-.B [0123456789]
-matches any single digit. A range of ASCII characters
-may be specified by giving the first and last characters, separated
-by a hyphen.
-Finally, certain named classes of characters are predefined.
-Their names are self explanatory, and they are
-.BR [:alnum:] ,
-.BR [:alpha:] ,
-.BR [:cntrl:] ,
-.BR [:digit:] ,
-.BR [:graph:] ,
-.BR [:lower:] ,
-.BR [:print:] ,
-.BR [:punct:] ,
-.BR [:space:] ,
-.BR [:upper:] ,
-and
-.BR [:xdigit:].
-For example,
-.B [[:alnum:]]
-means
-.BR [0-9A-Za-z] ,
-except the latter form is dependent upon the ASCII character encoding,
-whereas the former is portable.
-(Note that the brackets in these class names are part of the symbolic
-names, and must be included in addition to the brackets delimiting
-the bracket list.) Most metacharacters lose their special meaning
-inside lists. To include a literal
-.B ]
-place it first in the list. Similarly, to include a literal
-.B ^
-place it anywhere but first. Finally, to include a literal
-.B \-
-place it last.
-.PP
-The period
-.B .
-matches any single character.
-The symbol
-.B \ew
-is a synonym for
-.B [[:alnum:]]
-and
-.B \eW
-is a synonym for
-.BR [^[:alnum]] .
-.PP
-The caret
-.B ^
-and the dollar sign
-.B $
-are metacharacters that respectively match the empty string at the
-beginning and end of a line.
-The symbols
-.B \e<
-and
-.B \e>
-respectively match the empty string at the beginning and end of a word.
-The symbol
-.B \eb
-matches the empty string at the edge of a word,
-and
-.B \eB
-matches the empty string provided it's
-.I not
-at the edge of a word.
-.PP
-A regular expression may be followed by one of several repetition operators:
-.PD 0
-.TP
-.B ?
-The preceding item is optional and matched at most once.
-.TP
-.B *
-The preceding item will be matched zero or more times.
-.TP
-.B +
-The preceding item will be matched one or more times.
-.TP
-.BI { n }
-The preceding item is matched exactly
-.I n
-times.
-.TP
-.BI { n ,}
-The preceding item is matched
-.I n
-or more times.
-.TP
-.BI {, m }
-The preceding item is optional and is matched at most
-.I m
-times.
-.TP
-.BI { n , m }
-The preceding item is matched at least
-.I n
-times, but not more than
-.I m
-times.
-.PD
-.PP
-Two regular expressions may be concatenated; the resulting
-regular expression matches any string formed by concatenating
-two substrings that respectively match the concatenated
-subexpressions.
-.PP
-Two regular expressions may be joined by the infix operator
-.BR | ;
-the resulting regular expression matches any string matching
-either subexpression.
-.PP
-Repetition takes precedence over concatenation, which in turn
-takes precedence over alternation. A whole subexpression may be
-enclosed in parentheses to override these precedence rules.
-.PP
-The backreference
-.BI \e n\c
-\&, where
-.I n
-is a single digit, matches the substring
-previously matched by the
-.IR n th
-parenthesized subexpression of the regular expression.
-.PP
-In basic regular expressions the metacharacters
-.BR ? ,
-.BR + ,
-.BR { ,
-.BR | ,
-.BR ( ,
-and
-.BR )
-lose their special meaning; instead use the backslashed
-versions
-.BR \e? ,
-.BR \e+ ,
-.BR \e{ ,
-.BR \e| ,
-.BR \e( ,
-and
-.BR \e) .
-.PP
-In
-.B egrep
-the metacharacter
-.B {
-loses its special meaning; instead use
-.BR \e{ .
-.SH DIAGNOSTICS
-.PP
-Normally, exit status is 0 if matches were found,
-and 1 if no matches were found. (The
-.B \-v
-option inverts the sense of the exit status.)
-Exit status is 2 if there were syntax errors
-in the pattern, inaccessible input files, or
-other system errors.
-.SH BUGS
-.PP
-Email bug reports to
-.BR bug-gnu-utils@gnu.org .
-Be sure to include the word ``grep'' somewhere in the ``Subject:'' field.
-.PP
-Large repetition counts in the
-.BI { m , n }
-construct may cause grep to use lots of memory.
-In addition,
-certain other obscure regular expressions require exponential time
-and space, and may cause
-.B grep
-to run out of memory.
-.PP
-Backreferences are very slow, and may require exponential time.
diff --git a/src/regex.c b/src/regex.c
index 6c692914..ba01f735 100644
--- a/src/regex.c
+++ b/src/regex.c
@@ -1735,7 +1735,7 @@ typedef struct
# define IS_CHAR_CLASS(string) wctype (string)
# endif
#else
-# define CHAR_CLASS_MAX_LENGTH 256 /* Namely, `xdigit'. */
+# define CHAR_CLASS_MAX_LENGTH 6 /* Namely, `xdigit'. */
# define IS_CHAR_CLASS(string) \
(STREQ (string, "alpha") || STREQ (string, "upper") \
@@ -2210,10 +2210,13 @@ regex_compile (pattern, size, syntax, bufp)
for (;;)
{
PATFETCH (c);
- if ((c == ':' && *p == ']') || p == pend
- || c1 == CHAR_CLASS_MAX_LENGTH)
+ if ((c == ':' && *p == ']') || p == pend)
break;
- str[c1++] = c;
+ if (c1 < CHAR_CLASS_MAX_LENGTH)
+ str[c1++] = c;
+ else
+ /* This is in any case an invalid class name. */
+ str[0] = '\0';
}
str[c1] = '\0';
@@ -5594,10 +5597,6 @@ re_exec (s)
It returns 0 if it succeeds, nonzero if it doesn't. (See regex.h for
the return codes and their meanings.) */
-#ifdef __APPLE__
-__private_extern__
-#endif
-
int
regcomp (preg, pattern, cflags)
regex_t *preg;
@@ -5689,10 +5688,6 @@ weak_alias (__regcomp, regcomp)
We return 0 if we find a match and REG_NOMATCH if not. */
-#ifdef __APPLE__
-__private_extern__
-#endif
-
int
regexec (preg, string, nmatch, pmatch, eflags)
const regex_t *preg;
@@ -5760,9 +5755,6 @@ weak_alias (__regexec, regexec)
/* Returns a message corresponding to an error code, ERRCODE, returned
from either regcomp or regexec. We don't use PREG here. */
-#ifdef __APPLE__
-__private_extern__
-#endif
size_t
regerror (errcode, preg, errbuf, errbuf_size)
int errcode;
@@ -5809,9 +5801,7 @@ weak_alias (__regerror, regerror)
/* Free dynamically allocated space used by PREG. */
-#ifdef __APPLE__
-__private_extern__
-#endif
+
void
regfree (preg)
regex_t *preg;