summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPaul Eggert <eggert@cs.ucla.edu>2016-09-02 15:27:12 -0700
committerPaul Eggert <eggert@cs.ucla.edu>2016-09-02 15:29:03 -0700
commit3c381d05ed984f756f45d21776670fa74e1687ca (patch)
treedb98175472f2f4762912421481f6a87346bd22ec
parentad468bbe3df027f29ecb236283084fb60b734f68 (diff)
downloadgrep-3c381d05ed984f756f45d21776670fa74e1687ca.tar.gz
dfa: new option for anchored searches
This follows up on a suggestion by Norihiro Tanaka (Bug#24262). * src/dfa.c (struct regex_syntax): New member 'anchor'. (char_context): Use it. (dfasyntax): Change signature to specify it, along with the old FOLD and EOL args, as a single DFAOPTS arg. All uses changed. * src/dfa.h (DFA_ANCHOR, DFA_CASE_FOLD, DFA_EOL_NUL): New constants for dfasyntax new last arg.
-rw-r--r--src/dfa.c13
-rw-r--r--src/dfa.h22
-rw-r--r--src/dfasearch.c4
-rw-r--r--tests/dfa-match-aux.c2
4 files changed, 31 insertions, 10 deletions
diff --git a/src/dfa.c b/src/dfa.c
index 4cbaa75c..ff3721c1 100644
--- a/src/dfa.c
+++ b/src/dfa.c
@@ -335,6 +335,10 @@ struct regex_syntax
/* Flag for case-folding letters into sets. */
bool case_fold;
+ /* True if ^ and $ match only the start and end of data, and do not match
+ end-of-line within data. */
+ bool anchor;
+
/* End-of-line byte in data. */
unsigned char eolbyte;
@@ -754,7 +758,7 @@ unibyte_word_constituent (struct dfa const *dfa, unsigned char c)
static int
char_context (struct dfa const *dfa, unsigned char c)
{
- if (c == dfa->syntax.eolbyte)
+ if (c == dfa->syntax.eolbyte && !dfa->syntax.anchor)
return CTX_NEWLINE;
if (unibyte_word_constituent (dfa, c))
return CTX_LETTER;
@@ -3987,7 +3991,7 @@ dfaalloc (void)
/* Initialize DFA. */
void
dfasyntax (struct dfa *dfa, struct localeinfo const *linfo,
- reg_syntax_t bits, bool fold, unsigned char eol)
+ reg_syntax_t bits, int dfaopts)
{
int i;
memset (dfa, 0, offsetof (struct dfa, dfaexec));
@@ -4000,9 +4004,10 @@ dfasyntax (struct dfa *dfa, struct localeinfo const *linfo,
dfa->canychar = -1;
dfa->lex.cur_mb_len = 1;
dfa->syntax.syntax_bits_set = true;
+ dfa->syntax.case_fold = (dfaopts & DFA_CASE_FOLD) != 0;
+ dfa->syntax.anchor = (dfaopts & DFA_ANCHOR) != 0;
+ dfa->syntax.eolbyte = dfaopts & DFA_EOL_NUL ? '\0' : '\n';
dfa->syntax.syntax_bits = bits;
- dfa->syntax.case_fold = fold;
- dfa->syntax.eolbyte = eol;
for (i = CHAR_MIN; i <= CHAR_MAX; ++i)
{
diff --git a/src/dfa.h b/src/dfa.h
index 31baf7a1..b8c44cc2 100644
--- a/src/dfa.h
+++ b/src/dfa.h
@@ -46,15 +46,29 @@ struct dfa;
calling dfafree() on it. */
extern struct dfa *dfaalloc (void) _GL_ATTRIBUTE_MALLOC;
+/* DFA options that can be ORed together, for dfasyntax's 4th arg. */
+enum
+ {
+ /* ^ and $ match only the start and end of data, and do not match
+ end-of-line within data. This is always false for grep, but
+ possibly true for other apps. */
+ DFA_ANCHOR = 1 << 0,
+
+ /* Ignore case while matching. */
+ DFA_CASE_FOLD = 1 << 1,
+
+ /* '\0' in data is end-of-line, instead of the traditional '\n'. */
+ DFA_EOL_NUL = 1 << 2
+ };
+
/* Initialize or reinitialize a DFA. This must be called before
any of the routines below. The arguments are:
1. The DFA to operate on.
2. Information about the current locale.
- 3. The syntax bits described earlier in this file.
- 4. The case-folding flag.
- 5. The line terminator. */
+ 3. Syntax bits described in regex.h.
+ 4. Additional DFA options described above. */
extern void dfasyntax (struct dfa *, struct localeinfo const *,
- reg_syntax_t, bool, unsigned char);
+ reg_syntax_t, int);
/* Build and return the struct dfamust from the given struct dfa. */
extern struct dfamust *dfamust (struct dfa const *);
diff --git a/src/dfasearch.c b/src/dfasearch.c
index 0838e1fe..96be58f2 100644
--- a/src/dfasearch.c
+++ b/src/dfasearch.c
@@ -123,7 +123,9 @@ GEAcompile (char const *pattern, size_t size, reg_syntax_t syntax_bits)
if (match_icase)
syntax_bits |= RE_ICASE;
re_set_syntax (syntax_bits);
- dfasyntax (dfa, &localeinfo, syntax_bits, match_icase, eolbyte);
+ int dfaopts = ((match_icase ? DFA_CASE_FOLD : 0)
+ | (eolbyte ? 0 : DFA_EOL_NUL));
+ dfasyntax (dfa, &localeinfo, syntax_bits, dfaopts);
/* For GNU regex, pass the patterns separately to detect errors like
"[\nallo\n]\n", where the patterns are "[", "allo" and "]", and
diff --git a/tests/dfa-match-aux.c b/tests/dfa-match-aux.c
index e001b7de..070089c0 100644
--- a/tests/dfa-match-aux.c
+++ b/tests/dfa-match-aux.c
@@ -58,7 +58,7 @@ main (int argc, char **argv)
init_localeinfo (&localeinfo);
dfa = dfaalloc ();
- dfasyntax (dfa, &localeinfo, RE_SYNTAX_GREP | RE_NO_EMPTY_RANGES, 0, '\n');
+ dfasyntax (dfa, &localeinfo, RE_SYNTAX_GREP | RE_NO_EMPTY_RANGES, 0);
dfacomp (argv[1], strlen (argv[1]), dfa, 0);
beg = argv[2];