summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorPaul Eggert <eggert@cs.ucla.edu>2020-09-07 19:44:21 -0700
committerPaul Eggert <eggert@cs.ucla.edu>2020-09-07 19:49:33 -0700
commit9393b977015bf7944cec1d71ad3972c101bdb4b8 (patch)
tree75ec43b843346d5972f5eaace2d72ea8e37e11f3 /src
parent0ede35a6cd21093560de8bd9843263ba199abf1f (diff)
downloadgrep-9393b977015bf7944cec1d71ad3972c101bdb4b8.tar.gz
Prefer rawmemchr to memchr when it’s easy
* bootstrap.conf (gnulib_modules): Add rawmemchr. * src/dfasearch.c (GEAcompile, EGexecute): * src/grep.c (update_patterns, prpending, prtext): * src/kwsearch.c (Fcompile, Fexecute): * src/pcresearch.c (Pcompile, Pexecute): Simplify (and presumably speed up a little) by using rawmemchr with a sentinel, instead of using memchr.
Diffstat (limited to 'src')
-rw-r--r--src/dfasearch.c50
-rw-r--r--src/grep.c11
-rw-r--r--src/kwsearch.c45
-rw-r--r--src/pcresearch.c18
4 files changed, 64 insertions, 60 deletions
diff --git a/src/dfasearch.c b/src/dfasearch.c
index 256cd390..4d3f4b25 100644
--- a/src/dfasearch.c
+++ b/src/dfasearch.c
@@ -174,6 +174,10 @@ regex_compile (struct dfa_comp *dc, char const *p, ptrdiff_t len,
return false;
}
+/* Compile PATTERN, containing SIZE bytes that are followed by '\n'.
+ SYNTAX_BITS specifies whether PATTERN uses style -G, -E, or -A.
+ Return a description of the compiled pattern. */
+
void *
GEAcompile (char *pattern, size_t size, reg_syntax_t syntax_bits)
{
@@ -213,15 +217,8 @@ GEAcompile (char *pattern, size_t size, reg_syntax_t syntax_bits)
do
{
- size_t len;
- char const *sep = memchr (p, '\n', patlim - p);
- if (sep)
- {
- len = sep - p;
- sep++;
- }
- else
- len = patlim - p;
+ char const *sep = rawmemchr (p, '\n');
+ ptrdiff_t len = sep - p;
bool backref = possible_backrefs_in_pattern (p, len, bs_safe);
@@ -247,7 +244,7 @@ GEAcompile (char *pattern, size_t size, reg_syntax_t syntax_bits)
if (!regex_compile (dc, p, len, dc->pcount, lineno, !backref))
compilation_failed = true;
- p = sep;
+ p = sep + 1;
lineno++;
if (backref)
@@ -256,12 +253,12 @@ GEAcompile (char *pattern, size_t size, reg_syntax_t syntax_bits)
prev = p;
}
}
- while (p);
+ while (p <= patlim);
if (compilation_failed)
exit (EXIT_TROUBLE);
- if (prev != NULL)
+ if (prev <= patlim)
{
if (pattern < prev)
{
@@ -383,14 +380,19 @@ EGexecute (void *vdc, char const *buf, size_t size, size_t *match_size,
greater of the latter two values; this temporarily prefers
the DFA to KWset. */
exact_kwset_match = kwsm.index < dc->kwset_exact_matches;
- end = ((exact_kwset_match || !dfafast
- || MAX (16, match - beg) < (match - prev_beg) >> 2)
- ? match
- : MAX (16, match - beg) < (buflim - prev_beg) >> 2
- ? prev_beg + 4 * MAX (16, match - beg)
- : buflim);
- end = memchr (end, eol, buflim - end);
- end = end ? end + 1 : buflim;
+ if (exact_kwset_match || !dfafast
+ || MAX (16, match - beg) < (match - prev_beg) >> 2)
+ {
+ end = rawmemchr (match, eol);
+ end++;
+ }
+ else if (MAX (16, match - beg) < (buflim - prev_beg) >> 2)
+ {
+ end = rawmemchr (prev_beg + 4 * MAX (16, match - beg), eol);
+ end++;
+ }
+ else
+ end = buflim;
if (exact_kwset_match)
{
@@ -425,8 +427,8 @@ EGexecute (void *vdc, char const *buf, size_t size, size_t *match_size,
beg++;
dfa_beg = beg;
}
- end = memchr (next_beg, eol, buflim - next_beg);
- end = end ? end + 1 : buflim;
+ end = rawmemchr (next_beg, eol);
+ end++;
count = 0;
}
@@ -446,8 +448,8 @@ EGexecute (void *vdc, char const *buf, size_t size, size_t *match_size,
beg = memrchr (buf, eol, next_beg - buf);
beg++;
}
- end = memchr (next_beg, eol, buflim - next_beg);
- end = end ? end + 1 : buflim;
+ end = rawmemchr (next_beg, eol);
+ end++;
/* Successful, no back-references encountered! */
if (!backref)
diff --git a/src/grep.c b/src/grep.c
index ce2f2919..d058a76c 100644
--- a/src/grep.c
+++ b/src/grep.c
@@ -164,7 +164,7 @@ update_patterns (char *keys, ptrdiff_t dupfree_size, ptrdiff_t size,
ptrdiff_t patsize;
for (char const *src = keys + dupfree_size; src < srclim; src += patsize)
{
- char const *patend = memchr (src, '\n', srclim - src);
+ char const *patend = rawmemchr (src, '\n');
patsize = patend + 1 - src;
memmove (dst, src, patsize);
@@ -1104,8 +1104,7 @@ static void
nlscan (char const *lim)
{
size_t newlines = 0;
- char const *beg;
- for (beg = lastnl; beg < lim; beg++)
+ for (char const *beg = lastnl; beg < lim; beg++)
{
beg = memchr (beg, eolbyte, lim - beg);
if (!beg)
@@ -1353,7 +1352,7 @@ prpending (char const *lim)
lastout = bufbeg;
for (; 0 < pending && lastout < lim; pending--)
{
- char *nl = memchr (lastout, eolbyte, lim - lastout);
+ char *nl = rawmemchr (lastout, eolbyte);
prline (lastout, nl + 1, SEP_CHAR_REJECTED);
}
}
@@ -1394,7 +1393,7 @@ prtext (char *beg, char *lim)
while (p < beg)
{
- char *nl = memchr (p, eol, beg - p);
+ char *nl = rawmemchr (p, eol);
nl++;
prline (p, nl, SEP_CHAR_REJECTED);
p = nl;
@@ -1407,7 +1406,7 @@ prtext (char *beg, char *lim)
/* One or more lines are output. */
for (n = 0; p < lim && n < outleft; n++)
{
- char *nl = memchr (p, eol, lim - p);
+ char *nl = rawmemchr (p, eol);
nl++;
if (!out_quiet)
prline (p, nl, SEP_CHAR_SELECTED);
diff --git a/src/kwsearch.c b/src/kwsearch.c
index 6f6d4d05..70810604 100644
--- a/src/kwsearch.c
+++ b/src/kwsearch.c
@@ -43,14 +43,13 @@ struct kwsearch
void *re;
};
-/* Compile the -F style PATTERN, containing SIZE bytes. Return a
- description of the compiled pattern. */
+/* Compile the -F style PATTERN, containing SIZE bytes that are
+ followed by '\n'. Return a description of the compiled pattern. */
void *
Fcompile (char *pattern, size_t size, reg_syntax_t ignored)
{
kwset_t kwset;
- ptrdiff_t total = size;
char *buf = NULL;
size_t bufalloc = 0;
@@ -59,23 +58,12 @@ Fcompile (char *pattern, size_t size, reg_syntax_t ignored)
char const *p = pattern;
do
{
- ptrdiff_t len;
- char const *sep = memchr (p, '\n', total);
- if (sep)
- {
- len = sep - p;
- sep++;
- total -= (len + 1);
- }
- else
- {
- len = total;
- total = 0;
- }
+ char const *sep = rawmemchr (p, '\n');
+ ptrdiff_t len = sep - p;
if (match_lines)
{
- if (eolbyte == '\n' && pattern < p && sep)
+ if (eolbyte == '\n' && pattern < p)
p--;
else
{
@@ -94,9 +82,9 @@ Fcompile (char *pattern, size_t size, reg_syntax_t ignored)
}
kwsincr (kwset, p, len);
- p = sep;
+ p = sep + 1;
}
- while (p);
+ while (p <= pattern + size);
free (buf);
ptrdiff_t words = kwswords (kwset);
@@ -259,8 +247,14 @@ Fexecute (void *vcp, char const *buf, size_t size, size_t *match_size,
kwsearch->size,
RE_SYNTAX_GREP);
}
- end = memchr (beg + len, eol, (buf + size) - (beg + len));
- end = end ? end + 1 : buf + size;
+ if (beg + len < buf + size)
+ {
+ end = rawmemchr (beg + len, eol);
+ end++;
+ }
+ else
+ end = buf + size;
+
if (EGexecute (kwsearch->re, beg, end - beg, match_size, NULL)
!= (size_t) -1)
goto success_match_words;
@@ -285,8 +279,13 @@ Fexecute (void *vcp, char const *buf, size_t size, size_t *match_size,
return -1;
success:
- end = memchr (beg + len, eol, (buf + size) - (beg + len));
- end = end ? end + 1 : buf + size;
+ if (beg + len < buf + size)
+ {
+ end = rawmemchr (beg + len, eol);
+ end++;
+ }
+ else
+ end = buf + size;
success_match_words:
beg = memrchr (buf, eol, beg - buf);
beg = beg ? beg + 1 : buf;
diff --git a/src/pcresearch.c b/src/pcresearch.c
index 15a6a59b..2fcbf8e6 100644
--- a/src/pcresearch.c
+++ b/src/pcresearch.c
@@ -107,6 +107,9 @@ jit_exec (struct pcre_comp *pc, char const *subject, int search_bytes,
}
}
+/* Compile the -P style PATTERN, containing SIZE bytes that are
+ followed by '\n'. Return a description of the compiled pattern. */
+
void *
Pcompile (char *pattern, size_t size, reg_syntax_t ignored)
{
@@ -120,7 +123,7 @@ Pcompile (char *pattern, size_t size, reg_syntax_t ignored)
sizeof xprefix - 1 + sizeof xsuffix - 1);
char *re = xnmalloc (4, size + (fix_len_max + 4 - 1) / 4);
int flags = PCRE_DOLLAR_ENDONLY | (match_icase ? PCRE_CASELESS : 0);
- char const *patlim = pattern + size;
+ char *patlim = pattern + size;
char *n = re;
char const *p;
char const *pnul;
@@ -134,7 +137,7 @@ Pcompile (char *pattern, size_t size, reg_syntax_t ignored)
}
/* FIXME: Remove this restriction. */
- if (memchr (pattern, '\n', size))
+ if (rawmemchr (pattern, '\n') != patlim)
die (EXIT_TROUBLE, 0, _("the -P option only supports a single pattern"));
*n = '\0';
@@ -148,7 +151,8 @@ Pcompile (char *pattern, size_t size, reg_syntax_t ignored)
replace each NUL byte in the pattern with the four characters
"\000", removing a preceding backslash if there are an odd
number of backslashes before the NUL. */
- for (p = pattern; (pnul = memchr (p, '\0', patlim - p)); p = pnul + 1)
+ *patlim = '\0';
+ for (p = pattern; (pnul = p + strlen (p)) < patlim; p = pnul + 1)
{
memcpy (n, p, pnul - p);
n += pnul - p;
@@ -158,10 +162,10 @@ Pcompile (char *pattern, size_t size, reg_syntax_t ignored)
strcpy (n, "\\000");
n += 4;
}
-
- memcpy (n, p, patlim - p);
+ memcpy (n, p, patlim - p + 1);
n += patlim - p;
- *n = '\0';
+ *patlim = '\n';
+
if (match_words)
strcpy (n, wsuffix);
if (match_lines)
@@ -219,7 +223,7 @@ Pexecute (void *vcp, char const *buf, size_t size, size_t *match_size,
PCRE_MULTILINE for performance, the performance wasn't always
better and the correctness issues were too puzzling. See
Bug#22655. */
- line_end = memchr (p, eolbyte, buf + size - p);
+ line_end = rawmemchr (p, eolbyte);
if (INT_MAX < line_end - p)
die (EXIT_TROUBLE, 0, _("exceeded PCRE's line length limit"));