diff options
author | Paul Eggert <eggert@cs.ucla.edu> | 2020-09-07 19:44:21 -0700 |
---|---|---|
committer | Paul Eggert <eggert@cs.ucla.edu> | 2020-09-07 19:49:33 -0700 |
commit | 9393b977015bf7944cec1d71ad3972c101bdb4b8 (patch) | |
tree | 75ec43b843346d5972f5eaace2d72ea8e37e11f3 /src | |
parent | 0ede35a6cd21093560de8bd9843263ba199abf1f (diff) | |
download | grep-9393b977015bf7944cec1d71ad3972c101bdb4b8.tar.gz |
Prefer rawmemchr to memchr when it’s easy
* bootstrap.conf (gnulib_modules): Add rawmemchr.
* src/dfasearch.c (GEAcompile, EGexecute):
* src/grep.c (update_patterns, prpending, prtext):
* src/kwsearch.c (Fcompile, Fexecute):
* src/pcresearch.c (Pcompile, Pexecute):
Simplify (and presumably speed up a little) by using rawmemchr
with a sentinel, instead of using memchr.
Diffstat (limited to 'src')
-rw-r--r-- | src/dfasearch.c | 50 | ||||
-rw-r--r-- | src/grep.c | 11 | ||||
-rw-r--r-- | src/kwsearch.c | 45 | ||||
-rw-r--r-- | src/pcresearch.c | 18 |
4 files changed, 64 insertions, 60 deletions
diff --git a/src/dfasearch.c b/src/dfasearch.c index 256cd390..4d3f4b25 100644 --- a/src/dfasearch.c +++ b/src/dfasearch.c @@ -174,6 +174,10 @@ regex_compile (struct dfa_comp *dc, char const *p, ptrdiff_t len, return false; } +/* Compile PATTERN, containing SIZE bytes that are followed by '\n'. + SYNTAX_BITS specifies whether PATTERN uses style -G, -E, or -A. + Return a description of the compiled pattern. */ + void * GEAcompile (char *pattern, size_t size, reg_syntax_t syntax_bits) { @@ -213,15 +217,8 @@ GEAcompile (char *pattern, size_t size, reg_syntax_t syntax_bits) do { - size_t len; - char const *sep = memchr (p, '\n', patlim - p); - if (sep) - { - len = sep - p; - sep++; - } - else - len = patlim - p; + char const *sep = rawmemchr (p, '\n'); + ptrdiff_t len = sep - p; bool backref = possible_backrefs_in_pattern (p, len, bs_safe); @@ -247,7 +244,7 @@ GEAcompile (char *pattern, size_t size, reg_syntax_t syntax_bits) if (!regex_compile (dc, p, len, dc->pcount, lineno, !backref)) compilation_failed = true; - p = sep; + p = sep + 1; lineno++; if (backref) @@ -256,12 +253,12 @@ GEAcompile (char *pattern, size_t size, reg_syntax_t syntax_bits) prev = p; } } - while (p); + while (p <= patlim); if (compilation_failed) exit (EXIT_TROUBLE); - if (prev != NULL) + if (prev <= patlim) { if (pattern < prev) { @@ -383,14 +380,19 @@ EGexecute (void *vdc, char const *buf, size_t size, size_t *match_size, greater of the latter two values; this temporarily prefers the DFA to KWset. */ exact_kwset_match = kwsm.index < dc->kwset_exact_matches; - end = ((exact_kwset_match || !dfafast - || MAX (16, match - beg) < (match - prev_beg) >> 2) - ? match - : MAX (16, match - beg) < (buflim - prev_beg) >> 2 - ? prev_beg + 4 * MAX (16, match - beg) - : buflim); - end = memchr (end, eol, buflim - end); - end = end ? end + 1 : buflim; + if (exact_kwset_match || !dfafast + || MAX (16, match - beg) < (match - prev_beg) >> 2) + { + end = rawmemchr (match, eol); + end++; + } + else if (MAX (16, match - beg) < (buflim - prev_beg) >> 2) + { + end = rawmemchr (prev_beg + 4 * MAX (16, match - beg), eol); + end++; + } + else + end = buflim; if (exact_kwset_match) { @@ -425,8 +427,8 @@ EGexecute (void *vdc, char const *buf, size_t size, size_t *match_size, beg++; dfa_beg = beg; } - end = memchr (next_beg, eol, buflim - next_beg); - end = end ? end + 1 : buflim; + end = rawmemchr (next_beg, eol); + end++; count = 0; } @@ -446,8 +448,8 @@ EGexecute (void *vdc, char const *buf, size_t size, size_t *match_size, beg = memrchr (buf, eol, next_beg - buf); beg++; } - end = memchr (next_beg, eol, buflim - next_beg); - end = end ? end + 1 : buflim; + end = rawmemchr (next_beg, eol); + end++; /* Successful, no back-references encountered! */ if (!backref) @@ -164,7 +164,7 @@ update_patterns (char *keys, ptrdiff_t dupfree_size, ptrdiff_t size, ptrdiff_t patsize; for (char const *src = keys + dupfree_size; src < srclim; src += patsize) { - char const *patend = memchr (src, '\n', srclim - src); + char const *patend = rawmemchr (src, '\n'); patsize = patend + 1 - src; memmove (dst, src, patsize); @@ -1104,8 +1104,7 @@ static void nlscan (char const *lim) { size_t newlines = 0; - char const *beg; - for (beg = lastnl; beg < lim; beg++) + for (char const *beg = lastnl; beg < lim; beg++) { beg = memchr (beg, eolbyte, lim - beg); if (!beg) @@ -1353,7 +1352,7 @@ prpending (char const *lim) lastout = bufbeg; for (; 0 < pending && lastout < lim; pending--) { - char *nl = memchr (lastout, eolbyte, lim - lastout); + char *nl = rawmemchr (lastout, eolbyte); prline (lastout, nl + 1, SEP_CHAR_REJECTED); } } @@ -1394,7 +1393,7 @@ prtext (char *beg, char *lim) while (p < beg) { - char *nl = memchr (p, eol, beg - p); + char *nl = rawmemchr (p, eol); nl++; prline (p, nl, SEP_CHAR_REJECTED); p = nl; @@ -1407,7 +1406,7 @@ prtext (char *beg, char *lim) /* One or more lines are output. */ for (n = 0; p < lim && n < outleft; n++) { - char *nl = memchr (p, eol, lim - p); + char *nl = rawmemchr (p, eol); nl++; if (!out_quiet) prline (p, nl, SEP_CHAR_SELECTED); diff --git a/src/kwsearch.c b/src/kwsearch.c index 6f6d4d05..70810604 100644 --- a/src/kwsearch.c +++ b/src/kwsearch.c @@ -43,14 +43,13 @@ struct kwsearch void *re; }; -/* Compile the -F style PATTERN, containing SIZE bytes. Return a - description of the compiled pattern. */ +/* Compile the -F style PATTERN, containing SIZE bytes that are + followed by '\n'. Return a description of the compiled pattern. */ void * Fcompile (char *pattern, size_t size, reg_syntax_t ignored) { kwset_t kwset; - ptrdiff_t total = size; char *buf = NULL; size_t bufalloc = 0; @@ -59,23 +58,12 @@ Fcompile (char *pattern, size_t size, reg_syntax_t ignored) char const *p = pattern; do { - ptrdiff_t len; - char const *sep = memchr (p, '\n', total); - if (sep) - { - len = sep - p; - sep++; - total -= (len + 1); - } - else - { - len = total; - total = 0; - } + char const *sep = rawmemchr (p, '\n'); + ptrdiff_t len = sep - p; if (match_lines) { - if (eolbyte == '\n' && pattern < p && sep) + if (eolbyte == '\n' && pattern < p) p--; else { @@ -94,9 +82,9 @@ Fcompile (char *pattern, size_t size, reg_syntax_t ignored) } kwsincr (kwset, p, len); - p = sep; + p = sep + 1; } - while (p); + while (p <= pattern + size); free (buf); ptrdiff_t words = kwswords (kwset); @@ -259,8 +247,14 @@ Fexecute (void *vcp, char const *buf, size_t size, size_t *match_size, kwsearch->size, RE_SYNTAX_GREP); } - end = memchr (beg + len, eol, (buf + size) - (beg + len)); - end = end ? end + 1 : buf + size; + if (beg + len < buf + size) + { + end = rawmemchr (beg + len, eol); + end++; + } + else + end = buf + size; + if (EGexecute (kwsearch->re, beg, end - beg, match_size, NULL) != (size_t) -1) goto success_match_words; @@ -285,8 +279,13 @@ Fexecute (void *vcp, char const *buf, size_t size, size_t *match_size, return -1; success: - end = memchr (beg + len, eol, (buf + size) - (beg + len)); - end = end ? end + 1 : buf + size; + if (beg + len < buf + size) + { + end = rawmemchr (beg + len, eol); + end++; + } + else + end = buf + size; success_match_words: beg = memrchr (buf, eol, beg - buf); beg = beg ? beg + 1 : buf; diff --git a/src/pcresearch.c b/src/pcresearch.c index 15a6a59b..2fcbf8e6 100644 --- a/src/pcresearch.c +++ b/src/pcresearch.c @@ -107,6 +107,9 @@ jit_exec (struct pcre_comp *pc, char const *subject, int search_bytes, } } +/* Compile the -P style PATTERN, containing SIZE bytes that are + followed by '\n'. Return a description of the compiled pattern. */ + void * Pcompile (char *pattern, size_t size, reg_syntax_t ignored) { @@ -120,7 +123,7 @@ Pcompile (char *pattern, size_t size, reg_syntax_t ignored) sizeof xprefix - 1 + sizeof xsuffix - 1); char *re = xnmalloc (4, size + (fix_len_max + 4 - 1) / 4); int flags = PCRE_DOLLAR_ENDONLY | (match_icase ? PCRE_CASELESS : 0); - char const *patlim = pattern + size; + char *patlim = pattern + size; char *n = re; char const *p; char const *pnul; @@ -134,7 +137,7 @@ Pcompile (char *pattern, size_t size, reg_syntax_t ignored) } /* FIXME: Remove this restriction. */ - if (memchr (pattern, '\n', size)) + if (rawmemchr (pattern, '\n') != patlim) die (EXIT_TROUBLE, 0, _("the -P option only supports a single pattern")); *n = '\0'; @@ -148,7 +151,8 @@ Pcompile (char *pattern, size_t size, reg_syntax_t ignored) replace each NUL byte in the pattern with the four characters "\000", removing a preceding backslash if there are an odd number of backslashes before the NUL. */ - for (p = pattern; (pnul = memchr (p, '\0', patlim - p)); p = pnul + 1) + *patlim = '\0'; + for (p = pattern; (pnul = p + strlen (p)) < patlim; p = pnul + 1) { memcpy (n, p, pnul - p); n += pnul - p; @@ -158,10 +162,10 @@ Pcompile (char *pattern, size_t size, reg_syntax_t ignored) strcpy (n, "\\000"); n += 4; } - - memcpy (n, p, patlim - p); + memcpy (n, p, patlim - p + 1); n += patlim - p; - *n = '\0'; + *patlim = '\n'; + if (match_words) strcpy (n, wsuffix); if (match_lines) @@ -219,7 +223,7 @@ Pexecute (void *vcp, char const *buf, size_t size, size_t *match_size, PCRE_MULTILINE for performance, the performance wasn't always better and the correctness issues were too puzzling. See Bug#22655. */ - line_end = memchr (p, eolbyte, buf + size - p); + line_end = rawmemchr (p, eolbyte); if (INT_MAX < line_end - p) die (EXIT_TROUBLE, 0, _("exceeded PCRE's line length limit")); |