diff options
author | Paul Eggert <eggert@cs.ucla.edu> | 2021-08-24 00:34:20 -0700 |
---|---|---|
committer | Paul Eggert <eggert@cs.ucla.edu> | 2021-08-24 00:43:28 -0700 |
commit | 643e5573888c49ec7022c2cee04a312b70166d0d (patch) | |
tree | a15b538b1a6c8138c0e51c294fb2bf89e5b67e72 /src | |
parent | 869989fa834c34ca2d5602555111c11f179ec8e4 (diff) | |
download | grep-643e5573888c49ec7022c2cee04a312b70166d0d.tar.gz |
grep: tweak mb_goback performance
* src/searchutils.c (mb_goback): Set *MBCLEN only in
non-UTF-8 encodings, since that’s the only time it’s needed,
and this lets us see more clearly that the UTF-8 clen value
is not useful to the caller.
Diffstat (limited to 'src')
-rw-r--r-- | src/searchutils.c | 16 |
1 files changed, 11 insertions, 5 deletions
diff --git a/src/searchutils.c b/src/searchutils.c index 03b4c593..f16dd846 100644 --- a/src/searchutils.c +++ b/src/searchutils.c @@ -93,24 +93,25 @@ mb_goback (char const **mb_start, size_t *mbclen, char const *cur, { const char *p = *mb_start; const char *p0 = p; - size_t clen; if (cur <= p) return cur - p; if (localeinfo.using_utf8) { + /* UTF-8 permits scanning backward to the previous character. + Start by assuming CUR is at a character boundary. */ p = cur; - clen = 1; if ((*cur & 0xc0) == 0x80) for (int i = 1; i <= 3; i++) if ((cur[-i] & 0xc0) != 0x80) { mbstate_t mbs = { 0 }; - clen = mb_clen (cur - i, end - (cur - i), &mbs); + size_t clen = mb_clen (cur - i, end - (cur - i), &mbs); if (i < clen && clen <= MB_LEN_MAX) { + /* This multibyte character contains *CUR. */ p0 = cur - i; p = p0 + clen; } @@ -119,7 +120,11 @@ mb_goback (char const **mb_start, size_t *mbclen, char const *cur, } else { + /* In non-UTF-8 encodings, to find character boundaries one must + in general scan forward from the start of the buffer. */ mbstate_t mbs = { 0 }; + size_t clen; + do { clen = mb_clen (p, end - p, &mbs); @@ -135,11 +140,12 @@ mb_goback (char const **mb_start, size_t *mbclen, char const *cur, p += clen; } while (p < cur); + + if (mbclen) + *mbclen = clen; } *mb_start = p; - if (mbclen) - *mbclen = clen; return p == cur ? 0 : cur - p0; } |