diff options
author | Paul Eggert <eggert@cs.ucla.edu> | 2014-09-14 17:37:58 -0700 |
---|---|---|
committer | Paul Eggert <eggert@cs.ucla.edu> | 2014-09-16 18:23:49 -0700 |
commit | dfff75a432eec187e4c5f2c6ce99ebdadb1089c8 (patch) | |
tree | f3407af0d4030ecda8303e93bb8fb09a0c2f99f7 /src/searchutils.c | |
parent | 6e319a818ed7b15b452ed2baab2f6a38d42fd1fe (diff) | |
download | grep-dfff75a432eec187e4c5f2c6ce99ebdadb1089c8.tar.gz |
grep: use mbclen cache more effectively
* src/grep.c (buffer_textbin, contains_encoding_error):
Use mb_clen for speed.
(buffer_textbin): Bypass mb_clen in unibyte locales.
(main): Always initialize the cache, since it's sometimes used in
unibyte locales now. Initialize it before contains_encoding_error
might be called.
* src/search.h (SEARCH_INLINE): New macro.
(mbclen_cache): Now extern decl.
(mb_clen): New inline function.
* src/searchutils.c (SEARCH_INLINE, SYSTEM_INLINE): Define.
(mbclen_cache): Now extern.
(build_mbclen_cache): Put 1 into the cache when mbrlen returns 0.
(mb_goback): Use mb_len for speed, and rely on it returning nonzero.
* src/system.h (SYSTEM_INLINE): New macro.
(to_uchar): Use it.
Diffstat (limited to 'src/searchutils.c')
-rw-r--r-- | src/searchutils.c | 26 |
1 files changed, 14 insertions, 12 deletions
diff --git a/src/searchutils.c b/src/searchutils.c index 18dd584e..9edc7855 100644 --- a/src/searchutils.c +++ b/src/searchutils.c @@ -17,12 +17,16 @@ 02110-1301, USA. */ #include <config.h> -#include <assert.h> + +#define SEARCH_INLINE _GL_EXTERN_INLINE +#define SYSTEM_INLINE _GL_EXTERN_INLINE #include "search.h" +#include <assert.h> + #define NCHAR (UCHAR_MAX + 1) -static size_t mbclen_cache[NCHAR]; +size_t mbclen_cache[NCHAR]; void kwsinit (kwset_t *kwset) @@ -218,7 +222,8 @@ build_mbclen_cache (void) char c = i; unsigned char uc = i; mbstate_t mbs = { 0 }; - mbclen_cache[uc] = mbrlen (&c, 1, &mbs); + size_t len = mbrlen (&c, 1, &mbs); + mbclen_cache[uc] = len ? len : 1; } } @@ -244,20 +249,17 @@ mb_goback (char const **mb_start, char const *cur, char const *end) while (p < cur) { - size_t mbclen = mbclen_cache[to_uchar (*p)]; - - if (mbclen == (size_t) -2) - mbclen = mbrlen (p, end - p, &cur_state); + size_t clen = mb_clen (p, end - p, &cur_state); - if (! (0 < mbclen && mbclen < (size_t) -2)) + if ((size_t) -2 <= clen) { - /* An invalid sequence, or a truncated multibyte character, or - a null wide character. Treat it as a single byte character. */ - mbclen = 1; + /* An invalid sequence, or a truncated multibyte character. + Treat it as a single byte character. */ + clen = 1; memset (&cur_state, 0, sizeof cur_state); } p0 = p; - p += mbclen; + p += clen; } *mb_start = p; |