summaryrefslogtreecommitdiff
path: root/src/searchutils.c
diff options
context:
space:
mode:
authorPaul Eggert <eggert@cs.ucla.edu>2014-09-14 17:37:58 -0700
committerPaul Eggert <eggert@cs.ucla.edu>2014-09-16 18:23:49 -0700
commitdfff75a432eec187e4c5f2c6ce99ebdadb1089c8 (patch)
treef3407af0d4030ecda8303e93bb8fb09a0c2f99f7 /src/searchutils.c
parent6e319a818ed7b15b452ed2baab2f6a38d42fd1fe (diff)
downloadgrep-dfff75a432eec187e4c5f2c6ce99ebdadb1089c8.tar.gz
grep: use mbclen cache more effectively
* src/grep.c (buffer_textbin, contains_encoding_error): Use mb_clen for speed. (buffer_textbin): Bypass mb_clen in unibyte locales. (main): Always initialize the cache, since it's sometimes used in unibyte locales now. Initialize it before contains_encoding_error might be called. * src/search.h (SEARCH_INLINE): New macro. (mbclen_cache): Now extern decl. (mb_clen): New inline function. * src/searchutils.c (SEARCH_INLINE, SYSTEM_INLINE): Define. (mbclen_cache): Now extern. (build_mbclen_cache): Put 1 into the cache when mbrlen returns 0. (mb_goback): Use mb_len for speed, and rely on it returning nonzero. * src/system.h (SYSTEM_INLINE): New macro. (to_uchar): Use it.
Diffstat (limited to 'src/searchutils.c')
-rw-r--r--src/searchutils.c26
1 files changed, 14 insertions, 12 deletions
diff --git a/src/searchutils.c b/src/searchutils.c
index 18dd584e..9edc7855 100644
--- a/src/searchutils.c
+++ b/src/searchutils.c
@@ -17,12 +17,16 @@
02110-1301, USA. */
#include <config.h>
-#include <assert.h>
+
+#define SEARCH_INLINE _GL_EXTERN_INLINE
+#define SYSTEM_INLINE _GL_EXTERN_INLINE
#include "search.h"
+#include <assert.h>
+
#define NCHAR (UCHAR_MAX + 1)
-static size_t mbclen_cache[NCHAR];
+size_t mbclen_cache[NCHAR];
void
kwsinit (kwset_t *kwset)
@@ -218,7 +222,8 @@ build_mbclen_cache (void)
char c = i;
unsigned char uc = i;
mbstate_t mbs = { 0 };
- mbclen_cache[uc] = mbrlen (&c, 1, &mbs);
+ size_t len = mbrlen (&c, 1, &mbs);
+ mbclen_cache[uc] = len ? len : 1;
}
}
@@ -244,20 +249,17 @@ mb_goback (char const **mb_start, char const *cur, char const *end)
while (p < cur)
{
- size_t mbclen = mbclen_cache[to_uchar (*p)];
-
- if (mbclen == (size_t) -2)
- mbclen = mbrlen (p, end - p, &cur_state);
+ size_t clen = mb_clen (p, end - p, &cur_state);
- if (! (0 < mbclen && mbclen < (size_t) -2))
+ if ((size_t) -2 <= clen)
{
- /* An invalid sequence, or a truncated multibyte character, or
- a null wide character. Treat it as a single byte character. */
- mbclen = 1;
+ /* An invalid sequence, or a truncated multibyte character.
+ Treat it as a single byte character. */
+ clen = 1;
memset (&cur_state, 0, sizeof cur_state);
}
p0 = p;
- p += mbclen;
+ p += clen;
}
*mb_start = p;