diff options
author | Paolo Bonzini <bonzini@gnu.org> | 2010-03-08 12:20:37 +0100 |
---|---|---|
committer | Paolo Bonzini <bonzini@gnu.org> | 2010-03-17 15:33:14 +0100 |
commit | f6b983890c310f0b5c4470f5320c894d81528cb5 (patch) | |
tree | d0abf8bbe7f1606f17559cb9f0dcb1e63476472c /src | |
parent | 8f9106c419d18759f767da351b3b6913f022c8f8 (diff) | |
download | grep-f6b983890c310f0b5c4470f5320c894d81528cb5.tar.gz |
dfa: optimize simple character sets under UTF-8 charsets
Only use a bitset when possible without involving MBCSET. Testcase:
yes 'the quick brown fox jumps over the lazy dog' | sed 100000q | \
time grep -c [ABCDEFGHIJKLMNOPQRSTUVWXYZ,]
Before: 51ms (best of three runs); after: 16ms(best of three runs).
* src/dfa.c (parse_bracket_exp): For simple bracket expressions
under UTF-8, use a CSET.
Diffstat (limited to 'src')
-rw-r--r-- | src/dfa.c | 13 |
1 files changed, 12 insertions, 1 deletions
@@ -21,6 +21,7 @@ Modified July, 1988 by Arthur David Olson to assist BMG speedups */ #include <config.h> +#include <assert.h> #include <ctype.h> #include <stdio.h> #include <sys/types.h> @@ -728,7 +729,14 @@ parse_bracket_exp (void) while ((wc = wc1, (c = c1) != L']')); #ifdef MBS_SUPPORT - if (MB_CUR_MAX > 1) + if (MB_CUR_MAX > 1 + && (!using_utf8() + || invert + || work_mbc->nchars != 0 + || work_mbc->nch_classes != 0 + || work_mbc->nranges != 0 + || work_mbc->nequivs != 0 + || work_mbc->ncoll_elems != 0)) { static charclass zeroclass; work_mbc->invert = invert; @@ -739,6 +747,9 @@ parse_bracket_exp (void) if (invert) { +#ifdef MBS_SUPPORT + assert(MB_CUR_MAX == 1); +#endif notset(ccl); if (syntax_bits & RE_HAT_LISTS_NOT_NEWLINE) clrbit(eolbyte, ccl); |