summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorPaolo Bonzini <bonzini@gnu.org>2010-03-08 12:20:37 +0100
committerPaolo Bonzini <bonzini@gnu.org>2010-03-17 15:33:14 +0100
commitf6b983890c310f0b5c4470f5320c894d81528cb5 (patch)
treed0abf8bbe7f1606f17559cb9f0dcb1e63476472c /src
parent8f9106c419d18759f767da351b3b6913f022c8f8 (diff)
downloadgrep-f6b983890c310f0b5c4470f5320c894d81528cb5.tar.gz
dfa: optimize simple character sets under UTF-8 charsets
Only use a bitset when possible without involving MBCSET. Testcase: yes 'the quick brown fox jumps over the lazy dog' | sed 100000q | \ time grep -c [ABCDEFGHIJKLMNOPQRSTUVWXYZ,] Before: 51ms (best of three runs); after: 16ms(best of three runs). * src/dfa.c (parse_bracket_exp): For simple bracket expressions under UTF-8, use a CSET.
Diffstat (limited to 'src')
-rw-r--r--src/dfa.c13
1 files changed, 12 insertions, 1 deletions
diff --git a/src/dfa.c b/src/dfa.c
index 3b0d8610..919f3490 100644
--- a/src/dfa.c
+++ b/src/dfa.c
@@ -21,6 +21,7 @@
Modified July, 1988 by Arthur David Olson to assist BMG speedups */
#include <config.h>
+#include <assert.h>
#include <ctype.h>
#include <stdio.h>
#include <sys/types.h>
@@ -728,7 +729,14 @@ parse_bracket_exp (void)
while ((wc = wc1, (c = c1) != L']'));
#ifdef MBS_SUPPORT
- if (MB_CUR_MAX > 1)
+ if (MB_CUR_MAX > 1
+ && (!using_utf8()
+ || invert
+ || work_mbc->nchars != 0
+ || work_mbc->nch_classes != 0
+ || work_mbc->nranges != 0
+ || work_mbc->nequivs != 0
+ || work_mbc->ncoll_elems != 0))
{
static charclass zeroclass;
work_mbc->invert = invert;
@@ -739,6 +747,9 @@ parse_bracket_exp (void)
if (invert)
{
+#ifdef MBS_SUPPORT
+ assert(MB_CUR_MAX == 1);
+#endif
notset(ccl);
if (syntax_bits & RE_HAT_LISTS_NOT_NEWLINE)
clrbit(eolbyte, ccl);