diff options
author | Norihirio Tanaka <noritnk@kcn.ne.jp> | 2010-03-24 16:04:39 +0100 |
---|---|---|
committer | Paolo Bonzini <bonzini@gnu.org> | 2010-03-25 14:18:06 +0100 |
commit | 015240e846c8a6614272d44f351a5faee8b9aa93 (patch) | |
tree | 2df35e6f88b102f0327735fcef1bdaa2777e62e8 | |
parent | a0e3040fd80528163565779a85a95b80469b7dfa (diff) | |
download | grep-015240e846c8a6614272d44f351a5faee8b9aa93.tar.gz |
dfa/grep: fix compilation with MBS_SUPPORT
* src/dfa.c (cur_mb_len): Initialize to 1 and always make it available.
(setbit_case_fold): Do not use wint_t in prototype if !MBS_SUPPORT.
(parse_bracket_exp): Fix compilation with !MBS_SUPPORT.
* src/kwsearch.c (kwsinit): Do not use mbtolower and MB_CUR_MAX
if !MBS_SUPPORT.
* src/searchutils.c (kwsinit): Do not refer to MB_CUR_MAX if !MBS_SUPPORT.
* tests/char-class-multibyte: Skip if UTF-8 matching does not work.
* tests/fmbtest.sh: Likewise.
-rw-r--r-- | src/dfa.c | 24 | ||||
-rw-r--r-- | src/kwsearch.c | 2 | ||||
-rw-r--r-- | src/searchutils.c | 6 | ||||
-rw-r--r-- | tests/char-class-multibyte | 4 | ||||
-rwxr-xr-x | tests/fmbtest.sh | 8 |
5 files changed, 37 insertions, 7 deletions
@@ -243,7 +243,13 @@ dfasyntax (reg_syntax_t bits, int fold, unsigned char eol) For MB_CUR_MAX > 1, one or both of the two cases may not be set, so the resulting charset may only be used as an optimization. */ static void -setbit_case_fold (wint_t b, charclass c) +setbit_case_fold ( +#ifdef MBS_SUPPORT + wint_t b, +#else + unsigned int b, +#endif + charclass c) { if (case_fold) { @@ -309,11 +315,11 @@ static int parens; /* Count of outstanding left parens. */ static int minrep, maxrep; /* Repeat counts for {m,n}. */ static int hard_LC_COLLATE; /* Nonzero if LC_COLLATE is hard. */ +static int cur_mb_len = 1; /* Length of the multibyte representation of + wctok. */ #ifdef MBS_SUPPORT /* These variables are used only if (MB_CUR_MAX > 1). */ static mbstate_t mbs; /* Mbstate for mbrlen(). */ -static int cur_mb_len; /* Length of the multibyte representation of - wctok. */ static wchar_t wctok; /* Wide character representation of the current multibyte character. */ static unsigned char *mblen_buf;/* Correspond to the input buffer in dfaexec(). @@ -691,9 +697,9 @@ parse_bracket_exp (void) continue; } - setbit_case_fold (wc, ccl); #ifdef MBS_SUPPORT /* Build normal characters. */ + setbit_case_fold (wc, ccl); if (MB_CUR_MAX > 1) { if (case_fold && iswalpha(wc)) @@ -719,10 +725,16 @@ parse_bracket_exp (void) work_mbc->nchars + 1); work_mbc->chars[work_mbc->nchars++] = wc; } -#endif } +#else + setbit_case_fold (c, ccl); +#endif } - while ((wc = wc1, (c = c1) != L']')); + while (( +#ifdef MBS_SUPPORT + wc = wc1, +#endif + (c = c1) != ']')); #ifdef MBS_SUPPORT if (MB_CUR_MAX > 1 diff --git a/src/kwsearch.c b/src/kwsearch.c index 2f8e7b73..fa801e65 100644 --- a/src/kwsearch.c +++ b/src/kwsearch.c @@ -37,9 +37,11 @@ Fcompile (char const *pattern, size_t size) kwsinit (&kwset); psize = size; +#ifdef MBS_SUPPORT if (match_icase && MB_CUR_MAX > 1) pat = mbtolower (pattern, &psize); else +#endif pat = pattern; beg = pat; diff --git a/src/searchutils.c b/src/searchutils.c index 08cf4200..e30355da 100644 --- a/src/searchutils.c +++ b/src/searchutils.c @@ -27,7 +27,11 @@ kwsinit (kwset_t *kwset) static char trans[NCHAR]; int i; - if (match_icase && MB_CUR_MAX == 1) + if (match_icase +#ifdef MBS_SUPPORT + && MB_CUR_MAX == 1 +#endif + ) { for (i = 0; i < NCHAR; ++i) trans[i] = TOLOWER (i); diff --git a/tests/char-class-multibyte b/tests/char-class-multibyte index fccf13d7..d7ed3992 100644 --- a/tests/char-class-multibyte +++ b/tests/char-class-multibyte @@ -3,6 +3,10 @@ : ${srcdir=.} . "$srcdir/init.sh"; path_prepend_ ../src +if printf '\xc3\n' | LC_ALL=en_US.UTF-8 grep -q '[é]'; then + skip_ UTF-8 matching seems not to work +fi + printf 'é\n' > exp1 || framework_failure_ fail=0 diff --git a/tests/fmbtest.sh b/tests/fmbtest.sh index 1b3a111f..93ce2ae5 100755 --- a/tests/fmbtest.sh +++ b/tests/fmbtest.sh @@ -11,6 +11,14 @@ LC_ALL=cs_CZ.UTF-8 locale -k LC_CTYPE 2>/dev/null | ${GREP} -q charmap.*UTF-8 \ || exit 77 +# If matching is done in single-byte mode, skip this test too +printf 'é\n' | LC_ALL=cz_CZ.UTF-8 ${GREP} -Eq '^[é]{2}$' +case $? in + 0) exit 77;; + 1) ;; + *) exit 1;; +esac + failures=0 cat > csinput <<EOF |