summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorNorihirio Tanaka <noritnk@kcn.ne.jp>2010-03-24 16:04:39 +0100
committerPaolo Bonzini <bonzini@gnu.org>2010-03-25 14:18:06 +0100
commit015240e846c8a6614272d44f351a5faee8b9aa93 (patch)
tree2df35e6f88b102f0327735fcef1bdaa2777e62e8
parenta0e3040fd80528163565779a85a95b80469b7dfa (diff)
downloadgrep-015240e846c8a6614272d44f351a5faee8b9aa93.tar.gz
dfa/grep: fix compilation with MBS_SUPPORT
* src/dfa.c (cur_mb_len): Initialize to 1 and always make it available. (setbit_case_fold): Do not use wint_t in prototype if !MBS_SUPPORT. (parse_bracket_exp): Fix compilation with !MBS_SUPPORT. * src/kwsearch.c (kwsinit): Do not use mbtolower and MB_CUR_MAX if !MBS_SUPPORT. * src/searchutils.c (kwsinit): Do not refer to MB_CUR_MAX if !MBS_SUPPORT. * tests/char-class-multibyte: Skip if UTF-8 matching does not work. * tests/fmbtest.sh: Likewise.
-rw-r--r--src/dfa.c24
-rw-r--r--src/kwsearch.c2
-rw-r--r--src/searchutils.c6
-rw-r--r--tests/char-class-multibyte4
-rwxr-xr-xtests/fmbtest.sh8
5 files changed, 37 insertions, 7 deletions
diff --git a/src/dfa.c b/src/dfa.c
index 951c3b74..c2ef18c7 100644
--- a/src/dfa.c
+++ b/src/dfa.c
@@ -243,7 +243,13 @@ dfasyntax (reg_syntax_t bits, int fold, unsigned char eol)
For MB_CUR_MAX > 1, one or both of the two cases may not be set,
so the resulting charset may only be used as an optimization. */
static void
-setbit_case_fold (wint_t b, charclass c)
+setbit_case_fold (
+#ifdef MBS_SUPPORT
+ wint_t b,
+#else
+ unsigned int b,
+#endif
+ charclass c)
{
if (case_fold)
{
@@ -309,11 +315,11 @@ static int parens; /* Count of outstanding left parens. */
static int minrep, maxrep; /* Repeat counts for {m,n}. */
static int hard_LC_COLLATE; /* Nonzero if LC_COLLATE is hard. */
+static int cur_mb_len = 1; /* Length of the multibyte representation of
+ wctok. */
#ifdef MBS_SUPPORT
/* These variables are used only if (MB_CUR_MAX > 1). */
static mbstate_t mbs; /* Mbstate for mbrlen(). */
-static int cur_mb_len; /* Length of the multibyte representation of
- wctok. */
static wchar_t wctok; /* Wide character representation of the current
multibyte character. */
static unsigned char *mblen_buf;/* Correspond to the input buffer in dfaexec().
@@ -691,9 +697,9 @@ parse_bracket_exp (void)
continue;
}
- setbit_case_fold (wc, ccl);
#ifdef MBS_SUPPORT
/* Build normal characters. */
+ setbit_case_fold (wc, ccl);
if (MB_CUR_MAX > 1)
{
if (case_fold && iswalpha(wc))
@@ -719,10 +725,16 @@ parse_bracket_exp (void)
work_mbc->nchars + 1);
work_mbc->chars[work_mbc->nchars++] = wc;
}
-#endif
}
+#else
+ setbit_case_fold (c, ccl);
+#endif
}
- while ((wc = wc1, (c = c1) != L']'));
+ while ((
+#ifdef MBS_SUPPORT
+ wc = wc1,
+#endif
+ (c = c1) != ']'));
#ifdef MBS_SUPPORT
if (MB_CUR_MAX > 1
diff --git a/src/kwsearch.c b/src/kwsearch.c
index 2f8e7b73..fa801e65 100644
--- a/src/kwsearch.c
+++ b/src/kwsearch.c
@@ -37,9 +37,11 @@ Fcompile (char const *pattern, size_t size)
kwsinit (&kwset);
psize = size;
+#ifdef MBS_SUPPORT
if (match_icase && MB_CUR_MAX > 1)
pat = mbtolower (pattern, &psize);
else
+#endif
pat = pattern;
beg = pat;
diff --git a/src/searchutils.c b/src/searchutils.c
index 08cf4200..e30355da 100644
--- a/src/searchutils.c
+++ b/src/searchutils.c
@@ -27,7 +27,11 @@ kwsinit (kwset_t *kwset)
static char trans[NCHAR];
int i;
- if (match_icase && MB_CUR_MAX == 1)
+ if (match_icase
+#ifdef MBS_SUPPORT
+ && MB_CUR_MAX == 1
+#endif
+ )
{
for (i = 0; i < NCHAR; ++i)
trans[i] = TOLOWER (i);
diff --git a/tests/char-class-multibyte b/tests/char-class-multibyte
index fccf13d7..d7ed3992 100644
--- a/tests/char-class-multibyte
+++ b/tests/char-class-multibyte
@@ -3,6 +3,10 @@
: ${srcdir=.}
. "$srcdir/init.sh"; path_prepend_ ../src
+if printf '\xc3\n' | LC_ALL=en_US.UTF-8 grep -q '[é]'; then
+ skip_ UTF-8 matching seems not to work
+fi
+
printf 'é\n' > exp1 || framework_failure_
fail=0
diff --git a/tests/fmbtest.sh b/tests/fmbtest.sh
index 1b3a111f..93ce2ae5 100755
--- a/tests/fmbtest.sh
+++ b/tests/fmbtest.sh
@@ -11,6 +11,14 @@
LC_ALL=cs_CZ.UTF-8 locale -k LC_CTYPE 2>/dev/null | ${GREP} -q charmap.*UTF-8 \
|| exit 77
+# If matching is done in single-byte mode, skip this test too
+printf 'é\n' | LC_ALL=cz_CZ.UTF-8 ${GREP} -Eq '^[é]{2}$'
+case $? in
+ 0) exit 77;;
+ 1) ;;
+ *) exit 1;;
+esac
+
failures=0
cat > csinput <<EOF