summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPaolo Bonzini <bonzini@gnu.org>2010-03-23 16:07:56 +0100
committerPaolo Bonzini <bonzini@gnu.org>2010-03-23 16:12:00 +0100
commit3220317a428d63a4303ffee0fb45becf835cf1fd (patch)
tree70ffe675e50d2f6e5b7eb0f449f359c2191612d5
parent90cc2ba27bd3f368b945f89a60be4220b73baac7 (diff)
downloadgrep-3220317a428d63a4303ffee0fb45becf835cf1fd.tar.gz
dfa: fix sigsegv on multibyte character classes
Reported by Jaroslav Škarvada <jskarvad@redhat.com>. This is unfortunate. grep needs an automatic testcase generator. * NEWS: Document bug. * THANKS: Mention reporter. * src/dfa.c (set_bit_casefold): Change type of first argument for self-documentation. (parse_bracket_exp): Fix call. * tests/Makefile.am: Add new testcase. * tests/char-class-multibyte: New testcase.
-rw-r--r--NEWS4
-rw-r--r--THANKS1
-rw-r--r--src/dfa.c4
-rw-r--r--tests/Makefile.am1
-rw-r--r--tests/char-class-multibyte23
5 files changed, 31 insertions, 2 deletions
diff --git a/NEWS b/NEWS
index 437a93ac..9980df94 100644
--- a/NEWS
+++ b/NEWS
@@ -2,6 +2,10 @@ GNU grep NEWS -*- outline -*-
* Noteworthy changes in release ?.? (????-??-??) [?]
+** Bug fixes
+
+ Character classes could cause a segmentation fault if they included a
+ multibyte character. This is a regression from 2.5.4.
* Noteworthy changes in release 2.6 (2010-03-23) [stable]
diff --git a/THANKS b/THANKS
index 6812ddee..d1d1ad49 100644
--- a/THANKS
+++ b/THANKS
@@ -33,6 +33,7 @@ Harald Hanche-Olsen <hanche@math.ntnu.no>
Hans-Bernhard Broeker <broeker@physik.rwth-aachen.de>
Heikki Korpela <heko@iki.fi>
Isamu Hasegawa <isamu@yamato.ibm.com>
+Jaroslav Škarvada <jskarvad@redhat.com>
Jeff Bailey <jbailey@nisa.net>
Jim Hand <jhand@austx.tandem.com>
Jim Meyering <meyering@redhat.com>
diff --git a/src/dfa.c b/src/dfa.c
index cb45193d..a0d9410a 100644
--- a/src/dfa.c
+++ b/src/dfa.c
@@ -243,7 +243,7 @@ dfasyntax (reg_syntax_t bits, int fold, unsigned char eol)
For MB_CUR_MAX > 1, one or both of the two cases may not be set,
so the resulting charset may only be used as an optimization. */
static void
-setbit_case_fold (unsigned int b, charclass c)
+setbit_case_fold (wint_t b, charclass c)
{
if (case_fold)
{
@@ -691,7 +691,7 @@ parse_bracket_exp (void)
continue;
}
- setbit_case_fold (c, ccl);
+ setbit_case_fold (wc, ccl);
#ifdef MBS_SUPPORT
/* Build normal characters. */
if (MB_CUR_MAX > 1)
diff --git a/tests/Makefile.am b/tests/Makefile.am
index 67763b2f..02db64c4 100644
--- a/tests/Makefile.am
+++ b/tests/Makefile.am
@@ -22,6 +22,7 @@ TESTS = \
case-fold-char-class \
case-fold-char-range \
case-fold-char-type \
+ char-class-multibyte \
dfaexec-multibyte \
empty.sh \
ere.sh \
diff --git a/tests/char-class-multibyte b/tests/char-class-multibyte
new file mode 100644
index 00000000..d77c6de0
--- /dev/null
+++ b/tests/char-class-multibyte
@@ -0,0 +1,23 @@
+#!/bin/sh
+# This would segfault for grep-2.6
+: ${srcdir=.}
+. "$srcdir/init.sh"; path_prepend_ ../src
+
+printf 'É\n' > exp1 || framework_failure
+fail=0
+
+for LOC in en_US.UTF-8 $LOCALE_FR_UTF8; do
+ out=out1-$LOC
+ printf 'á\nç\nÉ\n' | LC_ALL=$LOC grep '[é]' > $out || fail=1
+ compare $out exp1 || fail=1
+done
+
+printf 'é\n' > exp2 || framework_failure
+
+for LOC in en_US.UTF-8 $LOCALE_FR_UTF8; do
+ out=out2-$LOC
+ printf 'á\nç\né\n' | LC_ALL=$LOC grep '[É]' > $out || fail=1
+ compare $out exp2 || fail=1
+done
+
+Exit $fail