summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorNorihiro Tanaka <noritnk@kcn.ne.jp>2014-09-29 08:53:56 +0900
committerJim Meyering <meyering@fb.com>2014-10-05 09:46:47 -0700
commit00f3b29027df6af21db4b3989e25c67d982cc03d (patch)
tree68c7d1f5c0f48c46cd2d0f81535c077f30c88e38
parentd43ec98e01b2135650bb677cab0c3dde53b0c399 (diff)
downloadgrep-00f3b29027df6af21db4b3989e25c67d982cc03d.tar.gz
dfa: check end of input buffer after transition in non-UTF8 multibyte locale
* src/dfa.c (dfaexec_main): Check for end of input buffer after each transition in a non-UTF8 multibyte locale. * tests/mb-non-UTF8-overrun: New test. * tests/Makefile.am (TESTS): Add it. * src/grep.c (main): With this fix, we no longer need the fourth byte of "eolbytes".
-rw-r--r--src/dfa.c30
-rw-r--r--src/grep.c4
-rw-r--r--tests/Makefile.am1
-rwxr-xr-xtests/mb-non-UTF8-overrun30
4 files changed, 63 insertions, 2 deletions
diff --git a/src/dfa.c b/src/dfa.c
index 4f45ffff..7cbe247c 100644
--- a/src/dfa.c
+++ b/src/dfa.c
@@ -3351,6 +3351,21 @@ dfaexec_main (struct dfa *d, char const *begin, char *end,
/* Can match with a multibyte character (and multi character
collating element). Transition table might be updated. */
s = transit_state (d, s, &p, (unsigned char *) end);
+
+ if (p[-1] == eol)
+ {
+ if ((char *) p > end)
+ {
+ p = NULL;
+ goto done;
+ }
+
+ nlcount++;
+
+ if (!allow_nl)
+ s = 0;
+ }
+
mbp = p;
trans = d->trans;
}
@@ -3399,6 +3414,21 @@ dfaexec_main (struct dfa *d, char const *begin, char *end,
/* Can match with a multibyte character (and multicharacter
collating element). Transition table might be updated. */
s = transit_state (d, s, &p, (unsigned char *) end);
+
+ if (p[-1] == eol)
+ {
+ if ((char *) p > end)
+ {
+ p = NULL;
+ goto done;
+ }
+
+ nlcount++;
+
+ if (!allow_nl)
+ s = 0;
+ }
+
mbp = p;
trans = d->trans;
}
diff --git a/src/grep.c b/src/grep.c
index 9dcf2982..dfc0e515 100644
--- a/src/grep.c
+++ b/src/grep.c
@@ -2513,8 +2513,8 @@ main (int argc, char **argv)
compile (keys, keycc);
free (keys);
- /* We need one byte prior and at least two after. */
- char eolbytes[4] = { 0, eolbyte, 0, 0 };
+ /* We need one byte prior and one after. */
+ char eolbytes[3] = { 0, eolbyte, 0 };
size_t match_size;
skip_empty_lines = ((execute (eolbytes + 1, 1, &match_size, NULL) == 0)
== out_invert);
diff --git a/tests/Makefile.am b/tests/Makefile.am
index d47978f7..4b9a931e 100644
--- a/tests/Makefile.am
+++ b/tests/Makefile.am
@@ -75,6 +75,7 @@ TESTS = \
long-line-vs-2GiB-read \
max-count-overread \
max-count-vs-context \
+ mb-non-UTF8-overrun \
mb-non-UTF8-performance \
multibyte-white-space \
multiple-begin-or-end-line \
diff --git a/tests/mb-non-UTF8-overrun b/tests/mb-non-UTF8-overrun
new file mode 100755
index 00000000..1d4d59ed
--- /dev/null
+++ b/tests/mb-non-UTF8-overrun
@@ -0,0 +1,30 @@
+#!/bin/sh
+# grep would sometimes read beyond end of input, when using a non-UTF8
+# multibyte locale.
+
+# Copyright 2014 Free Software Foundation, Inc.
+
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, see <http://www.gnu.org/licenses/>.
+
+. "${srcdir=.}/init.sh"; path_prepend_ ../src
+require_JP_EUC_locale_
+
+fail=0
+
+# This would fail when running an ASAN-enabled binary, or when run via
+# valgrind, accessing one byte beyond the end of an input buffer.
+grep -z . < /dev/null
+test $? = 1 || fail=1
+
+Exit $fail