From 2d7d3a2d3edddf763ca79ec014c8459d80c85176 Mon Sep 17 00:00:00 2001 From: Norihiro Tanaka Date: Thu, 29 May 2014 08:03:03 +0900 Subject: dfa: fix bug with regex containing multiple begin/end-line constraints MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit grep -E 'a(b$|c$)' would mistakenly match "aa". * src/dfa.c (dfamust): When resetting 'is' in OR, also reset 'begline' and 'endline' of 'must'. * NEWS (Bug fixes): Mention it. This bug was introduced via commit v2.18-85-g2c94326. Reported by Péter Radics in . --- NEWS | 6 ++++++ src/dfa.c | 15 +++++++++++---- tests/Makefile.am | 1 + tests/multiple-begin-or-end-line | 28 ++++++++++++++++++++++++++++ 4 files changed, 46 insertions(+), 4 deletions(-) create mode 100755 tests/multiple-begin-or-end-line diff --git a/NEWS b/NEWS index 10b2d19f..0caad227 100644 --- a/NEWS +++ b/NEWS @@ -2,6 +2,12 @@ GNU grep NEWS -*- outline -*- * Noteworthy changes in release ?.? (????-??-??) [?] +** Bug fixes + + A command like echo aa|grep -E 'a(b$|c$)' would mistakenly + report the input as a matched line. + [bug introduced in grep-2.19] + ** Changes in behavior grep --exclude-dir='FOO/' now excludes the directory FOO. diff --git a/src/dfa.c b/src/dfa.c index 636f2d44..0f27f85e 100644 --- a/src/dfa.c +++ b/src/dfa.c @@ -3962,10 +3962,17 @@ dfamust (struct dfa *d) size_t j, ln, rn, n; /* Guaranteed to be. Unlikely, but ... */ - if (!STREQ (lmp->is, rmp->is)) - lmp->is[0] = '\0'; - lmp->begline &= rmp->begline; - lmp->endline &= rmp->endline; + if (STREQ (lmp->is, rmp->is)) + { + lmp->begline &= rmp->begline; + lmp->endline &= rmp->endline; + } + else + { + lmp->is[0] = '\0'; + lmp->begline = false; + lmp->endline = false; + } /* Left side--easy */ i = 0; while (lmp->left[i] != '\0' && lmp->left[i] == rmp->left[i]) diff --git a/tests/Makefile.am b/tests/Makefile.am index 82a6d2dd..a38f074a 100644 --- a/tests/Makefile.am +++ b/tests/Makefile.am @@ -76,6 +76,7 @@ TESTS = \ max-count-vs-context \ mb-non-UTF8-performance \ multibyte-white-space \ + multiple-begin-or-end-line \ null-byte \ empty-line-mb \ unibyte-bracket-expr \ diff --git a/tests/multiple-begin-or-end-line b/tests/multiple-begin-or-end-line new file mode 100755 index 00000000..448f88bc --- /dev/null +++ b/tests/multiple-begin-or-end-line @@ -0,0 +1,28 @@ +#!/bin/sh +# Test a pattern of multiple begin or end line constraints. +# This would mistakenly print a line when using grep-2.19. + +# Copyright 2014 Free Software Foundation, Inc. + +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. + +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . + +. "${srcdir=.}/init.sh"; path_prepend_ ../src + +echo aa | grep 'a\(b$\|c$\)' >out && fail=1 +compare /dev/null out || fail=1 + +echo aa | grep '\(^b\|^c\)a' >out && fail=1 +compare /dev/null out || fail=1 + +Exit $fail -- cgit v1.2.1