diff options
-rw-r--r-- | NEWS | 3 | ||||
-rw-r--r-- | cfg.mk | 1 | ||||
-rw-r--r-- | src/dfa.c | 127 | ||||
-rw-r--r-- | tests/bre.tests | 6 | ||||
-rw-r--r-- | tests/ere.tests | 2 | ||||
-rwxr-xr-x | tests/repetition-overflow | 4 |
6 files changed, 59 insertions, 84 deletions
@@ -12,6 +12,9 @@ GNU grep NEWS -*- outline -*- name too long", and it can run much faster when dealing with large directory hierarchies. + grep -E 'a{1000000000}' now reports an overflow error rather than + silently acting like grep -E 'a\{1000000000}'. + ** New features The -R option now has a long-option alias --dereference-recursive. @@ -88,3 +88,4 @@ exclude_file_name_regexp--sc_prohibit_xalloc_without_use = ^src/kwset\.c$$ exclude_file_name_regexp--sc_prohibit_tab_based_indentation = \ (Makefile|\.(am|mk)$$|^gl/lib/.*\.c\.diff$$) exclude_file_name_regexp--sc_space_tab = ^gl/lib/.*\.c\.diff$$ +exclude_file_name_regexp--sc_error_message_uppercase = ^src/dfa\.c$$ @@ -861,6 +861,10 @@ static unsigned char const *buf_end; /* reference to end in dfaexec(). */ #endif /* MBS_SUPPORT */ +#ifndef MIN +# define MIN(a,b) ((a) < (b) ? (a) : (b)) +#endif + typedef int predicate (int); /* The following list maps the names of the Posix named character classes @@ -1328,90 +1332,53 @@ lex (void) if (!(syntax_bits & RE_CONTEXT_INDEP_OPS) && laststart) goto normal_char; - if (syntax_bits & RE_NO_BK_BRACES) - { - /* Scan ahead for a valid interval; if it's not valid, - treat it as a literal '{'. */ - int lo = -1, hi = -1; - char const *p = lexptr; - char const *lim = p + lexleft; - for (; p != lim && ISASCIIDIGIT (*p); p++) - { - if (lo < 0) - lo = *p - '0'; - else - { - lo = lo * 10 + *p - '0'; - if (RE_DUP_MAX < lo) - goto normal_char; - } - } - if (p != lim && *p == ',') - while (++p != lim && ISASCIIDIGIT (*p)) - { - if (hi < 0) - hi = *p - '0'; - else - { - hi = hi * 10 + *p - '0'; - if (RE_DUP_MAX < hi) - goto normal_char; - } - } - else - hi = lo; - if (p == lim || *p != '}' || lo < 0 || (0 <= hi && hi < lo)) - goto normal_char; - } - - minrep = 0; /* Cases: {M} - exact count {M,} - minimum count, maximum is infinity + {,N} - 0 through N + {,} - 0 to infinity (same as '*') {M,N} - M through N */ - FETCH (c, _("unfinished repeat count")); - if (ISASCIIDIGIT (c)) - { - minrep = c - '0'; - for (;;) - { - FETCH (c, _("unfinished repeat count")); - if (!ISASCIIDIGIT (c)) - break; - minrep = 10 * minrep + c - '0'; - } - } - else - dfaerror (_("malformed repeat count")); - if (c == ',') - { - FETCH (c, _("unfinished repeat count")); - if (!ISASCIIDIGIT (c)) - maxrep = -1; - else - { - maxrep = c - '0'; - for (;;) - { - FETCH (c, _("unfinished repeat count")); - if (!ISASCIIDIGIT (c)) - break; - maxrep = 10 * maxrep + c - '0'; - } - if (0 <= maxrep && maxrep < minrep) - dfaerror (_("malformed repeat count")); - } - } - else - maxrep = minrep; - if (!(syntax_bits & RE_NO_BK_BRACES)) - { - if (c != '\\') - dfaerror (_("malformed repeat count")); - FETCH (c, _("unfinished repeat count")); - } - if (c != '}') - dfaerror (_("malformed repeat count")); + { + char const *p = lexptr; + char const *lim = p + lexleft; + minrep = maxrep = -1; + for (; p != lim && ISASCIIDIGIT (*p); p++) + { + if (minrep < 0) + minrep = *p - '0'; + else + minrep = MIN (RE_DUP_MAX + 1, minrep * 10 + *p - '0'); + } + if (p != lim) + { + if (*p != ',') + maxrep = minrep; + else + { + if (minrep < 0) + minrep = 0; + while (++p != lim && ISASCIIDIGIT (*p)) + { + if (maxrep < 0) + maxrep = *p - '0'; + else + maxrep = MIN (RE_DUP_MAX + 1, maxrep * 10 + *p - '0'); + } + } + } + if (! ((! backslash || (p != lim && *p++ == '\\')) + && p != lim && *p++ == '}' + && 0 <= minrep && (maxrep < 0 || minrep <= maxrep))) + { + if (syntax_bits & RE_INVALID_INTERVAL_ORD) + goto normal_char; + dfaerror (_("Invalid content of \\{\\}")); + } + if (RE_DUP_MAX < maxrep) + dfaerror (_("Regular expression too big")); + lexptr = p; + lexleft = lim - p; + } laststart = 0; return lasttok = REPMN; diff --git a/tests/bre.tests b/tests/bre.tests index 60ff1b54..9d01a3c3 100644 --- a/tests/bre.tests +++ b/tests/bre.tests @@ -42,8 +42,9 @@ 2@a\{1@EBRACE 2@a\{1a@EBRACE 2@a\{1a\}@BADBR -2@a\{,2\}@BADBR -2@a\{,\}@BADBR +0@a\{,2\}@a\{,2\} +0@a\{,\}@a\{,\} +2@a\{\}@BADBR 2@a\{1,x\}@BADBR 2@a\{1,x@EBRACE 2@a\{32768\}@BADBR @@ -60,3 +61,4 @@ 2@a\{1\}*@BADRPT@TO CORRECT 1@a\(b\)?c\1d@acd 0@-\{0,1\}[0-9]*$@-5 +2@b\{1000000000\}@ESIZE diff --git a/tests/ere.tests b/tests/ere.tests index 08b3dbae..e0aad2a8 100644 --- a/tests/ere.tests +++ b/tests/ere.tests @@ -76,6 +76,7 @@ 0@a{1a}@a{1a} 0@a{,2}@a{,2} 0@a{,}@a{,} +2@a{}@BADBR 0@a{1,*}@a{1,,,} 2@a{1,x@EBRACE@TO CORRECT 2@a{300}@BADBR@TO CORRECT @@ -213,3 +214,4 @@ 0@abcdefghijklmnopqrstuv@abcdefghijklmnopqrstuv 0@CC[13]1|a{21}[23][EO][123][Es][12]a{15}aa[34][EW]aaaaaaa[X]a@CC11 0@a?b@ab +2@b{1000000000}@ESIZE diff --git a/tests/repetition-overflow b/tests/repetition-overflow index c92de237..66a44a6b 100755 --- a/tests/repetition-overflow +++ b/tests/repetition-overflow @@ -11,9 +11,9 @@ fail=0 # range of "unsigned int" would silently wrap around. Hence, 2^32+1 # would be treated just like "1", and both of these would mistakenly match. -echo abc | grep -E "b{$xp1}" > out 2>&1; test $? = 1 || fail=1 +echo abc | grep -E "b{$xp1}" > out 2> /dev/null; test $? = 2 || fail=1 compare out /dev/null || fail=1 -echo abbc | grep -E "b{1,$xp2}" > out 2>&1; test $? = 1 || fail=1 +echo abbc | grep -E "b{1,$xp2}" > out 2> /dev/null; test $? = 2 || fail=1 compare out /dev/null || fail=1 Exit $fail |