diff options
author | timshen <timshen@138bc75d-0d04-0410-961f-82ee72b054a4> | 2015-06-05 04:58:26 +0000 |
---|---|---|
committer | timshen <timshen@138bc75d-0d04-0410-961f-82ee72b054a4> | 2015-06-05 04:58:26 +0000 |
commit | d4f8a401a2331040916b6ac0c33ef6513bad3c0b (patch) | |
tree | 67e4e44451c12ff97cfb3a2670d26df7886854b9 /libstdc++-v3 | |
parent | 78e4a05e9585b38481d002c8e714b0261a5db231 (diff) | |
download | gcc-d4f8a401a2331040916b6ac0c33ef6513bad3c0b.tar.gz |
PR libstdc++/66359
Backport from mainline
2014-11-13 Tim Shen <timshen@google.com>
PR libstdc++/63775
* include/bits/regex_compiler.h (_Compiler<>::_M_expression_term,
_BracketMatcher<>::_M_make_range): Throw regex_erorr on invalid range
like [z-a]. Change _M_expression_term interface.
* include/bits/regex_compiler.tcc (
_Compiler<>::_M_insert_bracket_matcher,
_Compiler<>::_M_expression_term): Rewrite bracket expression parsing.
* testsuite/28_regex/algorithms/regex_match/cstring_bracket_01.cc:
Add testcases and move file out of extended.
git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/branches/gcc-4_9-branch@224144 138bc75d-0d04-0410-961f-82ee72b054a4
Diffstat (limited to 'libstdc++-v3')
-rw-r--r-- | libstdc++-v3/ChangeLog | 16 | ||||
-rw-r--r-- | libstdc++-v3/include/bits/regex_compiler.h | 5 | ||||
-rw-r--r-- | libstdc++-v3/include/bits/regex_compiler.tcc | 71 | ||||
-rw-r--r-- | libstdc++-v3/testsuite/28_regex/algorithms/regex_match/cstring_bracket_01.cc (renamed from libstdc++-v3/testsuite/28_regex/algorithms/regex_match/extended/cstring_bracket_01.cc) | 53 |
4 files changed, 125 insertions, 20 deletions
diff --git a/libstdc++-v3/ChangeLog b/libstdc++-v3/ChangeLog index ed9cd165494..a607facab66 100644 --- a/libstdc++-v3/ChangeLog +++ b/libstdc++-v3/ChangeLog @@ -1,3 +1,19 @@ +2015-06-05 Tim Shen <timshen@google.com> + + PR libstdc++/66359 + Backport from mainline + 2014-11-13 Tim Shen <timshen@google.com> + + PR libstdc++/63775 + * include/bits/regex_compiler.h (_Compiler<>::_M_expression_term, + _BracketMatcher<>::_M_make_range): Throw regex_erorr on invalid range + like [z-a]. Change _M_expression_term interface. + * include/bits/regex_compiler.tcc ( + _Compiler<>::_M_insert_bracket_matcher, + _Compiler<>::_M_expression_term): Rewrite bracket expression parsing. + * testsuite/28_regex/algorithms/regex_match/cstring_bracket_01.cc: + Add testcases and move file out of extended. + 2015-06-04 Renlin Li <renlin.li@arm.com> Backported from mainline diff --git a/libstdc++-v3/include/bits/regex_compiler.h b/libstdc++-v3/include/bits/regex_compiler.h index af76f55054a..bcbe1179987 100644 --- a/libstdc++-v3/include/bits/regex_compiler.h +++ b/libstdc++-v3/include/bits/regex_compiler.h @@ -118,7 +118,8 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION template<bool __icase, bool __collate> void - _M_expression_term(_BracketMatcher<_TraitsT, __icase, __collate>& + _M_expression_term(pair<bool, _CharT>& __last_char, + _BracketMatcher<_TraitsT, __icase, __collate>& __matcher); int @@ -390,6 +391,8 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION void _M_make_range(_CharT __l, _CharT __r) { + if (__l > __r) + __throw_regex_error(regex_constants::error_range); _M_range_set.push_back(make_pair(_M_translator._M_transform(__l), _M_translator._M_transform(__r))); #ifdef _GLIBCXX_DEBUG diff --git a/libstdc++-v3/include/bits/regex_compiler.tcc b/libstdc++-v3/include/bits/regex_compiler.tcc index ffe01705fe6..8551e0d29a7 100644 --- a/libstdc++-v3/include/bits/regex_compiler.tcc +++ b/libstdc++-v3/include/bits/regex_compiler.tcc @@ -410,18 +410,30 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION _M_insert_bracket_matcher(bool __neg) { _BracketMatcher<_TraitsT, __icase, __collate> __matcher(__neg, _M_traits); + pair<bool, _CharT> __last_char; // Optional<_CharT> + __last_char.first = false; + if (!(_M_flags & regex_constants::ECMAScript)) + if (_M_try_char()) + { + __matcher._M_add_char(_M_value[0]); + __last_char.first = true; + __last_char.second = _M_value[0]; + } while (!_M_match_token(_ScannerT::_S_token_bracket_end)) - _M_expression_term(__matcher); + _M_expression_term(__last_char, __matcher); __matcher._M_ready(); - _M_stack.push(_StateSeqT(_M_nfa, - _M_nfa._M_insert_matcher(std::move(__matcher)))); + _M_stack.push(_StateSeqT( + _M_nfa, + _M_nfa._M_insert_matcher(std::move(__matcher)))); } template<typename _TraitsT> template<bool __icase, bool __collate> void _Compiler<_TraitsT>:: - _M_expression_term(_BracketMatcher<_TraitsT, __icase, __collate>& __matcher) + _M_expression_term(pair<bool, _CharT>& __last_char, + _BracketMatcher<_TraitsT, __icase, __collate>& __matcher) + { if (_M_match_token(_ScannerT::_S_token_collsymbol)) __matcher._M_add_collating_element(_M_value); @@ -429,27 +441,50 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION __matcher._M_add_equivalence_class(_M_value); else if (_M_match_token(_ScannerT::_S_token_char_class_name)) __matcher._M_add_character_class(_M_value, false); - else if (_M_try_char()) // [a + // POSIX doesn't permit '-' as a start-range char (say [a-z--0]), + // except when the '-' is the first character in the bracket expression + // ([--0]). ECMAScript treats all '-' after a range as a normal character. + // Also see above, where _M_expression_term gets called. + // + // As a result, POSIX rejects [-----], but ECMAScript doesn't. + // Boost (1.57.0) always uses POSIX style even in its ECMAScript syntax. + // Clang (3.5) always uses ECMAScript style even in its POSIX syntax. + // + // It turns out that no one reads BNFs ;) + else if (_M_try_char()) { - auto __ch = _M_value[0]; - if (_M_try_char()) + if (!__last_char.first) + { + if (_M_value[0] == '-' + && !(_M_flags & regex_constants::ECMAScript)) + __throw_regex_error(regex_constants::error_range); + __matcher._M_add_char(_M_value[0]); + __last_char.first = true; + __last_char.second = _M_value[0]; + } + else { - if (_M_value[0] == '-') // [a- + if (_M_value[0] == '-') { - if (_M_try_char()) // [a-z] + if (_M_try_char()) + { + __matcher._M_make_range(__last_char.second , _M_value[0]); + __last_char.first = false; + } + else { - __matcher._M_make_range(__ch, _M_value[0]); - return; + if (_M_scanner._M_get_token() + != _ScannerT::_S_token_bracket_end) + __throw_regex_error(regex_constants::error_range); + __matcher._M_add_char(_M_value[0]); } - // If the dash is the last character in the bracket - // expression, it is not special. - if (_M_scanner._M_get_token() - != _ScannerT::_S_token_bracket_end) - __throw_regex_error(regex_constants::error_range); } - __matcher._M_add_char(_M_value[0]); + else + { + __matcher._M_add_char(_M_value[0]); + __last_char.second = _M_value[0]; + } } - __matcher._M_add_char(__ch); } else if (_M_match_token(_ScannerT::_S_token_quoted_class)) __matcher._M_add_character_class(_M_value, diff --git a/libstdc++-v3/testsuite/28_regex/algorithms/regex_match/extended/cstring_bracket_01.cc b/libstdc++-v3/testsuite/28_regex/algorithms/regex_match/cstring_bracket_01.cc index ca2a5f556fc..f7653c6dc9d 100644 --- a/libstdc++-v3/testsuite/28_regex/algorithms/regex_match/extended/cstring_bracket_01.cc +++ b/libstdc++-v3/testsuite/28_regex/algorithms/regex_match/cstring_bracket_01.cc @@ -3,7 +3,7 @@ // // 2013-08-01 Tim Shen <timshen91@gmail.com> // -// Copyright (C) 2013-2014 Free Software Foundation, Inc. +// Copyright (C) 2013-2015 Free Software Foundation, Inc. // // This file is part of the GNU ISO C++ Library. This library is free // software; you can redistribute it and/or modify it under the @@ -67,9 +67,60 @@ test01() } } +void +test02() +{ + bool test __attribute__((unused)) = true; + + try + { + std::regex re("[-----]", std::regex::extended); + VERIFY(false); + } + catch (const std::regex_error& e) + { + VERIFY(e.code() == std::regex_constants::error_range); + } + std::regex re("[-----]", std::regex::ECMAScript); +} + +void +test03() +{ + bool test __attribute__((unused)) = true; + + try + { + std::regex re("[z-a]", std::regex::extended); + VERIFY(false); + } + catch (const std::regex_error& e) + { + VERIFY(e.code() == std::regex_constants::error_range); + } +} + +void +test04() +{ + bool test __attribute__((unused)) = true; + + std::regex re("[-0-9a-z]"); + VERIFY(regex_match_debug("-", re)); + VERIFY(regex_match_debug("1", re)); + VERIFY(regex_match_debug("w", re)); + re.assign("[-0-9a-z]", regex_constants::basic); + VERIFY(regex_match_debug("-", re)); + VERIFY(regex_match_debug("1", re)); + VERIFY(regex_match_debug("w", re)); +} + int main() { test01(); + test02(); + test03(); + test04(); return 0; } |