summaryrefslogtreecommitdiff
path: root/libstdc++-v3
diff options
context:
space:
mode:
authortimshen <timshen@138bc75d-0d04-0410-961f-82ee72b054a4>2014-07-01 02:10:31 +0000
committertimshen <timshen@138bc75d-0d04-0410-961f-82ee72b054a4>2014-07-01 02:10:31 +0000
commit26157a7e9d1dcbe399f8de5314c01fd12ad80a0a (patch)
tree88aab4a08a668b5a2e116189f2dd62d4878258ff /libstdc++-v3
parent295f1677d75c28a4fda6bbc6652f7237c08276c0 (diff)
downloadgcc-26157a7e9d1dcbe399f8de5314c01fd12ad80a0a.tar.gz
PR libstdc++/61424
* include/bits/regex.tcc (__regex_algo_impl<>): Use DFS for ECMAScript, not just regex containing back-references. * include/bits/regex_compiler.tcc (_Compiler<>::_M_disjunction): exchange _M_next and _M_alt for alternative operator, making matching from left to right. * include/bits/regex_executor.h (_State_info<>::_M_get_sol_pos): Add position tracking fom DFS. * include/bits/regex_executor.tcc (_Executor<>::_M_main_dispatch, _Executor<>::_M_dfs): Likewise. * include/bits/regex_scanner.h: Remove unused enum entry. * testsuite/28_regex/algorithms/regex_search/61424.cc: New testcase from PR. git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@212184 138bc75d-0d04-0410-961f-82ee72b054a4
Diffstat (limited to 'libstdc++-v3')
-rw-r--r--libstdc++-v3/ChangeLog16
-rw-r--r--libstdc++-v3/include/bits/regex.tcc1
-rw-r--r--libstdc++-v3/include/bits/regex_compiler.tcc7
-rw-r--r--libstdc++-v3/include/bits/regex_executor.h7
-rw-r--r--libstdc++-v3/include/bits/regex_executor.tcc47
-rw-r--r--libstdc++-v3/include/bits/regex_scanner.h1
-rw-r--r--libstdc++-v3/testsuite/28_regex/algorithms/regex_search/61424.cc52
7 files changed, 123 insertions, 8 deletions
diff --git a/libstdc++-v3/ChangeLog b/libstdc++-v3/ChangeLog
index 1d6cf7a5bd8..75e4841626c 100644
--- a/libstdc++-v3/ChangeLog
+++ b/libstdc++-v3/ChangeLog
@@ -1,3 +1,19 @@
+2014-07-01 Tim Shen <timshen@google.com>
+
+ PR libstdc++/61424
+ * include/bits/regex.tcc (__regex_algo_impl<>): Use DFS for ECMAScript,
+ not just regex containing back-references.
+ * include/bits/regex_compiler.tcc (_Compiler<>::_M_disjunction):
+ exchange _M_next and _M_alt for alternative operator,
+ making matching from left to right.
+ * include/bits/regex_executor.h (_State_info<>::_M_get_sol_pos):
+ Add position tracking fom DFS.
+ * include/bits/regex_executor.tcc (_Executor<>::_M_main_dispatch,
+ _Executor<>::_M_dfs): Likewise.
+ * include/bits/regex_scanner.h: Remove unused enum entry.
+ * testsuite/28_regex/algorithms/regex_search/61424.cc: New
+ testcase from PR.
+
2014-06-30 Jason Merrill <jason@redhat.com>
* libsupc++/cxxabi.h (class __pbase_type_info): __pointer_catch
diff --git a/libstdc++-v3/include/bits/regex.tcc b/libstdc++-v3/include/bits/regex.tcc
index a81f51750dc..3322379a5b8 100644
--- a/libstdc++-v3/include/bits/regex.tcc
+++ b/libstdc++-v3/include/bits/regex.tcc
@@ -71,6 +71,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
// _GLIBCXX_REGEX_USE_THOMPSON_NFA if they need to use this approach.
bool __ret;
if (!__re._M_automaton->_M_has_backref
+ && !(__re._M_flags & regex_constants::ECMAScript)
#ifndef _GLIBCXX_REGEX_USE_THOMPSON_NFA
&& __policy == _RegexExecutorPolicy::_S_alternate
#endif
diff --git a/libstdc++-v3/include/bits/regex_compiler.tcc b/libstdc++-v3/include/bits/regex_compiler.tcc
index 0df10cc1a8b..f15f7dd0f7b 100644
--- a/libstdc++-v3/include/bits/regex_compiler.tcc
+++ b/libstdc++-v3/include/bits/regex_compiler.tcc
@@ -103,9 +103,12 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
auto __end = _M_nfa._M_insert_dummy();
__alt1._M_append(__end);
__alt2._M_append(__end);
+ // __alt2 is state._M_next, __alt1 is state._M_alt. The executor
+ // executes _M_alt before _M_next, as well as executing left
+ // alternative before right one.
_M_stack.push(_StateSeqT(_M_nfa,
- _M_nfa._M_insert_alt(__alt1._M_start,
- __alt2._M_start, false),
+ _M_nfa._M_insert_alt(__alt2._M_start,
+ __alt1._M_start, false),
__end));
}
}
diff --git a/libstdc++-v3/include/bits/regex_executor.h b/libstdc++-v3/include/bits/regex_executor.h
index 1991c0007a1..40d3443d985 100644
--- a/libstdc++-v3/include/bits/regex_executor.h
+++ b/libstdc++-v3/include/bits/regex_executor.h
@@ -173,6 +173,9 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
void _M_queue(_StateIdT __i, const _ResultsVec& __res)
{ _M_match_queue.emplace_back(__i, __res); }
+ // Dummy implementations for BFS mode.
+ _BiIter* _M_get_sol_pos() { return nullptr; }
+
// Saves states that need to be considered for the next character.
vector<pair<_StateIdT, _ResultsVec>> _M_match_queue;
// Indicates which states are already visited.
@@ -192,11 +195,13 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
bool _M_visited(_StateIdT) const { return false; }
void _M_queue(_StateIdT, const _ResultsVec&) { }
+ _BiIter* _M_get_sol_pos() { return &_M_sol_pos; }
+
// To record current solution.
_StateIdT _M_start;
+ _BiIter _M_sol_pos;
};
-
public:
_ResultsVec _M_cur_results;
_BiIter _M_current;
diff --git a/libstdc++-v3/include/bits/regex_executor.tcc b/libstdc++-v3/include/bits/regex_executor.tcc
index aefa8f47137..38b8ff27013 100644
--- a/libstdc++-v3/include/bits/regex_executor.tcc
+++ b/libstdc++-v3/include/bits/regex_executor.tcc
@@ -82,6 +82,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
_M_main_dispatch(_Match_mode __match_mode, __dfs)
{
_M_has_sol = false;
+ *_M_states._M_get_sol_pos() = _BiIter();
_M_cur_results = _M_results;
_M_dfs(__match_mode, _M_states._M_start);
return _M_has_sol;
@@ -338,7 +339,29 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
&& (_M_flags & regex_constants::match_not_null))
_M_has_sol = false;
if (_M_has_sol)
- _M_results = _M_cur_results;
+ {
+ if (_M_nfa._M_flags & regex_constants::ECMAScript)
+ _M_results = _M_cur_results;
+ else // POSIX
+ {
+ _GLIBCXX_DEBUG_ASSERT(_M_states._M_get_sol_pos());
+ // Here's POSIX's logic: match the longest one. However
+ // we never know which one (lhs or rhs of "|") is longer
+ // unless we try both of them and compare the results.
+ // The member variable _M_sol_pos records the end
+ // position of the last successful match. It's better
+ // to be larger, because POSIX regex is always greedy.
+ // TODO: This could be slow.
+ if (*_M_states._M_get_sol_pos() == _BiIter()
+ || std::distance(_M_begin,
+ *_M_states._M_get_sol_pos())
+ < std::distance(_M_begin, _M_current))
+ {
+ *_M_states._M_get_sol_pos() = _M_current;
+ _M_results = _M_cur_results;
+ }
+ }
+ }
}
else
{
@@ -354,9 +377,25 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
}
break;
case _S_opcode_alternative:
- _M_dfs(__match_mode, __state._M_alt);
- if (!__dfs_mode || !_M_has_sol)
- _M_dfs(__match_mode, __state._M_next);
+ if (_M_nfa._M_flags & regex_constants::ECMAScript)
+ {
+ // TODO: Let DFS support ECMAScript's alternative operation.
+ _GLIBCXX_DEBUG_ASSERT(!__dfs_mode);
+ _M_dfs(__match_mode, __state._M_alt);
+ // Pick lhs if it matches. Only try rhs if it doesn't.
+ if (!_M_has_sol)
+ _M_dfs(__match_mode, __state._M_next);
+ }
+ else
+ {
+ // Try both and compare the result.
+ // See "case _S_opcode_accept:" handling above.
+ _M_dfs(__match_mode, __state._M_alt);
+ auto __has_sol = _M_has_sol;
+ _M_has_sol = false;
+ _M_dfs(__match_mode, __state._M_next);
+ _M_has_sol |= __has_sol;
+ }
break;
default:
_GLIBCXX_DEBUG_ASSERT(false);
diff --git a/libstdc++-v3/include/bits/regex_scanner.h b/libstdc++-v3/include/bits/regex_scanner.h
index 6627db97ce0..5552226bdfb 100644
--- a/libstdc++-v3/include/bits/regex_scanner.h
+++ b/libstdc++-v3/include/bits/regex_scanner.h
@@ -67,7 +67,6 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
_S_token_or,
_S_token_closure0,
_S_token_closure1,
- _S_token_ungreedy,
_S_token_line_begin,
_S_token_line_end,
_S_token_word_bound, // neg if _M_value[0] == 'n'
diff --git a/libstdc++-v3/testsuite/28_regex/algorithms/regex_search/61424.cc b/libstdc++-v3/testsuite/28_regex/algorithms/regex_search/61424.cc
new file mode 100644
index 00000000000..bdccb4a454e
--- /dev/null
+++ b/libstdc++-v3/testsuite/28_regex/algorithms/regex_search/61424.cc
@@ -0,0 +1,52 @@
+// { dg-options "-std=gnu++11" }
+
+// Copyright (C) 2014 Free Software Foundation, Inc.
+//
+// This file is part of the GNU ISO C++ Library. This library is free
+// software; you can redistribute it and/or modify it under the
+// terms of the GNU General Public License as published by the
+// Free Software Foundation; either version 3, or (at your option)
+// any later version.
+
+// This library is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+// GNU General Public License for more details.
+
+// You should have received a copy of the GNU General Public License along
+// with this library; see the file COPYING3. If not see
+// <http://www.gnu.org/licenses/>.
+
+// PR libstdc++/61424
+
+#include <regex>
+#include <testsuite_hooks.h>
+#include <testsuite_regex.h>
+
+using namespace std;
+using namespace __gnu_test;
+
+int main()
+{
+ regex_constants::syntax_option_type grammar[] = {
+ regex_constants::ECMAScript, regex_constants::extended,
+ regex_constants::awk, regex_constants::egrep
+ };
+
+ string sol[] = {
+ "tour",
+ "tournament",
+ "tournament",
+ "tournament",
+ };
+ int i = 0;
+ for (auto g : grammar)
+ {
+ regex re("tour|tournament|tourn", g);
+ const char str[] = "tournament";
+ cmatch m;
+ VERIFY(regex_search_debug(str, m, re));
+ VERIFY(sol[i] == m[0]);
+ i++;
+ }
+}