summaryrefslogtreecommitdiff
path: root/data
diff options
context:
space:
mode:
authorAdrian Vogelsgesang <avogelsgesang@tableau.com>2020-02-19 20:23:11 +0100
committerAkim Demaille <akim.demaille@gmail.com>2020-02-27 18:13:44 +0100
commit879530cb950b3a3fd6bd9d45fe5f12db2babf1db (patch)
tree055b66585cd239bd2b959d00657935ce38a50c16 /data
parent72acecb30ca4b9c6e4ea27fc01a928272a9a974a (diff)
downloadbison-879530cb950b3a3fd6bd9d45fe5f12db2babf1db.tar.gz
c++: add parser::context for syntax error handling
* data/skeletons/lalr1.cc: here
Diffstat (limited to 'data')
-rw-r--r--data/skeletons/lalr1.cc236
1 files changed, 135 insertions, 101 deletions
diff --git a/data/skeletons/lalr1.cc b/data/skeletons/lalr1.cc
index 0bc1beaf..5928f72c 100644
--- a/data/skeletons/lalr1.cc
+++ b/data/skeletons/lalr1.cc
@@ -235,7 +235,28 @@ m4_define([b4_shared_declarations],
void error (const syntax_error& err);
]b4_token_constructor_define[
+]b4_parse_error_bmatch([custom\|detailed\|verbose], [[
+ class context {
+ public:
+ context (const ]b4_parser_class[& yyparser, symbol_type yyla)
+ : yyparser (yyparser)
+ , yyla (yyla)
+ {}
+]b4_locations_if([[
+ const location_type& get_location () const { return yyla.location; }
+]])[
+ /* Put in YYARG at most YYARGN of the expected tokens, and return the
+ number of tokens stored in YYARG. If YYARG is null, return the
+ number of expected tokens (guaranteed to be less than YYNTOKENS). */
+ int yyexpected_tokens (int yyarg[], int yyargn) const;
+ int yysyntax_error_arguments (int yyarg[], int yyargn) const;
+
+ private:
+ const ]b4_parser_class[& yyparser;
+ symbol_type yyla;
+ };
+]])[
private:
/// This class is not copyable.
]b4_parser_class[ (const ]b4_parser_class[&);
@@ -254,13 +275,11 @@ m4_define([b4_shared_declarations],
/// Stored state numbers (used for stacks).
typedef ]b4_int_type(0, m4_eval(b4_states_number - 1))[ state_type;
-
+]b4_parse_error_bmatch([detailed\|verbose], [[
/// Generate an error message.
- /// \param yystate the state where the error occurred.
- /// \param yyla the lookahead token.
- virtual std::string yysyntax_error_ (state_type yystate,
- const symbol_type& yyla) const;
-
+ /// \param yyctx the context in which the error occurred.
+ virtual std::string yysyntax_error_ (const context& yyctx) const;
+]])[
/// Compute post-reduction state.
/// \param yystate the current state
/// \param yysym the nonterminal to push on the stack
@@ -1050,9 +1069,13 @@ b4_dollar_popdef])[]dnl
// If not already recovering from an error, report this error.
if (!yyerrstatus_)
{
- ++yynerrs_;
- error (]b4_join(b4_locations_if([yyla.location]),
- [[yysyntax_error_ (yystack_[0].state, yyla)]])[);
+ ++yynerrs_;]b4_parse_error_case(
+ [simple], [[
+ std::string msg = YY_("syntax error");]],
+ [[
+ context yyctx (*this, yyla);
+ std::string msg = yysyntax_error_ (yyctx);]])[
+ error (]b4_join(b4_locations_if([yyla.location]), [[YY_MOVE (msg)]])[);
}
]b4_locations_if([[
@@ -1194,7 +1217,100 @@ b4_dollar_popdef])[]dnl
{
error (]b4_join(b4_locations_if([yyexc.location]),
[[yyexc.what ()]])[);
- }]b4_lac_if([[
+ }]b4_parse_error_bmatch([custom\|detailed\|verbose], [[
+
+ int
+ ]b4_parser_class[::context::yyexpected_tokens (int yyarg[], int yyargn) const
+ {
+ // Actual number of expected tokens
+ int yycount = 0;
+]b4_lac_if([[
+#if ]b4_api_PREFIX[DEBUG
+ // Execute LAC once. We don't care if it is successful, we
+ // only do it for the sake of debugging output.
+ if (!yyparser.yy_lac_established_)
+ yyparser.yy_lac_check_ (yyla.type_get ());
+#endif
+
+ for (int yyx = 0; yyx < yyntokens_; ++yyx)
+ if (yyx != yy_error_token_ && yyx != yy_undef_token_ && yyparser.yy_lac_check_ (yyx))
+ {
+ if (!yyarg)
+ ++yycount;
+ else if (yycount == yyargn)
+ return 0;
+ else
+ yyarg[yycount++] = yyx;
+ }
+]], [[
+ int yyn = yypact_[yyparser.yystack_[0].state];
+ if (!yy_pact_value_is_default_ (yyn))
+ {
+ /* Start YYX at -YYN if negative to avoid negative indexes in
+ YYCHECK. In other words, skip the first -YYN actions for
+ this state because they are default actions. */
+ int yyxbegin = yyn < 0 ? -yyn : 0;
+ // Stay within bounds of both yycheck and yytname.
+ int yychecklim = yylast_ - yyn + 1;
+ int yyxend = yychecklim < yyntokens_ ? yychecklim : yyntokens_;
+ for (int yyx = yyxbegin; yyx < yyxend; ++yyx)
+ if (yycheck_[yyx + yyn] == yyx && yyx != yy_error_token_
+ && !yy_table_value_is_error_ (yytable_[yyx + yyn]))
+ {
+ if (!yyarg)
+ ++yycount;
+ else if (yycount == yyargn)
+ return 0;
+ else
+ yyarg[yycount++] = yyx;
+ }
+ }
+]])[
+ return yycount;
+ }
+
+ int
+ ]b4_parser_class[::context::yysyntax_error_arguments (int yyarg[], int yyargn) const
+ {
+ /* There are many possibilities here to consider:
+ - If this state is a consistent state with a default action, then
+ the only way this function was invoked is if the default action
+ is an error action. In that case, don't check for expected
+ tokens because there are none.
+ - The only way there can be no lookahead present (in yyla) is
+ if this state is a consistent state with a default action.
+ Thus, detecting the absence of a lookahead is sufficient to
+ determine that there is no unexpected or expected token to
+ report. In that case, just report a simple "syntax error".
+ - Don't assume there isn't a lookahead just because this state is
+ a consistent state with a default action. There might have
+ been a previous inconsistent state, consistent state with a
+ non-default action, or user semantic action that manipulated
+ yyla. (However, yyla is currently not documented for users.)]b4_lac_if([[
+ In the first two cases, it might appear that the current syntax
+ error should have been detected in the previous state when
+ yy_lac_check was invoked. However, at that time, there might
+ have been a different syntax error that discarded a different
+ initial context during error recovery, leaving behind the
+ current lookahead.]], [[
+ - Of course, the expected token list depends on states to have
+ correct lookahead information, and it depends on the parser not
+ to perform extra reductions after fetching a lookahead from the
+ scanner and before detecting a syntax error. Thus, state merging
+ (from LALR or IELR) and default reductions corrupt the expected
+ token list. However, the list is correct for canonical LR with
+ one exception: it will still contain any token that will not be
+ accepted due to an error action in a later state.]])[
+ */
+
+ if (!yyla.empty ())
+ {
+ yyarg[0] = yyla.type_get ();
+ int yyn = yyexpected_tokens (yyarg ? yyarg + 1 : yyarg, yyargn - 1);
+ return yyn + 1;
+ }
+ return 0;
+ }]])b4_lac_if([[
bool
]b4_parser_class[::yy_lac_check_ (int yytoken) const
@@ -1333,99 +1449,17 @@ b4_dollar_popdef])[]dnl
<< evt << '\n';
yy_lac_established_ = false;
}
- }]])[
+ }]])b4_parse_error_bmatch([detailed\|verbose], [[
// Generate an error message.
std::string
-]b4_parser_class[::yysyntax_error_ (]b4_parse_error_case([simple],
- [state_type, const symbol_type&],
- [state_type yystate, const symbol_type& yyla])[) const
- {]b4_parse_error_case(
- [simple], [[
- return YY_("syntax error");]],
- [[
- // Number of reported tokens (one for the "unexpected", one per
- // "expected").
- std::ptrdiff_t yycount = 0;
+ ]b4_parser_class[::yysyntax_error_ (const context& yyctx) const
+ {
// Its maximum.
enum { YYERROR_VERBOSE_ARGS_MAXIMUM = 5 };
// Arguments of yyformat.
- char const *yyarg[YYERROR_VERBOSE_ARGS_MAXIMUM];
-
- /* There are many possibilities here to consider:
- - If this state is a consistent state with a default action, then
- the only way this function was invoked is if the default action
- is an error action. In that case, don't check for expected
- tokens because there are none.
- - The only way there can be no lookahead present (in yyla) is
- if this state is a consistent state with a default action.
- Thus, detecting the absence of a lookahead is sufficient to
- determine that there is no unexpected or expected token to
- report. In that case, just report a simple "syntax error".
- - Don't assume there isn't a lookahead just because this state is
- a consistent state with a default action. There might have
- been a previous inconsistent state, consistent state with a
- non-default action, or user semantic action that manipulated
- yyla. (However, yyla is currently not documented for users.)]b4_lac_if([[
- In the first two cases, it might appear that the current syntax
- error should have been detected in the previous state when
- yy_lac_check was invoked. However, at that time, there might
- have been a different syntax error that discarded a different
- initial context during error recovery, leaving behind the
- current lookahead.]], [[
- - Of course, the expected token list depends on states to have
- correct lookahead information, and it depends on the parser not
- to perform extra reductions after fetching a lookahead from the
- scanner and before detecting a syntax error. Thus, state merging
- (from LALR or IELR) and default reductions corrupt the expected
- token list. However, the list is correct for canonical LR with
- one exception: it will still contain any token that will not be
- accepted due to an error action in a later state.]])[
- */
- if (!yyla.empty ())
- {
- symbol_number_type yytoken = yyla.type_get ();
- yyarg[yycount++] = ]b4_parse_error_case(
- [verbose], [[yytname_[yytoken]]],
- [[yysymbol_name (yytoken)]])[;]b4_lac_if([[
-
-#if ]b4_api_PREFIX[DEBUG
- // Execute LAC once. We don't care if it is successful, we
- // only do it for the sake of debugging output.
- if (!yy_lac_established_)
- yy_lac_check_ (yytoken);
-#endif]])[
-
- int yyn = yypact_[+yystate];
- if (!yy_pact_value_is_default_ (yyn))
- {]b4_lac_if([[
- for (int yyx = 0; yyx < yyntokens_; ++yyx)
- if (yyx != yy_error_token_ && yyx != yy_undef_token_
- && yy_lac_check_ (yyx))
- {]], [[
- /* Start YYX at -YYN if negative to avoid negative indexes in
- YYCHECK. In other words, skip the first -YYN actions for
- this state because they are default actions. */
- int yyxbegin = yyn < 0 ? -yyn : 0;
- // Stay within bounds of both yycheck and yytname.
- int yychecklim = yylast_ - yyn + 1;
- int yyxend = yychecklim < yyntokens_ ? yychecklim : yyntokens_;
- for (int yyx = yyxbegin; yyx < yyxend; ++yyx)
- if (yycheck_[yyx + yyn] == yyx && yyx != yy_error_token_
- && !yy_table_value_is_error_ (yytable_[yyx + yyn]))
- {]])[
- if (yycount == YYERROR_VERBOSE_ARGS_MAXIMUM)
- {
- yycount = 1;
- break;
- }
- else
- yyarg[yycount++] = ]b4_parse_error_case(
- [verbose], [[yytname_[yyx]]],
- [[yysymbol_name (yyx)]])[;
- }
- }
- }
+ int yyarg[YYERROR_VERBOSE_ARGS_MAXIMUM];
+ int yycount = yyctx.yysyntax_error_arguments (yyarg, YYERROR_VERBOSE_ARGS_MAXIMUM);
char const* yyformat = YY_NULLPTR;
switch (yycount)
@@ -1451,14 +1485,14 @@ b4_dollar_popdef])[]dnl
if (yyp[0] == '%' && yyp[1] == 's' && yyi < yycount)
{
yyres += ]b4_parse_error_case([verbose],
- [[yytnamerr_ (yyarg[yyi++])]],
- [[yyarg[yyi++]]])[;
+ [[yytnamerr_ (yytname_[yyarg[yyi++]])]],
+ [[yysymbol_name (yyarg[yyi++])]])[;
++yyp;
}
else
yyres += *yyp;
- return yyres;]])[
- }
+ return yyres;
+ }]])[
const ]b4_int_type(b4_pact_ninf, b4_pact_ninf) b4_parser_class::yypact_ninf_ = b4_pact_ninf[;