diff options
author | Akim Demaille <akim.demaille@gmail.com> | 2020-04-13 14:18:51 +0200 |
---|---|---|
committer | Akim Demaille <akim.demaille@gmail.com> | 2020-04-13 16:54:14 +0200 |
commit | 42ab6c1e44d3c1665f9f81c41010b85291c94d90 (patch) | |
tree | c594e7d35e91e4d5b3febb527484281f5e74afd4 | |
parent | dc1035bada3fcc793c388b33869b7298e0643860 (diff) | |
download | bison-42ab6c1e44d3c1665f9f81c41010b85291c94d90.tar.gz |
doc: c++: document parser::context
* doc/bison.texi (C++ Parser Context): New.
* data/skeletons/lalr1.cc (parser::yysymbol_name): Rename as...
(parser::symbol_name): this.
(A Complete C++ Example): Promote LAC, now that we have it.
Promote parse.error detailed over verbose.
* examples/c++/calc++/calc++.test, tests/local.at: Adjust.
-rw-r--r-- | data/skeletons/lalr1.cc | 6 | ||||
-rw-r--r-- | doc/bison.texi | 136 | ||||
-rwxr-xr-x | examples/c++/calc++/calc++.test | 9 | ||||
-rw-r--r-- | tests/local.at | 4 |
4 files changed, 135 insertions, 20 deletions
diff --git a/data/skeletons/lalr1.cc b/data/skeletons/lalr1.cc index 5b6dbd3b..dd9d97e4 100644 --- a/data/skeletons/lalr1.cc +++ b/data/skeletons/lalr1.cc @@ -308,7 +308,7 @@ m4_define([b4_shared_declarations], ]b4_parse_error_bmatch([custom\|detailed], [[ /// The user-facing name of the symbol whose (internal) number is /// YYSYMBOL. No bounds checking. - static const char *yysymbol_name (symbol_kind_type yysymbol); + static const char *symbol_name (symbol_kind_type yysymbol); ]])[ // Tables. @@ -586,7 +586,7 @@ m4_if(b4_prefix, [yy], [], /* The user-facing name of the symbol whose (internal) number is YYSYMBOL. No bounds checking. */ const char * - ]b4_parser_class[::yysymbol_name (symbol_kind_type yysymbol) + ]b4_parser_class[::symbol_name (symbol_kind_type yysymbol) { static const char *const yy_sname[] = { @@ -1503,7 +1503,7 @@ b4_dollar_popdef])[]dnl { yyres += ]b4_parse_error_case([verbose], [[yytnamerr_ (yytname_[yyarg[yyi++]])]], - [[yysymbol_name (yyarg[yyi++])]])[; + [[symbol_name (yyarg[yyi++])]])[; ++yyp; } else diff --git a/doc/bison.texi b/doc/bison.texi index 2d6cc327..c7084032 100644 --- a/doc/bison.texi +++ b/doc/bison.texi @@ -325,8 +325,8 @@ The Lexical Analyzer Function @code{yylex} Error Reporting -* Error Reporting Function:: You must supply a function @code{yyerror}. -* Syntax Error Reporting Function:: You can supply a function @code{yyreport_syntax_error}. +* Error Reporting Function:: You must supply a @code{yyerror} function. +* Syntax Error Reporting Function:: You can supply a @code{yyreport_syntax_error} function. Parser Internationalization @@ -408,6 +408,7 @@ C++ Parsers * C++ Parser Interface:: Instantiating and running the parser * C++ Semantic Values:: %union vs. C++ * C++ Location Values:: The position and location classes +* C++ Parser Context:: You can supply a @code{report_syntax_error} function. * C++ Scanner Interface:: Exchanges between yylex and parse * A Complete C++ Example:: Demonstrating their use @@ -7352,8 +7353,8 @@ such as syntax error, or memory exhaustion. How this message is delivered to the user must be specified by the developer. @menu -* Error Reporting Function:: You must supply a function @code{yyerror}. -* Syntax Error Reporting Function:: You can supply a function @code{yyreport_syntax_error}. +* Error Reporting Function:: You must supply a @code{yyerror} function. +* Syntax Error Reporting Function:: You can supply a @code{yyreport_syntax_error} function. @end menu @node Error Reporting Function @@ -7465,8 +7466,8 @@ An opaque type that captures the circumstances of the syntax error. @end deffn @deffn {Type} yysymbol_kind_t -An enum that includes all the symbols, tokens and nonterminals, of the -grammar. Its enumerators are forged from the token and symbol names: +An enum that includes all the grammar symbols, tokens and nonterminals. Its +enumerators are forged from the symbol names: @example enum yysymbol_kind_t @@ -7491,7 +7492,6 @@ typedef enum yysymbol_kind_t yysymbol_kind_t; @deftypefun {yysymbol_kind_t} yypcontext_token (@code{const yypcontext_t *}@var{ctx}) The ``unexpected'' token: the symbol kind of the lookahead token that caused the syntax error. Return @code{YYSYMBOL_YYEMPTY} if there is no lookahead. -Can never return @code{YYSYMBOL_YYERROR}, or @code{YYSYMBOL_YYUNDEF}. @end deftypefun @deftypefun {YYLTYPE *} yypcontext_location (@code{const yypcontext_t *}@var{ctx}) @@ -7528,7 +7528,7 @@ yyreport_syntax_error (const yypcontext_t *ctx) fprintf (stderr, ": syntax error"); // Report the tokens expected at this point. @{ - enum @{ TOKENMAX = 10 @}; + enum @{ TOKENMAX = 5 @}; yysymbol_kind_t expected[TOKENMAX]; int n = yypcontext_expected_tokens (ctx, expected, TOKENMAX); if (n < 0) @@ -11280,6 +11280,7 @@ The Bison parser in C++ is an object, an instance of the class * C++ Parser Interface:: Instantiating and running the parser * C++ Semantic Values:: %union vs. C++ * C++ Location Values:: The position and location classes +* C++ Parser Context:: You can supply a @code{report_syntax_error} function. * C++ Scanner Interface:: Exchanges between yylex and parse * A Complete C++ Example:: Demonstrating their use @end menu @@ -11973,6 +11974,112 @@ files, reused by other parsers as follows: @end example +@node C++ Parser Context +@subsection C++ Parser Context + +When @samp{%define parse.error custom} is used (@pxref{Syntax Error +Reporting Function}), the user must define the following function. + +@deftypemethod {parser} {void} report_syntax_error (@code{const context_type&}@var{ctx}) @code{const} +Report a syntax error to the user. Whether it uses @code{yyerror} is up to +the user. +@end deftypemethod + +Use the following types and functions to build the error message. + +@defcv {Type} {parser} {context} +A type that captures the circumstances of the syntax error. +@end defcv + +@defcv {Type} {parser} {symbol_kind_type} +An enum that includes all the grammar symbols, tokens and nonterminals. Its +enumerators are forged from the symbol names: + +@example +struct symbol_kind +@{ + enum symbol_kind_type + @{ + S_YYEMPTY = -2, // No symbol. + S_YYEOF = 0, // "end of file" + S_YYERROR = 1, // error + S_YYUNDEF = 2, // "invalid token" + S_PLUS = 3, // "+" + S_MINUS = 4, // "-" + [...] + S_VAR = 14, // "variable" + S_NEG = 15, // NEG + S_YYACCEPT = 16, // $accept + S_exp = 17, // exp + S_input = 18 // input + @}; +@}; +typedef symbol_kind::symbol_kind_t symbol_kind_type; +@end example +@end defcv + +@deftypemethod {context} {const symbol_type&} lookahead () @code{const} +The ``unexpected'' token: the lookahead that caused the syntax error. +@end deftypemethod + +@deftypemethod {context} {symbol_kind_type} token () @code{const} +The symbol kind of the lookahead token that caused the syntax error. Return +@code{symbol_kind::S_YYEMPTY} if there is no lookahead. +@end deftypemethod + +@deftypemethod {context} {const location&} location () @code{const} +The location of the syntax error (that of the lookahead). +@end deftypemethod + +@deftypemethod {context} int expected_tokens (@code{symbol_kind_type} @var{argv}@code{[]}, @code{int} @var{argc}) @code{const} +Fill @var{argv} with the expected tokens, which never includes +@code{symbol_kind::S_YYEMPTY}, @code{symbol_kind::S_YYERROR}, or +@code{symbol_kind::S_YYUNDEF}. + +Never put more than @var{argc} elements into @var{argv}, and on success +return the effective number of tokens stored in @var{argv}. Return 0 if +there are more than @var{argc} expected tokens, yet fill @var{argv} up to +@var{argc}. + +If @var{argv} is null, return the size needed to store all the possible +values, which is always less than @code{YYNTOKENS}. +@end deftypemethod + +@deftypemethod {parser} {const char *} symbol_name (@code{symbol_kind_t} @var{symbol}) @code{const} +The name of the symbol whose kind is @var{symbol}, possibly translated. +@end deftypemethod + +A custom syntax error function looks as follows. + +@example +void +yy::parser::report_syntax_error (const context& ctx) +@{ + int res = 0; + std::cerr << ctx.location () << ": syntax error"; + // Report the tokens expected at this point. + @{ + enum @{ TOKENMAX = 5 @}; + symbol_kind_type expected[TOKENMAX]; + int n = ctx.expected_tokens (ctx, expected, TOKENMAX); + for (int i = 0; i < n; ++i) + std::cerr << i == 0 ? ": expected " : " or " + << symbol_name (expected[i]); + @} + // Report the unexpected token. + @{ + symbol_kind_type lookahead = ctx.token (); + if (lookahead != symbol_kind::S_YYEMPTY) + std::cerr << " before " << symbol_name (lookahead)); + @} + std::cerr << '\n'; +@} +@end example + +You still must provide a @code{yyerror} function, used for instance to +report memory exhaustion. + + @node C++ Scanner Interface @subsection C++ Scanner Interface @c - prefix for yylex. @@ -12332,7 +12439,7 @@ designed the grammar for. @comment file: calc++/parser.yy @example -%skeleton "lalr1.cc" /* -*- C++ -*- */ +%skeleton "lalr1.cc" // -*- C++ -*- %require "@value{VERSION}" %defines @end example @@ -12403,14 +12510,15 @@ Then we request location tracking. @end example @noindent -Use the following two directives to enable parser tracing and verbose error -messages. However, verbose error messages can contain incorrect information -(@pxref{LAC}). +Use the following two directives to enable parser tracing and detailed error +messages. However, detailed error messages can contain incorrect +information if lookahead correction is not enabled (@pxref{LAC}). @comment file: calc++/parser.yy @example %define parse.trace -%define parse.error verbose +%define parse.error detailed +%define parse.lac full @end example @noindent @@ -12720,7 +12828,7 @@ driver::scan_begin () yyin = stdin; else if (!(yyin = fopen (file.c_str (), "r"))) @{ - std::cerr << "cannot open " << file << ": " << strerror(errno) << '\n'; + std::cerr << "cannot open " << file << ": " << strerror (errno) << '\n'; exit (EXIT_FAILURE); @} @} diff --git a/examples/c++/calc++/calc++.test b/examples/c++/calc++/calc++.test index d0df490f..e00bb466 100755 --- a/examples/c++/calc++/calc++.test +++ b/examples/c++/calc++/calc++.test @@ -45,10 +45,17 @@ run 0 9 cat >input <<EOF +1 + +EOF +run 1 'err: -:2.1: syntax error, unexpected end of file, expecting ( or identifier or number' + + +# LAC finds many more tokens. +cat >input <<EOF a := 1 d := a + b * c EOF -run 1 'err: -:3.1: syntax error, unexpected end of file, expecting ( or identifier or number' +run 1 'err: -:3.1: syntax error, unexpected end of file' cat >input <<EOF diff --git a/tests/local.at b/tests/local.at index 2cda26bc..7d63142c 100644 --- a/tests/local.at +++ b/tests/local.at @@ -757,7 +757,7 @@ void { symbol_kind_type la = ctx.token (); if (la != symbol_kind::S_YYEMPTY) - fprintf (stderr, " on token [%s]", yysymbol_name (la)); + std::cerr << " on token [" << symbol_name (la) << ']'; } { enum { TOKENMAX = 10 }; @@ -767,7 +767,7 @@ void { std::cerr << " (expected:"; for (int i = 0; i < n; ++i) - std::cerr << " [" << yysymbol_name (expected[i]) << ']'; + std::cerr << " [" << symbol_name (expected[i]) << ']'; std::cerr << ')'; } } |