diff options
-rw-r--r-- | TODO | 5 | ||||
-rwxr-xr-x | build-aux/update-test | 2 | ||||
-rw-r--r-- | data/skeletons/lalr1.java | 3 | ||||
-rw-r--r-- | examples/c/lexcalc/lexcalc.test | 4 | ||||
-rw-r--r-- | examples/c/lexcalc/parse.y | 3 | ||||
-rw-r--r-- | examples/c/lexcalc/scan.l | 2 | ||||
-rw-r--r-- | src/output.c | 15 | ||||
-rw-r--r-- | tests/actions.at | 12 | ||||
-rw-r--r-- | tests/c++.at | 2 | ||||
-rw-r--r-- | tests/conflicts.at | 30 | ||||
-rw-r--r-- | tests/glr-regression.at | 4 | ||||
-rw-r--r-- | tests/regression.at | 17 | ||||
-rw-r--r-- | tests/skeletons.at | 4 |
13 files changed, 53 insertions, 50 deletions
@@ -6,6 +6,7 @@ - i18n in Java - symbol.type_get should be kind_get, and it's not documented. - token code instead of token user number etc. +- YYERRCODE and "end of file" and translation ** User token number, internal symbol number, external token number, etc. There is some confusion over these terms, which is even a problem for @@ -43,10 +44,6 @@ I feel it's ugly to use the GNU style to declare functions in the doc. It generates tons of white space in the page, and may contribute to bad page breaks. -** improve syntax errors (UTF-8, internationalization) -While at it, we should stop using "$end" by default, in favor of "end of -file", or "end of input", whatever. See how lalr1.java does that. - ** consistency token vs terminal, variable vs non terminal. diff --git a/build-aux/update-test b/build-aux/update-test index 47059b38..c3602c5a 100755 --- a/build-aux/update-test +++ b/build-aux/update-test @@ -102,7 +102,7 @@ def update(at_file, logfile): def process(logfile): log = contents(logfile) # Look for the file to update. - m = re.search(r'^\d+\. ([-\w]+\.at):\d+: ', log, re.MULTILINE) + m = re.search(r'^\d+\. ([\-\+\w]+\.at):\d+: ', log, re.MULTILINE) if not m: trace("no diff found:", logfile) return diff --git a/data/skeletons/lalr1.java b/data/skeletons/lalr1.java index f3fc9066..e5b0f729 100644 --- a/data/skeletons/lalr1.java +++ b/data/skeletons/lalr1.java @@ -1080,9 +1080,6 @@ b4_dollar_popdef[]dnl return yyr.toString (); } } - else if (yystr.equals ("$end")) - return "end of input"; - return yystr; } diff --git a/examples/c/lexcalc/lexcalc.test b/examples/c/lexcalc/lexcalc.test index c9eb62f1..48a51575 100644 --- a/examples/c/lexcalc/lexcalc.test +++ b/examples/c/lexcalc/lexcalc.test @@ -29,9 +29,9 @@ run -noerr 0 9 -p cat >input <<EOF (1+2) * EOF -run 1 'err: 1.8-2.0: syntax error, unexpected end-of-line, expecting ( or number' +run 1 'err: 1.8-2.0: syntax error, unexpected end of line, expecting ( or number' cat >input <<EOF 1 / (2 - 2) EOF -run 1 'err: 1.1-11: error: division by zero" +run 1 'err: 1.1-11: error: division by zero' diff --git a/examples/c/lexcalc/parse.y b/examples/c/lexcalc/parse.y index 62201a16..e8a560fe 100644 --- a/examples/c/lexcalc/parse.y +++ b/examples/c/lexcalc/parse.y @@ -49,8 +49,7 @@ SLASH "/" LPAREN "(" RPAREN ")" - EOL "end-of-line" - EOF 0 "end-of-file" + EOL "end of line" ; %token <int> NUM "number" diff --git a/examples/c/lexcalc/scan.l b/examples/c/lexcalc/scan.l index 8be67cae..d66a23cf 100644 --- a/examples/c/lexcalc/scan.l +++ b/examples/c/lexcalc/scan.l @@ -54,6 +54,6 @@ . yyerror (yylloc, nerrs, "syntax error, invalid character"); continue; -<<EOF>> return TOK_EOF; +<<EOF>> return TOK_YYEOF; %% /* Epilogue (C code). */ diff --git a/src/output.c b/src/output.c index 0f6ee5b5..8689c6dd 100644 --- a/src/output.c +++ b/src/output.c @@ -192,6 +192,9 @@ xescape_trigraphs (const char *src) static void prepare_symbol_names (char const *muscle_name) { + const bool eof_is_user_defined + = !endtoken->alias || STRNEQ (endtoken->alias->tag, "$end"); + /* We assume that the table will be output starting at column 2. */ const bool quote = STREQ (muscle_name, "tname"); bool has_translations = false; @@ -201,10 +204,16 @@ prepare_symbol_names (char const *muscle_name) set_quoting_flags (qo, QA_SPLIT_TRIGRAPHS); for (int i = 0; i < nsyms; i++) { + /* Use "end of file" rather than "$end". But keep "$end" in the + reports, it's shorter and more consistent. */ + const char *tag + = !eof_is_user_defined && symbols[i]->content == endtoken->content + ? "\"end of file\"" + : symbols[i]->tag; char *cp - = symbols[i]->tag[0] == '"' && !quote - ? xescape_trigraphs (symbols[i]->tag) - : quotearg_alloc (symbols[i]->tag, -1, qo); + = tag[0] == '"' && !quote + ? xescape_trigraphs (tag) + : quotearg_alloc (tag, -1, qo); /* Width of the next token, including the two quotes, the comma and the space. */ int width diff --git a/tests/actions.at b/tests/actions.at index 223d00d2..4a167690 100644 --- a/tests/actions.at +++ b/tests/actions.at @@ -1146,7 +1146,7 @@ Entering state 6 Stack now 0 1 3 5 6 Reading a token Now at end of input. -1.5: syntax error, unexpected $end, expecting 'e' +1.5: syntax error, unexpected end of file, expecting 'e' Error: popping token 'd' (1.4: <> printer for 'd' @ 4) Stack now 0 1 3 5 Error: popping token 'c' (1.3: 'b'/'c' printer for 'c' @ 3) @@ -1155,7 +1155,7 @@ Error: popping token 'b' (1.2: 'b'/'c' printer for 'b' @ 2) Stack now 0 1 Error: popping token 'a' (1.1: <> printer for 'a' @ 1) Stack now 0 -Cleanup: discarding lookahead token $end (1.5: ) +Cleanup: discarding lookahead token "end of file" (1.5: ) Stack now 0 ]]) @@ -1275,7 +1275,7 @@ Entering state 8 Stack now 0 1 3 5 6 7 8 Reading a token Now at end of input. -syntax error, unexpected $end, expecting 'g' +syntax error, unexpected end of file, expecting 'g' Error: popping token 'f' (<*>/<field2>/e printer) Stack now 0 1 3 5 6 7 Error: popping token 'e' (<*>/<field2>/e printer) @@ -1288,7 +1288,7 @@ Error: popping token 'b' (<field1> printer) Stack now 0 1 Error: popping token 'a' (<*>/<field2>/e printer) Stack now 0 -Cleanup: discarding lookahead token $end () +Cleanup: discarding lookahead token "end of file" () Stack now 0 ]]) @@ -1511,7 +1511,7 @@ Entering state 3 Stack now 0 1 3 Reading a token Now at end of input. -Cleanup: discarding lookahead token $end () +Cleanup: discarding lookahead token "end of file" () Stack now 0 1 3 Cleanup: popping token error () Cleanup: popping token 'a' ('a') @@ -1685,7 +1685,7 @@ DESTROY 2 Stack now 0 2 Error: popping nterm $@1 (: ) Stack now 0 -Cleanup: discarding lookahead token $end (: ) +Cleanup: discarding lookahead token "end of file" (: ) Stack now 0 ]]) diff --git a/tests/c++.at b/tests/c++.at index 848a977d..5facc63a 100644 --- a/tests/c++.at +++ b/tests/c++.at @@ -1330,7 +1330,7 @@ AT_PARSER_CHECK([[input aaaae]], [[2]], [[]], ]]) AT_PARSER_CHECK([[input aaaaE]], [[2]], [[]], -[[exception caught: syntax error, unexpected $end, expecting 'a' +[[exception caught: syntax error, unexpected end of file, expecting 'a' ]]) AT_PARSER_CHECK([[input aaaaT]], [[1]]) diff --git a/tests/conflicts.at b/tests/conflicts.at index 6300ee96..b6d92c5a 100644 --- a/tests/conflicts.at +++ b/tests/conflicts.at @@ -346,7 +346,7 @@ m4_pushdef([AT_NONASSOC_AND_EOF_CHECK], [AT_BISON_CHECK([$1[ -o input.c input.y]]) AT_COMPILE([input]) -m4_pushdef([AT_EXPECTING], [m4_if($2, [correct], [[, expecting $end]])]) +m4_pushdef([AT_EXPECTING], [m4_if($2, [correct], [[, expecting end of file]])]) AT_PARSER_CHECK([input '0<0']) AT_PARSER_CHECK([input '0<0<0'], [1], [], @@ -509,50 +509,50 @@ m4_pushdef([AT_PREVIOUS_STATE_INPUT], [[a]]) AT_CONSISTENT_ERRORS_CHECK([[%define lr.type ielr]], [AT_PREVIOUS_STATE_GRAMMAR], [AT_PREVIOUS_STATE_INPUT], - [[$end]], [[none]]) + [[end of file]], [[none]]) AT_CONSISTENT_ERRORS_CHECK([[%define lr.type ielr %glr-parser]], [AT_PREVIOUS_STATE_GRAMMAR], [AT_PREVIOUS_STATE_INPUT], - [[$end]], [[none]]) + [[end of file]], [[none]]) AT_CONSISTENT_ERRORS_CHECK([[%define lr.type ielr %language "c++"]], [AT_PREVIOUS_STATE_GRAMMAR], [AT_PREVIOUS_STATE_INPUT], - [[$end]], [[none]]) + [[end of file]], [[none]]) AT_CONSISTENT_ERRORS_CHECK([[%define lr.type ielr %language "java"]], [AT_PREVIOUS_STATE_GRAMMAR], [AT_PREVIOUS_STATE_INPUT], - [[end of input]], [[none]]) + [[end of file]], [[none]]) # Even canonical LR doesn't foresee the error for 'a'! AT_CONSISTENT_ERRORS_CHECK([[%define lr.type ielr %define lr.default-reduction consistent]], [AT_PREVIOUS_STATE_GRAMMAR], [AT_PREVIOUS_STATE_INPUT], - [[$end]], [[ab]]) + [[end of file]], [[ab]]) AT_CONSISTENT_ERRORS_CHECK([[%define lr.type ielr %define lr.default-reduction accepting]], [AT_PREVIOUS_STATE_GRAMMAR], [AT_PREVIOUS_STATE_INPUT], - [[$end]], [[ab]]) + [[end of file]], [[ab]]) AT_CONSISTENT_ERRORS_CHECK([[%define lr.type canonical-lr]], [AT_PREVIOUS_STATE_GRAMMAR], [AT_PREVIOUS_STATE_INPUT], - [[$end]], [[ab]]) + [[end of file]], [[ab]]) # Only LAC gets it right. In C. AT_CONSISTENT_ERRORS_CHECK([[%define lr.type canonical-lr %define parse.lac full]], [AT_PREVIOUS_STATE_GRAMMAR], [AT_PREVIOUS_STATE_INPUT], - [[$end]], [[b]]) + [[end of file]], [[b]]) AT_CONSISTENT_ERRORS_CHECK([[%define lr.type ielr %define parse.lac full]], [AT_PREVIOUS_STATE_GRAMMAR], [AT_PREVIOUS_STATE_INPUT], - [[$end]], [[b]]) + [[end of file]], [[b]]) # Only LAC gets it right. In C++. AT_CONSISTENT_ERRORS_CHECK([[%language "c++" @@ -560,13 +560,13 @@ AT_CONSISTENT_ERRORS_CHECK([[%language "c++" %define parse.lac full]], [AT_PREVIOUS_STATE_GRAMMAR], [AT_PREVIOUS_STATE_INPUT], - [[$end]], [[b]]) + [[end of file]], [[b]]) AT_CONSISTENT_ERRORS_CHECK([[%language "c++" %define lr.type ielr %define parse.lac full]], [AT_PREVIOUS_STATE_GRAMMAR], [AT_PREVIOUS_STATE_INPUT], - [[$end]], [[b]]) + [[end of file]], [[b]]) m4_popdef([AT_PREVIOUS_STATE_GRAMMAR]) m4_popdef([AT_PREVIOUS_STATE_INPUT]) @@ -638,11 +638,11 @@ AT_CONSISTENT_ERRORS_CHECK([[%define lr.default-reduction consistent]], AT_CONSISTENT_ERRORS_CHECK([[%define lr.default-reduction accepting]], [AT_USER_ACTION_GRAMMAR], [AT_USER_ACTION_INPUT], - [[$end]], [[a]]) + [[end of file]], [[a]]) AT_CONSISTENT_ERRORS_CHECK([[%define lr.type canonical-lr]], [AT_USER_ACTION_GRAMMAR], [AT_USER_ACTION_INPUT], - [[$end]], [[a]]) + [[end of file]], [[a]]) AT_CONSISTENT_ERRORS_CHECK([[%define parse.lac full]], [AT_USER_ACTION_GRAMMAR], @@ -652,7 +652,7 @@ AT_CONSISTENT_ERRORS_CHECK([[%define parse.lac full %define lr.default-reduction accepting]], [AT_USER_ACTION_GRAMMAR], [AT_USER_ACTION_INPUT], - [[$end]], [[none]]) + [[end of file]], [[none]]) m4_popdef([AT_USER_ACTION_GRAMMAR]) m4_popdef([AT_USER_ACTION_INPUT]) diff --git a/tests/glr-regression.at b/tests/glr-regression.at index 78baee89..8257088b 100644 --- a/tests/glr-regression.at +++ b/tests/glr-regression.at @@ -1742,7 +1742,7 @@ Stack 1 Entering state 2 Now at end of input. Removing dead stacks. Rename stack 1 -> 0. -On stack 0, shifting token $end () +On stack 0, shifting token "end of file" () Stack 0 now in state #5 Ambiguity detected. Option 1, @@ -1760,7 +1760,7 @@ Option 2, d <empty> syntax is ambiguous -Cleanup: popping token $end () +Cleanup: popping token "end of file" () Cleanup: popping unresolved nterm start () Cleanup: popping nterm d () Cleanup: popping token 'c' () diff --git a/tests/regression.at b/tests/regression.at index cd79b507..52c88e98 100644 --- a/tests/regression.at +++ b/tests/regression.at @@ -701,8 +701,9 @@ static const yytype_int8 yyrline[] = }; static const char *const yytname[] = { - "$end", "error", "$undefined", "\"if\"", "\"const\"", "\"then\"", - "\"else\"", "$accept", "statement", "struct_stat", "if", "else", YY_NULLPTR + "\"end of file\"", "error", "$undefined", "\"if\"", "\"const\"", + "\"then\"", "\"else\"", "$accept", "statement", "struct_stat", "if", + "else", YY_NULLPTR }; static const yytype_int16 yytoknum[] = { @@ -967,7 +968,7 @@ Entering state 1 Stack now 0 1 Reading a token Next token is token 'a' (PRINTER) -syntax error, unexpected 'a', expecting $end +syntax error, unexpected 'a', expecting end of file Error: popping nterm start () Stack now 0 Cleanup: discarding lookahead token 'a' (PRINTER) @@ -1177,7 +1178,7 @@ AT_BISON_CHECK([[-o input.c input.y]]) AT_COMPILE([[input]]) AT_PARSER_CHECK([[input]], [[1]], [], [[syntax error, unexpected 'a', expecting 123456789112345678921234567893123456789412345678951234567896123A or 123456789112345678921234567893123456789412345678951234567896123B -syntax error, unexpected $end, expecting 123456789112345678921234567893123456789412345678951234567896123A or 123456789112345678921234567893123456789412345678951234567896123B +syntax error, unexpected end of file, expecting 123456789112345678921234567893123456789412345678951234567896123A or 123456789112345678921234567893123456789412345678951234567896123B ]]) AT_CLEANUP @@ -1445,10 +1446,10 @@ Entering state 0 Stack now 0 Reading a token Now at end of input. -LAC: initial context established for $end -LAC: checking lookahead $end: R2 G3 R2 G5 R2 G6 R2 G7 R2 G8 R2 G9 R2 G10 R2 G11 R2 (max size exceeded) +LAC: initial context established for "end of file" +LAC: checking lookahead "end of file": R2 G3 R2 G5 R2 G6 R2 G7 R2 G8 R2 G9 R2 G10 R2 G11 R2 (max size exceeded) memory exhausted -Cleanup: discarding lookahead token $end () +Cleanup: discarding lookahead token "end of file" () Stack now 0 ]]) @@ -1464,7 +1465,7 @@ Next token is token $undefined () LAC: initial context established for $undefined LAC: checking lookahead $undefined: Always Err Constructing syntax error message -LAC: checking lookahead $end: R2 G3 R2 G5 R2 G6 R2 G7 R2 G8 R2 G9 R2 G10 R2 G11 R2 (max size exceeded) +LAC: checking lookahead "end of file": R2 G3 R2 G5 R2 G6 R2 G7 R2 G8 R2 G9 R2 G10 R2 G11 R2 (max size exceeded) syntax error memory exhausted Cleanup: discarding lookahead token $undefined () diff --git a/tests/skeletons.at b/tests/skeletons.at index 013e29d0..744e57b5 100644 --- a/tests/skeletons.at +++ b/tests/skeletons.at @@ -120,13 +120,13 @@ AT_GRAM]) AT_BISON_CHECK([[--skeleton=yacc.c -o input-cmd-line.c input-cmd-line.y]]) AT_COMPILE([[input-cmd-line]]) AT_PARSER_CHECK([[input-cmd-line]], [[1]], [], -[[syntax error, unexpected 'a', expecting $end +[[syntax error, unexpected 'a', expecting end of file ]]) AT_BISON_CHECK([[-o input-gram.c input-gram.y]]) AT_COMPILE([[input-gram]]) AT_PARSER_CHECK([[input-gram]], [[1]], [], -[[syntax error, unexpected 'a', expecting $end +[[syntax error, unexpected 'a', expecting end of file ]]) m4_popdef([AT_GRAM]) |