diff options
-rw-r--r-- | NEWS | 4 | ||||
-rw-r--r-- | THANKS | 1 | ||||
-rw-r--r-- | data/README | 71 | ||||
-rw-r--r-- | data/skeletons/bison.m4 | 17 | ||||
-rw-r--r-- | src/output.c | 9 | ||||
-rw-r--r-- | src/reduce.c | 9 | ||||
-rw-r--r-- | src/reduce.h | 5 | ||||
-rw-r--r-- | src/scan-code.l | 6 | ||||
-rw-r--r-- | tests/reduce.at | 82 |
9 files changed, 173 insertions, 31 deletions
@@ -2,6 +2,10 @@ GNU Bison NEWS * Noteworthy changes in release ?.? (????-??-??) [?] +** Bug fixes + + Bison 3.3 failed to generate parsers for grammars with unused non terminal + symbols. * Noteworthy changes in release 3.3.1 (2019-01-27) [stable] @@ -18,6 +18,7 @@ Antonio Silva Correia amsilvacorreia@hotmail.com Arnold Robbins arnold@skeeve.com Art Haas ahaas@neosoft.com Askar Safin safinaskar@mail.ru +Balázs Scheidler balazs.scheidler@oneidentity.com Baron Schwartz baron@sequent.org Ben Pfaff blp@cs.stanford.edu Benoit Perrot benoit.perrot@epita.fr diff --git a/data/README b/data/README index 1a2e71cc..ed217365 100644 --- a/data/README +++ b/data/README @@ -75,48 +75,75 @@ skeletons. ## Symbols +### `b4_symbol(NUM, FIELD)` In order to unify the handling of the various aspects of symbols (tag, type name, whether terminal, etc.), bison.exe defines one macro per (token, field), where field can `has_id`, `id`, etc.: see -src/output.c:prepare_symbols_definitions(). +`prepare_symbols_definitions()` in `src/output.c`. -The various FIELDS are: +The macro `b4_symbol(NUM, FIELD)` gives access to the following FIELDS: + +- `has_id`: 0 or 1. -- has_id: 0 or 1. Whether the symbol has an id. -- id: string - If has_id, the id. Guaranteed to be usable as a C identifier. - Prefixed by api.token.prefix if defined. -- tag: string. + +- `id`: string + If has_id, the id (prefixed by api.token.prefix if defined), otherwise + defined as empty. Guaranteed to be usable as a C identifier. + +- `tag`: string. A representation of the symbol. Can be 'foo', 'foo.id', '"foo"' etc. -- user_number: integer + +- `user_number`: integer The external number as used by yylex. Can be ASCII code when a character, some number chosen by bison, or some user number in the case of %token FOO <NUM>. Corresponds to yychar in yacc.c. -- is_token: 0 or 1 + +- `is_token`: 0 or 1 Whether this is a terminal symbol. -- number: integer + +- `number`: integer The internal number (computed from the external number by yytranslate). Corresponds to yytoken in yacc.c. This is the same number that serves as key in b4_symbol(NUM, FIELD). -- has_type: 0, 1 + + In bison, symbols are first assigned increasing numbers in order of + appearance (but tokens first, then nterms). After grammar reduction, + unused nterms are then renumbered to appear last (i.e., first tokens, then + used nterms and finally unused nterms). This final number NUM is the one + contained in this field, and it is the one used as key in `b4_symbol(NUM, + FIELD)`. + + The code of the rule actions, however, is emitted before we know what + symbols are unused, so they use the original numbers. To avoid confusion, + they actually use "orig NUM" instead of just "NUM". bison also emits + definitions for `b4_symbol(orig NUM, number)` that map from original + numbers to the new ones. `b4_symbol` actually resolves `orig NUM` in the + other case, i.e., `b4_symbol(orig 42, tag)` would return the tag of the + symbols whose original number was 42. + +- `has_type`: 0, 1 Whether has a semantic value. -- type_tag: string + +- `type_tag`: string When api.value.type=union, the generated name for the union member. yytype_INT etc. for symbols that has_id, otherwise yytype_1 etc. -- type + +- `type` If it has a semantic value, its type tag, or, if variant are used, its type. In the case of api.value.type=union, type is the real type (e.g. int). -- has_printer: 0, 1 -- printer: string -- printer_file: string -- printer_line: integer + +- `has_printer`: 0, 1 +- `printer`: string +- `printer_file`: string +- `printer_line`: integer If the symbol has a printer, everything about it. -- has_destructor, destructor, destructor_file, destructor_line + +- `has_destructor`, `destructor`, `destructor_file`, `destructor_line` Likewise. -### b4_symbol_value(VAL, [SYMBOL-NUM], [TYPE-TAG]) +### `b4_symbol_value(VAL, [SYMBOL-NUM], [TYPE-TAG])` Expansion of $$, $1, $<TYPE-TAG>3, etc. The semantic value from a given VAL. @@ -127,14 +154,14 @@ The semantic value from a given VAL. The result can be used safely, it is put in parens to avoid nasty precedence issues. -### b4_lhs_value(SYMBOL-NUM, [TYPE]) +### `b4_lhs_value(SYMBOL-NUM, [TYPE])` Expansion of `$$` or `$<TYPE>$`, for symbol `SYMBOL-NUM`. -### b4_rhs_data(RULE-LENGTH, POS) +### `b4_rhs_data(RULE-LENGTH, POS)` The data corresponding to the symbol `#POS`, where the current rule has `RULE-LENGTH` symbols on RHS. -### b4_rhs_value(RULE-LENGTH, POS, SYMBOL-NUM, [TYPE]) +### `b4_rhs_value(RULE-LENGTH, POS, SYMBOL-NUM, [TYPE])` Expansion of `$<TYPE>POS`, where the current rule has `RULE-LENGTH` symbols on RHS. diff --git a/data/skeletons/bison.m4 b/data/skeletons/bison.m4 index 8a33a582..e3591875 100644 --- a/data/skeletons/bison.m4 +++ b/data/skeletons/bison.m4 @@ -389,17 +389,28 @@ m4_define([b4_glr_cc_if], # # The following macros provide access to symbol related values. +# __b4_symbol(NUM, FIELD) +# ----------------------- +# Recover a FIELD about symbol #NUM. Thanks to m4_indir, fails if +# undefined. +m4_define([__b4_symbol], +[m4_indir([b4_symbol($1, $2)])]) + + # _b4_symbol(NUM, FIELD) # ---------------------- -# Recover a FIELD about symbol #NUM. Thanks to m4_indir, fails if +# Recover a FIELD about symbol #NUM (or "orig NUM"). Fails if # undefined. m4_define([_b4_symbol], -[m4_indir([b4_symbol($1, $2)])]) +[m4_ifdef([b4_symbol($1, number)], + [__b4_symbol(m4_indir([b4_symbol($1, number)]), $2)], + [__b4_symbol([$1], [$2])])]) + # b4_symbol(NUM, FIELD) # --------------------- -# Recover a FIELD about symbol #NUM. Thanks to m4_indir, fails if +# Recover a FIELD about symbol #NUM (or "orig NUM"). Fails if # undefined. If FIELD = id, prepend the token prefix. m4_define([b4_symbol], [m4_case([$2], diff --git a/src/output.c b/src/output.c index da600ae5..6e4fc595 100644 --- a/src/output.c +++ b/src/output.c @@ -38,6 +38,7 @@ #include "muscle-tab.h" #include "output.h" #include "reader.h" +#include "reduce.h" #include "scan-code.h" /* max_left_semantic_context */ #include "scan-skel.h" #include "symtab.h" @@ -414,6 +415,14 @@ merger_output (FILE *out) static void prepare_symbol_definitions (void) { + /* Map "orig NUM" to new numbers. See data/README. */ + for (symbol_number i = ntokens; i < nsyms + nuseless_nonterminals; ++i) + { + obstack_printf (&format_obstack, "symbol(orig %d, number)", i); + const char *key = obstack_finish0 (&format_obstack); + MUSCLE_INSERT_INT (key, nterm_map ? nterm_map[i - ntokens] : i); + } + for (int i = 0; i < nsyms; ++i) { symbol *sym = symbols[i]; diff --git a/src/reduce.c b/src/reduce.c index e3a5e039..b1815531 100644 --- a/src/reduce.c +++ b/src/reduce.c @@ -259,13 +259,14 @@ reduce_grammar_tables (void) | Remove useless nonterminals. | `------------------------------*/ +symbol_number *nterm_map = NULL; + static void nonterminals_reduce (void) { + nterm_map = xnmalloc (nvars, sizeof *nterm_map); /* Map the nonterminals to their new index: useful first, useless afterwards. Kept for later report. */ - - symbol_number *nterm_map = xnmalloc (nvars, sizeof *nterm_map); { symbol_number n = ntokens; for (symbol_number i = ntokens; i < nsyms; ++i) @@ -306,8 +307,6 @@ nonterminals_reduce (void) nsyms -= nuseless_nonterminals; nvars -= nuseless_nonterminals; - - free (nterm_map); } @@ -433,4 +432,6 @@ reduce_free (void) bitset_free (V); bitset_free (V1); bitset_free (P); + free (nterm_map); + nterm_map = NULL; } diff --git a/src/reduce.h b/src/reduce.h index c3866fc3..a6b4946b 100644 --- a/src/reduce.h +++ b/src/reduce.h @@ -32,6 +32,11 @@ bool reduce_nonterminal_useless_in_grammar (const sym_content *sym); void reduce_free (void); +/** Map initial nterm numbers to the new ones. Built by + * reduce_grammar. Size nvars. */ +extern symbol_number *nterm_map; + extern unsigned nuseless_nonterminals; extern unsigned nuseless_productions; + #endif /* !REDUCE_H_ */ diff --git a/src/scan-code.l b/src/scan-code.l index 9141a9a5..d9e2a4b9 100644 --- a/src/scan-code.l +++ b/src/scan-code.l @@ -648,7 +648,7 @@ handle_action_dollar (symbol_list *rule, char *text, location dollar_loc) untyped_var_seen = true; } - obstack_printf (&obstack_for_string, "]b4_lhs_value(%d, ", + obstack_printf (&obstack_for_string, "]b4_lhs_value(orig %d, ", sym->content.sym->content->number); obstack_quote (&obstack_for_string, type_name); obstack_sgrow (&obstack_for_string, ")["); @@ -677,7 +677,9 @@ handle_action_dollar (symbol_list *rule, char *text, location dollar_loc) "]b4_rhs_value(%d, %d, ", effective_rule_length, n); if (sym) - obstack_printf (&obstack_for_string, "%d, ", sym->content.sym->content->number); + obstack_printf (&obstack_for_string, "%s%d, ", + sym->content.sym->content->class == nterm_sym ? "orig " : "", + sym->content.sym->content->number); else obstack_sgrow (&obstack_for_string, "[], "); diff --git a/tests/reduce.at b/tests/reduce.at index 805b89d2..a47485ad 100644 --- a/tests/reduce.at +++ b/tests/reduce.at @@ -199,6 +199,88 @@ AT_CLEANUP +## --------------- ## +## Useless Parts. ## +## --------------- ## + +AT_SETUP([Useless Parts]) + +# We used to emit code that used symbol numbers before the useless +# symbol elimination, hence before the renumbering of the useful +# symbols. As a result, the evaluation of the skeleton failed because +# it used non existing symbol numbers. Which is the happy scenario: +# we could use numbers of other existing symbols... +# http://lists.gnu.org/archive/html/bug-bison/2019-01/msg00044.html + +AT_BISON_OPTION_PUSHDEFS +AT_DATA([[input.y]], +[[%code { + ]AT_YYERROR_DECLARE_EXTERN[ + ]AT_YYLEX_DECLARE_EXTERN[ +} +%union { void* ptr; } +%type <ptr> used1 +%type <ptr> used2 + +%% +start + : used1 + ; + +used1 + : used2 { $$ = $1; } + ; + +unused + : used2 + ; + +used2 + : { $$ = YY_NULLPTR; } + ; +]]) + +AT_BISON_CHECK([[-fcaret -rall -o input.c input.y]], 0, [], +[[input.y: warning: 1 nonterminal useless in grammar [-Wother] +input.y: warning: 1 rule useless in grammar [-Wother] +input.y:18.1-6: warning: nonterminal useless in grammar: unused [-Wother] + unused + ^~~~~~ +]]) + + +AT_CHECK([[sed -n '/^State 0/q;/^$/!p' input.output]], 0, +[[Nonterminals useless in grammar + unused +Rules useless in grammar + 4 unused: used2 +Grammar + 0 $accept: start $end + 1 start: used1 + 2 used1: used2 + 3 used2: %empty +Terminals, with rules where they appear +$end (0) 0 +error (256) +Nonterminals, with rules where they appear +$accept (3) + on left: 0 +start (4) + on left: 1, on right: 0 +used1 <ptr> (5) + on left: 2, on right: 1 +used2 <ptr> (6) + on left: 3, on right: 2 +]]) + +# Make sure the generated parser is correct. +AT_COMPILE([input.o]) + +AT_BISON_OPTION_POPDEFS +AT_CLEANUP + + + ## ------------------- ## ## Reduced Automaton. ## ## ------------------- ## |