summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorAkim Demaille <akim.demaille@gmail.com>2021-01-23 18:40:15 +0100
committerAkim Demaille <akim.demaille@gmail.com>2021-01-24 08:28:45 +0100
commitc22902e360e0fbbe9fd5657dcf107e03166da309 (patch)
treeb96259a7bfe693653508929cad4b073133886fd1 /src
parent2c294c132528ede23d8ae4959783a67e9ff05ac5 (diff)
downloadbison-c22902e360e0fbbe9fd5657dcf107e03166da309.tar.gz
tables: fix handling for useless tokens
In some rare conditions, the generated parser can be wrong when there are useless tokens. Reported by Balázs Scheidler. https://github.com/akimd/bison/issues/72 Balázs managed to prove that the bug was introduced in commit af1c6f973a60a51c609903713ff8f7fce0887025 Author: Theophile Ranquet <ranquet@lrde.epita.fr> Date: Tue Nov 13 10:38:49 2012 +0000 tables: use bitsets for a performance boost Suggested by Yuri at <http://lists.gnu.org/archive/html/bison-patches/2012-01/msg00000.html>. The improvement is marginal for most grammars, but notable for large grammars (e.g., PosgreSQL's postgre.y), and very large for the sample.y grammar submitted by Yuri in http://lists.gnu.org/archive/html/bison-patches/2012-01/msg00012.html. Measured with --trace=time -fsyntax-only. parser action tables postgre.y sample.y Before 0,129 (44%) 37,095 (99%) After 0,117 (42%) 5,046 (93%) * src/tables.c (pos): Replace this set of integer coded as an unsorted array of integers with... (pos_set): this bitset. which was implemented long ago, but that I installed only recently (March 2019), first published in v3.3.90. That patch introduces a bitset to represent a set of integers. It managed negative integers by using a (fixed) base (the smallest integer to represent). It avoided negative accesses into the bitset by ignoring integers smaller than the base, under the asumption that these cases correspond to useless tokens that are ignored anyway. While it turns out to be true for all the test cases in the test suite (!), Balázs' use case demonstrates that it is not always the case. So we need to be able to accept negative integers that are smaller than the current base. "Amusingly" enough, the aforementioned patch was visibly unsure about itself: /* Store PLACE into POS_SET. PLACE might not belong to the set of possible values for instance with useless tokens. It would be more satisfying to eliminate the need for this 'if'. */ This commit needs several improvements in the future: - support from bitset for bit assignment and shifts - amortized resizing of pos_set - test cases * src/tables.c (pos_set_base, pos_set_dump, pos_set_set, pos_set_test): New. Use them instead of using bitset_set and bitset_test directly.
Diffstat (limited to 'src')
-rw-r--r--src/tables.c83
1 files changed, 73 insertions, 10 deletions
diff --git a/src/tables.c b/src/tables.c
index 0c98c327..b04a496e 100644
--- a/src/tables.c
+++ b/src/tables.c
@@ -111,9 +111,13 @@ base_number *base = NULL;
computation equals to BASE_MINIMUM, later mapped to BASE_NINF to
keep parser tables small. */
base_number base_ninf = 0;
+
/* Bitset representing an integer set in the range
- -nstates..table_size (as an upper bound) */
+ POS_SET_OFFSET..(POS_SET_OFFSET + SIZE). POS_SET_OFFSET is
+ nonpositive. */
static bitset pos_set = NULL;
+/* The integer denoted by bitno 0 in pos_set. */
+static int pos_set_base = 0;
static int *conflrow;
int *conflict_table;
@@ -138,6 +142,71 @@ int high;
state_number *yydefgoto;
rule_number *yydefact;
+
+/*----------.
+| pos_set. |
+`----------*/
+
+#if 0
+static void
+pos_set_dump (void)
+{
+ fprintf (stderr, "pos_set (%ld, %d) =", bitset_size (pos_set), pos_set_base);
+ bitset_iterator biter;
+ int i;
+ BITSET_FOR_EACH (biter, pos_set, i, 0)
+ fprintf (stderr, " %d", i + pos_set_base);
+ putc ('\n', stderr);
+}
+#endif
+
+
+/* The size and base of POS_SET are not known, we need to be able to
+ move the base farther "on the left", and grow "on the right".
+
+ It would be nice to be able to predict the base accurately, but it
+ seems difficult (-nstates seems to work most of the time, except
+ when there are useless tokens).
+
+ FIXME: The current approach is correct, but with poor performances.
+ Bitsets need to support 'assign' and 'shift'. And instead of
+ extending POS_SET just for the out-of-range new values, we need
+ something like doubling the size.
+ */
+
+static void
+pos_set_set (int pos)
+{
+ int bitno = pos - pos_set_base;
+ if (bitno < 0)
+ {
+ const int delta = pos_set_base - pos;
+ const int old_size = bitset_size (pos_set);
+ const int new_size = old_size + delta;
+ bitset_resize (pos_set, new_size);
+ // Shift all the bits by DELTA.
+ // FIXME: add bitset_assign, and bitset_shift?
+ for (int i = new_size - 1; delta <= i ; --i)
+ if (bitset_test (pos_set, i))
+ bitset_set (pos_set, i + delta);
+ else
+ bitset_reset (pos_set, i + delta);
+ pos_set_base = pos;
+ bitno = 0;
+ }
+ else if (bitset_size (pos_set) <= bitno)
+ bitset_resize (pos_set, bitno + 1);
+ bitset_set (pos_set, bitno);
+}
+
+static bool
+pos_set_test (int pos)
+{
+ const int bitno = pos - pos_set_base;
+ return bitset_test (pos_set, bitno);
+}
+
+
/*-------------------------------------------------------------------.
| If TABLE, CONFLICT_TABLE, and CHECK are too small to be addressed |
| at DESIRED, grow them. TABLE[DESIRED] can be used, so the desired |
@@ -168,8 +237,6 @@ table_grow (int desired)
check = xnrealloc (check, table_size, sizeof *check);
for (int i = old_size; i < table_size; ++i)
check[i] = -1;
-
- bitset_resize (pos_set, table_size + nstates);
}
@@ -672,7 +739,7 @@ pack_vector (vector_number vector)
ok = false;
}
- if (ok && bitset_test (pos_set, nstates + res))
+ if (ok && pos_set_test (res))
ok = false;
}
@@ -732,6 +799,7 @@ pack_table (void)
{
base = xnmalloc (nvectors, sizeof *base);
pos_set = bitset_create (table_size + nstates, BITSET_FRUGAL);
+ pos_set_base = -nstates;
table = xcalloc (table_size, sizeof *table);
conflict_table = xcalloc (table_size, sizeof *conflict_table);
check = xnmalloc (table_size, sizeof *check);
@@ -757,12 +825,7 @@ pack_table (void)
/* Action of I were already coded for S. */
place = base[s];
- /* Store PLACE into POS_SET. PLACE might not belong to the set
- of possible values for instance with useless tokens. It
- would be more satisfying to eliminate the need for this
- 'if'. */
- if (0 <= nstates + place)
- bitset_set (pos_set, nstates + place);
+ pos_set_set (place);
base[order[i]] = place;
}