summaryrefslogtreecommitdiff
path: root/src/symtab.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/symtab.c')
-rw-r--r--src/symtab.c401
1 files changed, 401 insertions, 0 deletions
diff --git a/src/symtab.c b/src/symtab.c
new file mode 100644
index 0000000..fe37bee
--- /dev/null
+++ b/src/symtab.c
@@ -0,0 +1,401 @@
+/* GNU m4 -- A simple macro processor
+
+ Copyright (C) 1989-1994, 2003, 2006-2013 Free Software Foundation,
+ Inc.
+
+ This file is part of GNU M4.
+
+ GNU M4 is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ GNU M4 is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+*/
+
+/* This file handles all the low level work around the symbol table. The
+ symbol table is a simple chained hash table. Each symbol is described
+ by a struct symbol, which is placed in the hash table based upon the
+ symbol name. Symbols that hash to the same entry in the table are
+ kept on a list, sorted by name. As a special case, to facilitate the
+ "pushdef" and "popdef" builtins, a symbol can be several times in the
+ symbol table, one for each definition. Since the name is the same,
+ all the entries for the symbol will be on the same list, and will
+ also, because the list is sorted, be adjacent. All the entries for a
+ name are simply ordered on the list by age. The current definition
+ will then always be the first found. */
+
+#include "m4.h"
+#include <limits.h>
+
+#ifdef DEBUG_SYM
+/* When evaluating hash table performance, this profiling code shows
+ how many collisions were encountered. */
+
+struct profile
+{
+ int entry; /* Number of times lookup_symbol called with this mode. */
+ int comparisons; /* Number of times strcmp was called. */
+ int misses; /* Number of times strcmp did not return 0. */
+ long long bytes; /* Number of bytes compared. */
+};
+
+static struct profile profiles[5];
+static symbol_lookup current_mode;
+
+/* On exit, show a profile of symbol table performance. */
+static void
+show_profile (void)
+{
+ int i;
+ for (i = 0; i < 5; i++)
+ {
+ xfprintf(stderr, "m4: lookup mode %d called %d times, %d compares, "
+ "%d misses, %lld bytes\n",
+ i, profiles[i].entry, profiles[i].comparisons,
+ profiles[i].misses, profiles[i].bytes);
+ }
+}
+
+/* Like strcmp (S1, S2), but also track profiling statistics. */
+static int
+profile_strcmp (const char *s1, const char *s2)
+{
+ int i = 1;
+ int result;
+ while (*s1 && *s1 == *s2)
+ {
+ s1++;
+ s2++;
+ i++;
+ }
+ result = (unsigned char) *s1 - (unsigned char) *s2;
+ profiles[current_mode].comparisons++;
+ if (result != 0)
+ profiles[current_mode].misses++;
+ profiles[current_mode].bytes += i;
+ return result;
+}
+
+# define strcmp profile_strcmp
+#endif /* DEBUG_SYM */
+
+
+/*------------------------------------------------------------------.
+| Initialise the symbol table, by allocating the necessary storage, |
+| and zeroing all the entries. |
+`------------------------------------------------------------------*/
+
+/* Pointer to symbol table. */
+symbol **symtab;
+
+void
+symtab_init (void)
+{
+ size_t i;
+ symbol **s;
+
+ s = symtab = (symbol **) xnmalloc (hash_table_size, sizeof (symbol *));
+
+ for (i = 0; i < hash_table_size; i++)
+ s[i] = NULL;
+
+#ifdef DEBUG_SYM
+ {
+ int e = atexit(show_profile);
+ if (e != 0)
+ M4ERROR ((warning_status, 0,
+ "INTERNAL ERROR: unable to show symtab profile"));
+ }
+#endif /* DEBUG_SYM */
+}
+
+/*--------------------------------------------------.
+| Return a hashvalue for a string, from GNU-emacs. |
+`--------------------------------------------------*/
+
+static size_t M4_GNUC_PURE
+hash (const char *s)
+{
+ register size_t val = 0;
+
+ register const char *ptr = s;
+ register char ch;
+
+ while ((ch = *ptr++) != '\0')
+ val = (val << 7) + (val >> (sizeof (val) * CHAR_BIT - 7)) + ch;
+ return val;
+}
+
+/*--------------------------------------------.
+| Free all storage associated with a symbol. |
+`--------------------------------------------*/
+
+void
+free_symbol (symbol *sym)
+{
+ if (SYMBOL_PENDING_EXPANSIONS (sym) > 0)
+ SYMBOL_DELETED (sym) = true;
+ else
+ {
+ free (SYMBOL_NAME (sym));
+ if (SYMBOL_TYPE (sym) == TOKEN_TEXT)
+ free (SYMBOL_TEXT (sym));
+ free (sym);
+ }
+}
+
+/*-------------------------------------------------------------------.
+| Search in, and manipulation of the symbol table, are all done by |
+| lookup_symbol (). It basically hashes NAME to a list in the |
+| symbol table, and searches this list for the first occurrence of a |
+| symbol with the name. |
+| |
+| The MODE parameter determines what lookup_symbol () will do. It |
+| can either just do a lookup, do a lookup and insert if not |
+| present, do an insertion even if the name is already in the list, |
+| delete the first occurrence of the name on the list, or delete all |
+| occurrences of the name on the list. |
+`-------------------------------------------------------------------*/
+
+symbol *
+lookup_symbol (const char *name, symbol_lookup mode)
+{
+ size_t h;
+ int cmp = 1;
+ symbol *sym, *prev;
+ symbol **spp;
+
+#if DEBUG_SYM
+ current_mode = mode;
+ profiles[mode].entry++;
+#endif /* DEBUG_SYM */
+
+ h = hash (name);
+ sym = symtab[h % hash_table_size];
+
+ for (prev = NULL; sym != NULL; prev = sym, sym = sym->next)
+ {
+ cmp = strcmp (SYMBOL_NAME (sym), name);
+ if (cmp >= 0)
+ break;
+ }
+
+ /* If just searching, return status of search. */
+
+ if (mode == SYMBOL_LOOKUP)
+ return cmp == 0 ? sym : NULL;
+
+ /* Symbol not found. */
+
+ spp = (prev != NULL) ? &prev->next : &symtab[h % hash_table_size];
+
+ switch (mode)
+ {
+
+ case SYMBOL_INSERT:
+
+ /* If the name was found in the table, check whether it is still in
+ use by a pending expansion. If so, replace the table element with
+ a new one; if not, just return the symbol. If not found, just
+ insert the name, and return the new symbol. */
+
+ if (cmp == 0 && sym != NULL)
+ {
+ if (SYMBOL_PENDING_EXPANSIONS (sym) > 0)
+ {
+ symbol *old = sym;
+ SYMBOL_DELETED (old) = true;
+
+ sym = (symbol *) xmalloc (sizeof (symbol));
+ SYMBOL_TYPE (sym) = TOKEN_VOID;
+ SYMBOL_TRACED (sym) = SYMBOL_TRACED (old);
+ SYMBOL_NAME (sym) = xstrdup (name);
+ SYMBOL_SHADOWED (sym) = false;
+ SYMBOL_MACRO_ARGS (sym) = false;
+ SYMBOL_BLIND_NO_ARGS (sym) = false;
+ SYMBOL_DELETED (sym) = false;
+ SYMBOL_PENDING_EXPANSIONS (sym) = 0;
+
+ SYMBOL_NEXT (sym) = SYMBOL_NEXT (old);
+ SYMBOL_NEXT (old) = NULL;
+ (*spp) = sym;
+ }
+ return sym;
+ }
+ /* Fall through. */
+
+ case SYMBOL_PUSHDEF:
+
+ /* Insert a name in the symbol table. If there is already a symbol
+ with the name, insert this in front of it, and mark the old
+ symbol as "shadowed". */
+
+ sym = (symbol *) xmalloc (sizeof (symbol));
+ SYMBOL_TYPE (sym) = TOKEN_VOID;
+ SYMBOL_TRACED (sym) = false;
+ SYMBOL_NAME (sym) = xstrdup (name);
+ SYMBOL_SHADOWED (sym) = false;
+ SYMBOL_MACRO_ARGS (sym) = false;
+ SYMBOL_BLIND_NO_ARGS (sym) = false;
+ SYMBOL_DELETED (sym) = false;
+ SYMBOL_PENDING_EXPANSIONS (sym) = 0;
+
+ SYMBOL_NEXT (sym) = *spp;
+ (*spp) = sym;
+
+ if (mode == SYMBOL_PUSHDEF && cmp == 0)
+ {
+ SYMBOL_SHADOWED (SYMBOL_NEXT (sym)) = true;
+ SYMBOL_TRACED (sym) = SYMBOL_TRACED (SYMBOL_NEXT (sym));
+ }
+ return sym;
+
+ case SYMBOL_DELETE:
+ case SYMBOL_POPDEF:
+
+ /* Delete occurrences of symbols with NAME. SYMBOL_DELETE kills
+ all definitions, SYMBOL_POPDEF kills only the first.
+ However, if the last instance of a symbol is marked for
+ tracing, reinsert a placeholder in the table. And if the
+ definition is still in use, let the caller free the memory
+ after it is done with the symbol. */
+
+ if (cmp != 0 || sym == NULL)
+ return NULL;
+ {
+ bool traced = false;
+ if (SYMBOL_NEXT (sym) != NULL
+ && SYMBOL_SHADOWED (SYMBOL_NEXT (sym))
+ && mode == SYMBOL_POPDEF)
+ {
+ SYMBOL_SHADOWED (SYMBOL_NEXT (sym)) = false;
+ SYMBOL_TRACED (SYMBOL_NEXT (sym)) = SYMBOL_TRACED (sym);
+ }
+ else
+ traced = SYMBOL_TRACED (sym);
+ do
+ {
+ *spp = SYMBOL_NEXT (sym);
+ free_symbol (sym);
+ sym = *spp;
+ }
+ while (*spp != NULL && SYMBOL_SHADOWED (*spp)
+ && mode == SYMBOL_DELETE);
+ if (traced)
+ {
+ sym = (symbol *) xmalloc (sizeof (symbol));
+ SYMBOL_TYPE (sym) = TOKEN_VOID;
+ SYMBOL_TRACED (sym) = true;
+ SYMBOL_NAME (sym) = xstrdup (name);
+ SYMBOL_SHADOWED (sym) = false;
+ SYMBOL_MACRO_ARGS (sym) = false;
+ SYMBOL_BLIND_NO_ARGS (sym) = false;
+ SYMBOL_DELETED (sym) = false;
+ SYMBOL_PENDING_EXPANSIONS (sym) = 0;
+
+ SYMBOL_NEXT (sym) = *spp;
+ (*spp) = sym;
+ }
+ }
+ return NULL;
+
+ case SYMBOL_LOOKUP:
+ default:
+ M4ERROR ((warning_status, 0,
+ "INTERNAL ERROR: invalid mode to symbol_lookup ()"));
+ abort ();
+ }
+}
+
+/*-----------------------------------------------------------------.
+| The following function is used for the cases where we want to do |
+| something to each and every symbol in the table. The function |
+| hack_all_symbols () traverses the symbol table, and calls a |
+| specified function FUNC for each symbol in the table. FUNC is |
+| called with a pointer to the symbol, and the DATA argument. |
+| |
+| FUNC may safely call lookup_symbol with mode SYMBOL_POPDEF or |
+| SYMBOL_LOOKUP, but any other mode can break the iteration. |
+`-----------------------------------------------------------------*/
+
+void
+hack_all_symbols (hack_symbol *func, void *data)
+{
+ size_t h;
+ symbol *sym;
+ symbol *next;
+
+ for (h = 0; h < hash_table_size; h++)
+ {
+ /* We allow func to call SYMBOL_POPDEF, which can invalidate
+ sym, so we must grab the next element to traverse before
+ calling func. */
+ for (sym = symtab[h]; sym != NULL; sym = next)
+ {
+ next = SYMBOL_NEXT (sym);
+ func (sym, data);
+ }
+ }
+}
+
+#ifdef DEBUG_SYM
+
+static void symtab_print_list (int i);
+
+static void M4_GNUC_UNUSED
+symtab_debug (void)
+{
+ token_data td;
+ const char *text;
+ symbol *s;
+ int delete;
+ static int i;
+
+ while (next_token (&td, NULL) == TOKEN_WORD)
+ {
+ text = TOKEN_DATA_TEXT (&td);
+ if (*text == '_')
+ {
+ delete = 1;
+ text++;
+ }
+ else
+ delete = 0;
+
+ s = lookup_symbol (text, SYMBOL_LOOKUP);
+
+ if (s == NULL)
+ xprintf ("Name `%s' is unknown\n", text);
+
+ lookup_symbol (text, delete ? SYMBOL_DELETE : SYMBOL_INSERT);
+ }
+ symtab_print_list (i++);
+}
+
+static void
+symtab_print_list (int i)
+{
+ symbol *sym;
+ size_t h;
+
+ xprintf ("Symbol dump #%d:\n", i);
+ for (h = 0; h < hash_table_size; h++)
+ for (sym = symtab[h]; sym != NULL; sym = sym->next)
+ xprintf ("\tname %s, bucket %lu, addr %p, next %p, "
+ "flags%s%s%s, pending %d\n",
+ SYMBOL_NAME (sym),
+ (unsigned long int) h, sym, SYMBOL_NEXT (sym),
+ SYMBOL_TRACED (sym) ? " traced" : "",
+ SYMBOL_SHADOWED (sym) ? " shadowed" : "",
+ SYMBOL_DELETED (sym) ? " deleted" : "",
+ SYMBOL_PENDING_EXPANSIONS (sym));
+}
+
+#endif /* DEBUG_SYM */