summaryrefslogtreecommitdiff
path: root/gcc/stringpool.c
diff options
context:
space:
mode:
authorzack <zack@138bc75d-0d04-0410-961f-82ee72b054a4>2000-11-17 06:05:31 +0000
committerzack <zack@138bc75d-0d04-0410-961f-82ee72b054a4>2000-11-17 06:05:31 +0000
commit44acf429b688309e6afa61e4877719f75acadcca (patch)
tree925528b7b20608d2eac0acf5e939480048b49271 /gcc/stringpool.c
parent0959b35a42b8c715164772b3ae94b2b18d10ea18 (diff)
downloadgcc-44acf429b688309e6afa61e4877719f75acadcca.tar.gz
* stringpool.c: New file.
* ggc-common.c (ggc_mark_string_ptr, ggc_add_string_root): Delete. (ggc_alloc_string): Now in stringpool.o. * ggc-page.c, ggc-simple.c: Do not define or allocate empty_string. * ggc.h: Delete prototype of ggc_add_string_root. #define ggc_add_string_root and ggc_mark_string to nothing. Prototype init_stringpool and stringpool_statistics. (ggc_alloc_string): Returns a const char *. * tree.c (hash_table, do_identifier_warnings): Delete. (init_obstacks): Don't initialize the identifier hash table. (get_identifier, maybe_get_identifier, start_identifier_warnings, set_identifier_size): Now in stringpool.c. * tree.h (struct tree_string): Constify pointer field. (approx_sqrt): Prototype. * Makefile.in (stringpool.o): Add rule, mention in OBJS. * toplev.c (approx_sqrt): New function. (compile_file): Call stringpool_statistics if mem_report is on. (main): Call init_stringpool. * builtins.c (c_strlen), c-decl.c (finish_decl), c-lex.c (process_directive), c-typeck.c (constructor_asmspec, struct initializer_stack, start_init), except.c (create_rethrow_ref), stmt.c (digit_strings), toplev.c (decode_f_option), tree.c (built_in_filename), varasm,c (in_named_name, assemble_static_space, struct constant_descriptor, struct deferred_string, struct pool_constant, force_const_mem), i386.c (pic_label_name, global_offset_table_name), rs6000.c (rs6000_emit_prologue, rs6000_emit_epilogue) : Constify a char *. * c-common.c (combine_strings): Combine strings in scratch buffer, then pass to build_string. * optabs.c (init_libfuncs), profile.c (init_edge_profiler, output_func_start_profiler), stmt.c (init_stmt), alpha.c (alpha_need_linkage), arm.c (arm_encode_call_attribute), i386.c (load_pic_register), ia64.c (ia64_encode_section_info), rs6000.c (rs6000_encode_section_info): Create string in scratch buffer, then pass to ggc_alloc_string. * stmt.c (expand_asm_operands): If we must adjust the constraint strings, do so by creating a new one, not by modifying the old one in place. Constify some char *s. * config/pa/pa.c (hppa_encode_label): Drop unnecessary second argument. Create string in scratch buffer, then pass to ggc_alloc_string. * config/pa/pa-protos.h: Update prototype. * config/pa/elf.h, config/pa/pa.h, config/pa/som.h: hppa_encode_label takes only one argument. * c-parse.in (if_prefix): Find the filename and line number at $-2 and $-1 respectively. * diagnostic.c (error_recursion): Add missing newline, use fputs, translate string. cp: * lex.c (struct impl_files, internal_filename): Constify a char *. java: * jcf-parse.c (get_constant), parse.y (do_merge_string_cste): Create string in scratch buffer, then pass to build_string. git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@37514 138bc75d-0d04-0410-961f-82ee72b054a4
Diffstat (limited to 'gcc/stringpool.c')
-rw-r--r--gcc/stringpool.c405
1 files changed, 405 insertions, 0 deletions
diff --git a/gcc/stringpool.c b/gcc/stringpool.c
new file mode 100644
index 00000000000..32381e01ebb
--- /dev/null
+++ b/gcc/stringpool.c
@@ -0,0 +1,405 @@
+/* String pool for GCC.
+ Copyright (C) 2000 Free Software Foundation, Inc.
+
+This file is part of GNU CC.
+
+GNU CC is free software; you can redistribute it and/or modify it
+under the terms of the GNU General Public License as published by the
+Free Software Foundation; either version 2, or (at your option) any
+later version.
+
+GNU CC is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+for more details.
+
+You should have received a copy of the GNU General Public License
+along with GNU CC; see the file COPYING. If not, write to the Free
+Software Foundation, 59 Temple Place - Suite 330, Boston, MA
+02111-1307, USA. */
+
+/* String pool allocator. All strings allocated by ggc_alloc_string are
+ uniquified and stored in an obstack which is never shrunk. You can
+ associate a tree with a string if you wish; this is used to implement
+ get_identifier.
+
+ We have our own private hash table implementation which is similar
+ to the one in cpphash.c (actually, it's a further refinement of
+ that code). libiberty's hashtab.c is not used because it requires
+ 100% average space overhead per string, which is unacceptable.
+ Also, this algorithm is faster. */
+
+#include "config.h"
+#include "system.h"
+#include "ggc.h"
+#include "tree.h"
+#include "obstack.h"
+#include "flags.h"
+#include "toplev.h"
+
+/* The "" allocated string. */
+const char empty_string[] = "";
+
+static struct obstack string_stack;
+
+/* This is the hash entry associated with each string. It lives in
+ the hash table; only the string lives in the obstack. Note that
+ the string is not necessarily NUL terminated. */
+
+struct str_header
+{
+ const char *ptr;
+ tree data; /* for get_identifier */
+ unsigned int len;
+};
+
+/* This is the hash table structure. There's only one. */
+struct str_hash
+{
+ struct str_header *entries;
+ size_t nslots; /* total slots in the entries array */
+ size_t nelements; /* number of live elements */
+
+ /* table usage statistics */
+ unsigned int searches;
+ unsigned int collisions;
+};
+#define INITIAL_HASHSIZE (16*1024)
+
+static struct str_hash string_hash = { 0, INITIAL_HASHSIZE, 0, 0, 0 };
+
+enum insert_option { INSERT, NO_INSERT };
+
+static struct str_header *alloc_string PARAMS ((const char *, size_t,
+ enum insert_option));
+static inline unsigned int calc_hash PARAMS ((const unsigned char *, size_t));
+static void mark_string_hash PARAMS ((void *));
+static struct str_header *expand_string_table PARAMS ((struct str_header *));
+
+/* Convenience macro for iterating over the hash table. E is set to
+ each live entry in turn. */
+#define FORALL_STRINGS(E) \
+for (E = string_hash.entries; E < string_hash.entries+string_hash.nslots; E++) \
+ if (E->ptr != NULL)
+ /* block here */
+
+/* Likewise, but tests ->data instead of ->ptr (for cases where we only
+ care about entries with ->data set) */
+#define FORALL_IDS(E) \
+for (E = string_hash.entries; E < string_hash.entries+string_hash.nslots; E++) \
+ if (E->data != NULL)
+
+/* 0 while creating built-in identifiers. */
+static int do_identifier_warnings;
+
+/* Initialize the string pool. */
+void
+init_stringpool ()
+{
+ gcc_obstack_init (&string_stack);
+ ggc_add_root (&string_hash, 1, sizeof string_hash, mark_string_hash);
+
+ /* Strings need no alignment. */
+ obstack_alignment_mask (&string_stack) = 0;
+
+ string_hash.entries = (struct str_header *)
+ xcalloc (string_hash.nslots, sizeof (struct str_header));
+}
+
+/* Enable warnings on similar identifiers (if requested).
+ Done after the built-in identifiers are created. */
+void
+start_identifier_warnings ()
+{
+ do_identifier_warnings = 1;
+}
+
+/* Record the size of an identifier node for the language in use.
+ SIZE is the total size in bytes.
+ This is called by the language-specific files. This must be
+ called before allocating any identifiers. */
+void
+set_identifier_size (size)
+ int size;
+{
+ tree_code_length[(int) IDENTIFIER_NODE]
+ = (size - sizeof (struct tree_common)) / sizeof (tree);
+}
+
+/* Calculate the hash of the string STR, which is of length LEN. */
+static inline unsigned int
+calc_hash (str, len)
+ const unsigned char *str;
+ size_t len;
+{
+ size_t n = len;
+ unsigned int r = 0;
+#define HASHSTEP(r, c) ((r) * 67 + (c - 113));
+
+ while (n--)
+ r = HASHSTEP (r, *str++);
+
+ return r + len;
+#undef HASHSTEP
+}
+
+/* Internal primitive: returns the header structure for the string of
+ length LENGTH, containing CONTENTS. If that string already exists
+ in the table, returns the existing entry. If the string hasn't
+ been seen before and the last argument is INSERT, inserts and returns
+ a new entry. Otherwise returns NULL. */
+static struct str_header *
+alloc_string (contents, length, insert)
+ const char *contents;
+ size_t length;
+ enum insert_option insert;
+{
+ unsigned int hash = calc_hash ((const unsigned char *)contents, length);
+ unsigned int hash2;
+ unsigned int index;
+ size_t sizemask;
+ struct str_header *entry;
+ struct str_header *entries = string_hash.entries;
+
+ sizemask = string_hash.nslots - 1;
+ index = hash & sizemask;
+
+ /* hash2 must be odd, so we're guaranteed to visit every possible
+ location in the table during rehashing. */
+ hash2 = ((hash * 17) & sizemask) | 1;
+ string_hash.searches++;
+
+ for (;;)
+ {
+ entry = entries + index;
+
+ if (entry->ptr == NULL)
+ break;
+
+ if (entry->len == length
+ && !memcmp (entry->ptr, contents, length))
+ return entry;
+
+ index = (index + hash2) & sizemask;
+ string_hash.collisions++;
+ }
+
+ if (insert == NO_INSERT)
+ return NULL;
+
+ obstack_grow0 (&string_stack, contents, length);
+ entry->ptr = (const char *) obstack_finish (&string_stack);
+ entry->len = length;
+ entry->data = NULL;
+
+ if (++string_hash.nelements * 4 < string_hash.nslots * 3)
+ return entry;
+
+ /* Must expand the string table. */
+ return expand_string_table (entry);
+}
+
+/* Subroutine of alloc_string which doubles the size of the hash table
+ and rehashes all the strings into the new table. Returns the entry
+ in the new table corresponding to ENTRY. */
+static struct str_header *
+expand_string_table (entry)
+ struct str_header *entry;
+{
+ struct str_header *nentries;
+ struct str_header *e, *nentry = NULL;
+ size_t size, sizemask;
+
+ size = string_hash.nslots * 2;
+ nentries = (struct str_header *) xcalloc (size, sizeof (struct str_header));
+ sizemask = size - 1;
+
+ FORALL_STRINGS (e)
+ {
+ unsigned int index, hash, hash2;
+
+ hash = calc_hash ((const unsigned char *) e->ptr, e->len);
+ hash2 = ((hash * 17) & sizemask) | 1;
+ index = hash & sizemask;
+
+ for (;;)
+ {
+ if (nentries[index].ptr == NULL)
+ {
+ nentries[index].ptr = e->ptr;
+ nentries[index].len = e->len;
+ nentries[index].data = e->data;
+ if (e == entry)
+ nentry = nentries + index;
+ break;
+ }
+
+ index = (index + hash2) & sizemask;
+ }
+ }
+
+ free (string_hash.entries);
+ string_hash.entries = nentries;
+ string_hash.nslots = size;
+ return nentry;
+}
+
+/* Allocate and return a string constant of length LENGTH, containing
+ CONTENTS. If LENGTH is -1, CONTENTS is assumed to be a
+ nul-terminated string, and the length is calculated using strlen.
+ If the same string constant has been allocated before, that copy is
+ returned this time too. */
+
+const char *
+ggc_alloc_string (contents, length)
+ const char *contents;
+ int length;
+{
+ struct str_header *str;
+
+ if (length == -1)
+ length = strlen (contents);
+
+ if (length == 0)
+ return empty_string;
+
+ str = alloc_string (contents, length, INSERT);
+ return str->ptr;
+}
+
+/* Return an IDENTIFIER_NODE whose name is TEXT (a null-terminated string).
+ If an identifier with that name has previously been referred to,
+ the same node is returned this time. */
+tree
+get_identifier (text)
+ const char *text;
+{
+ tree idp;
+ struct str_header *str;
+ size_t length = strlen (text);
+
+ str = alloc_string (text, length, INSERT);
+ idp = str->data;
+ if (idp == NULL)
+ {
+ if (TREE_CODE_LENGTH (IDENTIFIER_NODE) < 0)
+ abort (); /* set_identifier_size hasn't been called. */
+
+ /* If this identifier is longer than the clash-warning length,
+ do a brute force search of the entire table for clashes. */
+ if (warn_id_clash && do_identifier_warnings && length >= (size_t) id_clash_len)
+ {
+ struct str_header *e;
+ FORALL_IDS (e)
+ {
+ if (e->len >= (size_t)id_clash_len
+ && !strncmp (e->ptr, text, id_clash_len))
+ {
+ warning ("\"%s\" and \"%s\" identical in first %d characters",
+ text, e->ptr, id_clash_len);
+ break;
+ }
+ }
+ }
+
+ idp = make_node (IDENTIFIER_NODE);
+ IDENTIFIER_LENGTH (idp) = length;
+ IDENTIFIER_POINTER (idp) = str->ptr;
+#ifdef GATHER_STATISTICS
+ id_string_size += length;
+#endif
+ str->data = idp;
+ }
+ return idp;
+}
+
+/* If an identifier with the name TEXT (a null-terminated string) has
+ previously been referred to, return that node; otherwise return
+ NULL_TREE. */
+
+tree
+maybe_get_identifier (text)
+ const char *text;
+{
+ struct str_header *str;
+ size_t length = strlen (text);
+
+ str = alloc_string (text, length, NO_INSERT);
+ if (str)
+ return str->data; /* N.B. str->data might be null here, if the
+ string has been used but not as an identifier. */
+ return NULL_TREE;
+}
+
+/* Report some basic statistics about the string pool. */
+
+void
+stringpool_statistics ()
+{
+ size_t nelts, overhead, headers;
+ size_t total_bytes, longest, sum_of_squares;
+ double exp_len, exp_len2, exp2_len;
+ struct str_header *e;
+#define SCALE(x) ((unsigned long) ((x) < 1024*10 \
+ ? (x) \
+ : ((x) < 1024*1024*10 \
+ ? (x) / 1024 \
+ : (x) / (1024*1024))))
+#define LABEL(x) ((x) < 1024*10 ? ' ' : ((x) < 1024*1024*10 ? 'k' : 'M'))
+
+ total_bytes = longest = sum_of_squares = 0;
+ FORALL_STRINGS (e)
+ {
+ size_t n = e->len;
+
+ total_bytes += n;
+ sum_of_squares += n*n;
+ if (n > longest)
+ longest = n;
+ }
+
+ nelts = string_hash.nelements;
+ overhead = obstack_memory_used (&string_stack) - total_bytes;
+ headers = string_hash.nslots * sizeof (struct str_header);
+
+ fprintf (stderr,
+"\nString pool\n\
+entries\t\t%lu\n\
+slots\t\t%lu\n\
+bytes\t\t%lu%c (%lu%c overhead)\n\
+table size\t%lu%c\n",
+ (unsigned long) nelts, (unsigned long) string_hash.nslots,
+ SCALE (total_bytes), LABEL (total_bytes),
+ SCALE (overhead), LABEL (overhead),
+ SCALE (headers), LABEL (headers));
+
+ exp_len = (double)total_bytes / (double)nelts;
+ exp2_len = exp_len * exp_len;
+ exp_len2 = (double)sum_of_squares / (double)nelts;
+
+ fprintf (stderr,
+"coll/search\t%.4f\n\
+ins/search\t%.4f\n\
+avg. entry\t%.2f bytes (+/- %.2f)\n\
+longest entry\t%lu\n",
+ (double) string_hash.collisions / (double) string_hash.searches,
+ (double) nelts / (double) string_hash.searches,
+ exp_len, approx_sqrt (exp_len2 - exp2_len),
+ (unsigned long) longest);
+#undef SCALE
+#undef LABEL
+}
+
+/* Mark the string hash for GC. */
+
+static void
+mark_string_hash (arg)
+ void *arg ATTRIBUTE_UNUSED;
+{
+ struct str_header *h;
+
+ FORALL_IDS (h)
+ {
+ ggc_mark_tree (h->data);
+ }
+}