diff options
author | zack <zack@138bc75d-0d04-0410-961f-82ee72b054a4> | 2000-11-17 06:05:31 +0000 |
---|---|---|
committer | zack <zack@138bc75d-0d04-0410-961f-82ee72b054a4> | 2000-11-17 06:05:31 +0000 |
commit | 44acf429b688309e6afa61e4877719f75acadcca (patch) | |
tree | 925528b7b20608d2eac0acf5e939480048b49271 /gcc/stringpool.c | |
parent | 0959b35a42b8c715164772b3ae94b2b18d10ea18 (diff) | |
download | gcc-44acf429b688309e6afa61e4877719f75acadcca.tar.gz |
* stringpool.c: New file.
* ggc-common.c (ggc_mark_string_ptr, ggc_add_string_root): Delete.
(ggc_alloc_string): Now in stringpool.o.
* ggc-page.c, ggc-simple.c: Do not define or allocate empty_string.
* ggc.h: Delete prototype of ggc_add_string_root. #define
ggc_add_string_root and ggc_mark_string to nothing. Prototype
init_stringpool and stringpool_statistics.
(ggc_alloc_string): Returns a const char *.
* tree.c (hash_table, do_identifier_warnings): Delete.
(init_obstacks): Don't initialize the identifier hash table.
(get_identifier, maybe_get_identifier, start_identifier_warnings,
set_identifier_size): Now in stringpool.c.
* tree.h (struct tree_string): Constify pointer field.
(approx_sqrt): Prototype.
* Makefile.in (stringpool.o): Add rule, mention in OBJS.
* toplev.c (approx_sqrt): New function.
(compile_file): Call stringpool_statistics if mem_report is on.
(main): Call init_stringpool.
* builtins.c (c_strlen), c-decl.c (finish_decl), c-lex.c
(process_directive), c-typeck.c (constructor_asmspec, struct
initializer_stack, start_init), except.c (create_rethrow_ref),
stmt.c (digit_strings), toplev.c (decode_f_option), tree.c
(built_in_filename), varasm,c (in_named_name,
assemble_static_space, struct constant_descriptor, struct
deferred_string, struct pool_constant, force_const_mem),
i386.c (pic_label_name, global_offset_table_name), rs6000.c
(rs6000_emit_prologue, rs6000_emit_epilogue) : Constify a char *.
* c-common.c (combine_strings): Combine strings in scratch
buffer, then pass to build_string.
* optabs.c (init_libfuncs), profile.c (init_edge_profiler,
output_func_start_profiler), stmt.c (init_stmt), alpha.c
(alpha_need_linkage), arm.c (arm_encode_call_attribute),
i386.c (load_pic_register), ia64.c (ia64_encode_section_info),
rs6000.c (rs6000_encode_section_info): Create string in
scratch buffer, then pass to ggc_alloc_string.
* stmt.c (expand_asm_operands): If we must adjust the
constraint strings, do so by creating a new one, not by
modifying the old one in place. Constify some char *s.
* config/pa/pa.c (hppa_encode_label): Drop unnecessary second
argument. Create string in scratch buffer, then pass to
ggc_alloc_string.
* config/pa/pa-protos.h: Update prototype.
* config/pa/elf.h, config/pa/pa.h, config/pa/som.h:
hppa_encode_label takes only one argument.
* c-parse.in (if_prefix): Find the filename and line number at
$-2 and $-1 respectively.
* diagnostic.c (error_recursion): Add missing newline, use
fputs, translate string.
cp:
* lex.c (struct impl_files, internal_filename): Constify a char *.
java:
* jcf-parse.c (get_constant), parse.y (do_merge_string_cste):
Create string in scratch buffer, then pass to build_string.
git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@37514 138bc75d-0d04-0410-961f-82ee72b054a4
Diffstat (limited to 'gcc/stringpool.c')
-rw-r--r-- | gcc/stringpool.c | 405 |
1 files changed, 405 insertions, 0 deletions
diff --git a/gcc/stringpool.c b/gcc/stringpool.c new file mode 100644 index 00000000000..32381e01ebb --- /dev/null +++ b/gcc/stringpool.c @@ -0,0 +1,405 @@ +/* String pool for GCC. + Copyright (C) 2000 Free Software Foundation, Inc. + +This file is part of GNU CC. + +GNU CC is free software; you can redistribute it and/or modify it +under the terms of the GNU General Public License as published by the +Free Software Foundation; either version 2, or (at your option) any +later version. + +GNU CC is distributed in the hope that it will be useful, but WITHOUT +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +You should have received a copy of the GNU General Public License +along with GNU CC; see the file COPYING. If not, write to the Free +Software Foundation, 59 Temple Place - Suite 330, Boston, MA +02111-1307, USA. */ + +/* String pool allocator. All strings allocated by ggc_alloc_string are + uniquified and stored in an obstack which is never shrunk. You can + associate a tree with a string if you wish; this is used to implement + get_identifier. + + We have our own private hash table implementation which is similar + to the one in cpphash.c (actually, it's a further refinement of + that code). libiberty's hashtab.c is not used because it requires + 100% average space overhead per string, which is unacceptable. + Also, this algorithm is faster. */ + +#include "config.h" +#include "system.h" +#include "ggc.h" +#include "tree.h" +#include "obstack.h" +#include "flags.h" +#include "toplev.h" + +/* The "" allocated string. */ +const char empty_string[] = ""; + +static struct obstack string_stack; + +/* This is the hash entry associated with each string. It lives in + the hash table; only the string lives in the obstack. Note that + the string is not necessarily NUL terminated. */ + +struct str_header +{ + const char *ptr; + tree data; /* for get_identifier */ + unsigned int len; +}; + +/* This is the hash table structure. There's only one. */ +struct str_hash +{ + struct str_header *entries; + size_t nslots; /* total slots in the entries array */ + size_t nelements; /* number of live elements */ + + /* table usage statistics */ + unsigned int searches; + unsigned int collisions; +}; +#define INITIAL_HASHSIZE (16*1024) + +static struct str_hash string_hash = { 0, INITIAL_HASHSIZE, 0, 0, 0 }; + +enum insert_option { INSERT, NO_INSERT }; + +static struct str_header *alloc_string PARAMS ((const char *, size_t, + enum insert_option)); +static inline unsigned int calc_hash PARAMS ((const unsigned char *, size_t)); +static void mark_string_hash PARAMS ((void *)); +static struct str_header *expand_string_table PARAMS ((struct str_header *)); + +/* Convenience macro for iterating over the hash table. E is set to + each live entry in turn. */ +#define FORALL_STRINGS(E) \ +for (E = string_hash.entries; E < string_hash.entries+string_hash.nslots; E++) \ + if (E->ptr != NULL) + /* block here */ + +/* Likewise, but tests ->data instead of ->ptr (for cases where we only + care about entries with ->data set) */ +#define FORALL_IDS(E) \ +for (E = string_hash.entries; E < string_hash.entries+string_hash.nslots; E++) \ + if (E->data != NULL) + +/* 0 while creating built-in identifiers. */ +static int do_identifier_warnings; + +/* Initialize the string pool. */ +void +init_stringpool () +{ + gcc_obstack_init (&string_stack); + ggc_add_root (&string_hash, 1, sizeof string_hash, mark_string_hash); + + /* Strings need no alignment. */ + obstack_alignment_mask (&string_stack) = 0; + + string_hash.entries = (struct str_header *) + xcalloc (string_hash.nslots, sizeof (struct str_header)); +} + +/* Enable warnings on similar identifiers (if requested). + Done after the built-in identifiers are created. */ +void +start_identifier_warnings () +{ + do_identifier_warnings = 1; +} + +/* Record the size of an identifier node for the language in use. + SIZE is the total size in bytes. + This is called by the language-specific files. This must be + called before allocating any identifiers. */ +void +set_identifier_size (size) + int size; +{ + tree_code_length[(int) IDENTIFIER_NODE] + = (size - sizeof (struct tree_common)) / sizeof (tree); +} + +/* Calculate the hash of the string STR, which is of length LEN. */ +static inline unsigned int +calc_hash (str, len) + const unsigned char *str; + size_t len; +{ + size_t n = len; + unsigned int r = 0; +#define HASHSTEP(r, c) ((r) * 67 + (c - 113)); + + while (n--) + r = HASHSTEP (r, *str++); + + return r + len; +#undef HASHSTEP +} + +/* Internal primitive: returns the header structure for the string of + length LENGTH, containing CONTENTS. If that string already exists + in the table, returns the existing entry. If the string hasn't + been seen before and the last argument is INSERT, inserts and returns + a new entry. Otherwise returns NULL. */ +static struct str_header * +alloc_string (contents, length, insert) + const char *contents; + size_t length; + enum insert_option insert; +{ + unsigned int hash = calc_hash ((const unsigned char *)contents, length); + unsigned int hash2; + unsigned int index; + size_t sizemask; + struct str_header *entry; + struct str_header *entries = string_hash.entries; + + sizemask = string_hash.nslots - 1; + index = hash & sizemask; + + /* hash2 must be odd, so we're guaranteed to visit every possible + location in the table during rehashing. */ + hash2 = ((hash * 17) & sizemask) | 1; + string_hash.searches++; + + for (;;) + { + entry = entries + index; + + if (entry->ptr == NULL) + break; + + if (entry->len == length + && !memcmp (entry->ptr, contents, length)) + return entry; + + index = (index + hash2) & sizemask; + string_hash.collisions++; + } + + if (insert == NO_INSERT) + return NULL; + + obstack_grow0 (&string_stack, contents, length); + entry->ptr = (const char *) obstack_finish (&string_stack); + entry->len = length; + entry->data = NULL; + + if (++string_hash.nelements * 4 < string_hash.nslots * 3) + return entry; + + /* Must expand the string table. */ + return expand_string_table (entry); +} + +/* Subroutine of alloc_string which doubles the size of the hash table + and rehashes all the strings into the new table. Returns the entry + in the new table corresponding to ENTRY. */ +static struct str_header * +expand_string_table (entry) + struct str_header *entry; +{ + struct str_header *nentries; + struct str_header *e, *nentry = NULL; + size_t size, sizemask; + + size = string_hash.nslots * 2; + nentries = (struct str_header *) xcalloc (size, sizeof (struct str_header)); + sizemask = size - 1; + + FORALL_STRINGS (e) + { + unsigned int index, hash, hash2; + + hash = calc_hash ((const unsigned char *) e->ptr, e->len); + hash2 = ((hash * 17) & sizemask) | 1; + index = hash & sizemask; + + for (;;) + { + if (nentries[index].ptr == NULL) + { + nentries[index].ptr = e->ptr; + nentries[index].len = e->len; + nentries[index].data = e->data; + if (e == entry) + nentry = nentries + index; + break; + } + + index = (index + hash2) & sizemask; + } + } + + free (string_hash.entries); + string_hash.entries = nentries; + string_hash.nslots = size; + return nentry; +} + +/* Allocate and return a string constant of length LENGTH, containing + CONTENTS. If LENGTH is -1, CONTENTS is assumed to be a + nul-terminated string, and the length is calculated using strlen. + If the same string constant has been allocated before, that copy is + returned this time too. */ + +const char * +ggc_alloc_string (contents, length) + const char *contents; + int length; +{ + struct str_header *str; + + if (length == -1) + length = strlen (contents); + + if (length == 0) + return empty_string; + + str = alloc_string (contents, length, INSERT); + return str->ptr; +} + +/* Return an IDENTIFIER_NODE whose name is TEXT (a null-terminated string). + If an identifier with that name has previously been referred to, + the same node is returned this time. */ +tree +get_identifier (text) + const char *text; +{ + tree idp; + struct str_header *str; + size_t length = strlen (text); + + str = alloc_string (text, length, INSERT); + idp = str->data; + if (idp == NULL) + { + if (TREE_CODE_LENGTH (IDENTIFIER_NODE) < 0) + abort (); /* set_identifier_size hasn't been called. */ + + /* If this identifier is longer than the clash-warning length, + do a brute force search of the entire table for clashes. */ + if (warn_id_clash && do_identifier_warnings && length >= (size_t) id_clash_len) + { + struct str_header *e; + FORALL_IDS (e) + { + if (e->len >= (size_t)id_clash_len + && !strncmp (e->ptr, text, id_clash_len)) + { + warning ("\"%s\" and \"%s\" identical in first %d characters", + text, e->ptr, id_clash_len); + break; + } + } + } + + idp = make_node (IDENTIFIER_NODE); + IDENTIFIER_LENGTH (idp) = length; + IDENTIFIER_POINTER (idp) = str->ptr; +#ifdef GATHER_STATISTICS + id_string_size += length; +#endif + str->data = idp; + } + return idp; +} + +/* If an identifier with the name TEXT (a null-terminated string) has + previously been referred to, return that node; otherwise return + NULL_TREE. */ + +tree +maybe_get_identifier (text) + const char *text; +{ + struct str_header *str; + size_t length = strlen (text); + + str = alloc_string (text, length, NO_INSERT); + if (str) + return str->data; /* N.B. str->data might be null here, if the + string has been used but not as an identifier. */ + return NULL_TREE; +} + +/* Report some basic statistics about the string pool. */ + +void +stringpool_statistics () +{ + size_t nelts, overhead, headers; + size_t total_bytes, longest, sum_of_squares; + double exp_len, exp_len2, exp2_len; + struct str_header *e; +#define SCALE(x) ((unsigned long) ((x) < 1024*10 \ + ? (x) \ + : ((x) < 1024*1024*10 \ + ? (x) / 1024 \ + : (x) / (1024*1024)))) +#define LABEL(x) ((x) < 1024*10 ? ' ' : ((x) < 1024*1024*10 ? 'k' : 'M')) + + total_bytes = longest = sum_of_squares = 0; + FORALL_STRINGS (e) + { + size_t n = e->len; + + total_bytes += n; + sum_of_squares += n*n; + if (n > longest) + longest = n; + } + + nelts = string_hash.nelements; + overhead = obstack_memory_used (&string_stack) - total_bytes; + headers = string_hash.nslots * sizeof (struct str_header); + + fprintf (stderr, +"\nString pool\n\ +entries\t\t%lu\n\ +slots\t\t%lu\n\ +bytes\t\t%lu%c (%lu%c overhead)\n\ +table size\t%lu%c\n", + (unsigned long) nelts, (unsigned long) string_hash.nslots, + SCALE (total_bytes), LABEL (total_bytes), + SCALE (overhead), LABEL (overhead), + SCALE (headers), LABEL (headers)); + + exp_len = (double)total_bytes / (double)nelts; + exp2_len = exp_len * exp_len; + exp_len2 = (double)sum_of_squares / (double)nelts; + + fprintf (stderr, +"coll/search\t%.4f\n\ +ins/search\t%.4f\n\ +avg. entry\t%.2f bytes (+/- %.2f)\n\ +longest entry\t%lu\n", + (double) string_hash.collisions / (double) string_hash.searches, + (double) nelts / (double) string_hash.searches, + exp_len, approx_sqrt (exp_len2 - exp2_len), + (unsigned long) longest); +#undef SCALE +#undef LABEL +} + +/* Mark the string hash for GC. */ + +static void +mark_string_hash (arg) + void *arg ATTRIBUTE_UNUSED; +{ + struct str_header *h; + + FORALL_IDS (h) + { + ggc_mark_tree (h->data); + } +} |