diff options
Diffstat (limited to 'strcache.c')
-rw-r--r-- | strcache.c | 291 |
1 files changed, 291 insertions, 0 deletions
diff --git a/strcache.c b/strcache.c new file mode 100644 index 0000000..1ade5e7 --- /dev/null +++ b/strcache.c @@ -0,0 +1,291 @@ +/* Constant string caching for GNU Make. +Copyright (C) 2006-2014 Free Software Foundation, Inc. +This file is part of GNU Make. + +GNU Make is free software; you can redistribute it and/or modify it under the +terms of the GNU General Public License as published by the Free Software +Foundation; either version 3 of the License, or (at your option) any later +version. + +GNU Make is distributed in the hope that it will be useful, but WITHOUT ANY +WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR +A PARTICULAR PURPOSE. See the GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along with +this program. If not, see <http://www.gnu.org/licenses/>. */ + +#include "makeint.h" + +#include <stddef.h> +#include <assert.h> + +#include "hash.h" + +/* A string cached here will never be freed, so we don't need to worry about + reference counting. We just store the string, and then remember it in a + hash so it can be looked up again. */ + +typedef unsigned short int sc_buflen_t; + +struct strcache { + struct strcache *next; /* The next block of strings. Must be first! */ + sc_buflen_t end; /* Offset to the beginning of free space. */ + sc_buflen_t bytesfree; /* Free space left in this buffer. */ + sc_buflen_t count; /* # of strings in this buffer (for stats). */ + char buffer[1]; /* The buffer comes after this. */ +}; + +/* The size (in bytes) of each cache buffer. + Try to pick something that will map well into the heap. + This must be able to be represented by a short int (<=65535). */ +#define CACHE_BUFFER_BASE (8192) +#define CACHE_BUFFER_ALLOC(_s) ((_s) - (2 * sizeof (size_t))) +#define CACHE_BUFFER_OFFSET (offsetof (struct strcache, buffer)) +#define CACHE_BUFFER_SIZE(_s) (CACHE_BUFFER_ALLOC(_s) - CACHE_BUFFER_OFFSET) + +static sc_buflen_t bufsize = CACHE_BUFFER_SIZE (CACHE_BUFFER_BASE); +static struct strcache *strcache = NULL; +static struct strcache *fullcache = NULL; + +static unsigned long total_buffers = 0; +static unsigned long total_strings = 0; +static unsigned long total_size = 0; + +/* Add a new buffer to the cache. Add it at the front to reduce search time. + This can also increase the overhead, since it's less likely that older + buffers will be filled in. However, GNU make has so many smaller strings + that this doesn't seem to be much of an issue in practice. + */ +static struct strcache * +new_cache () +{ + struct strcache *new; + new = xmalloc (bufsize + CACHE_BUFFER_OFFSET); + new->end = 0; + new->count = 0; + new->bytesfree = bufsize; + + new->next = strcache; + strcache = new; + + ++total_buffers; + return new; +} + +static const char * +add_string (const char *str, unsigned int len) +{ + char *res; + struct strcache *sp; + struct strcache **spp = &strcache; + /* We need space for the nul char. */ + unsigned int sz = len + 1; + + /* If the string we want is too large to fit into a single buffer, then + no existing cache is large enough. Change the maximum size. */ + if (sz > bufsize) + bufsize = CACHE_BUFFER_SIZE ((((sz + 1) / CACHE_BUFFER_BASE) + 1) + * CACHE_BUFFER_BASE); + else + /* Find the first cache with enough free space. */ + for (; *spp != NULL; spp = &(*spp)->next) + if ((*spp)->bytesfree > sz) + break; + + /* If nothing is big enough, make a new cache. */ + sp = *spp; + if (sp == NULL) + { + sp = new_cache (); + spp = &sp; + } + + /* Add the string to this cache. */ + res = &sp->buffer[sp->end]; + memmove (res, str, len); + res[len] = '\0'; + sp->end += sz; + sp->bytesfree -= sz; + ++sp->count; + + /* If the amount free in this cache is less than the average string size, + consider it full and move it to the full list. */ + ++total_strings; + total_size += sz; + + if (sp->bytesfree < (total_size / total_strings) + 1) + { + *spp = (*spp)->next; + sp->next = fullcache; + fullcache = sp; + } + + return res; +} + + +/* Hash table of strings in the cache. */ + +static unsigned long +str_hash_1 (const void *key) +{ + return_ISTRING_HASH_1 ((const char *) key); +} + +static unsigned long +str_hash_2 (const void *key) +{ + return_ISTRING_HASH_2 ((const char *) key); +} + +static int +str_hash_cmp (const void *x, const void *y) +{ + return_ISTRING_COMPARE ((const char *) x, (const char *) y); +} + +static struct hash_table strings; +static unsigned long total_adds = 0; + +static const char * +add_hash (const char *str, int len) +{ + /* Look up the string in the hash. If it's there, return it. */ + char *const *slot = (char *const *) hash_find_slot (&strings, str); + const char *key = *slot; + + /* Count the total number of add operations we performed. */ + ++total_adds; + + if (!HASH_VACANT (key)) + return key; + + /* Not there yet so add it to a buffer, then into the hash table. */ + key = add_string (str, len); + hash_insert_at (&strings, key, slot); + return key; +} + +/* Returns true if the string is in the cache; false if not. */ +int +strcache_iscached (const char *str) +{ + struct strcache *sp; + + for (sp = strcache; sp != 0; sp = sp->next) + if (str >= sp->buffer && str < sp->buffer + sp->end) + return 1; + for (sp = fullcache; sp != 0; sp = sp->next) + if (str >= sp->buffer && str < sp->buffer + sp->end) + return 1; + + return 0; +} + +/* If the string is already in the cache, return a pointer to the cached + version. If not, add it then return a pointer to the cached version. + Note we do NOT take control of the string passed in. */ +const char * +strcache_add (const char *str) +{ + return add_hash (str, strlen (str)); +} + +const char * +strcache_add_len (const char *str, unsigned int len) +{ + /* If we're not given a nul-terminated string we have to create one, because + the hashing functions expect it. */ + if (str[len] != '\0') + { + char *key = alloca (len + 1); + memcpy (key, str, len); + key[len] = '\0'; + str = key; + } + + return add_hash (str, len); +} + +int +strcache_setbufsize (unsigned int size) +{ + if (size > bufsize) + bufsize = size; + return bufsize; +} + +void +strcache_init (void) +{ + hash_init (&strings, 8000, str_hash_1, str_hash_2, str_hash_cmp); +} + + +/* Generate some stats output. */ + +void +strcache_print_stats (const char *prefix) +{ + const struct strcache *sp; + unsigned long numbuffs = 0, fullbuffs = 0; + unsigned long totfree = 0, maxfree = 0, minfree = bufsize; + + if (! strcache) + { + printf (_("\n%s No strcache buffers\n"), prefix); + return; + } + + /* Count the first buffer separately since it's not full. */ + for (sp = strcache->next; sp != NULL; sp = sp->next) + { + sc_buflen_t bf = sp->bytesfree; + + totfree += bf; + maxfree = (bf > maxfree ? bf : maxfree); + minfree = (bf < minfree ? bf : minfree); + + ++numbuffs; + } + for (sp = fullcache; sp != NULL; sp = sp->next) + { + sc_buflen_t bf = sp->bytesfree; + + totfree += bf; + maxfree = (bf > maxfree ? bf : maxfree); + minfree = (bf < minfree ? bf : minfree); + + ++numbuffs; + ++fullbuffs; + } + + /* Make sure we didn't lose any buffers. */ + assert (total_buffers == numbuffs + 1); + + printf (_("\n%s strcache buffers: %lu (%lu) / strings = %lu / storage = %lu B / avg = %lu B\n"), + prefix, numbuffs + 1, fullbuffs, total_strings, total_size, + (total_size / total_strings)); + + printf (_("%s current buf: size = %hu B / used = %hu B / count = %hu / avg = %hu B\n"), + prefix, bufsize, strcache->end, strcache->count, + (strcache->end / strcache->count)); + + if (numbuffs) + { + unsigned long sz = total_size - bufsize; + unsigned long cnt = total_strings - strcache->count; + sc_buflen_t avgfree = totfree / numbuffs; + + printf (_("%s other used: total = %lu B / count = %lu / avg = %lu B\n"), + prefix, sz, cnt, sz / cnt); + + printf (_("%s other free: total = %lu B / max = %lu B / min = %lu B / avg = %hu B\n"), + prefix, totfree, maxfree, minfree, avgfree); + } + + printf (_("\n%s strcache performance: lookups = %lu / hit rate = %lu%%\n"), + prefix, total_adds, (long unsigned)(100.0 * (total_adds - total_strings) / total_adds)); + fputs (_("# hash-table stats:\n# "), stdout); + hash_print_stats (&strings, stdout); +} |