diff options
author | H. Peter Anvin (Intel) <hpa@zytor.com> | 2018-12-11 12:30:25 -0800 |
---|---|---|
committer | H. Peter Anvin (Intel) <hpa@zytor.com> | 2018-12-11 13:18:49 -0800 |
commit | ebb05a0e5fa8dfae58e6a00e550005df4b0f58f8 (patch) | |
tree | 337a6804dc6a86858bbd02d7e55af58e7a6c9e3f | |
parent | ddb290681e52aee70fc4b3342829fb9770a089b2 (diff) | |
download | nasm-ebb05a0e5fa8dfae58e6a00e550005df4b0f58f8.tar.gz |
hashtbl: revamp the hash table interface, support binary keys
Add binary key support to the hash table interface. Clean up the
interface to contain less extraneous crud.
Signed-off-by: H. Peter Anvin (Intel) <hpa@zytor.com>
-rw-r--r-- | asm/labels.c | 2 | ||||
-rw-r--r-- | asm/preproc.c | 39 | ||||
-rw-r--r-- | asm/srcfile.c | 1 | ||||
-rw-r--r-- | include/hashtbl.h | 47 | ||||
-rw-r--r-- | include/strlist.h | 2 | ||||
-rw-r--r-- | nasmlib/crc64.c | 22 | ||||
-rw-r--r-- | nasmlib/hashtbl.c | 222 | ||||
-rw-r--r-- | nasmlib/strlist.c | 14 | ||||
-rw-r--r-- | output/codeview.c | 1 | ||||
-rw-r--r-- | output/outmacho.c | 1 | ||||
-rw-r--r-- | output/strtbl.c | 16 |
11 files changed, 215 insertions, 152 deletions
diff --git a/asm/labels.c b/asm/labels.c index 1e7301e1..8eeb5b71 100644 --- a/asm/labels.c +++ b/asm/labels.c @@ -545,8 +545,6 @@ void backend_label(const char *label, int32_t segment, int64_t offset) int init_labels(void) { - hash_init(<ab, HASH_LARGE); - ldata = lfree = nasm_malloc(LBLK_SIZE); init_block(lfree); diff --git a/asm/preproc.c b/asm/preproc.c index de0f7ce2..15d8bbaf 100644 --- a/asm/preproc.c +++ b/asm/preproc.c @@ -623,12 +623,13 @@ static void free_mmacro(MMacro * m) */ static void free_smacro_table(struct hash_table *smt) { - SMacro *s, *tmp; - const char *key; - struct hash_tbl_node *it = NULL; + struct hash_iterator it; + const struct hash_node *np; - while ((s = hash_iterate(smt, &it, &key)) != NULL) { - nasm_free((void *)key); + hash_for_each(smt, it, np) { + SMacro *tmp; + SMacro *s = np->data; + nasm_free((void *)np->key); list_for_each_safe(s, tmp, s) { nasm_free(s->name); free_tlist(s->expansion); @@ -640,14 +641,14 @@ static void free_smacro_table(struct hash_table *smt) static void free_mmacro_table(struct hash_table *mmt) { - MMacro *m, *tmp; - const char *key; - struct hash_tbl_node *it = NULL; - - it = NULL; - while ((m = hash_iterate(mmt, &it, &key)) != NULL) { - nasm_free((void *)key); - list_for_each_safe(m ,tmp, m) + struct hash_iterator it; + const struct hash_node *np; + + hash_for_each(mmt, it, np) { + MMacro *tmp; + MMacro *m = np->data; + nasm_free((void *)np->key); + list_for_each_safe(m, tmp, m) free_mmacro(m); } hash_free(mmt); @@ -664,8 +665,6 @@ static void free_macros(void) */ static void init_macros(void) { - hash_init(&smacros, HASH_LARGE); - hash_init(&mmacros, HASH_LARGE); } /* @@ -691,12 +690,14 @@ hash_findi_add(struct hash_table *hash, const char *str) struct hash_insert hi; void **r; char *strx; + size_t l = strlen(str) + 1; - r = hash_findi(hash, str, &hi); + r = hash_findib(hash, str, l, &hi); if (r) return r; - strx = nasm_strdup(str); /* Use a more efficient allocator here? */ + strx = nasm_malloc(l); /* Use a more efficient allocator here? */ + memcpy(strx, str, l); return hash_add(&hi, strx, NULL); } @@ -2624,9 +2625,8 @@ static int do_directive(Token *tline, char **output) } if (i == PP_PUSH) { - ctx = nasm_malloc(sizeof(Context)); + nasm_new(ctx); ctx->next = cstk; - hash_init(&ctx->localmac, HASH_SMALL); ctx->name = p; ctx->number = unique++; cstk = ctx; @@ -4932,7 +4932,6 @@ pp_reset(const char *file, int apass, struct strlist *dep_list) static void pp_init(void) { - hash_init(&FileHash, HASH_MEDIUM); } static char *pp_getline(void) diff --git a/asm/srcfile.c b/asm/srcfile.c index f548da65..73054853 100644 --- a/asm/srcfile.c +++ b/asm/srcfile.c @@ -50,7 +50,6 @@ static struct hash_table filename_hash; void src_init(void) { - hash_init(&filename_hash, HASH_MEDIUM); } void src_free(void) diff --git a/include/hashtbl.h b/include/hashtbl.h index 25f2d2d5..239f6104 100644 --- a/include/hashtbl.h +++ b/include/hashtbl.h @@ -1,6 +1,6 @@ /* ----------------------------------------------------------------------- * * - * Copyright 1996-2017 The NASM Authors - All Rights Reserved + * Copyright 1996-2018 The NASM Authors - All Rights Reserved * See the file AUTHORS included with the NASM distribution for * the specific copyright holders. * @@ -43,43 +43,58 @@ #include <stddef.h> #include "nasmlib.h" -struct hash_tbl_node { +struct hash_node { uint64_t hash; - const char *key; + const void *key; + size_t keylen; void *data; }; struct hash_table { - struct hash_tbl_node *table; + struct hash_node *table; size_t load; size_t size; size_t max_load; }; struct hash_insert { - uint64_t hash; struct hash_table *head; - struct hash_tbl_node *where; + struct hash_node *where; + struct hash_node node; +}; + +struct hash_iterator { + const struct hash_table *head; + const struct hash_node *next; }; uint64_t crc64(uint64_t crc, const char *string); uint64_t crc64i(uint64_t crc, const char *string); +uint64_t crc64b(uint64_t crc, const void *data, size_t len); +uint64_t crc64ib(uint64_t crc, const void *data, size_t len); #define CRC64_INIT UINT64_C(0xffffffffffffffff) -/* Some reasonable initial sizes... */ -#define HASH_SMALL 4 -#define HASH_MEDIUM 16 -#define HASH_LARGE 256 - -void hash_init(struct hash_table *head, size_t size); void **hash_find(struct hash_table *head, const char *string, struct hash_insert *insert); +void **hash_findb(struct hash_table *head, const void *key, size_t keylen, + struct hash_insert *insert); void **hash_findi(struct hash_table *head, const char *string, struct hash_insert *insert); -void **hash_add(struct hash_insert *insert, const char *string, void *data); -void *hash_iterate(const struct hash_table *head, - struct hash_tbl_node **iterator, - const char **key); +void **hash_findib(struct hash_table *head, const void *key, size_t keylen, + struct hash_insert *insert); +void **hash_add(struct hash_insert *insert, const void *key, void *data); +static inline void hash_iterator_init(const struct hash_table *head, + struct hash_iterator *iterator) +{ + iterator->head = head; + iterator->next = head->table; +} +const struct hash_node *hash_iterate(struct hash_iterator *iterator); + +#define hash_for_each(_head,_it,_np) \ + for (hash_iterator_init((_head), &(_it)), (_np) = hash_iterate(&(_it)) ; \ + (_np) ; (_np) = hash_iterate(&(_it))) + void hash_free(struct hash_table *head); void hash_free_all(struct hash_table *head, bool free_keys); diff --git a/include/strlist.h b/include/strlist.h index fd5a0192..00ac01c6 100644 --- a/include/strlist.h +++ b/include/strlist.h @@ -44,7 +44,7 @@ struct strlist_entry { struct strlist_entry *next; - size_t len; + size_t size; char str[1]; }; diff --git a/nasmlib/crc64.c b/nasmlib/crc64.c index f901d403..334cd307 100644 --- a/nasmlib/crc64.c +++ b/nasmlib/crc64.c @@ -187,3 +187,25 @@ uint64_t crc64i(uint64_t crc, const char *str) return crc; } + +uint64_t crc64b(uint64_t crc, const void *data, size_t len) +{ + const uint8_t *str = data; + + while (len--) { + crc = crc64_tab[(uint8_t)crc ^ *str++] ^ (crc >> 8); + } + + return crc; +} + +uint64_t crc64ib(uint64_t crc, const void *data, size_t len) +{ + const uint8_t *str = data; + + while (len--) { + crc = crc64_tab[(uint8_t)crc ^ nasm_tolower(*str++)] ^ (crc >> 8); + } + + return crc; +} diff --git a/nasmlib/hashtbl.c b/nasmlib/hashtbl.c index bc0776b8..a6cbdb19 100644 --- a/nasmlib/hashtbl.c +++ b/nasmlib/hashtbl.c @@ -1,6 +1,6 @@ /* ----------------------------------------------------------------------- * * - * Copyright 1996-2009 The NASM Authors - All Rights Reserved + * Copyright 1996-2018 The NASM Authors - All Rights Reserved * See the file AUTHORS included with the NASM distribution for * the specific copyright holders. * @@ -43,10 +43,11 @@ #include "nasm.h" #include "hashtbl.h" -#define HASH_MAX_LOAD 2 /* Higher = more memory-efficient, slower */ +#define HASH_MAX_LOAD 2 /* Higher = more memory-efficient, slower */ +#define HASH_INIT_SIZE 16 /* Initial size (power of 2, min 4) */ -#define hash_calc(key) crc64(CRC64_INIT, (key)) -#define hash_calci(key) crc64i(CRC64_INIT, (key)) +#define hash_calc(key,keylen) crc64b(CRC64_INIT, (key), (keylen)) +#define hash_calci(key,keylen) crc64ib(CRC64_INIT, (key), (keylen)) #define hash_max_load(size) ((size) * (HASH_MAX_LOAD - 1) / HASH_MAX_LOAD) #define hash_expand(size) ((size) << 1) #define hash_mask(size) ((size) - 1) @@ -54,129 +55,167 @@ #define hash_inc(hash, mask) ((((hash) >> 32) & (mask)) | 1) /* always odd */ #define hash_pos_next(pos, inc, mask) (((pos) + (inc)) & (mask)) -static struct hash_tbl_node *alloc_table(size_t newsize) +static void hash_init(struct hash_table *head) { - size_t bytes = newsize * sizeof(struct hash_tbl_node); - return nasm_zalloc(bytes); -} - -void hash_init(struct hash_table *head, size_t size) -{ - nasm_assert(is_power2(size)); - head->table = alloc_table(size); + head->size = HASH_INIT_SIZE; head->load = 0; - head->size = size; - head->max_load = hash_max_load(size); + head->max_load = hash_max_load(head->size); + nasm_newn(head->table, head->size); } /* - * Find an entry in a hash table. + * Find an entry in a hash table. The key can be any binary object. * * On failure, if "insert" is non-NULL, store data in that structure * which can be used to insert that node using hash_add(). - * - * WARNING: this data is only valid until the very next call of - * hash_add(); it cannot be "saved" to a later date. + * See hash_add() for constraints on the uses of the insert object. * * On success, return a pointer to the "data" element of the hash * structure. */ -void **hash_find(struct hash_table *head, const char *key, - struct hash_insert *insert) +void **hash_findb(struct hash_table *head, const void *key, + size_t keylen, struct hash_insert *insert) { - struct hash_tbl_node *np; - struct hash_tbl_node *tbl = head->table; - uint64_t hash = hash_calc(key); + struct hash_node *np = NULL; + struct hash_node *tbl = head->table; + uint64_t hash = hash_calc(key, keylen); size_t mask = hash_mask(head->size); size_t pos = hash_pos(hash, mask); size_t inc = hash_inc(hash, mask); - while ((np = &tbl[pos])->key) { - if (hash == np->hash && !strcmp(key, np->key)) - return &np->data; - pos = hash_pos_next(pos, inc, mask); + if (likely(tbl)) { + while ((np = &tbl[pos])->key) { + if (hash == np->hash && + keylen == np->keylen && + !memcmp(key, np->key, keylen)) + return &np->data; + pos = hash_pos_next(pos, inc, mask); + } } /* Not found. Store info for insert if requested. */ if (insert) { + insert->node.hash = hash; + insert->node.key = key; + insert->node.keylen = keylen; + insert->node.data = NULL; insert->head = head; - insert->hash = hash; insert->where = np; } return NULL; } /* - * Same as hash_find, but for case-insensitive hashing. + * Same as hash_findb(), but for a C string. */ -void **hash_findi(struct hash_table *head, const char *key, - struct hash_insert *insert) +void **hash_find(struct hash_table *head, const char *key, + struct hash_insert *insert) +{ + return hash_findb(head, key, strlen(key)+1, insert); +} + +/* + * Same as hash_findb(), but for case-insensitive hashing. + */ +void **hash_findib(struct hash_table *head, const void *key, size_t keylen, + struct hash_insert *insert) { - struct hash_tbl_node *np; - struct hash_tbl_node *tbl = head->table; - uint64_t hash = hash_calci(key); + struct hash_node *np = NULL; + struct hash_node *tbl = head->table; + uint64_t hash = hash_calci(key, keylen); size_t mask = hash_mask(head->size); size_t pos = hash_pos(hash, mask); size_t inc = hash_inc(hash, mask); - while ((np = &tbl[pos])->key) { - if (hash == np->hash && !nasm_stricmp(key, np->key)) - return &np->data; - pos = hash_pos_next(pos, inc, mask); + if (likely(tbl)) { + while ((np = &tbl[pos])->key) { + if (hash == np->hash && + keylen == np->keylen && + !nasm_memicmp(key, np->key, keylen)) + return &np->data; + pos = hash_pos_next(pos, inc, mask); + } } /* Not found. Store info for insert if requested. */ if (insert) { + insert->node.hash = hash; + insert->node.key = key; + insert->node.keylen = keylen; + insert->node.data = NULL; insert->head = head; - insert->hash = hash; insert->where = np; } return NULL; } /* + * Same as hash_find(), but for case-insensitive hashing. + */ +void **hash_findi(struct hash_table *head, const char *key, + struct hash_insert *insert) +{ + return hash_findib(head, key, strlen(key)+1, insert); +} + +/* * Insert node. Return a pointer to the "data" element of the newly * created hash node. + * + * The following constraints apply: + * 1. A call to hash_add() invalidates all other outstanding hash_insert + * objects; attempting to use them causes a wild pointer reference. + * 2. The key provided must exactly match the key passed to hash_find*(), + * but it does not have to point to the same storage address. The key + * buffer provided to this function must not be freed for the lifespan + * of the hash. NULL will use the same pointer that was passed to + * hash_find*(). */ -void **hash_add(struct hash_insert *insert, const char *key, void *data) +void **hash_add(struct hash_insert *insert, const void *key, void *data) { struct hash_table *head = insert->head; - struct hash_tbl_node *np = insert->where; + struct hash_node *np = insert->where; + + if (unlikely(!np)) { + hash_init(head); + /* The hash table is empty, so we don't need to iterate here */ + np = &head->table[hash_pos(insert->node.hash, hash_mask(head->size))]; + } /* * Insert node. We can always do this, even if we need to * rebalance immediately after. */ - np->hash = insert->hash; - np->key = key; + *np = insert->node; np->data = data; + if (key) + np->key = key; - if (++head->load > head->max_load) { + if (unlikely(++head->load > head->max_load)) { /* Need to expand the table */ - size_t newsize = hash_expand(head->size); - struct hash_tbl_node *newtbl = alloc_table(newsize); - size_t mask = hash_mask(newsize); + size_t newsize = hash_expand(head->size); + struct hash_node *newtbl; + size_t mask = hash_mask(newsize); + struct hash_node *op, *xp; + size_t i; - if (head->table) { - struct hash_tbl_node *op, *xp; - size_t i; + nasm_newn(newtbl, newsize); - /* Rebalance all the entries */ - for (i = 0, op = head->table; i < head->size; i++, op++) { - if (op->key) { - size_t pos = hash_pos(op->hash, mask); - size_t inc = hash_inc(op->hash, mask); + /* Rebalance all the entries */ + for (i = 0, op = head->table; i < head->size; i++, op++) { + if (op->key) { + size_t pos = hash_pos(op->hash, mask); + size_t inc = hash_inc(op->hash, mask); - while ((xp = &newtbl[pos])->key) - pos = hash_pos_next(pos, inc, mask); + while ((xp = &newtbl[pos])->key) + pos = hash_pos_next(pos, inc, mask); - *xp = *op; - if (op == np) - np = xp; - } + *xp = *op; + if (op == np) + np = xp; } - nasm_free(head->table); } + nasm_free(head->table); head->table = newtbl; head->size = newsize; @@ -187,36 +226,30 @@ void **hash_add(struct hash_insert *insert, const char *key, void *data) } /* - * Iterate over all members of a hash set. For the first call, - * iterator should be initialized to NULL. Returns the data pointer, - * or NULL on failure. + * Iterate over all members of a hash set. For the first call, + * iter->node should be initialized to NULL. Returns a pointer to + * a struct hash_node representing the current object, or NULL + * if we have reached the end of the hash table; this is the + * + * Calling hash_add() will invalidate the iterator. */ -void *hash_iterate(const struct hash_table *head, - struct hash_tbl_node **iterator, - const char **key) +const struct hash_node *hash_iterate(struct hash_iterator *iter) { - struct hash_tbl_node *np = *iterator; - struct hash_tbl_node *ep = head->table + head->size; + const struct hash_table *head = iter->head; + const struct hash_node *cp = iter->next; + const struct hash_node *ep = head->table + head->size; - if (!np) { - np = head->table; - if (!np) - return NULL; /* Uninitialized table */ - } - - while (np < ep) { + /* For an empty table, np == ep == NULL */ + while (cp < ep) { + const struct hash_node *np = cp+1; if (np->key) { - *iterator = np + 1; - if (key) - *key = np->key; - return np->data; + iter->next = np; + return cp; } - np++; + cp = np; } - *iterator = NULL; - if (key) - *key = NULL; + iter->next = head->table; return NULL; } @@ -229,8 +262,10 @@ void *hash_iterate(const struct hash_table *head, void hash_free(struct hash_table *head) { void *p = head->table; - head->table = NULL; - nasm_free(p); + if (likely(p)) { + head->table = NULL; + nasm_free(p); + } } /* @@ -242,14 +277,13 @@ void hash_free(struct hash_table *head) */ void hash_free_all(struct hash_table *head, bool free_keys) { - struct hash_tbl_node *iter = NULL; - const char *keyp; - void *d; + struct hash_iterator it; + const struct hash_node *np; - while ((d = hash_iterate(head, &iter, &keyp))) { - nasm_free(d); + hash_for_each(head, it, np) { + nasm_free(np->data); if (free_keys) - nasm_free((void *)keyp); + nasm_free((void *)np->key); } hash_free(head); diff --git a/nasmlib/strlist.c b/nasmlib/strlist.c index 868fa3bf..6dfaa46e 100644 --- a/nasmlib/strlist.c +++ b/nasmlib/strlist.c @@ -43,7 +43,6 @@ struct strlist *strlist_alloc(void) { struct strlist *list = nasm_zalloc(sizeof(*list)); - hash_init(&list->hash, HASH_MEDIUM); list->tailp = &list->head; return list; } @@ -56,20 +55,19 @@ bool strlist_add(struct strlist *list, const char *str) { struct strlist_entry *e; struct hash_insert hi; - size_t len; + size_t size; if (!list) return false; - if (hash_find(&list->hash, str, &hi)) + size = strlen(str) + 1; + if (hash_findb(&list->hash, str, size, &hi)) return false; - len = strlen(str); - /* Structure already has char[1] as EOS */ - e = nasm_zalloc(sizeof(*e) + len); - e->len = len; - memcpy(e->str, str, len + 1); + e = nasm_zalloc(sizeof(*e) - 1 + size); + e->size = size; + memcpy(e->str, str, size); *list->tailp = e; list->tailp = &e->next; diff --git a/output/codeview.c b/output/codeview.c index 51eb2ea9..6028fc74 100644 --- a/output/codeview.c +++ b/output/codeview.c @@ -177,7 +177,6 @@ static void cv8_init(void) cv8_state.source_files = NULL; cv8_state.source_files_tail = &cv8_state.source_files; - hash_init(&cv8_state.file_hash, HASH_MEDIUM); cv8_state.num_files = 0; cv8_state.total_filename_len = 0; diff --git a/output/outmacho.c b/output/outmacho.c index 14aec076..3d4767d0 100644 --- a/output/outmacho.c +++ b/output/outmacho.c @@ -363,7 +363,6 @@ static void macho_init(void) strs = saa_init(1L); section_by_index = raa_init(); - hash_init(§ion_by_name, HASH_MEDIUM); /* string table starts with a zero byte so index 0 is an empty string */ saa_wbytes(strs, zero_buffer, 1); diff --git a/output/strtbl.c b/output/strtbl.c index 23b0d118..23278f5f 100644 --- a/output/strtbl.c +++ b/output/strtbl.c @@ -56,7 +56,6 @@ struct strtbl_entry { void strtbl_init(struct nasm_strtbl *tbl) { tbl->size = 0; - hash_init(&tbl->hash, HASH_LARGE); strtbl_add(tbl, ""); /* Index 0 is always an empty string */ } @@ -70,14 +69,13 @@ size_t strtbl_add(struct nasm_strtbl *tbl, const char *str) void **sep; struct strtbl_entry *se; struct hash_insert hi; + size_t bytes = strlen(str) + 1; - sep = hash_find(&tbl->hash, str, &hi); + sep = hash_findb(&tbl->hash, str, bytes, &hi); if (sep) { se = *sep; } else { - size_t bytes = strlen(str) + 1; - - se = nasm_malloc(sizeof(struct strtbl_entry)-1+bytes); + nasm_new(se); se->index = tbl->size; tbl->size += bytes; se->bytes = bytes; @@ -107,11 +105,13 @@ size_t strtbl_find(struct nasm_strtbl *tbl, const char *str) void *strtbl_generate(const struct nasm_strtbl *tbl) { char *buf = nasm_malloc(strtbl_size(tbl)); - struct hash_tbl_node *iter = NULL; - struct strtbl_entry *se; + struct hash_iterator it; + const struct hash_node *np; - while ((se = hash_iterate(&tbl->hash, &iter, NULL))) + hash_for_each(&tbl->hash, it, np) { + struct strtbl_entry *se = np->data; memcpy(buf + se->index, se->str, se->bytes); + } return buf; } |