diff options
author | Pip Cet <pipcet@gmail.com> | 2020-08-11 02:16:53 -0700 |
---|---|---|
committer | Paul Eggert <eggert@cs.ucla.edu> | 2020-08-11 02:27:43 -0700 |
commit | 16a16645f524c62f7906036b0e383e4247b58de7 (patch) | |
tree | ec9af0e89b5494707fc630d5cebec267737d98d7 | |
parent | 0d0aad213f941efc0fa0ec032e37dc9c2b08c9fb (diff) | |
download | emacs-16a16645f524c62f7906036b0e383e4247b58de7.tar.gz |
Rehash hash tables eagerly after loading a dump
This simplifies code, and helps performance in some cases (Bug#36597).
* src/lisp.h (hash_rehash_needed_p): Remove. All uses removed.
(hash_rehash_if_needed): Remove. All uses removed.
(struct Lisp_Hash_Table): Remove comment about rehashing hash tables.
* src/pdumper.c (thaw_hash_tables): New function.
(hash_table_thaw): New function.
(hash_table_freeze): New function.
(dump_hash_table): Simplify.
(dump_hash_table_list): New function.
(hash_table_contents): New function.
(Fdump_emacs_portable): Handle hash tables by eager rehashing.
(pdumper_load): Restore hash tables.
(init_pdumper_once): New function.
-rw-r--r-- | src/bytecode.c | 1 | ||||
-rw-r--r-- | src/composite.c | 1 | ||||
-rw-r--r-- | src/emacs.c | 1 | ||||
-rw-r--r-- | src/fns.c | 65 | ||||
-rw-r--r-- | src/lisp.h | 21 | ||||
-rw-r--r-- | src/minibuf.c | 3 | ||||
-rw-r--r-- | src/pdumper.c | 201 | ||||
-rw-r--r-- | src/pdumper.h | 1 |
8 files changed, 109 insertions, 185 deletions
diff --git a/src/bytecode.c b/src/bytecode.c index 1913a4812a0..1c3b6eac0d1 100644 --- a/src/bytecode.c +++ b/src/bytecode.c @@ -1401,7 +1401,6 @@ exec_byte_code (Lisp_Object bytestr, Lisp_Object vector, Lisp_Object maxdepth, Lisp_Object v1 = POP; ptrdiff_t i; struct Lisp_Hash_Table *h = XHASH_TABLE (jmp_table); - hash_rehash_if_needed (h); /* h->count is a faster approximation for HASH_TABLE_SIZE (h) here. */ diff --git a/src/composite.c b/src/composite.c index f96f0b77726..ec2b8328f78 100644 --- a/src/composite.c +++ b/src/composite.c @@ -652,7 +652,6 @@ Lisp_Object composition_gstring_put_cache (Lisp_Object gstring, ptrdiff_t len) { struct Lisp_Hash_Table *h = XHASH_TABLE (gstring_hash_table); - hash_rehash_if_needed (h); Lisp_Object header = LGSTRING_HEADER (gstring); Lisp_Object hash = h->test.hashfn (header, h); if (len < 0) diff --git a/src/emacs.c b/src/emacs.c index 8e5eaf5e43e..d31fa2cb287 100644 --- a/src/emacs.c +++ b/src/emacs.c @@ -1536,6 +1536,7 @@ Using an Emacs configured with --with-x-toolkit=lucid does not have this problem if (!initialized) { init_alloc_once (); + init_pdumper_once (); init_obarray_once (); init_eval_once (); init_charset_once (); diff --git a/src/fns.c b/src/fns.c index 811d6e82001..41e26104f30 100644 --- a/src/fns.c +++ b/src/fns.c @@ -4248,50 +4248,28 @@ maybe_resize_hash_table (struct Lisp_Hash_Table *h) /* Recompute the hashes (and hence also the "next" pointers). Normally there's never a need to recompute hashes. - This is done only on first-access to a hash-table loaded from - the "pdump", because the object's addresses may have changed, thus + This is done only on first access to a hash-table loaded from + the "pdump", because the objects' addresses may have changed, thus affecting their hash. */ void -hash_table_rehash (struct Lisp_Hash_Table *h) +hash_table_rehash (Lisp_Object hash) { - ptrdiff_t size = HASH_TABLE_SIZE (h); - - /* These structures may have been purecopied and shared - (bug#36447). */ - Lisp_Object hash = make_nil_vector (size); - h->next = Fcopy_sequence (h->next); - h->index = Fcopy_sequence (h->index); - + struct Lisp_Hash_Table *h = XHASH_TABLE (hash); /* Recompute the actual hash codes for each entry in the table. Order is still invalid. */ - for (ptrdiff_t i = 0; i < size; ++i) + for (ptrdiff_t i = 0; i < h->count; ++i) { Lisp_Object key = HASH_KEY (h, i); - if (!EQ (key, Qunbound)) - ASET (hash, i, h->test.hashfn (key, h)); + Lisp_Object hash_code = h->test.hashfn (key, h); + ptrdiff_t start_of_bucket = XUFIXNUM (hash_code) % ASIZE (h->index); + set_hash_hash_slot (h, i, hash_code); + set_hash_next_slot (h, i, HASH_INDEX (h, start_of_bucket)); + set_hash_index_slot (h, start_of_bucket, i); + eassert (HASH_NEXT (h, i) != i); /* Stop loops. */ } - /* Reset the index so that any slot we don't fill below is marked - invalid. */ - Ffillarray (h->index, make_fixnum (-1)); - - /* Rebuild the collision chains. */ - for (ptrdiff_t i = 0; i < size; ++i) - if (!NILP (AREF (hash, i))) - { - EMACS_UINT hash_code = XUFIXNUM (AREF (hash, i)); - ptrdiff_t start_of_bucket = hash_code % ASIZE (h->index); - set_hash_next_slot (h, i, HASH_INDEX (h, start_of_bucket)); - set_hash_index_slot (h, start_of_bucket, i); - eassert (HASH_NEXT (h, i) != i); /* Stop loops. */ - } - - /* Finally, mark the hash table as having a valid hash order. - Do this last so that if we're interrupted, we retry on next - access. */ - eassert (hash_rehash_needed_p (h)); - h->hash = hash; - eassert (!hash_rehash_needed_p (h)); + for (ptrdiff_t i = h->count; i < ASIZE (h->next) - 1; i++) + set_hash_next_slot (h, i, i + 1); } /* Lookup KEY in hash table H. If HASH is non-null, return in *HASH @@ -4303,8 +4281,6 @@ hash_lookup (struct Lisp_Hash_Table *h, Lisp_Object key, Lisp_Object *hash) { ptrdiff_t start_of_bucket, i; - hash_rehash_if_needed (h); - Lisp_Object hash_code = h->test.hashfn (key, h); if (hash) *hash = hash_code; @@ -4339,8 +4315,6 @@ hash_put (struct Lisp_Hash_Table *h, Lisp_Object key, Lisp_Object value, { ptrdiff_t start_of_bucket, i; - hash_rehash_if_needed (h); - /* Increment count after resizing because resizing may fail. */ maybe_resize_hash_table (h); h->count++; @@ -4373,8 +4347,6 @@ hash_remove_from_table (struct Lisp_Hash_Table *h, Lisp_Object key) ptrdiff_t start_of_bucket = XUFIXNUM (hash_code) % ASIZE (h->index); ptrdiff_t prev = -1; - hash_rehash_if_needed (h); - for (ptrdiff_t i = HASH_INDEX (h, start_of_bucket); 0 <= i; i = HASH_NEXT (h, i)) @@ -4415,8 +4387,7 @@ hash_clear (struct Lisp_Hash_Table *h) if (h->count > 0) { ptrdiff_t size = HASH_TABLE_SIZE (h); - if (!hash_rehash_needed_p (h)) - memclear (xvector_contents (h->hash), size * word_size); + memclear (xvector_contents (h->hash), size * word_size); for (ptrdiff_t i = 0; i < size; i++) { set_hash_next_slot (h, i, i < size - 1 ? i + 1 : -1); @@ -4452,9 +4423,7 @@ sweep_weak_table (struct Lisp_Hash_Table *h, bool remove_entries_p) for (ptrdiff_t bucket = 0; bucket < n; ++bucket) { /* Follow collision chain, removing entries that don't survive - this garbage collection. It's okay if hash_rehash_needed_p - (h) is true, since we're operating entirely on the cached - hash values. */ + this garbage collection. */ ptrdiff_t prev = -1; ptrdiff_t next; for (ptrdiff_t i = HASH_INDEX (h, bucket); 0 <= i; i = next) @@ -4499,7 +4468,7 @@ sweep_weak_table (struct Lisp_Hash_Table *h, bool remove_entries_p) set_hash_hash_slot (h, i, Qnil); eassert (h->count != 0); - h->count += h->count > 0 ? -1 : 1; + h->count--; } else { @@ -4923,7 +4892,7 @@ DEFUN ("hash-table-count", Fhash_table_count, Shash_table_count, 1, 1, 0, (Lisp_Object table) { struct Lisp_Hash_Table *h = check_hash_table (table); - eassert (h->count >= 0); + return make_fixnum (h->count); } diff --git a/src/lisp.h b/src/lisp.h index 17b92a04146..d88038d91be 100644 --- a/src/lisp.h +++ b/src/lisp.h @@ -2275,11 +2275,7 @@ struct hash_table_test struct Lisp_Hash_Table { - /* Change pdumper.c if you change the fields here. - - IMPORTANT!!!!!!! - - Call hash_rehash_if_needed() before accessing. */ + /* Change pdumper.c if you change the fields here. */ /* This is for Lisp; the hash table code does not refer to it. */ union vectorlike_header header; @@ -2398,20 +2394,7 @@ HASH_TABLE_SIZE (const struct Lisp_Hash_Table *h) return size; } -void hash_table_rehash (struct Lisp_Hash_Table *h); - -INLINE bool -hash_rehash_needed_p (const struct Lisp_Hash_Table *h) -{ - return NILP (h->hash); -} - -INLINE void -hash_rehash_if_needed (struct Lisp_Hash_Table *h) -{ - if (hash_rehash_needed_p (h)) - hash_table_rehash (h); -} +void hash_table_rehash (Lisp_Object); /* Default size for hash tables if not specified. */ diff --git a/src/minibuf.c b/src/minibuf.c index 9d870ce3640..cb302c5a605 100644 --- a/src/minibuf.c +++ b/src/minibuf.c @@ -1212,9 +1212,6 @@ is used to further constrain the set of candidates. */) bucket = AREF (collection, idx); } - if (HASH_TABLE_P (collection)) - hash_rehash_if_needed (XHASH_TABLE (collection)); - while (1) { /* Get the next element of the alist, obarray, or hash-table. */ diff --git a/src/pdumper.c b/src/pdumper.c index 63ee0fcb7f6..10dfa8737f0 100644 --- a/src/pdumper.c +++ b/src/pdumper.c @@ -105,17 +105,6 @@ along with GNU Emacs. If not, see <https://www.gnu.org/licenses/>. */ # define VM_SUPPORTED 0 #endif -/* PDUMPER_CHECK_REHASHING being true causes the portable dumper to - check, for each hash table it dumps, that the hash table means the - same thing after rehashing. */ -#ifndef PDUMPER_CHECK_REHASHING -# if ENABLE_CHECKING -# define PDUMPER_CHECK_REHASHING 1 -# else -# define PDUMPER_CHECK_REHASHING 0 -# endif -#endif - /* Require an architecture in which pointers, ptrdiff_t and intptr_t are the same size and have the same layout, and where bytes have eight bits --- that is, a general-purpose computer made after 1990. @@ -401,6 +390,8 @@ struct dump_header The start of the cold region is always aligned on a page boundary. */ dump_off cold_start; + + dump_off hash_list; }; /* Double-ended singly linked list. */ @@ -558,6 +549,8 @@ struct dump_context heap objects. */ Lisp_Object bignum_data; + Lisp_Object hash_tables; + unsigned number_hot_relocations; unsigned number_discardable_relocations; }; @@ -2616,78 +2609,62 @@ dump_vectorlike_generic (struct dump_context *ctx, return offset; } -/* Determine whether the hash table's hash order is stable - across dump and load. If it is, we don't have to trigger - a rehash on access. */ -static bool -dump_hash_table_stable_p (const struct Lisp_Hash_Table *hash) +/* Return a vector of KEY, VALUE pairs in the given hash table H. The + first H->count pairs are valid, the rest is left as nil. */ +static Lisp_Object +hash_table_contents (struct Lisp_Hash_Table *h) { - if (hash->test.hashfn == hashfn_user_defined) + if (h->test.hashfn == hashfn_user_defined) error ("cannot dump hash tables with user-defined tests"); /* Bug#36769 */ - bool is_eql = hash->test.hashfn == hashfn_eql; - bool is_equal = hash->test.hashfn == hashfn_equal; - ptrdiff_t size = HASH_TABLE_SIZE (hash); - for (ptrdiff_t i = 0; i < size; ++i) + Lisp_Object contents = Qnil; + + /* Make sure key_and_value ends up in the same order; charset.c + relies on it by expecting hash table indices to stay constant + across the dump. */ + for (ptrdiff_t i = 0; i < HASH_TABLE_SIZE (h) - h->count; i++) { - Lisp_Object key = HASH_KEY (hash, i); - if (!EQ (key, Qunbound)) - { - bool key_stable = (dump_builtin_symbol_p (key) - || FIXNUMP (key) - || (is_equal - && (STRINGP (key) || BOOL_VECTOR_P (key))) - || ((is_equal || is_eql) - && (FLOATP (key) || BIGNUMP (key)))); - if (!key_stable) - return false; - } + dump_push (&contents, Qnil); + dump_push (&contents, Qunbound); } - return true; + for (ptrdiff_t i = HASH_TABLE_SIZE (h) - 1; i >= 0; --i) + if (!NILP (HASH_HASH (h, i))) + { + dump_push (&contents, HASH_VALUE (h, i)); + dump_push (&contents, HASH_KEY (h, i)); + } + + return CALLN (Fapply, Qvector, contents); } -/* Return a list of (KEY . VALUE) pairs in the given hash table. */ -static Lisp_Object -hash_table_contents (Lisp_Object table) +static dump_off +dump_hash_table_list (struct dump_context *ctx) { - Lisp_Object contents = Qnil; - struct Lisp_Hash_Table *h = XHASH_TABLE (table); - for (ptrdiff_t i = 0; i < HASH_TABLE_SIZE (h); ++i) - { - Lisp_Object key = HASH_KEY (h, i); - if (!EQ (key, Qunbound)) - dump_push (&contents, Fcons (key, HASH_VALUE (h, i))); - } - return Fnreverse (contents); + if (CONSP (ctx->hash_tables)) + return dump_object (ctx, CALLN (Fapply, Qvector, ctx->hash_tables)); + else + return 0; } -/* Copy the given hash table, rehash it, and make sure that we can - look up all the values in the original. */ static void -check_hash_table_rehash (Lisp_Object table_orig) -{ - ptrdiff_t count = XHASH_TABLE (table_orig)->count; - hash_rehash_if_needed (XHASH_TABLE (table_orig)); - Lisp_Object table_rehashed = Fcopy_hash_table (table_orig); - eassert (!hash_rehash_needed_p (XHASH_TABLE (table_rehashed))); - XHASH_TABLE (table_rehashed)->hash = Qnil; - eassert (count == 0 || hash_rehash_needed_p (XHASH_TABLE (table_rehashed))); - hash_rehash_if_needed (XHASH_TABLE (table_rehashed)); - eassert (!hash_rehash_needed_p (XHASH_TABLE (table_rehashed))); - Lisp_Object expected_contents = hash_table_contents (table_orig); - while (!NILP (expected_contents)) - { - Lisp_Object key_value_pair = dump_pop (&expected_contents); - Lisp_Object key = XCAR (key_value_pair); - Lisp_Object expected_value = XCDR (key_value_pair); - Lisp_Object arbitrary = Qdump_emacs_portable__sort_predicate_copied; - Lisp_Object found_value = Fgethash (key, table_rehashed, arbitrary); - eassert (EQ (expected_value, found_value)); - Fremhash (key, table_rehashed); - } +hash_table_freeze (struct Lisp_Hash_Table *h) +{ + ptrdiff_t nkeys = XFIXNAT (Flength (h->key_and_value)) / 2; + h->key_and_value = hash_table_contents (h); + h->next_free = (nkeys == h->count ? -1 : h->count); + h->index = Flength (h->index); + h->next = h->hash = make_fixnum (nkeys); +} - eassert (EQ (Fhash_table_count (table_rehashed), - make_fixnum (0))); +static void +hash_table_thaw (Lisp_Object hash) +{ + struct Lisp_Hash_Table *h = XHASH_TABLE (hash); + h->index = Fmake_vector (h->index, make_fixnum (-1)); + h->hash = Fmake_vector (h->hash, Qnil); + h->next = Fmake_vector (h->next, make_fixnum (-1)); + + hash_table_rehash (hash); } static dump_off @@ -2699,51 +2676,11 @@ dump_hash_table (struct dump_context *ctx, # error "Lisp_Hash_Table changed. See CHECK_STRUCTS comment in config.h." #endif const struct Lisp_Hash_Table *hash_in = XHASH_TABLE (object); - bool is_stable = dump_hash_table_stable_p (hash_in); - /* If the hash table is likely to be modified in memory (either - because we need to rehash, and thus toggle hash->count, or - because we need to assemble a list of weak tables) punt the hash - table to the end of the dump, where we can lump all such hash - tables together. */ - if (!(is_stable || !NILP (hash_in->weak)) - && ctx->flags.defer_hash_tables) - { - if (offset != DUMP_OBJECT_ON_HASH_TABLE_QUEUE) - { - eassert (offset == DUMP_OBJECT_ON_NORMAL_QUEUE - || offset == DUMP_OBJECT_NOT_SEEN); - /* We still want to dump the actual keys and values now. */ - dump_enqueue_object (ctx, hash_in->key_and_value, WEIGHT_NONE); - /* We'll get to the rest later. */ - offset = DUMP_OBJECT_ON_HASH_TABLE_QUEUE; - dump_remember_object (ctx, object, offset); - dump_push (&ctx->deferred_hash_tables, object); - } - return offset; - } - - if (PDUMPER_CHECK_REHASHING) - check_hash_table_rehash (make_lisp_ptr ((void *) hash_in, Lisp_Vectorlike)); - struct Lisp_Hash_Table hash_munged = *hash_in; struct Lisp_Hash_Table *hash = &hash_munged; - /* Remember to rehash this hash table on first access. After a - dump reload, the hash table values will have changed, so we'll - need to rebuild the index. - - TODO: for EQ and EQL hash tables, it should be possible to rehash - here using the preferred load address of the dump, eliminating - the need to rehash-on-access if we can load the dump where we - want. */ - if (hash->count > 0 && !is_stable) - /* Hash codes will have to be recomputed anyway, so let's not dump them. - Also set `hash` to nil for hash_rehash_needed_p. - We could also refrain from dumping the `next' and `index' vectors, - except that `next' is currently used for HASH_TABLE_SIZE and - we'd have to rebuild the next_free list as well as adjust - sweep_weak_hash_table for the case where there's no `index'. */ - hash->hash = Qnil; + hash_table_freeze (hash); + dump_push (&ctx->hash_tables, object); START_DUMP_PVEC (ctx, &hash->header, struct Lisp_Hash_Table, out); dump_pseudovector_lisp_fields (ctx, &out->header, &hash->header); @@ -4151,6 +4088,19 @@ types. */) || !NILP (ctx->deferred_hash_tables) || !NILP (ctx->deferred_symbols)); + ctx->header.hash_list = ctx->offset; + dump_hash_table_list (ctx); + + do + { + dump_drain_deferred_hash_tables (ctx); + dump_drain_deferred_symbols (ctx); + dump_drain_normal_queue (ctx); + } + while (!dump_queue_empty_p (&ctx->dump_queue) + || !NILP (ctx->deferred_hash_tables) + || !NILP (ctx->deferred_symbols)); + dump_sort_copied_objects (ctx); /* While we copy built-in symbols into the Emacs image, these @@ -5302,6 +5252,9 @@ enum dump_section NUMBER_DUMP_SECTIONS, }; +/* Pointer to a stack variable to avoid having to staticpro it. */ +static Lisp_Object *pdumper_hashes = &zero_vector; + /* Load a dump from DUMP_FILENAME. Return an error code. N.B. We run very early in initialization, so we can't use lisp, @@ -5448,6 +5401,15 @@ pdumper_load (const char *dump_filename) for (int i = 0; i < ARRAYELTS (sections); ++i) dump_mmap_reset (§ions[i]); + Lisp_Object hashes = zero_vector; + if (header->hash_list) + { + struct Lisp_Vector *hash_tables = + ((struct Lisp_Vector *)(dump_base + header->hash_list)); + XSETVECTOR (hashes, hash_tables); + } + + pdumper_hashes = &hashes; /* Run the functions Emacs registered for doing post-dump-load initialization. */ for (int i = 0; i < nr_dump_hooks; ++i) @@ -5518,6 +5480,19 @@ Value is nil if this session was not started using a dump file.*/) #endif /* HAVE_PDUMPER */ +static void +thaw_hash_tables (void) +{ + Lisp_Object hash_tables = *pdumper_hashes; + for (ptrdiff_t i = 0; i < ASIZE (hash_tables); i++) + hash_table_thaw (AREF (hash_tables, i)); +} + +void +init_pdumper_once (void) +{ + pdumper_do_now_and_after_load (thaw_hash_tables); +} void syms_of_pdumper (void) diff --git a/src/pdumper.h b/src/pdumper.h index 6a99b511f2f..c793fb40580 100644 --- a/src/pdumper.h +++ b/src/pdumper.h @@ -256,6 +256,7 @@ pdumper_clear_marks (void) file was loaded. */ extern void pdumper_record_wd (const char *); +void init_pdumper_once (void); void syms_of_pdumper (void); INLINE_HEADER_END |