summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPip Cet <pipcet@gmail.com>2020-08-11 02:16:53 -0700
committerPaul Eggert <eggert@cs.ucla.edu>2020-08-11 02:27:43 -0700
commit16a16645f524c62f7906036b0e383e4247b58de7 (patch)
treeec9af0e89b5494707fc630d5cebec267737d98d7
parent0d0aad213f941efc0fa0ec032e37dc9c2b08c9fb (diff)
downloademacs-16a16645f524c62f7906036b0e383e4247b58de7.tar.gz
Rehash hash tables eagerly after loading a dump
This simplifies code, and helps performance in some cases (Bug#36597). * src/lisp.h (hash_rehash_needed_p): Remove. All uses removed. (hash_rehash_if_needed): Remove. All uses removed. (struct Lisp_Hash_Table): Remove comment about rehashing hash tables. * src/pdumper.c (thaw_hash_tables): New function. (hash_table_thaw): New function. (hash_table_freeze): New function. (dump_hash_table): Simplify. (dump_hash_table_list): New function. (hash_table_contents): New function. (Fdump_emacs_portable): Handle hash tables by eager rehashing. (pdumper_load): Restore hash tables. (init_pdumper_once): New function.
-rw-r--r--src/bytecode.c1
-rw-r--r--src/composite.c1
-rw-r--r--src/emacs.c1
-rw-r--r--src/fns.c65
-rw-r--r--src/lisp.h21
-rw-r--r--src/minibuf.c3
-rw-r--r--src/pdumper.c201
-rw-r--r--src/pdumper.h1
8 files changed, 109 insertions, 185 deletions
diff --git a/src/bytecode.c b/src/bytecode.c
index 1913a4812a0..1c3b6eac0d1 100644
--- a/src/bytecode.c
+++ b/src/bytecode.c
@@ -1401,7 +1401,6 @@ exec_byte_code (Lisp_Object bytestr, Lisp_Object vector, Lisp_Object maxdepth,
Lisp_Object v1 = POP;
ptrdiff_t i;
struct Lisp_Hash_Table *h = XHASH_TABLE (jmp_table);
- hash_rehash_if_needed (h);
/* h->count is a faster approximation for HASH_TABLE_SIZE (h)
here. */
diff --git a/src/composite.c b/src/composite.c
index f96f0b77726..ec2b8328f78 100644
--- a/src/composite.c
+++ b/src/composite.c
@@ -652,7 +652,6 @@ Lisp_Object
composition_gstring_put_cache (Lisp_Object gstring, ptrdiff_t len)
{
struct Lisp_Hash_Table *h = XHASH_TABLE (gstring_hash_table);
- hash_rehash_if_needed (h);
Lisp_Object header = LGSTRING_HEADER (gstring);
Lisp_Object hash = h->test.hashfn (header, h);
if (len < 0)
diff --git a/src/emacs.c b/src/emacs.c
index 8e5eaf5e43e..d31fa2cb287 100644
--- a/src/emacs.c
+++ b/src/emacs.c
@@ -1536,6 +1536,7 @@ Using an Emacs configured with --with-x-toolkit=lucid does not have this problem
if (!initialized)
{
init_alloc_once ();
+ init_pdumper_once ();
init_obarray_once ();
init_eval_once ();
init_charset_once ();
diff --git a/src/fns.c b/src/fns.c
index 811d6e82001..41e26104f30 100644
--- a/src/fns.c
+++ b/src/fns.c
@@ -4248,50 +4248,28 @@ maybe_resize_hash_table (struct Lisp_Hash_Table *h)
/* Recompute the hashes (and hence also the "next" pointers).
Normally there's never a need to recompute hashes.
- This is done only on first-access to a hash-table loaded from
- the "pdump", because the object's addresses may have changed, thus
+ This is done only on first access to a hash-table loaded from
+ the "pdump", because the objects' addresses may have changed, thus
affecting their hash. */
void
-hash_table_rehash (struct Lisp_Hash_Table *h)
+hash_table_rehash (Lisp_Object hash)
{
- ptrdiff_t size = HASH_TABLE_SIZE (h);
-
- /* These structures may have been purecopied and shared
- (bug#36447). */
- Lisp_Object hash = make_nil_vector (size);
- h->next = Fcopy_sequence (h->next);
- h->index = Fcopy_sequence (h->index);
-
+ struct Lisp_Hash_Table *h = XHASH_TABLE (hash);
/* Recompute the actual hash codes for each entry in the table.
Order is still invalid. */
- for (ptrdiff_t i = 0; i < size; ++i)
+ for (ptrdiff_t i = 0; i < h->count; ++i)
{
Lisp_Object key = HASH_KEY (h, i);
- if (!EQ (key, Qunbound))
- ASET (hash, i, h->test.hashfn (key, h));
+ Lisp_Object hash_code = h->test.hashfn (key, h);
+ ptrdiff_t start_of_bucket = XUFIXNUM (hash_code) % ASIZE (h->index);
+ set_hash_hash_slot (h, i, hash_code);
+ set_hash_next_slot (h, i, HASH_INDEX (h, start_of_bucket));
+ set_hash_index_slot (h, start_of_bucket, i);
+ eassert (HASH_NEXT (h, i) != i); /* Stop loops. */
}
- /* Reset the index so that any slot we don't fill below is marked
- invalid. */
- Ffillarray (h->index, make_fixnum (-1));
-
- /* Rebuild the collision chains. */
- for (ptrdiff_t i = 0; i < size; ++i)
- if (!NILP (AREF (hash, i)))
- {
- EMACS_UINT hash_code = XUFIXNUM (AREF (hash, i));
- ptrdiff_t start_of_bucket = hash_code % ASIZE (h->index);
- set_hash_next_slot (h, i, HASH_INDEX (h, start_of_bucket));
- set_hash_index_slot (h, start_of_bucket, i);
- eassert (HASH_NEXT (h, i) != i); /* Stop loops. */
- }
-
- /* Finally, mark the hash table as having a valid hash order.
- Do this last so that if we're interrupted, we retry on next
- access. */
- eassert (hash_rehash_needed_p (h));
- h->hash = hash;
- eassert (!hash_rehash_needed_p (h));
+ for (ptrdiff_t i = h->count; i < ASIZE (h->next) - 1; i++)
+ set_hash_next_slot (h, i, i + 1);
}
/* Lookup KEY in hash table H. If HASH is non-null, return in *HASH
@@ -4303,8 +4281,6 @@ hash_lookup (struct Lisp_Hash_Table *h, Lisp_Object key, Lisp_Object *hash)
{
ptrdiff_t start_of_bucket, i;
- hash_rehash_if_needed (h);
-
Lisp_Object hash_code = h->test.hashfn (key, h);
if (hash)
*hash = hash_code;
@@ -4339,8 +4315,6 @@ hash_put (struct Lisp_Hash_Table *h, Lisp_Object key, Lisp_Object value,
{
ptrdiff_t start_of_bucket, i;
- hash_rehash_if_needed (h);
-
/* Increment count after resizing because resizing may fail. */
maybe_resize_hash_table (h);
h->count++;
@@ -4373,8 +4347,6 @@ hash_remove_from_table (struct Lisp_Hash_Table *h, Lisp_Object key)
ptrdiff_t start_of_bucket = XUFIXNUM (hash_code) % ASIZE (h->index);
ptrdiff_t prev = -1;
- hash_rehash_if_needed (h);
-
for (ptrdiff_t i = HASH_INDEX (h, start_of_bucket);
0 <= i;
i = HASH_NEXT (h, i))
@@ -4415,8 +4387,7 @@ hash_clear (struct Lisp_Hash_Table *h)
if (h->count > 0)
{
ptrdiff_t size = HASH_TABLE_SIZE (h);
- if (!hash_rehash_needed_p (h))
- memclear (xvector_contents (h->hash), size * word_size);
+ memclear (xvector_contents (h->hash), size * word_size);
for (ptrdiff_t i = 0; i < size; i++)
{
set_hash_next_slot (h, i, i < size - 1 ? i + 1 : -1);
@@ -4452,9 +4423,7 @@ sweep_weak_table (struct Lisp_Hash_Table *h, bool remove_entries_p)
for (ptrdiff_t bucket = 0; bucket < n; ++bucket)
{
/* Follow collision chain, removing entries that don't survive
- this garbage collection. It's okay if hash_rehash_needed_p
- (h) is true, since we're operating entirely on the cached
- hash values. */
+ this garbage collection. */
ptrdiff_t prev = -1;
ptrdiff_t next;
for (ptrdiff_t i = HASH_INDEX (h, bucket); 0 <= i; i = next)
@@ -4499,7 +4468,7 @@ sweep_weak_table (struct Lisp_Hash_Table *h, bool remove_entries_p)
set_hash_hash_slot (h, i, Qnil);
eassert (h->count != 0);
- h->count += h->count > 0 ? -1 : 1;
+ h->count--;
}
else
{
@@ -4923,7 +4892,7 @@ DEFUN ("hash-table-count", Fhash_table_count, Shash_table_count, 1, 1, 0,
(Lisp_Object table)
{
struct Lisp_Hash_Table *h = check_hash_table (table);
- eassert (h->count >= 0);
+
return make_fixnum (h->count);
}
diff --git a/src/lisp.h b/src/lisp.h
index 17b92a04146..d88038d91be 100644
--- a/src/lisp.h
+++ b/src/lisp.h
@@ -2275,11 +2275,7 @@ struct hash_table_test
struct Lisp_Hash_Table
{
- /* Change pdumper.c if you change the fields here.
-
- IMPORTANT!!!!!!!
-
- Call hash_rehash_if_needed() before accessing. */
+ /* Change pdumper.c if you change the fields here. */
/* This is for Lisp; the hash table code does not refer to it. */
union vectorlike_header header;
@@ -2398,20 +2394,7 @@ HASH_TABLE_SIZE (const struct Lisp_Hash_Table *h)
return size;
}
-void hash_table_rehash (struct Lisp_Hash_Table *h);
-
-INLINE bool
-hash_rehash_needed_p (const struct Lisp_Hash_Table *h)
-{
- return NILP (h->hash);
-}
-
-INLINE void
-hash_rehash_if_needed (struct Lisp_Hash_Table *h)
-{
- if (hash_rehash_needed_p (h))
- hash_table_rehash (h);
-}
+void hash_table_rehash (Lisp_Object);
/* Default size for hash tables if not specified. */
diff --git a/src/minibuf.c b/src/minibuf.c
index 9d870ce3640..cb302c5a605 100644
--- a/src/minibuf.c
+++ b/src/minibuf.c
@@ -1212,9 +1212,6 @@ is used to further constrain the set of candidates. */)
bucket = AREF (collection, idx);
}
- if (HASH_TABLE_P (collection))
- hash_rehash_if_needed (XHASH_TABLE (collection));
-
while (1)
{
/* Get the next element of the alist, obarray, or hash-table. */
diff --git a/src/pdumper.c b/src/pdumper.c
index 63ee0fcb7f6..10dfa8737f0 100644
--- a/src/pdumper.c
+++ b/src/pdumper.c
@@ -105,17 +105,6 @@ along with GNU Emacs. If not, see <https://www.gnu.org/licenses/>. */
# define VM_SUPPORTED 0
#endif
-/* PDUMPER_CHECK_REHASHING being true causes the portable dumper to
- check, for each hash table it dumps, that the hash table means the
- same thing after rehashing. */
-#ifndef PDUMPER_CHECK_REHASHING
-# if ENABLE_CHECKING
-# define PDUMPER_CHECK_REHASHING 1
-# else
-# define PDUMPER_CHECK_REHASHING 0
-# endif
-#endif
-
/* Require an architecture in which pointers, ptrdiff_t and intptr_t
are the same size and have the same layout, and where bytes have
eight bits --- that is, a general-purpose computer made after 1990.
@@ -401,6 +390,8 @@ struct dump_header
The start of the cold region is always aligned on a page
boundary. */
dump_off cold_start;
+
+ dump_off hash_list;
};
/* Double-ended singly linked list. */
@@ -558,6 +549,8 @@ struct dump_context
heap objects. */
Lisp_Object bignum_data;
+ Lisp_Object hash_tables;
+
unsigned number_hot_relocations;
unsigned number_discardable_relocations;
};
@@ -2616,78 +2609,62 @@ dump_vectorlike_generic (struct dump_context *ctx,
return offset;
}
-/* Determine whether the hash table's hash order is stable
- across dump and load. If it is, we don't have to trigger
- a rehash on access. */
-static bool
-dump_hash_table_stable_p (const struct Lisp_Hash_Table *hash)
+/* Return a vector of KEY, VALUE pairs in the given hash table H. The
+ first H->count pairs are valid, the rest is left as nil. */
+static Lisp_Object
+hash_table_contents (struct Lisp_Hash_Table *h)
{
- if (hash->test.hashfn == hashfn_user_defined)
+ if (h->test.hashfn == hashfn_user_defined)
error ("cannot dump hash tables with user-defined tests"); /* Bug#36769 */
- bool is_eql = hash->test.hashfn == hashfn_eql;
- bool is_equal = hash->test.hashfn == hashfn_equal;
- ptrdiff_t size = HASH_TABLE_SIZE (hash);
- for (ptrdiff_t i = 0; i < size; ++i)
+ Lisp_Object contents = Qnil;
+
+ /* Make sure key_and_value ends up in the same order; charset.c
+ relies on it by expecting hash table indices to stay constant
+ across the dump. */
+ for (ptrdiff_t i = 0; i < HASH_TABLE_SIZE (h) - h->count; i++)
{
- Lisp_Object key = HASH_KEY (hash, i);
- if (!EQ (key, Qunbound))
- {
- bool key_stable = (dump_builtin_symbol_p (key)
- || FIXNUMP (key)
- || (is_equal
- && (STRINGP (key) || BOOL_VECTOR_P (key)))
- || ((is_equal || is_eql)
- && (FLOATP (key) || BIGNUMP (key))));
- if (!key_stable)
- return false;
- }
+ dump_push (&contents, Qnil);
+ dump_push (&contents, Qunbound);
}
- return true;
+ for (ptrdiff_t i = HASH_TABLE_SIZE (h) - 1; i >= 0; --i)
+ if (!NILP (HASH_HASH (h, i)))
+ {
+ dump_push (&contents, HASH_VALUE (h, i));
+ dump_push (&contents, HASH_KEY (h, i));
+ }
+
+ return CALLN (Fapply, Qvector, contents);
}
-/* Return a list of (KEY . VALUE) pairs in the given hash table. */
-static Lisp_Object
-hash_table_contents (Lisp_Object table)
+static dump_off
+dump_hash_table_list (struct dump_context *ctx)
{
- Lisp_Object contents = Qnil;
- struct Lisp_Hash_Table *h = XHASH_TABLE (table);
- for (ptrdiff_t i = 0; i < HASH_TABLE_SIZE (h); ++i)
- {
- Lisp_Object key = HASH_KEY (h, i);
- if (!EQ (key, Qunbound))
- dump_push (&contents, Fcons (key, HASH_VALUE (h, i)));
- }
- return Fnreverse (contents);
+ if (CONSP (ctx->hash_tables))
+ return dump_object (ctx, CALLN (Fapply, Qvector, ctx->hash_tables));
+ else
+ return 0;
}
-/* Copy the given hash table, rehash it, and make sure that we can
- look up all the values in the original. */
static void
-check_hash_table_rehash (Lisp_Object table_orig)
-{
- ptrdiff_t count = XHASH_TABLE (table_orig)->count;
- hash_rehash_if_needed (XHASH_TABLE (table_orig));
- Lisp_Object table_rehashed = Fcopy_hash_table (table_orig);
- eassert (!hash_rehash_needed_p (XHASH_TABLE (table_rehashed)));
- XHASH_TABLE (table_rehashed)->hash = Qnil;
- eassert (count == 0 || hash_rehash_needed_p (XHASH_TABLE (table_rehashed)));
- hash_rehash_if_needed (XHASH_TABLE (table_rehashed));
- eassert (!hash_rehash_needed_p (XHASH_TABLE (table_rehashed)));
- Lisp_Object expected_contents = hash_table_contents (table_orig);
- while (!NILP (expected_contents))
- {
- Lisp_Object key_value_pair = dump_pop (&expected_contents);
- Lisp_Object key = XCAR (key_value_pair);
- Lisp_Object expected_value = XCDR (key_value_pair);
- Lisp_Object arbitrary = Qdump_emacs_portable__sort_predicate_copied;
- Lisp_Object found_value = Fgethash (key, table_rehashed, arbitrary);
- eassert (EQ (expected_value, found_value));
- Fremhash (key, table_rehashed);
- }
+hash_table_freeze (struct Lisp_Hash_Table *h)
+{
+ ptrdiff_t nkeys = XFIXNAT (Flength (h->key_and_value)) / 2;
+ h->key_and_value = hash_table_contents (h);
+ h->next_free = (nkeys == h->count ? -1 : h->count);
+ h->index = Flength (h->index);
+ h->next = h->hash = make_fixnum (nkeys);
+}
- eassert (EQ (Fhash_table_count (table_rehashed),
- make_fixnum (0)));
+static void
+hash_table_thaw (Lisp_Object hash)
+{
+ struct Lisp_Hash_Table *h = XHASH_TABLE (hash);
+ h->index = Fmake_vector (h->index, make_fixnum (-1));
+ h->hash = Fmake_vector (h->hash, Qnil);
+ h->next = Fmake_vector (h->next, make_fixnum (-1));
+
+ hash_table_rehash (hash);
}
static dump_off
@@ -2699,51 +2676,11 @@ dump_hash_table (struct dump_context *ctx,
# error "Lisp_Hash_Table changed. See CHECK_STRUCTS comment in config.h."
#endif
const struct Lisp_Hash_Table *hash_in = XHASH_TABLE (object);
- bool is_stable = dump_hash_table_stable_p (hash_in);
- /* If the hash table is likely to be modified in memory (either
- because we need to rehash, and thus toggle hash->count, or
- because we need to assemble a list of weak tables) punt the hash
- table to the end of the dump, where we can lump all such hash
- tables together. */
- if (!(is_stable || !NILP (hash_in->weak))
- && ctx->flags.defer_hash_tables)
- {
- if (offset != DUMP_OBJECT_ON_HASH_TABLE_QUEUE)
- {
- eassert (offset == DUMP_OBJECT_ON_NORMAL_QUEUE
- || offset == DUMP_OBJECT_NOT_SEEN);
- /* We still want to dump the actual keys and values now. */
- dump_enqueue_object (ctx, hash_in->key_and_value, WEIGHT_NONE);
- /* We'll get to the rest later. */
- offset = DUMP_OBJECT_ON_HASH_TABLE_QUEUE;
- dump_remember_object (ctx, object, offset);
- dump_push (&ctx->deferred_hash_tables, object);
- }
- return offset;
- }
-
- if (PDUMPER_CHECK_REHASHING)
- check_hash_table_rehash (make_lisp_ptr ((void *) hash_in, Lisp_Vectorlike));
-
struct Lisp_Hash_Table hash_munged = *hash_in;
struct Lisp_Hash_Table *hash = &hash_munged;
- /* Remember to rehash this hash table on first access. After a
- dump reload, the hash table values will have changed, so we'll
- need to rebuild the index.
-
- TODO: for EQ and EQL hash tables, it should be possible to rehash
- here using the preferred load address of the dump, eliminating
- the need to rehash-on-access if we can load the dump where we
- want. */
- if (hash->count > 0 && !is_stable)
- /* Hash codes will have to be recomputed anyway, so let's not dump them.
- Also set `hash` to nil for hash_rehash_needed_p.
- We could also refrain from dumping the `next' and `index' vectors,
- except that `next' is currently used for HASH_TABLE_SIZE and
- we'd have to rebuild the next_free list as well as adjust
- sweep_weak_hash_table for the case where there's no `index'. */
- hash->hash = Qnil;
+ hash_table_freeze (hash);
+ dump_push (&ctx->hash_tables, object);
START_DUMP_PVEC (ctx, &hash->header, struct Lisp_Hash_Table, out);
dump_pseudovector_lisp_fields (ctx, &out->header, &hash->header);
@@ -4151,6 +4088,19 @@ types. */)
|| !NILP (ctx->deferred_hash_tables)
|| !NILP (ctx->deferred_symbols));
+ ctx->header.hash_list = ctx->offset;
+ dump_hash_table_list (ctx);
+
+ do
+ {
+ dump_drain_deferred_hash_tables (ctx);
+ dump_drain_deferred_symbols (ctx);
+ dump_drain_normal_queue (ctx);
+ }
+ while (!dump_queue_empty_p (&ctx->dump_queue)
+ || !NILP (ctx->deferred_hash_tables)
+ || !NILP (ctx->deferred_symbols));
+
dump_sort_copied_objects (ctx);
/* While we copy built-in symbols into the Emacs image, these
@@ -5302,6 +5252,9 @@ enum dump_section
NUMBER_DUMP_SECTIONS,
};
+/* Pointer to a stack variable to avoid having to staticpro it. */
+static Lisp_Object *pdumper_hashes = &zero_vector;
+
/* Load a dump from DUMP_FILENAME. Return an error code.
N.B. We run very early in initialization, so we can't use lisp,
@@ -5448,6 +5401,15 @@ pdumper_load (const char *dump_filename)
for (int i = 0; i < ARRAYELTS (sections); ++i)
dump_mmap_reset (&sections[i]);
+ Lisp_Object hashes = zero_vector;
+ if (header->hash_list)
+ {
+ struct Lisp_Vector *hash_tables =
+ ((struct Lisp_Vector *)(dump_base + header->hash_list));
+ XSETVECTOR (hashes, hash_tables);
+ }
+
+ pdumper_hashes = &hashes;
/* Run the functions Emacs registered for doing post-dump-load
initialization. */
for (int i = 0; i < nr_dump_hooks; ++i)
@@ -5518,6 +5480,19 @@ Value is nil if this session was not started using a dump file.*/)
#endif /* HAVE_PDUMPER */
+static void
+thaw_hash_tables (void)
+{
+ Lisp_Object hash_tables = *pdumper_hashes;
+ for (ptrdiff_t i = 0; i < ASIZE (hash_tables); i++)
+ hash_table_thaw (AREF (hash_tables, i));
+}
+
+void
+init_pdumper_once (void)
+{
+ pdumper_do_now_and_after_load (thaw_hash_tables);
+}
void
syms_of_pdumper (void)
diff --git a/src/pdumper.h b/src/pdumper.h
index 6a99b511f2f..c793fb40580 100644
--- a/src/pdumper.h
+++ b/src/pdumper.h
@@ -256,6 +256,7 @@ pdumper_clear_marks (void)
file was loaded. */
extern void pdumper_record_wd (const char *);
+void init_pdumper_once (void);
void syms_of_pdumper (void);
INLINE_HEADER_END