From 0938964ba1af3924cf969fb809fc3598892bc20d Mon Sep 17 00:00:00 2001 From: Peter Zhu Date: Wed, 19 Apr 2023 16:02:36 -0400 Subject: Implement Hash ST tables on VWA --- array.c | 10 +++-- gc.c | 18 +++++---- hash.c | 110 +++++++++++++++++++----------------------------------- include/ruby/st.h | 2 + internal/hash.h | 27 ++++++++++---- st.c | 36 +++++++++++++----- 6 files changed, 103 insertions(+), 100 deletions(-) diff --git a/array.c b/array.c index fc7eebd050..72c796728d 100644 --- a/array.c +++ b/array.c @@ -6416,6 +6416,12 @@ rb_ary_count(int argc, VALUE *argv, VALUE ary) static VALUE flatten(VALUE ary, int level) { + static const rb_data_type_t flatten_memo_data_type = { + .wrap_struct_name = "array_flatten_memo_data_type", + .function = { NULL, (RUBY_DATA_FUNC)st_free_table }, + NULL, NULL, RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_WB_PROTECTED + }; + long i; VALUE stack, result, tmp = 0, elt, vmemo; st_table *memo = 0; @@ -6441,10 +6447,8 @@ flatten(VALUE ary, int level) rb_ary_push(stack, LONG2NUM(i + 1)); if (level < 0) { - vmemo = rb_hash_new(); - RBASIC_CLEAR_CLASS(vmemo); memo = st_init_numtable(); - rb_hash_st_table_set(vmemo, memo); + vmemo = TypedData_Wrap_Struct(0, &flatten_memo_data_type, memo); st_insert(memo, (st_data_t)ary, (st_data_t)Qtrue); st_insert(memo, (st_data_t)tmp, (st_data_t)Qtrue); } diff --git a/gc.c b/gc.c index d2cf6d07db..0f64efefa5 100644 --- a/gc.c +++ b/gc.c @@ -3568,9 +3568,12 @@ obj_free(rb_objspace_t *objspace, VALUE obj) RB_DEBUG_COUNTER_INC(obj_hash_st); } #endif - if (!RHASH_AR_TABLE_P(obj)) { - GC_ASSERT(RHASH_ST_TABLE_P(obj)); - st_free_table(RHASH(obj)->as.st); + + if (RHASH_ST_TABLE_P(obj)) { + st_table *tab = RHASH_ST_TABLE(obj); + + if (tab->bins != NULL) free(tab->bins); + free(tab->entries); } break; case T_REGEXP: @@ -4909,11 +4912,10 @@ obj_memsize_of(VALUE obj, int use_all_types) size += rb_ary_memsize(obj); break; case T_HASH: - if (RHASH_AR_TABLE_P(obj)) { - } - else { + if (RHASH_ST_TABLE_P(obj)) { VM_ASSERT(RHASH_ST_TABLE(obj) != NULL); - size += st_memsize(RHASH_ST_TABLE(obj)); + /* st_table is in the slot */ + size += st_memsize(RHASH_ST_TABLE(obj)) - sizeof(st_table); } break; case T_REGEXP: @@ -8402,7 +8404,7 @@ gc_compact_destination_pool(rb_objspace_t *objspace, rb_size_pool_t *src_pool, V break; case T_HASH: - obj_size = rb_hash_size_as_embedded(src); + obj_size = RHASH_SLOT_SIZE; break; default: diff --git a/hash.c b/hash.c index f4aeee36c6..cef784cccc 100644 --- a/hash.c +++ b/hash.c @@ -378,27 +378,6 @@ typedef st_index_t st_hash_t; #define RHASH_AR_TABLE_REF(hash, n) (&RHASH_AR_TABLE(hash)->pairs[n]) #define RHASH_AR_CLEARED_HINT 0xff -typedef struct ar_table_pair_struct { - VALUE key; - VALUE val; -} ar_table_pair; - -typedef struct ar_table_struct { - /* 64bit CPU: 8B * 2 * 8 = 128B */ - ar_table_pair pairs[RHASH_AR_TABLE_MAX_SIZE]; -} ar_table; - -#define RHASH_EMBED_SIZE (offsetof(struct RHash, as.st) + sizeof(ar_table)) - -size_t -rb_hash_size_as_embedded(VALUE hash) -{ - if (RHASH_AR_TABLE_P(hash)) { - return RHASH_EMBED_SIZE; - } - return sizeof(struct RHash); -} - static inline st_hash_t ar_do_hash(st_data_t key) { @@ -539,6 +518,7 @@ hash_verify_(VALUE hash, const char *file, int line) HASH_ASSERT(RHASH_AR_TABLE_SIZE_RAW(hash) == 0); HASH_ASSERT(RHASH_AR_TABLE_BOUND_RAW(hash) == 0); } + return hash; } @@ -564,25 +544,25 @@ RHASH_TABLE_EMPTY_P(VALUE hash) return RHASH_SIZE(hash) == 0; } +#define RHASH_SET_ST_FLAG(h) FL_SET_RAW(h, RHASH_ST_TABLE_FLAG) +#define RHASH_UNSET_ST_FLAG(h) FL_UNSET_RAW(h, RHASH_ST_TABLE_FLAG) + +static void +hash_st_table_init(VALUE hash, const struct st_hash_type *type, st_index_t size) +{ + st_init_existing_table_with_size(RHASH_ST_TABLE(hash), type, size); + RHASH_SET_ST_FLAG(hash); +} + void rb_hash_st_table_set(VALUE hash, st_table *st) { HASH_ASSERT(st != NULL); - FL_SET_RAW((hash), RHASH_ST_TABLE_FLAG); - RHASH(hash)->as.st = st; -} + RHASH_SET_ST_FLAG(hash); -static void -hash_ar_table_set(VALUE hash, ar_table *ar) -{ - HASH_ASSERT(RHASH_AR_TABLE_P(hash)); - *(RHASH_AR_TABLE(hash)) = *ar; - hash_verify(hash); + *RHASH_ST_TABLE(hash) = *st; } -#define RHASH_SET_ST_FLAG(h) FL_SET_RAW(h, RHASH_ST_TABLE_FLAG) -#define RHASH_UNSET_ST_FLAG(h) FL_UNSET_RAW(h, RHASH_ST_TABLE_FLAG) - static inline void RHASH_AR_TABLE_BOUND_SET(VALUE h, st_index_t n) { @@ -646,9 +626,6 @@ ar_alloc_table(VALUE hash) ar_table *tab = RHASH_AR_TABLE(hash); memset(tab, 0, sizeof(ar_table)); - // RHASH_UNSET_ST_FLAG(hash); - // hash_ar_table_set(hash, tab); - RHASH_AR_TABLE_SIZE_SET(hash, 0); RHASH_AR_TABLE_BOUND_SET(hash, 0); @@ -730,28 +707,29 @@ ar_try_convert_table(VALUE hash) const unsigned size = RHASH_AR_TABLE_SIZE(hash); - st_table *new_tab; - st_index_t i; - if (size < RHASH_AR_TABLE_MAX_SIZE) { return; } - new_tab = st_init_table_with_size(&objhash, size * 2); + st_table tab; + st_table *new_tab = &tab; + rb_st_init_existing_table_with_size(new_tab, &objhash, size * 2); - for (i = 0; i < RHASH_AR_TABLE_MAX_BOUND; i++) { + for (st_index_t i = 0; i < RHASH_AR_TABLE_MAX_BOUND; i++) { ar_table_pair *pair = RHASH_AR_TABLE_REF(hash, i); st_add_direct(new_tab, pair->key, pair->val); } + ar_free_and_clear_table(hash); RHASH_ST_TABLE_SET(hash, new_tab); - return; } static st_table * ar_force_convert_table(VALUE hash, const char *file, int line) { st_table *new_tab; + st_table tab; + new_tab = &tab; if (RHASH_ST_TABLE_P(hash)) { return RHASH_ST_TABLE(hash); @@ -760,13 +738,7 @@ ar_force_convert_table(VALUE hash, const char *file, int line) if (RHASH_AR_TABLE(hash)) { unsigned i, bound = RHASH_AR_TABLE_BOUND(hash); -#if defined(RHASH_CONVERT_TABLE_DEBUG) && RHASH_CONVERT_TABLE_DEBUG - rb_obj_info_dump(hash); - fprintf(stderr, "force_convert: %s:%d\n", file, line); - RB_DEBUG_COUNTER_INC(obj_hash_force_convert); -#endif - - new_tab = st_init_table_with_size(&objhash, RHASH_AR_TABLE_SIZE(hash)); + rb_st_init_existing_table_with_size(new_tab, &objhash, RHASH_AR_TABLE_SIZE(hash)); for (i = 0; i < bound; i++) { if (ar_cleared_entry(hash, i)) continue; @@ -777,10 +749,13 @@ ar_force_convert_table(VALUE hash, const char *file, int line) ar_free_and_clear_table(hash); } else { - new_tab = st_init_table(&objhash); + rb_st_init_existing_table_with_size(new_tab, &objhash, 0); } + RHASH_ST_TABLE_SET(hash, new_tab); + new_tab = RHASH_ST_TABLE(hash); + return new_tab; } @@ -1193,7 +1168,6 @@ ar_copy(VALUE hash1, VALUE hash2) RHASH(hash1)->ar_hint.word = RHASH(hash2)->ar_hint.word; RHASH_AR_TABLE_BOUND_SET(hash1, RHASH_AR_TABLE_BOUND(hash2)); RHASH_AR_TABLE_SIZE_SET(hash1, RHASH_AR_TABLE_SIZE(hash2)); - hash_ar_table_set(hash1, new_tab); rb_gc_writebarrier_remember(hash1); @@ -1454,7 +1428,7 @@ static VALUE hash_alloc_flags(VALUE klass, VALUE flags, VALUE ifnone) { const VALUE wb = (RGENGC_WB_PROTECTED_HASH ? FL_WB_PROTECTED : 0); - NEWOBJ_OF(hash, struct RHash, klass, T_HASH | wb | flags, RHASH_EMBED_SIZE, 0); + NEWOBJ_OF(hash, struct RHash, klass, T_HASH | wb | flags, RHASH_SLOT_SIZE, 0); RHASH_SET_IFNONE((VALUE)hash, ifnone); @@ -1498,7 +1472,7 @@ rb_hash_new_with_size(st_index_t size) /* do nothing */ } else if (size > RHASH_AR_TABLE_MAX_SIZE) { - RHASH_ST_TABLE_SET(ret, st_init_table_with_size(&objhash, size)); + hash_st_table_init(ret, &objhash, size); } return ret; } @@ -1981,10 +1955,12 @@ rb_hash_rehash(VALUE hash) else if (RHASH_ST_TABLE_P(hash)) { st_table *old_tab = RHASH_ST_TABLE(hash); tmp = hash_alloc(0); - tbl = st_init_table_with_size(old_tab->type, old_tab->num_entries); - RHASH_ST_TABLE_SET(tmp, tbl); + + hash_st_table_init(tmp, old_tab->type, old_tab->num_entries); + tbl = RHASH_ST_TABLE(tmp); + rb_hash_foreach(hash, rb_hash_rehash_i, (VALUE)tmp); - st_free_table(old_tab); + RHASH_ST_TABLE_SET(hash, tbl); RHASH_ST_CLEAR(tmp); } @@ -2925,13 +2901,12 @@ rb_hash_replace(VALUE hash, VALUE hash2) ar_free_and_clear_table(hash); } else { - st_free_table(RHASH_ST_TABLE(hash)); RHASH_ST_CLEAR(hash); } hash_copy(hash, hash2); if (RHASH_EMPTY_P(hash2) && RHASH_ST_TABLE_P(hash2)) { /* ident hash */ - RHASH_ST_TABLE_SET(hash, st_init_table_with_size(RHASH_TYPE(hash2), 0)); + hash_st_table_init(hash, RHASH_TYPE(hash2), 0); } rb_gc_writebarrier_remember(hash); @@ -4306,8 +4281,6 @@ rb_hash_compact_bang(VALUE hash) return Qnil; } -static st_table *rb_init_identtable_with_size(st_index_t size); - /* * call-seq: * hash.compare_by_identity -> self @@ -4349,10 +4322,11 @@ rb_hash_compare_by_id(VALUE hash) HASH_ASSERT(RHASH_ST_TABLE_P(hash)); tmp = hash_alloc(0); - identtable = rb_init_identtable_with_size(RHASH_SIZE(hash)); - RHASH_ST_TABLE_SET(tmp, identtable); + hash_st_table_init(tmp, &identhash, RHASH_SIZE(hash)); + identtable = RHASH_ST_TABLE(tmp); + rb_hash_foreach(hash, rb_hash_rehash_i, (VALUE)tmp); - st_free_table(RHASH_ST_TABLE(hash)); + RHASH_ST_TABLE_SET(hash, identtable); RHASH_ST_CLEAR(tmp); @@ -4376,7 +4350,7 @@ VALUE rb_ident_hash_new(void) { VALUE hash = rb_hash_new(); - RHASH_ST_TABLE_SET(hash, st_init_table(&identhash)); + hash_st_table_init(hash, &identhash, 0); return hash; } @@ -4384,7 +4358,7 @@ VALUE rb_ident_hash_new_with_size(st_index_t size) { VALUE hash = rb_hash_new(); - RHASH_ST_TABLE_SET(hash, st_init_table_with_size(&identhash, size)); + hash_st_table_init(hash, &identhash, size); return hash; } @@ -4394,12 +4368,6 @@ rb_init_identtable(void) return st_init_table(&identhash); } -static st_table * -rb_init_identtable_with_size(st_index_t size) -{ - return st_init_table_with_size(&identhash, size); -} - static int any_p_i(VALUE key, VALUE value, VALUE arg) { diff --git a/include/ruby/st.h b/include/ruby/st.h index f35ab43603..0d42283364 100644 --- a/include/ruby/st.h +++ b/include/ruby/st.h @@ -104,6 +104,8 @@ st_table *rb_st_init_table(const struct st_hash_type *); #define st_init_table rb_st_init_table st_table *rb_st_init_table_with_size(const struct st_hash_type *, st_index_t); #define st_init_table_with_size rb_st_init_table_with_size +st_table *rb_st_init_existing_table_with_size(st_table *tab, const struct st_hash_type *type, st_index_t size); +#define st_init_existing_table_with_size rb_st_init_existing_table_with_size st_table *rb_st_init_numtable(void); #define st_init_numtable rb_st_init_numtable st_table *rb_st_init_numtable_with_size(st_index_t); diff --git a/internal/hash.h b/internal/hash.h index 4eea9afbce..c9a8b49c20 100644 --- a/internal/hash.h +++ b/internal/hash.h @@ -40,6 +40,16 @@ enum ruby_rhash_flags { RHASH_LEV_MAX = 127, /* 7 bits */ }; +typedef struct ar_table_pair_struct { + VALUE key; + VALUE val; +} ar_table_pair; + +typedef struct ar_table_struct { + /* 64bit CPU: 8B * 2 * 8 = 128B */ + ar_table_pair pairs[RHASH_AR_TABLE_MAX_SIZE]; +} ar_table; + struct RHash { struct RBasic basic; const VALUE ifnone; @@ -47,13 +57,16 @@ struct RHash { ar_hint_t ary[RHASH_AR_TABLE_MAX_SIZE]; VALUE word; } ar_hint; - union { - st_table *st; - } as; }; #define RHASH(obj) ((struct RHash *)(obj)) +#ifndef MAX +# define MAX(a, b) (((a) > (b)) ? (a) : (b)) +#endif + +#define RHASH_SLOT_SIZE (sizeof(struct RHash) + MAX(sizeof(ar_table), sizeof(st_table))) + #ifdef RHASH_IFNONE # undef RHASH_IFNONE #endif @@ -85,8 +98,6 @@ int rb_hash_stlike_foreach_with_replace(VALUE hash, st_foreach_check_callback_fu int rb_hash_stlike_update(VALUE hash, st_data_t key, st_update_callback_func *func, st_data_t arg); VALUE rb_ident_hash_new_with_size(st_index_t size); -size_t rb_hash_size_as_embedded(VALUE hash); - static inline unsigned RHASH_AR_TABLE_SIZE_RAW(VALUE h); static inline VALUE RHASH_IFNONE(VALUE h); static inline size_t RHASH_SIZE(VALUE h); @@ -126,13 +137,13 @@ RHASH_AR_TABLE_P(VALUE h) static inline struct ar_table_struct * RHASH_AR_TABLE(VALUE h) { - return (struct ar_table_struct *)((uintptr_t)h + offsetof(struct RHash, as.st)); + return (struct ar_table_struct *)((uintptr_t)h + sizeof(struct RHash)); } static inline st_table * RHASH_ST_TABLE(VALUE h) { - return RHASH(h)->as.st; + return (st_table *)((uintptr_t)h + sizeof(struct RHash)); } static inline VALUE @@ -173,7 +184,7 @@ RHASH_ST_SIZE(VALUE h) static inline void RHASH_ST_CLEAR(VALUE h) { - RHASH(h)->as.st = NULL; + memset(RHASH_ST_TABLE(h), 0, sizeof(st_table)); FL_UNSET_RAW(h, RHASH_ST_TABLE_FLAG); } diff --git a/st.c b/st.c index 411237d13b..b8ad6ab6c2 100644 --- a/st.c +++ b/st.c @@ -509,13 +509,9 @@ stat_col(void) } #endif -/* Create and return table with TYPE which can hold at least SIZE - entries. The real number of entries which the table can hold is - the nearest power of two for SIZE. */ st_table * -st_init_table_with_size(const struct st_hash_type *type, st_index_t size) +st_init_existing_table_with_size(st_table *tab, const struct st_hash_type *type, st_index_t size) { - st_table *tab; int n; #ifdef HASH_LOG @@ -536,11 +532,7 @@ st_init_table_with_size(const struct st_hash_type *type, st_index_t size) if (n < 0) return NULL; #endif - tab = (st_table *) malloc(sizeof (st_table)); -#ifndef RUBY - if (tab == NULL) - return NULL; -#endif + tab->type = type; tab->entry_power = n; tab->bin_power = features[n].bin_power; @@ -569,6 +561,30 @@ st_init_table_with_size(const struct st_hash_type *type, st_index_t size) return tab; } +/* Create and return table with TYPE which can hold at least SIZE + entries. The real number of entries which the table can hold is + the nearest power of two for SIZE. */ +st_table * +st_init_table_with_size(const struct st_hash_type *type, st_index_t size) +{ + st_table *tab = malloc(sizeof(st_table)); +#ifndef RUBY + if (tab == NULL) + return NULL; +#endif + +#ifdef RUBY + st_init_existing_table_with_size(tab, type, size); +#else + if (st_init_existing_table_with_size(tab, type, size) == NULL) { + free(tab); + return NULL; + } +#endif + + return tab; +} + size_t st_table_size(const struct st_table *tbl) { -- cgit v1.2.1