summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPeter Zhu <peter@peterzhu.ca>2023-04-19 16:02:36 -0400
committerPeter Zhu <peter@peterzhu.ca>2023-05-17 09:19:40 -0400
commit0938964ba1af3924cf969fb809fc3598892bc20d (patch)
treec87a84dcbec890a0e35a4a84c96a0892653dbbc0
parent5199f2aaf9527c97e6ec371e19748d0c2ac7a70e (diff)
downloadruby-0938964ba1af3924cf969fb809fc3598892bc20d.tar.gz
Implement Hash ST tables on VWA
-rw-r--r--array.c10
-rw-r--r--gc.c18
-rw-r--r--hash.c110
-rw-r--r--include/ruby/st.h2
-rw-r--r--internal/hash.h27
-rw-r--r--st.c36
6 files changed, 103 insertions, 100 deletions
diff --git a/array.c b/array.c
index fc7eebd050..72c796728d 100644
--- a/array.c
+++ b/array.c
@@ -6416,6 +6416,12 @@ rb_ary_count(int argc, VALUE *argv, VALUE ary)
static VALUE
flatten(VALUE ary, int level)
{
+ static const rb_data_type_t flatten_memo_data_type = {
+ .wrap_struct_name = "array_flatten_memo_data_type",
+ .function = { NULL, (RUBY_DATA_FUNC)st_free_table },
+ NULL, NULL, RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_WB_PROTECTED
+ };
+
long i;
VALUE stack, result, tmp = 0, elt, vmemo;
st_table *memo = 0;
@@ -6441,10 +6447,8 @@ flatten(VALUE ary, int level)
rb_ary_push(stack, LONG2NUM(i + 1));
if (level < 0) {
- vmemo = rb_hash_new();
- RBASIC_CLEAR_CLASS(vmemo);
memo = st_init_numtable();
- rb_hash_st_table_set(vmemo, memo);
+ vmemo = TypedData_Wrap_Struct(0, &flatten_memo_data_type, memo);
st_insert(memo, (st_data_t)ary, (st_data_t)Qtrue);
st_insert(memo, (st_data_t)tmp, (st_data_t)Qtrue);
}
diff --git a/gc.c b/gc.c
index d2cf6d07db..0f64efefa5 100644
--- a/gc.c
+++ b/gc.c
@@ -3568,9 +3568,12 @@ obj_free(rb_objspace_t *objspace, VALUE obj)
RB_DEBUG_COUNTER_INC(obj_hash_st);
}
#endif
- if (!RHASH_AR_TABLE_P(obj)) {
- GC_ASSERT(RHASH_ST_TABLE_P(obj));
- st_free_table(RHASH(obj)->as.st);
+
+ if (RHASH_ST_TABLE_P(obj)) {
+ st_table *tab = RHASH_ST_TABLE(obj);
+
+ if (tab->bins != NULL) free(tab->bins);
+ free(tab->entries);
}
break;
case T_REGEXP:
@@ -4909,11 +4912,10 @@ obj_memsize_of(VALUE obj, int use_all_types)
size += rb_ary_memsize(obj);
break;
case T_HASH:
- if (RHASH_AR_TABLE_P(obj)) {
- }
- else {
+ if (RHASH_ST_TABLE_P(obj)) {
VM_ASSERT(RHASH_ST_TABLE(obj) != NULL);
- size += st_memsize(RHASH_ST_TABLE(obj));
+ /* st_table is in the slot */
+ size += st_memsize(RHASH_ST_TABLE(obj)) - sizeof(st_table);
}
break;
case T_REGEXP:
@@ -8402,7 +8404,7 @@ gc_compact_destination_pool(rb_objspace_t *objspace, rb_size_pool_t *src_pool, V
break;
case T_HASH:
- obj_size = rb_hash_size_as_embedded(src);
+ obj_size = RHASH_SLOT_SIZE;
break;
default:
diff --git a/hash.c b/hash.c
index f4aeee36c6..cef784cccc 100644
--- a/hash.c
+++ b/hash.c
@@ -378,27 +378,6 @@ typedef st_index_t st_hash_t;
#define RHASH_AR_TABLE_REF(hash, n) (&RHASH_AR_TABLE(hash)->pairs[n])
#define RHASH_AR_CLEARED_HINT 0xff
-typedef struct ar_table_pair_struct {
- VALUE key;
- VALUE val;
-} ar_table_pair;
-
-typedef struct ar_table_struct {
- /* 64bit CPU: 8B * 2 * 8 = 128B */
- ar_table_pair pairs[RHASH_AR_TABLE_MAX_SIZE];
-} ar_table;
-
-#define RHASH_EMBED_SIZE (offsetof(struct RHash, as.st) + sizeof(ar_table))
-
-size_t
-rb_hash_size_as_embedded(VALUE hash)
-{
- if (RHASH_AR_TABLE_P(hash)) {
- return RHASH_EMBED_SIZE;
- }
- return sizeof(struct RHash);
-}
-
static inline st_hash_t
ar_do_hash(st_data_t key)
{
@@ -539,6 +518,7 @@ hash_verify_(VALUE hash, const char *file, int line)
HASH_ASSERT(RHASH_AR_TABLE_SIZE_RAW(hash) == 0);
HASH_ASSERT(RHASH_AR_TABLE_BOUND_RAW(hash) == 0);
}
+
return hash;
}
@@ -564,25 +544,25 @@ RHASH_TABLE_EMPTY_P(VALUE hash)
return RHASH_SIZE(hash) == 0;
}
+#define RHASH_SET_ST_FLAG(h) FL_SET_RAW(h, RHASH_ST_TABLE_FLAG)
+#define RHASH_UNSET_ST_FLAG(h) FL_UNSET_RAW(h, RHASH_ST_TABLE_FLAG)
+
+static void
+hash_st_table_init(VALUE hash, const struct st_hash_type *type, st_index_t size)
+{
+ st_init_existing_table_with_size(RHASH_ST_TABLE(hash), type, size);
+ RHASH_SET_ST_FLAG(hash);
+}
+
void
rb_hash_st_table_set(VALUE hash, st_table *st)
{
HASH_ASSERT(st != NULL);
- FL_SET_RAW((hash), RHASH_ST_TABLE_FLAG);
- RHASH(hash)->as.st = st;
-}
+ RHASH_SET_ST_FLAG(hash);
-static void
-hash_ar_table_set(VALUE hash, ar_table *ar)
-{
- HASH_ASSERT(RHASH_AR_TABLE_P(hash));
- *(RHASH_AR_TABLE(hash)) = *ar;
- hash_verify(hash);
+ *RHASH_ST_TABLE(hash) = *st;
}
-#define RHASH_SET_ST_FLAG(h) FL_SET_RAW(h, RHASH_ST_TABLE_FLAG)
-#define RHASH_UNSET_ST_FLAG(h) FL_UNSET_RAW(h, RHASH_ST_TABLE_FLAG)
-
static inline void
RHASH_AR_TABLE_BOUND_SET(VALUE h, st_index_t n)
{
@@ -646,9 +626,6 @@ ar_alloc_table(VALUE hash)
ar_table *tab = RHASH_AR_TABLE(hash);
memset(tab, 0, sizeof(ar_table));
- // RHASH_UNSET_ST_FLAG(hash);
- // hash_ar_table_set(hash, tab);
-
RHASH_AR_TABLE_SIZE_SET(hash, 0);
RHASH_AR_TABLE_BOUND_SET(hash, 0);
@@ -730,28 +707,29 @@ ar_try_convert_table(VALUE hash)
const unsigned size = RHASH_AR_TABLE_SIZE(hash);
- st_table *new_tab;
- st_index_t i;
-
if (size < RHASH_AR_TABLE_MAX_SIZE) {
return;
}
- new_tab = st_init_table_with_size(&objhash, size * 2);
+ st_table tab;
+ st_table *new_tab = &tab;
+ rb_st_init_existing_table_with_size(new_tab, &objhash, size * 2);
- for (i = 0; i < RHASH_AR_TABLE_MAX_BOUND; i++) {
+ for (st_index_t i = 0; i < RHASH_AR_TABLE_MAX_BOUND; i++) {
ar_table_pair *pair = RHASH_AR_TABLE_REF(hash, i);
st_add_direct(new_tab, pair->key, pair->val);
}
+
ar_free_and_clear_table(hash);
RHASH_ST_TABLE_SET(hash, new_tab);
- return;
}
static st_table *
ar_force_convert_table(VALUE hash, const char *file, int line)
{
st_table *new_tab;
+ st_table tab;
+ new_tab = &tab;
if (RHASH_ST_TABLE_P(hash)) {
return RHASH_ST_TABLE(hash);
@@ -760,13 +738,7 @@ ar_force_convert_table(VALUE hash, const char *file, int line)
if (RHASH_AR_TABLE(hash)) {
unsigned i, bound = RHASH_AR_TABLE_BOUND(hash);
-#if defined(RHASH_CONVERT_TABLE_DEBUG) && RHASH_CONVERT_TABLE_DEBUG
- rb_obj_info_dump(hash);
- fprintf(stderr, "force_convert: %s:%d\n", file, line);
- RB_DEBUG_COUNTER_INC(obj_hash_force_convert);
-#endif
-
- new_tab = st_init_table_with_size(&objhash, RHASH_AR_TABLE_SIZE(hash));
+ rb_st_init_existing_table_with_size(new_tab, &objhash, RHASH_AR_TABLE_SIZE(hash));
for (i = 0; i < bound; i++) {
if (ar_cleared_entry(hash, i)) continue;
@@ -777,10 +749,13 @@ ar_force_convert_table(VALUE hash, const char *file, int line)
ar_free_and_clear_table(hash);
}
else {
- new_tab = st_init_table(&objhash);
+ rb_st_init_existing_table_with_size(new_tab, &objhash, 0);
}
+
RHASH_ST_TABLE_SET(hash, new_tab);
+ new_tab = RHASH_ST_TABLE(hash);
+
return new_tab;
}
@@ -1193,7 +1168,6 @@ ar_copy(VALUE hash1, VALUE hash2)
RHASH(hash1)->ar_hint.word = RHASH(hash2)->ar_hint.word;
RHASH_AR_TABLE_BOUND_SET(hash1, RHASH_AR_TABLE_BOUND(hash2));
RHASH_AR_TABLE_SIZE_SET(hash1, RHASH_AR_TABLE_SIZE(hash2));
- hash_ar_table_set(hash1, new_tab);
rb_gc_writebarrier_remember(hash1);
@@ -1454,7 +1428,7 @@ static VALUE
hash_alloc_flags(VALUE klass, VALUE flags, VALUE ifnone)
{
const VALUE wb = (RGENGC_WB_PROTECTED_HASH ? FL_WB_PROTECTED : 0);
- NEWOBJ_OF(hash, struct RHash, klass, T_HASH | wb | flags, RHASH_EMBED_SIZE, 0);
+ NEWOBJ_OF(hash, struct RHash, klass, T_HASH | wb | flags, RHASH_SLOT_SIZE, 0);
RHASH_SET_IFNONE((VALUE)hash, ifnone);
@@ -1498,7 +1472,7 @@ rb_hash_new_with_size(st_index_t size)
/* do nothing */
}
else if (size > RHASH_AR_TABLE_MAX_SIZE) {
- RHASH_ST_TABLE_SET(ret, st_init_table_with_size(&objhash, size));
+ hash_st_table_init(ret, &objhash, size);
}
return ret;
}
@@ -1981,10 +1955,12 @@ rb_hash_rehash(VALUE hash)
else if (RHASH_ST_TABLE_P(hash)) {
st_table *old_tab = RHASH_ST_TABLE(hash);
tmp = hash_alloc(0);
- tbl = st_init_table_with_size(old_tab->type, old_tab->num_entries);
- RHASH_ST_TABLE_SET(tmp, tbl);
+
+ hash_st_table_init(tmp, old_tab->type, old_tab->num_entries);
+ tbl = RHASH_ST_TABLE(tmp);
+
rb_hash_foreach(hash, rb_hash_rehash_i, (VALUE)tmp);
- st_free_table(old_tab);
+
RHASH_ST_TABLE_SET(hash, tbl);
RHASH_ST_CLEAR(tmp);
}
@@ -2925,13 +2901,12 @@ rb_hash_replace(VALUE hash, VALUE hash2)
ar_free_and_clear_table(hash);
}
else {
- st_free_table(RHASH_ST_TABLE(hash));
RHASH_ST_CLEAR(hash);
}
hash_copy(hash, hash2);
if (RHASH_EMPTY_P(hash2) && RHASH_ST_TABLE_P(hash2)) {
/* ident hash */
- RHASH_ST_TABLE_SET(hash, st_init_table_with_size(RHASH_TYPE(hash2), 0));
+ hash_st_table_init(hash, RHASH_TYPE(hash2), 0);
}
rb_gc_writebarrier_remember(hash);
@@ -4306,8 +4281,6 @@ rb_hash_compact_bang(VALUE hash)
return Qnil;
}
-static st_table *rb_init_identtable_with_size(st_index_t size);
-
/*
* call-seq:
* hash.compare_by_identity -> self
@@ -4349,10 +4322,11 @@ rb_hash_compare_by_id(VALUE hash)
HASH_ASSERT(RHASH_ST_TABLE_P(hash));
tmp = hash_alloc(0);
- identtable = rb_init_identtable_with_size(RHASH_SIZE(hash));
- RHASH_ST_TABLE_SET(tmp, identtable);
+ hash_st_table_init(tmp, &identhash, RHASH_SIZE(hash));
+ identtable = RHASH_ST_TABLE(tmp);
+
rb_hash_foreach(hash, rb_hash_rehash_i, (VALUE)tmp);
- st_free_table(RHASH_ST_TABLE(hash));
+
RHASH_ST_TABLE_SET(hash, identtable);
RHASH_ST_CLEAR(tmp);
@@ -4376,7 +4350,7 @@ VALUE
rb_ident_hash_new(void)
{
VALUE hash = rb_hash_new();
- RHASH_ST_TABLE_SET(hash, st_init_table(&identhash));
+ hash_st_table_init(hash, &identhash, 0);
return hash;
}
@@ -4384,7 +4358,7 @@ VALUE
rb_ident_hash_new_with_size(st_index_t size)
{
VALUE hash = rb_hash_new();
- RHASH_ST_TABLE_SET(hash, st_init_table_with_size(&identhash, size));
+ hash_st_table_init(hash, &identhash, size);
return hash;
}
@@ -4394,12 +4368,6 @@ rb_init_identtable(void)
return st_init_table(&identhash);
}
-static st_table *
-rb_init_identtable_with_size(st_index_t size)
-{
- return st_init_table_with_size(&identhash, size);
-}
-
static int
any_p_i(VALUE key, VALUE value, VALUE arg)
{
diff --git a/include/ruby/st.h b/include/ruby/st.h
index f35ab43603..0d42283364 100644
--- a/include/ruby/st.h
+++ b/include/ruby/st.h
@@ -104,6 +104,8 @@ st_table *rb_st_init_table(const struct st_hash_type *);
#define st_init_table rb_st_init_table
st_table *rb_st_init_table_with_size(const struct st_hash_type *, st_index_t);
#define st_init_table_with_size rb_st_init_table_with_size
+st_table *rb_st_init_existing_table_with_size(st_table *tab, const struct st_hash_type *type, st_index_t size);
+#define st_init_existing_table_with_size rb_st_init_existing_table_with_size
st_table *rb_st_init_numtable(void);
#define st_init_numtable rb_st_init_numtable
st_table *rb_st_init_numtable_with_size(st_index_t);
diff --git a/internal/hash.h b/internal/hash.h
index 4eea9afbce..c9a8b49c20 100644
--- a/internal/hash.h
+++ b/internal/hash.h
@@ -40,6 +40,16 @@ enum ruby_rhash_flags {
RHASH_LEV_MAX = 127, /* 7 bits */
};
+typedef struct ar_table_pair_struct {
+ VALUE key;
+ VALUE val;
+} ar_table_pair;
+
+typedef struct ar_table_struct {
+ /* 64bit CPU: 8B * 2 * 8 = 128B */
+ ar_table_pair pairs[RHASH_AR_TABLE_MAX_SIZE];
+} ar_table;
+
struct RHash {
struct RBasic basic;
const VALUE ifnone;
@@ -47,13 +57,16 @@ struct RHash {
ar_hint_t ary[RHASH_AR_TABLE_MAX_SIZE];
VALUE word;
} ar_hint;
- union {
- st_table *st;
- } as;
};
#define RHASH(obj) ((struct RHash *)(obj))
+#ifndef MAX
+# define MAX(a, b) (((a) > (b)) ? (a) : (b))
+#endif
+
+#define RHASH_SLOT_SIZE (sizeof(struct RHash) + MAX(sizeof(ar_table), sizeof(st_table)))
+
#ifdef RHASH_IFNONE
# undef RHASH_IFNONE
#endif
@@ -85,8 +98,6 @@ int rb_hash_stlike_foreach_with_replace(VALUE hash, st_foreach_check_callback_fu
int rb_hash_stlike_update(VALUE hash, st_data_t key, st_update_callback_func *func, st_data_t arg);
VALUE rb_ident_hash_new_with_size(st_index_t size);
-size_t rb_hash_size_as_embedded(VALUE hash);
-
static inline unsigned RHASH_AR_TABLE_SIZE_RAW(VALUE h);
static inline VALUE RHASH_IFNONE(VALUE h);
static inline size_t RHASH_SIZE(VALUE h);
@@ -126,13 +137,13 @@ RHASH_AR_TABLE_P(VALUE h)
static inline struct ar_table_struct *
RHASH_AR_TABLE(VALUE h)
{
- return (struct ar_table_struct *)((uintptr_t)h + offsetof(struct RHash, as.st));
+ return (struct ar_table_struct *)((uintptr_t)h + sizeof(struct RHash));
}
static inline st_table *
RHASH_ST_TABLE(VALUE h)
{
- return RHASH(h)->as.st;
+ return (st_table *)((uintptr_t)h + sizeof(struct RHash));
}
static inline VALUE
@@ -173,7 +184,7 @@ RHASH_ST_SIZE(VALUE h)
static inline void
RHASH_ST_CLEAR(VALUE h)
{
- RHASH(h)->as.st = NULL;
+ memset(RHASH_ST_TABLE(h), 0, sizeof(st_table));
FL_UNSET_RAW(h, RHASH_ST_TABLE_FLAG);
}
diff --git a/st.c b/st.c
index 411237d13b..b8ad6ab6c2 100644
--- a/st.c
+++ b/st.c
@@ -509,13 +509,9 @@ stat_col(void)
}
#endif
-/* Create and return table with TYPE which can hold at least SIZE
- entries. The real number of entries which the table can hold is
- the nearest power of two for SIZE. */
st_table *
-st_init_table_with_size(const struct st_hash_type *type, st_index_t size)
+st_init_existing_table_with_size(st_table *tab, const struct st_hash_type *type, st_index_t size)
{
- st_table *tab;
int n;
#ifdef HASH_LOG
@@ -536,11 +532,7 @@ st_init_table_with_size(const struct st_hash_type *type, st_index_t size)
if (n < 0)
return NULL;
#endif
- tab = (st_table *) malloc(sizeof (st_table));
-#ifndef RUBY
- if (tab == NULL)
- return NULL;
-#endif
+
tab->type = type;
tab->entry_power = n;
tab->bin_power = features[n].bin_power;
@@ -569,6 +561,30 @@ st_init_table_with_size(const struct st_hash_type *type, st_index_t size)
return tab;
}
+/* Create and return table with TYPE which can hold at least SIZE
+ entries. The real number of entries which the table can hold is
+ the nearest power of two for SIZE. */
+st_table *
+st_init_table_with_size(const struct st_hash_type *type, st_index_t size)
+{
+ st_table *tab = malloc(sizeof(st_table));
+#ifndef RUBY
+ if (tab == NULL)
+ return NULL;
+#endif
+
+#ifdef RUBY
+ st_init_existing_table_with_size(tab, type, size);
+#else
+ if (st_init_existing_table_with_size(tab, type, size) == NULL) {
+ free(tab);
+ return NULL;
+ }
+#endif
+
+ return tab;
+}
+
size_t
st_table_size(const struct st_table *tbl)
{