summaryrefslogtreecommitdiff
path: root/erts/emulator/beam/erl_db_hash.c
diff options
context:
space:
mode:
authorSverker Eriksson <sverker@erlang.org>2019-07-01 18:38:49 +0200
committerSverker Eriksson <sverker@erlang.org>2019-07-02 20:35:54 +0200
commit4172488e16506ecbf9d66a0fd55a4a91a205e66d (patch)
treee8e501231132d43701f3b0422d0727786a0e4fb3 /erts/emulator/beam/erl_db_hash.c
parent0de9ecd561bdc964f1c6436d240729b3952cdf3a (diff)
downloaderlang-4172488e16506ecbf9d66a0fd55a4a91a205e66d.tar.gz
erts: Improve hash shrinking
* Release is_resizing as soon as possible to improve shrink concurrency. - Do join of buckets after release, but with kept WLOCK_HASH. - Do deallocations of seg and segtab after release of both is_resizing and WLOCK_HASH. * Do lazy initialization of buckets in extended segments. - Mark inactive buckets in DEBUG.
Diffstat (limited to 'erts/emulator/beam/erl_db_hash.c')
-rw-r--r--erts/emulator/beam/erl_db_hash.c200
1 files changed, 142 insertions, 58 deletions
diff --git a/erts/emulator/beam/erl_db_hash.c b/erts/emulator/beam/erl_db_hash.c
index ceaccf7e44..0302b9f1a1 100644
--- a/erts/emulator/beam/erl_db_hash.c
+++ b/erts/emulator/beam/erl_db_hash.c
@@ -137,6 +137,11 @@
#define BUCKET(tb, i) SEGTAB(tb)[SLOT_IX_TO_SEG_IX(i)]->buckets[(i) & EXT_SEGSZ_MASK]
+#ifdef DEBUG
+# define DBG_BUCKET_INACTIVE ((HashDbTerm*)0xdead5107)
+#endif
+
+
/*
* When deleting a table, the number of records to delete.
* Approximate number, because we must delete entire buckets.
@@ -377,7 +382,7 @@ typedef int (*extra_match_validator_t)(int keypos, Eterm match, Eterm guard, Ete
*/
static struct ext_segtab* alloc_ext_segtab(DbTableHash* tb, unsigned seg_ix);
static void alloc_seg(DbTableHash *tb);
-static int free_seg(DbTableHash *tb, int free_records);
+static int free_seg(DbTableHash *tb);
static HashDbTerm* next_live(DbTableHash *tb, Uint *iptr, erts_rwmtx_t** lck_ptr,
HashDbTerm *list);
static HashDbTerm* search_list(DbTableHash* tb, Eterm key,
@@ -2466,7 +2471,7 @@ static SWord db_free_table_continue_hash(DbTable *tbl, SWord reds)
erts_atomic_set_relb(&tb->fixdel, (erts_aint_t)NULL);
while(tb->nslots != 0) {
- reds -= EXT_SEGSZ/64 + free_seg(tb, 1);
+ reds -= EXT_SEGSZ/64 + free_seg(tb);
/*
* If we have done enough work, get out here.
@@ -2682,7 +2687,14 @@ static void alloc_seg(DbTableHash *tb)
segtab[seg_ix] = (struct segment*) erts_db_alloc(ERTS_ALC_T_DB_SEG,
(DbTable *) tb,
SIZEOF_SEGMENT(EXT_SEGSZ));
- sys_memset(segtab[seg_ix], 0, SIZEOF_SEGMENT(EXT_SEGSZ));
+#ifdef DEBUG
+ {
+ int i;
+ for (i = 0; i < EXT_SEGSZ; i++) {
+ segtab[seg_ix]->buckets[i] = DBG_BUCKET_INACTIVE;
+ }
+ }
+#endif
tb->nslots += EXT_SEGSZ;
}
@@ -2693,10 +2705,19 @@ static void dealloc_ext_segtab(void* lop_data)
erts_free(ERTS_ALC_T_DB_SEG, est);
}
-/* Shrink table by freeing the top segment
+struct dealloc_seg_ops {
+ struct segment* segp;
+ Uint seg_sz;
+
+ struct ext_segtab* est;
+};
+
+/* Shrink table by removing the top segment
** free_records: 1=free any records in segment, 0=assume segment is empty
+** ds_ops: (out) Instructions for dealloc_seg().
*/
-static int free_seg(DbTableHash *tb, int free_records)
+static int remove_seg(DbTableHash *tb, int free_records,
+ struct dealloc_seg_ops *ds_ops)
{
const int seg_ix = SLOT_IX_TO_SEG_IX(tb->nslots) - 1;
struct segment** const segtab = SEGTAB(tb);
@@ -2704,24 +2725,47 @@ static int free_seg(DbTableHash *tb, int free_records)
Uint seg_sz;
int nrecords = 0;
+ ERTS_LC_ASSERT(IS_TAB_WLOCKED(tb) || tb->common.status & DB_DELETE
+ || erts_atomic_read_nob(&tb->is_resizing));
+
ASSERT(segp != NULL);
-#ifndef DEBUG
- if (free_records)
-#endif
- {
- int i = (seg_ix == 0) ? FIRST_SEGSZ : EXT_SEGSZ;
- while (i--) {
- HashDbTerm* p = segp->buckets[i];
+ if (free_records) {
+ int ix, n;
+ if (seg_ix == 0) {
+ /* First segment (always fully active) */
+ n = FIRST_SEGSZ;
+ ix = FIRST_SEGSZ-1;
+ }
+ else if (NACTIVE(tb) < tb->nslots) {
+ /* Last extended segment partially active */
+ n = (NACTIVE(tb) - FIRST_SEGSZ) & EXT_SEGSZ_MASK;
+ ix = (NACTIVE(tb)-1) & EXT_SEGSZ_MASK;
+ }
+ else {
+ /* Full extended segment */
+ n = EXT_SEGSZ;
+ ix = EXT_SEGSZ - 1;
+ }
+ for ( ; n > 0; n--, ix--) {
+ HashDbTerm* p = segp->buckets[ix & EXT_SEGSZ_MASK];
while(p != 0) {
HashDbTerm* nxt = p->next;
- ASSERT(free_records); /* segment not empty as assumed? */
free_term(tb, p);
p = nxt;
++nrecords;
}
}
}
-
+#ifdef DEBUG
+ else {
+ int ix = (seg_ix == 0) ? FIRST_SEGSZ-1 : EXT_SEGSZ-1;
+ for ( ; ix >= 0; ix--) {
+ ASSERT(segp->buckets[ix] == DBG_BUCKET_INACTIVE);
+ }
+ }
+#endif
+
+ ds_ops->est = NULL;
if (seg_ix >= NSEG_1) {
struct ext_segtab* est = ErtsContainerStruct_(segtab,struct ext_segtab,segtab);
@@ -2730,35 +2774,63 @@ static int free_seg(DbTableHash *tb, int free_records)
SET_SEGTAB(tb, est->prev_segtab);
tb->nsegs = est->prev_nsegs;
- if (!tb->common.is_thread_safe) {
- /*
- * Table is doing a graceful shrink operation and we must avoid
- * deallocating this segtab while it may still be read by other
- * threads. Schedule deallocation with thread progress to make
- * sure no lingering threads are still hanging in BUCKET macro
- * with an old segtab pointer.
- */
- erts_schedule_db_free(&tb->common, dealloc_ext_segtab,
- est, &est->lop,
- SIZEOF_EXT_SEGTAB(est->nsegs));
- }
- else
- erts_db_free(ERTS_ALC_T_DB_SEG, (DbTable*)tb, est,
- SIZEOF_EXT_SEGTAB(est->nsegs));
+ ds_ops->est = est;
}
}
+
seg_sz = (seg_ix == 0) ? FIRST_SEGSZ : EXT_SEGSZ;
- erts_db_free(ERTS_ALC_T_DB_SEG, (DbTable *)tb, segp, SIZEOF_SEGMENT(seg_sz));
+ tb->nslots -= seg_sz;
+ ASSERT(tb->nslots >= 0);
+
+ ds_ops->segp = segp;
+ ds_ops->seg_sz = seg_sz;
#ifdef DEBUG
if (seg_ix < tb->nsegs)
SEGTAB(tb)[seg_ix] = NULL;
#endif
- tb->nslots -= seg_sz;
- ASSERT(tb->nslots >= 0);
return nrecords;
}
+/*
+ * Deallocate segment removed by remove_seg()
+ */
+static void dealloc_seg(DbTableHash *tb, struct dealloc_seg_ops* ds_ops)
+{
+ struct ext_segtab* est = ds_ops->est;
+
+ if (est) {
+ if (!tb->common.is_thread_safe) {
+ /*
+ * Table is doing a graceful shrink operation and we must avoid
+ * deallocating this segtab while it may still be read by other
+ * threads. Schedule deallocation with thread progress to make
+ * sure no lingering threads are still hanging in BUCKET macro
+ * with an old segtab pointer.
+ */
+ erts_schedule_db_free(&tb->common, dealloc_ext_segtab,
+ est, &est->lop,
+ SIZEOF_EXT_SEGTAB(est->nsegs));
+ }
+ else
+ erts_db_free(ERTS_ALC_T_DB_SEG, (DbTable*)tb, est,
+ SIZEOF_EXT_SEGTAB(est->nsegs));
+ }
+
+ erts_db_free(ERTS_ALC_T_DB_SEG, (DbTable *)tb,
+ ds_ops->segp, SIZEOF_SEGMENT(ds_ops->seg_sz));
+}
+
+/* Remove and deallocate top segment and all its contained objects */
+static int free_seg(DbTableHash *tb)
+{
+ struct dealloc_seg_ops ds_ops;
+ int reds;
+
+ reds = remove_seg(tb, 1, &ds_ops);
+ dealloc_seg(tb, &ds_ops);
+ return reds;
+}
/*
** Copy terms from ptr1 until ptr2
@@ -2880,6 +2952,7 @@ static void grow(DbTableHash* tb, int nitems)
pnext = &BUCKET(tb, from_ix);
p = *pnext;
to_pnext = &BUCKET(tb, to_ix);
+ ASSERT(*to_pnext == DBG_BUCKET_INACTIVE);
while (p != NULL) {
if (is_pseudo_deleted(p)) { /* rare but possible with fine locking */
*pnext = p->next;
@@ -2916,14 +2989,16 @@ abort:
*/
static void shrink(DbTableHash* tb, int nitems)
{
- HashDbTerm** src_bp;
- HashDbTerm** dst_bp;
+ struct dealloc_seg_ops ds_ops;
+ HashDbTerm* src;
+ HashDbTerm* tail;
HashDbTerm** bp;
erts_rwmtx_t* lck;
int src_ix, dst_ix, low_szm;
int nactive;
int loop_limit = 5;
+ ds_ops.segp = NULL;
do {
if (!begin_resizing(tb))
return; /* already in progress */
@@ -2945,39 +3020,48 @@ static void shrink(DbTableHash* tb, int nitems)
goto abort;
}
- src_bp = &BUCKET(tb, src_ix);
- dst_bp = &BUCKET(tb, dst_ix);
- bp = src_bp;
-
- /*
- * We join lists by appending "dst" at the end of "src"
- * as we must step through "src" anyway to purge pseudo deleted.
- */
- while(*bp != NULL) {
- if (is_pseudo_deleted(*bp)) {
- HashDbTerm* deleted = *bp;
- *bp = deleted->next;
- free_term(tb, deleted);
- } else {
- bp = &(*bp)->next;
- }
- }
- *bp = *dst_bp;
- *dst_bp = *src_bp;
- *src_bp = NULL;
-
+ src = BUCKET(tb, src_ix);
+#ifdef DEBUG
+ BUCKET(tb, src_ix) = DBG_BUCKET_INACTIVE;
+#endif
nactive = src_ix;
erts_atomic_set_nob(&tb->nactive, nactive);
if (dst_ix == 0) {
erts_atomic_set_relb(&tb->szm, low_szm);
}
- WUNLOCK_HASH(lck);
-
if (tb->nslots - src_ix >= EXT_SEGSZ) {
- free_seg(tb, 0);
+ remove_seg(tb, 0, &ds_ops);
}
done_resizing(tb);
+ if (src) {
+ /*
+ * We join buckets by appending "dst" list at the end of "src" list
+ * as we must step through "src" anyway to purge pseudo deleted.
+ */
+ bp = &BUCKET(tb, dst_ix);
+ tail = *bp;
+ *bp = src;
+
+ while(*bp != NULL) {
+ if (is_pseudo_deleted(*bp)) {
+ HashDbTerm* deleted = *bp;
+ *bp = deleted->next;
+ free_term(tb, deleted);
+ } else {
+ bp = &(*bp)->next;
+ }
+ }
+ *bp = tail;
+ }
+
+ WUNLOCK_HASH(lck);
+
+ if (ds_ops.segp) {
+ dealloc_seg(tb, &ds_ops);
+ ds_ops.segp = NULL;
+ }
+
} while (--loop_limit
&& nactive > FIRST_SEGSZ && nitems < SHRINK_LIMIT(nactive));
return;