summaryrefslogtreecommitdiff
path: root/ctdb
diff options
context:
space:
mode:
authorAmitay Isaacs <amitay@gmail.com>2018-02-14 15:18:17 +1100
committerKarolin Seeger <kseeger@samba.org>2018-10-09 11:29:25 +0200
commit00a263982cdf666c1c9c409e4f07df8f5d38ab98 (patch)
tree6df772059081f4dd2234326e207dfe89856d0ed6 /ctdb
parentd08665ffa86fe3446bbed363d876d790a399e309 (diff)
downloadsamba-00a263982cdf666c1c9c409e4f07df8f5d38ab98.tar.gz
ctdb-vacuum: Fix the incorrect counting of remote errors
If a node fails to delete a record in TRY_DELETE_RECORDS control during vacuuming, then it's possible that other nodes also may fail to delete a record. So instead of deleting the record from RB tree on first failure, keep track of the remote failures. Update delete_list.remote_error and delete_list.left statistics only once per record during the delete_record_traverse. BUG: https://bugzilla.samba.org/show_bug.cgi?id=13641 Signed-off-by: Amitay Isaacs <amitay@gmail.com> Reviewed-by: Martin Schwenke <martin@meltin.net> (cherry picked from commit ef052397173522ac2dd0d0bd9660a18a13a3e4fc)
Diffstat (limited to 'ctdb')
-rw-r--r--ctdb/server/ctdb_vacuum.c22
1 files changed, 13 insertions, 9 deletions
diff --git a/ctdb/server/ctdb_vacuum.c b/ctdb/server/ctdb_vacuum.c
index 5aa0ca7dcc0..8faf803efb9 100644
--- a/ctdb/server/ctdb_vacuum.c
+++ b/ctdb/server/ctdb_vacuum.c
@@ -107,6 +107,7 @@ struct delete_record_data {
struct ctdb_context *ctdb;
struct ctdb_db_context *ctdb_db;
struct ctdb_ltdb_header hdr;
+ uint32_t remote_fail_count;
TDB_DATA key;
uint8_t keydata[1];
};
@@ -149,6 +150,7 @@ static int insert_delete_record_data_into_tree(struct ctdb_context *ctdb,
memcpy(dd->keydata, key.dptr, key.dsize);
dd->hdr = *hdr;
+ dd->remote_fail_count = 0;
hash = ctdb_hash(&key);
@@ -451,6 +453,13 @@ static int delete_record_traverse(void *param, void *data)
uint32_t lmaster;
uint32_t hash = ctdb_hash(&(dd->key));
+ if (dd->remote_fail_count > 0) {
+ vdata->count.delete_list.remote_error++;
+ vdata->count.delete_list.left--;
+ talloc_free(dd);
+ return 0;
+ }
+
res = tdb_chainlock(ctdb_db->ltdb->tdb, dd->key);
if (res != 0) {
DEBUG(DEBUG_ERR,
@@ -828,22 +837,17 @@ static void ctdb_process_delete_list(struct ctdb_db_context *ctdb_db,
ctdb_hash(&reckey));
if (dd != NULL) {
/*
- * The other node could not delete the
- * record and it is the first node that
- * failed. So we should remove it from
- * the tree and update statistics.
+ * The remote node could not delete the
+ * record. Since other remote nodes can
+ * also fail, we just mark the record.
*/
- talloc_free(dd);
- vdata->count.delete_list.remote_error++;
- vdata->count.delete_list.left--;
+ dd->remote_fail_count++;
} else {
DEBUG(DEBUG_ERR, (__location__ " Failed to "
"find record with hash 0x%08x coming "
"back from TRY_DELETE_RECORDS "
"control in delete list.\n",
ctdb_hash(&reckey)));
- vdata->count.delete_list.local_error++;
- vdata->count.delete_list.left--;
}
rec = (struct ctdb_rec_data_old *)(rec->length + (uint8_t *)rec);