summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJosef Ahmad <josef.ahmad@mongodb.com>2023-05-17 15:27:32 +0000
committerEvergreen Agent <no-reply@evergreen.mongodb.com>2023-05-17 16:28:35 +0000
commit846ed8250b4c3322e52c2e51c4a6992c6ce7ba34 (patch)
tree97afaae7f21b62cfa9c24cd94e7d5b18ed24b6c4
parentb46ba042fdce866cb42bca247febecde398fd1c4 (diff)
downloadmongo-846ed8250b4c3322e52c2e51c4a6992c6ce7ba34.tar.gz
SERVER-77083 Index build stepup async task should tolerate stepdowns
-rw-r--r--jstests/noPassthrough/index_build_stepdown_during_async_stepup.js80
-rw-r--r--src/mongo/db/index_builds_coordinator.cpp22
2 files changed, 96 insertions, 6 deletions
diff --git a/jstests/noPassthrough/index_build_stepdown_during_async_stepup.js b/jstests/noPassthrough/index_build_stepdown_during_async_stepup.js
new file mode 100644
index 00000000000..d5d76ca04bd
--- /dev/null
+++ b/jstests/noPassthrough/index_build_stepdown_during_async_stepup.js
@@ -0,0 +1,80 @@
+/**
+ * Verifies that the index build step-up async task handles a stepdown gracefully.
+ *
+ * @tags: [
+ * requires_fcv_71,
+ * requires_replication,
+ * ]
+ */
+(function() {
+"use strict";
+
+load('jstests/noPassthrough/libs/index_build.js');
+load("jstests/libs/fail_point_util.js");
+
+const rst = new ReplSetTest({nodes: 2});
+rst.startSet();
+rst.initiate();
+
+const dbName = 'test';
+const collName = 'coll';
+const primary = rst.getPrimary();
+const primaryDB = primary.getDB(dbName);
+const primaryColl = primaryDB.getCollection(collName);
+
+assert.commandWorked(primaryColl.insert({a: 1}));
+
+rst.awaitReplication();
+
+const secondary = rst.getSecondary();
+
+const hangAfterIndexBuildDumpsInsertsFromBulk =
+ configureFailPoint(primary, 'hangAfterIndexBuildDumpsInsertsFromBulk');
+const hangOnStepUpAsyncTaskBeforeCheckingCommitQuorum =
+ configureFailPoint(secondary, 'hangOnStepUpAsyncTaskBeforeCheckingCommitQuorum');
+
+const waitForIndexBuildToComplete = IndexBuildTest.startIndexBuild(
+ primary, primaryColl.getFullName(), {a: 1}, null, [ErrorCodes.InterruptedDueToReplStateChange]);
+
+// Wait for the primary to start the index build.
+hangAfterIndexBuildDumpsInsertsFromBulk.wait();
+
+assert.commandWorked(primary.adminCommand({replSetStepDown: 60, force: true}));
+
+// The old secondary is now stepping up and checking the active index builds.
+// "IndexBuildsCoordinator-StepUp [..] Active index builds"
+hangOnStepUpAsyncTaskBeforeCheckingCommitQuorum.wait();
+checkLog.containsJson(secondary, 20650);
+
+// Step down the new primary.
+const waitForStepDown = startParallelShell(() => {
+ assert.commandWorked(db.adminCommand({replSetStepDown: 60 * 60, force: true}));
+}, secondary.port);
+
+// Wait for the RstlKillOpThread to run again. It first ran when the secondary stepped up (earlier
+// in this test case), and it's running now when it's stepping down again.
+assert.soon(() => checkLog.checkContainsWithCountJson(secondary, 21343, {}, 2));
+
+// Wait for the step-up task to be marked as killPending by the RstlKillOpThread.
+assert.soon(() => {
+ return 1 ===
+ secondary.getDB('test')
+ .currentOp({desc: 'IndexBuildsCoordinator-StepUp', killPending: true})['inprog']
+ .length;
+});
+
+// Turn off the failpoints. Allow the createIndexes command to return
+// InterruptedDueToReplStateChange due to stepdown, the stepped-up secondary to complete the new
+// stepdown, and the index build to succeed.
+hangOnStepUpAsyncTaskBeforeCheckingCommitQuorum.off();
+hangAfterIndexBuildDumpsInsertsFromBulk.off();
+waitForIndexBuildToComplete();
+waitForStepDown();
+
+IndexBuildTest.assertIndexesSoon(
+ rst.getPrimary().getDB(dbName).getCollection(collName), 2, ['_id_', 'a_1']);
+IndexBuildTest.assertIndexesSoon(
+ rst.getSecondary().getDB(dbName).getCollection(collName), 2, ['_id_', 'a_1']);
+
+rst.stopSet();
+})();
diff --git a/src/mongo/db/index_builds_coordinator.cpp b/src/mongo/db/index_builds_coordinator.cpp
index 4961d533f68..d8c737757ba 100644
--- a/src/mongo/db/index_builds_coordinator.cpp
+++ b/src/mongo/db/index_builds_coordinator.cpp
@@ -98,6 +98,7 @@ MONGO_FAIL_POINT_DEFINE(failIndexBuildWithErrorInSecondDrain);
MONGO_FAIL_POINT_DEFINE(hangInRemoveIndexBuildEntryAfterCommitOrAbort);
MONGO_FAIL_POINT_DEFINE(hangIndexBuildOnSetupBeforeTakingLocks);
MONGO_FAIL_POINT_DEFINE(hangAbortIndexBuildByBuildUUIDAfterLocks);
+MONGO_FAIL_POINT_DEFINE(hangOnStepUpAsyncTaskBeforeCheckingCommitQuorum);
IndexBuildsCoordinator::IndexBuildsSSS::IndexBuildsSSS()
: ServerStatusSection("indexBuilds"),
@@ -1703,6 +1704,8 @@ void IndexBuildsCoordinator::_onStepUpAsyncTaskFn(OperationContext* opCtx) {
// This reads from system.indexBuilds collection to see if commit quorum got
// satisfied.
try {
+ hangOnStepUpAsyncTaskBeforeCheckingCommitQuorum.pauseWhileSet();
+
if (_signalIfCommitQuorumIsSatisfied(opCtx, replState)) {
// The index build has been signalled to commit. As retrying skipped records
// during step-up is done to prevent waiting until commit time, if the build
@@ -1711,12 +1714,20 @@ void IndexBuildsCoordinator::_onStepUpAsyncTaskFn(OperationContext* opCtx) {
return;
}
} catch (DBException& ex) {
+ // If the operation context is interrupted (shutdown, stepdown, killOp), stop
+ // the verification process and exit.
+ opCtx->checkForInterrupt();
+
fassert(31440, ex.toStatus());
}
}
try {
- // Only checks if key generation is valid, does not actually insert.
+ // Unlike the primary, secondaries cannot fail immediately when detecting key
+ // generation errors; they instead temporarily store them in the 'skipped records'
+ // table, to validate them on commit. As an optimisation to potentially detect
+ // errors earlier, check the table on step-up. Unlike during commit, we only check
+ // key generation here, we do not actually insert the keys.
uassertStatusOK(_indexBuildsManager.retrySkippedRecords(
opCtx,
replState->buildUUID,
@@ -1724,13 +1735,12 @@ void IndexBuildsCoordinator::_onStepUpAsyncTaskFn(OperationContext* opCtx) {
IndexBuildsManager::RetrySkippedRecordMode::kKeyGeneration));
} catch (const DBException& ex) {
- // Shutdown or replication state change might happen while iterating the index
- // builds. In both cases, the opCtx is interrupted, in which case we want to stop
- // the verification process and exit. This might also be the case for a killOp.
+ // If the operation context is interrupted (shutdown, stepdown, killOp), stop the
+ // verification process and exit.
opCtx->checkForInterrupt();
- // All other errors must be due to key generation. We can abort the build early as
- // it would eventually fail anyways during the commit phase retry.
+ // All other errors must be due to key generation. Abort the build now, instead of
+ // failing later during the commit phase retry.
auto status = ex.toStatus().withContext("Skipped records retry failed on step-up");
abortIndexBuildByBuildUUID(
opCtx, replState->buildUUID, IndexBuildAction::kPrimaryAbort, status.reason());