/** * Helper functions for running tests related to sync source selection during a tenant migration. */ import {TenantMigrationTest} from "jstests/replsets/libs/tenant_migration_test.js"; import {makeX509OptionsForTest} from "jstests/replsets/libs/tenant_migration_util.js"; load("jstests/libs/fail_point_util.js"); load("jstests/libs/uuid_util.js"); load("jstests/libs/write_concern_util.js"); load('jstests/replsets/rslib.js'); /** * Starts up a tenant migration with 'secondary' read preference, and ensures that both donor * secondaries are not eligible sync sources. * * When this function returns, the recipient primary should be hanging during sync source selection. * We expect 'donorSecondary' to be shut down and 'delayedSecondary' to be behind the * 'startApplyingDonorOpTime' stored in the recipient state document. As a result, neither nodes are * eligible sync sources for the migration. */ export function setUpMigrationSyncSourceTest() { const donorRst = new ReplSetTest({ name: `${jsTestName()}_donor`, nodes: 3, serverless: true, settings: {chainingAllowed: false}, nodeOptions: Object.assign(makeX509OptionsForTest().donor, { setParameter: { tenantMigrationExcludeDonorHostTimeoutMS: 30 * 1000, // Allow non-timestamped reads on donor after migration completes for testing. 'failpoint.tenantMigrationDonorAllowsNonTimestampedReads': tojson({mode: 'alwaysOn'}), } }), }); donorRst.startSet(); donorRst.initiateWithHighElectionTimeout(); const tenantMigrationTest = new TenantMigrationTest({name: jsTestName(), donorRst}); const tenantId = ObjectId().str; const tenantDB = tenantMigrationTest.tenantDB(tenantId, "DB"); const collName = "testColl"; const donorPrimary = tenantMigrationTest.getDonorPrimary(); const delayedSecondary = donorRst.getSecondaries()[0]; const donorSecondary = donorRst.getSecondaries()[1]; // The default WC is majority and stopServerReplication will prevent satisfying any majority // writes. assert.commandWorked(donorPrimary.adminCommand( {setDefaultRWConcern: 1, defaultWriteConcern: {w: 1}, writeConcern: {w: "majority"}})); donorRst.awaitReplication(); const recipientRst = tenantMigrationTest.getRecipientRst(); const recipientPrimary = tenantMigrationTest.getRecipientPrimary(); const newRecipientPrimary = recipientRst.getSecondary(); tenantMigrationTest.insertDonorDB(tenantDB, collName); const hangDonorBeforeEnteringDataSync = configureFailPoint( donorPrimary, "pauseTenantMigrationBeforeLeavingAbortingIndexBuildsState"); const hangRecipientPrimaryAfterCreatingRSM = configureFailPoint(recipientPrimary, 'hangAfterCreatingRSM'); const hangRecipientPrimaryAfterCreatingConnections = configureFailPoint(recipientPrimary, 'fpAfterStartingOplogFetcherMigrationRecipientInstance', {action: "hang"}); const migrationOpts = { migrationIdString: extractUUIDFromObject(UUID()), tenantId, // The recipient primary can only choose secondaries as sync sources. readPreference: {mode: 'secondary'}, }; jsTestLog("Starting the tenant migration"); assert.commandWorked(tenantMigrationTest.startMigration(migrationOpts)); // Stop replicating on one of the secondaries so that its majority OpTime will be behind the // recipient's 'startApplyingDonorOpTime'. Do this immediately before the write to enter the // data sync state, so external keys will already have replicated to every donor node. hangDonorBeforeEnteringDataSync.wait(); stopServerReplication(delayedSecondary); hangDonorBeforeEnteringDataSync.off(); hangRecipientPrimaryAfterCreatingRSM.wait(); awaitRSClientHosts(recipientPrimary, donorSecondary, {ok: true, secondary: true}); awaitRSClientHosts(recipientPrimary, delayedSecondary, {ok: true, secondary: true}); // Turn on the 'waitInHello' failpoint. This will cause the delayed secondary to cease sending // hello responses and the RSM should mark the node as down. This is necessary so that the // delayed secondary is not chosen as the sync source here, since we want the // 'startApplyingDonorOpTime' to be set to the most advanced majority OpTime. jsTestLog( "Turning on waitInHello failpoint. Delayed donor secondary should stop sending hello responses."); const helloFailpoint = configureFailPoint(delayedSecondary, "waitInHello"); awaitRSClientHosts(recipientPrimary, delayedSecondary, {ok: false}); hangRecipientPrimaryAfterCreatingRSM.off(); hangRecipientPrimaryAfterCreatingConnections.wait(); let res = recipientPrimary.adminCommand({currentOp: true, desc: "tenant recipient migration"}); let currOp = res.inprog[0]; // The migration should not be complete. assert.eq(currOp.garbageCollectable, false, tojson(res)); assert.eq(currOp.migrationCompleted, false, tojson(res)); // The sync source can only be 'donorSecondary'. assert.eq(donorSecondary.host, currOp.donorSyncSource, tojson(res)); helloFailpoint.off(); const hangNewRecipientPrimaryAfterCreatingRSM = configureFailPoint(newRecipientPrimary, 'hangAfterCreatingRSM'); const hangNewRecipientPrimaryAfterCreatingConnections = configureFailPoint(newRecipientPrimary, 'fpAfterRetrievingStartOpTimesMigrationRecipientInstance', {action: "hang"}); // Step up a new primary so that the tenant migration restarts on the new primary, with the // 'startApplyingDonorOpTime' field already set in the state doc. jsTestLog("Stepping up the recipient secondary"); recipientRst.awaitLastOpCommitted(); recipientRst.stepUp(newRecipientPrimary); assert.eq(newRecipientPrimary, recipientRst.getPrimary()); jsTestLog("Stopping the non-lagged secondary"); donorRst.stop(donorSecondary); // Wait for the new primary to see the state of each donor node. 'donorSecondary' should return // '{ok: false}' since it has been shut down. hangNewRecipientPrimaryAfterCreatingRSM.wait(); awaitRSClientHosts(newRecipientPrimary, donorPrimary, {ok: true, ismaster: true}); awaitRSClientHosts(newRecipientPrimary, delayedSecondary, {ok: true, secondary: true}); awaitRSClientHosts(newRecipientPrimary, donorSecondary, {ok: false}); jsTestLog("Releasing failpoints"); hangNewRecipientPrimaryAfterCreatingRSM.off(); hangRecipientPrimaryAfterCreatingConnections.off(); res = newRecipientPrimary.adminCommand({currentOp: true, desc: "tenant recipient migration"}); currOp = res.inprog[0]; // The migration should not be complete and there should be no sync source stored, since the new // recipient primary does not have a valid sync source to choose from. assert.eq(currOp.garbageCollectable, false, tojson(res)); assert.eq(currOp.migrationCompleted, false, tojson(res)); assert(!currOp.donorSyncSource, tojson(res)); return { tenantMigrationTest, migrationOpts, donorSecondary, delayedSecondary, hangAfterCreatingConnections: hangNewRecipientPrimaryAfterCreatingConnections }; }