summaryrefslogtreecommitdiff
path: root/jstests/replsets/initial_sync_with_partial_transaction.js
blob: a96387bbb318f55d7e1b53132dbf79442e6ffe03 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
/**
 * Tests that an initial sync that starts in the middle of a large transaction on a secondary
 * is able to complete and apply the entire transaction.
 *
 * @tags: [
 *   exclude_from_large_txns,
 *   uses_transactions,
 * ]
 */
(function() {
"use strict";
load("jstests/replsets/rslib.js");  // For reconnect()
load("jstests/libs/fail_point_util.js");

const replTest = new ReplSetTest({
    nodes: [{}, {rsConfig: {priority: 0}}],
    nodeOptions: {
        setParameter:
            // We want two entries in each oplog batch, so the beginning of the transaction is not
            // the end of the batch.
            {maxNumberOfTransactionOperationsInSingleOplogEntry: 1, bgSyncOplogFetcherBatchSize: 2}
    },
});

replTest.startSet();
replTest.initiateWithHighElectionTimeout();

// We have to add and pause the initial sync node here, because we cannot re-initiate the set while
// the failpoints are held.
jsTestLog("Adding initial sync node");
const initialSyncNode = replTest.add({
    rsConfig: {priority: 0, votes: 0},
    setParameter: {
        'initialSyncSourceReadPreference': 'secondary',
        'failpoint.initialSyncHangBeforeChoosingSyncSource': tojson({mode: 'alwaysOn'}),
        'logComponentVerbosity': tojsononeline({replication: {initialSync: 1}})
    }
});
replTest.reInitiate();

const dbName = jsTest.name();
const collName = "coll";

const primary = replTest.getPrimary();
const testDB = primary.getDB(dbName);
const syncSource = replTest.nodes[1];

assert.commandWorked(testDB.runCommand({create: collName, writeConcern: {w: "majority"}}));

jsTest.log("Stop secondary oplog replication before the last operation in the transaction.");
// The stopReplProducerOnDocument failpoint ensures that secondary stops replicating before
// accepting the last two operations in the transaction.
const stopReplProducerOnDocumentFailPoint = configureFailPoint(
    syncSource, "stopReplProducerOnDocument", {document: {"applyOps.o._id": "next-last in txn"}});

// This will cause us to pause batch application at a critical point, with "first in txn" and
// "next in txn" in the oplog but not applied.
const pauseBatchApplicationAfterWritingOplogEntriesFailPoint =
    configureFailPoint(syncSource, "pauseBatchApplicationAfterWritingOplogEntries");

jsTestLog("Starting transaction");
const session = primary.startSession({causalConsistency: false});
const sessionDB = session.getDatabase(dbName);
session.startTransaction({writeConcern: {w: 1}});

assert.commandWorked(sessionDB.getCollection(collName).insert({_id: "first in txn"}));
assert.commandWorked(sessionDB.getCollection(collName).insert({_id: "next in txn"}));
assert.commandWorked(sessionDB.getCollection(collName).insert({_id: "next-last in txn"}));
assert.commandWorked(sessionDB.getCollection(collName).insert({_id: "last in txn"}));

jsTestLog("Committing transaction");
assert.commandWorked(session.commitTransaction_forTesting());
stopReplProducerOnDocumentFailPoint.wait();
pauseBatchApplicationAfterWritingOplogEntriesFailPoint.wait();

jsTestLog("Starting initial sync");
assert.commandWorked(initialSyncNode.adminCommand(
    {configureFailPoint: "initialSyncHangBeforeChoosingSyncSource", mode: "off"}));

// The bug we're testing for is a race.  The way we fix the race is to wait until it resolves.
// So it's very difficult to test it deterministically; if we use failpoints to force the race to
// happen, we'll just hang when the code is correct.  Instead we hold the sync source in the
// condition that causes the race until we reach the point it's about to happen, then release
// it hoping it'll fail most of the time when the bug is present.
jsTestLog("Waiting to fetch the defaultBeginFetchingOplogEntry");
checkLog.containsJson(initialSyncNode, 6608900);
stopReplProducerOnDocumentFailPoint.off();
pauseBatchApplicationAfterWritingOplogEntriesFailPoint.off();
replTest.awaitSecondaryNodes();
replTest.awaitReplication();

// Make sure we got the transaction.
assert.eq(4, initialSyncNode.getDB(dbName)[collName].find().itcount());

printjson(initialSyncNode.getDB("local").oplog.rs.find().toArray());

replTest.stopSet();
})();