From 28e39d47e6d7297ccf283cfa5aa73b447584006a Mon Sep 17 00:00:00 2001 From: Nick Vatamaniuc Date: Fri, 8 Oct 2021 13:54:33 -0400 Subject: Try harder to avoid a change feed rewind after a shard move In the previous attempt [1] we improved the logic by spawning workers on the matching target shards only. However, that wasn't enough as workers might still reject the passed in sequence from the old node when it asserts ownership locally on each shard. Re-use the already existing replacement clause, where after uuid is matched, we try harder to find the highest viable sequence. To use the unit test setup as an example, if the shard moved from node1 to node2, and recorded epoch `{node2, 10}` on the new node, then a sequence generated on node1 before the move, for example 12, would rewind down to 10 only when calculated on new location on node2, instead of being rewound all the way down to 0. [1] https://github.com/apache/couchdb/commit/e83935c7f8c3e47b47f07f22ece327f6529d4da0 --- src/couch/src/couch_db.erl | 32 ++++++++++++++++++++++++-------- 1 file changed, 24 insertions(+), 8 deletions(-) diff --git a/src/couch/src/couch_db.erl b/src/couch/src/couch_db.erl index 8837101ec..0646240bd 100644 --- a/src/couch/src/couch_db.erl +++ b/src/couch/src/couch_db.erl @@ -1503,17 +1503,20 @@ calculate_start_seq(Db, _Node, {Seq, {split, Uuid}, EpochNode}) -> [?MODULE, Db#db.name, Seq, Uuid, EpochNode, get_epochs(Db)]), 0 end; -calculate_start_seq(Db, _Node, {Seq, Uuid, EpochNode}) -> +calculate_start_seq(Db, Node, {Seq, Uuid, EpochNode}) -> case is_prefix(Uuid, get_uuid(Db)) of true -> case is_owner(EpochNode, Seq, get_epochs(Db)) of true -> Seq; false -> - couch_log:warning("~p calculate_start_seq not owner " - "db: ~p, seq: ~p, uuid: ~p, epoch_node: ~p, epochs: ~p", - [?MODULE, Db#db.name, Seq, Uuid, EpochNode, - get_epochs(Db)]), - 0 + %% Shard might have been moved from another node. We + %% matched the uuid already, try to find last viable + %% sequence we can use + couch_log:warning( "~p calculate_start_seq not owner, " + " trying replacement db: ~p, seq: ~p, uuid: ~p, " + "epoch_node: ~p, epochs: ~p", [?MODULE, Db#db.name, + Seq, Uuid, EpochNode, get_epochs(Db)]), + calculate_start_seq(Db, Node, {replace, EpochNode, Uuid, Seq}) end; false -> couch_log:warning("~p calculate_start_seq uuid prefix mismatch " @@ -1984,7 +1987,8 @@ calculate_start_seq_test_() -> t_calculate_start_seq_is_owner(), t_calculate_start_seq_not_owner(), t_calculate_start_seq_raw(), - t_calculate_start_seq_epoch_mismatch() + t_calculate_start_seq_epoch_mismatch(), + t_calculate_start_seq_shard_move() ] } }. @@ -2028,7 +2032,7 @@ t_calculate_start_seq_is_owner() -> t_calculate_start_seq_not_owner() -> ?_test(begin Db = test_util:fake_db([]), - Seq = calculate_start_seq(Db, node1, {15, <<"foo">>}), + Seq = calculate_start_seq(Db, node3, {15, <<"foo">>}), ?assertEqual(0, Seq) end). @@ -2047,6 +2051,18 @@ t_calculate_start_seq_epoch_mismatch() -> ?assertEqual(0, Seq) end). +t_calculate_start_seq_shard_move() -> + ?_test(begin + Db = test_util:fake_db([]), + % Sequence when shard was on node1 + ?assertEqual(2, calculate_start_seq(Db, node1, {2, <<"foo">>})), + % Sequence from node1 after the move happened, we reset back to the + % start of the epoch on node2 = 10 + ?assertEqual(10, calculate_start_seq(Db, node1, {16, <<"foo">>})), + % Invalid node, epoch mismatch, start at 0 + ?assertEqual(0, calculate_start_seq(Db, node3, {16, <<"foo">>})) + end). + is_owner_test() -> ?assertNot(is_owner(foo, 1, [])), ?assertNot(is_owner(foo, 1, [{foo, 1}])), -- cgit v1.2.1