summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorzhaozhao.zz <zhaozhao.zz@alibaba-inc.com>2023-05-06 11:53:28 +0800
committerGitHub <noreply@github.com>2023-05-06 11:53:28 +0800
commitb0dd7b324505ebb3b6fac3f3067eba948cf0b09d (patch)
tree3c053f9430b8a22d77596b08160cab02b2475b1b
parente49c2a52922f1c86eb94b42206cb9d50e1aeb431 (diff)
downloadredis-b0dd7b324505ebb3b6fac3f3067eba948cf0b09d.tar.gz
Free backlog only if rsi is invalid when master reboot (#12088)
When master reboot from RDB, if rsi in RDB is valid we should not free replication backlog, even if master_repl_offset or repl-offset is 0. Since if master doesn't send any data to replicas master_repl_offset is 0, it's a valid number. A clear example: 1. start a master and apply some write commands, the master's master_repl_offset is 0 since it has no replicas. 2. stop write commands on master, and start another instance and replicaof the master, trigger an FULLRESYNC 3. the master's master_repl_offset is still 0 (set a large number for repl-ping-replica-period), do BGSAVE and restart the master 4. master load master_repl_offset from RDB's rsi and it's still 0, and we should make sure replica can partially resync with master.
-rw-r--r--src/server.c4
-rw-r--r--tests/integration/psync2-master-restart.tcl41
2 files changed, 40 insertions, 5 deletions
diff --git a/src/server.c b/src/server.c
index 381edabc6..b6ed1d9c5 100644
--- a/src/server.c
+++ b/src/server.c
@@ -6700,6 +6700,7 @@ void loadDataFromDisk(void) {
serverLog(LL_NOTICE, "DB loaded from append only file: %.3f seconds", (float)(ustime()-start)/1000000);
} else {
rdbSaveInfo rsi = RDB_SAVE_INFO_INIT;
+ int rsi_is_valid = 0;
errno = 0; /* Prevent a stale value from affecting error checking */
int rdb_flags = RDBFLAGS_NONE;
if (iAmMaster()) {
@@ -6721,6 +6722,7 @@ void loadDataFromDisk(void) {
* information in function rdbPopulateSaveInfo. */
rsi.repl_stream_db != -1)
{
+ rsi_is_valid = 1;
if (!iAmMaster()) {
memcpy(server.replid,rsi.repl_id,sizeof(server.replid));
server.master_repl_offset = rsi.repl_offset;
@@ -6754,7 +6756,7 @@ void loadDataFromDisk(void) {
* if RDB doesn't have replication info or there is no rdb, it is not
* possible to support partial resynchronization, to avoid extra memory
* of replication backlog, we drop it. */
- if (server.master_repl_offset == 0 && server.repl_backlog)
+ if (!rsi_is_valid && server.repl_backlog)
freeReplicationBacklog();
}
}
diff --git a/tests/integration/psync2-master-restart.tcl b/tests/integration/psync2-master-restart.tcl
index e925aaab8..a9e21d12d 100644
--- a/tests/integration/psync2-master-restart.tcl
+++ b/tests/integration/psync2-master-restart.tcl
@@ -21,6 +21,10 @@ start_server {} {
$replica config set repl-timeout 3600
$sub_replica config set repl-timeout 3600
+ # Avoid PINGs
+ $master config set repl-ping-replica-period 3600
+ $master config rewrite
+
# Build replication chain
$replica replicaof $master_host $master_port
$sub_replica replicaof $replica_host $replica_port
@@ -32,14 +36,43 @@ start_server {} {
fail "Replication not started."
}
- # Avoid PINGs
- $master config set repl-ping-replica-period 3600
- $master config rewrite
+ test "PSYNC2: Partial resync after Master restart using RDB aux fields when offset is 0" {
+ assert {[status $master master_repl_offset] == 0}
+
+ set replid [status $master master_replid]
+ $replica config resetstat
+
+ catch {
+ restart_server 0 true false true now
+ set master [srv 0 client]
+ }
+ wait_for_condition 50 1000 {
+ [status $replica master_link_status] eq {up} &&
+ [status $sub_replica master_link_status] eq {up}
+ } else {
+ fail "Replicas didn't sync after master restart"
+ }
+
+ # Make sure master restore replication info correctly
+ assert {[status $master master_replid] != $replid}
+ assert {[status $master master_repl_offset] == 0}
+ assert {[status $master master_replid2] eq $replid}
+ assert {[status $master second_repl_offset] == 1}
+
+ # Make sure master set replication backlog correctly
+ assert {[status $master repl_backlog_active] == 1}
+ assert {[status $master repl_backlog_first_byte_offset] == 1}
+ assert {[status $master repl_backlog_histlen] == 0}
+
+ # Partial resync after Master restart
+ assert {[status $master sync_partial_ok] == 1}
+ assert {[status $replica sync_partial_ok] == 1}
+ }
# Generate some data
createComplexDataset $master 1000
- test "PSYNC2: Partial resync after Master restart using RDB aux fields" {
+ test "PSYNC2: Partial resync after Master restart using RDB aux fields with data" {
wait_for_condition 500 100 {
[status $master master_repl_offset] == [status $replica master_repl_offset] &&
[status $master master_repl_offset] == [status $sub_replica master_repl_offset]