diff options
author | Simon MacMullen <simon@rabbitmq.com> | 2013-03-12 17:56:18 +0000 |
---|---|---|
committer | Simon MacMullen <simon@rabbitmq.com> | 2013-03-12 17:56:18 +0000 |
commit | 38f82dfc2e3743f101b2921750ea8d4bd679c5d2 (patch) | |
tree | e38bd01d0d69eaba17ea1cb95a75e6241ba8a7ef /src | |
parent | 5dea4c63b56e8b9a7acdf0cedbe5fa051ea5e266 (diff) | |
download | rabbitmq-server-38f82dfc2e3743f101b2921750ea8d4bd679c5d2.tar.gz |
If we have been partitioned, and we are now in the only remaining partition, we no longer care about partitions - forget them. Note that we do not attempt to deal with individual (other) partitions going away, it's only safe to forget *any* of them when we have seen the back of *all* of them.
Diffstat (limited to 'src')
-rw-r--r-- | src/rabbit_node_monitor.erl | 24 |
1 files changed, 21 insertions, 3 deletions
diff --git a/src/rabbit_node_monitor.erl b/src/rabbit_node_monitor.erl index 3d900d26..558596ef 100644 --- a/src/rabbit_node_monitor.erl +++ b/src/rabbit_node_monitor.erl @@ -249,7 +249,8 @@ handle_info({'DOWN', _MRef, process, {rabbit, Node}, _Reason}, write_cluster_status({AllNodes, DiscNodes, del_node(Node, RunningNodes)}), ok = handle_dead_rabbit(Node), [P ! {node_down, Node} || P <- pmon:monitored(Subscribers)], - {noreply, State#state{monitors = pmon:erase({rabbit, Node}, Monitors)}}; + {noreply, handle_dead_rabbit_state( + State#state{monitors = pmon:erase({rabbit, Node}, Monitors)})}; handle_info({'DOWN', _MRef, process, Pid, _Reason}, State = #state{subscribers = Subscribers}) -> @@ -308,9 +309,14 @@ handle_dead_rabbit(Node) -> ok. majority() -> + length(alive_nodes()) / length(rabbit_mnesia:cluster_nodes(all)) > 0.5. + +%% mnesia:system_info(db_nodes) (and hence +%% rabbit_mnesia:cluster_nodes(running)) does not give reliable results +%% when partitioned. +alive_nodes() -> Nodes = rabbit_mnesia:cluster_nodes(all), - Alive = [N || N <- Nodes, pong =:= net_adm:ping(N)], - length(Alive) / length(Nodes) > 0.5. + [N || N <- Nodes, pong =:= net_adm:ping(N)]. await_cluster_recovery() -> rabbit_log:warning("Cluster minority status detected - awaiting recovery~n", @@ -334,6 +340,18 @@ wait_for_cluster_recovery(Nodes) -> wait_for_cluster_recovery(Nodes) end. +handle_dead_rabbit_state(State = #state{partitions = Partitions}) -> + %% If we have been partitioned, and we are now in the only remaining + %% partition, we no longer care about partitions - forget them. Note + %% that we do not attempt to deal with individual (other) partitions + %% going away, it's only safe to forget *any* of them when we have seen + %% the back of *all* of them. + Partitions1 = case Partitions -- (Partitions -- alive_nodes()) of + [] -> []; + _ -> Partitions + end, + State#state{partitions = Partitions1}. + handle_live_rabbit(Node) -> ok = rabbit_alarm:on_node_up(Node), ok = rabbit_mnesia:on_node_up(Node). |