summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSimon MacMullen <simon@rabbitmq.com>2014-10-03 17:50:18 +0100
committerSimon MacMullen <simon@rabbitmq.com>2014-10-03 17:50:18 +0100
commit9bd6ff0c68babf3d58bf7537b0519786327c754b (patch)
treee4900187d2384b0c3291e9085d4c8d1946e41334
parentacb8c43655b5149e4c5259433a4b9765c864a840 (diff)
downloadrabbitmq-server-bug26213.tar.gz
Just because we received a running_partitioned_network, doesn't mean all nodes are now contactable. Defer attempting autoheal until we can talk to everyone again, to avoid getting stuck in a loop with partial partition promotion.bug26213
-rw-r--r--src/rabbit_node_monitor.erl19
1 files changed, 13 insertions, 6 deletions
diff --git a/src/rabbit_node_monitor.erl b/src/rabbit_node_monitor.erl
index b4a429ed..f3aa2303 100644
--- a/src/rabbit_node_monitor.erl
+++ b/src/rabbit_node_monitor.erl
@@ -330,8 +330,8 @@ handle_cast({node_up, Node, NodeType},
end,
add_node(Node, RunningNodes)}),
ok = handle_live_rabbit(Node),
- {noreply, State#state{
- monitors = pmon:monitor({rabbit, Node}, Monitors)}}
+ Monitors1 = pmon:monitor({rabbit, Node}, Monitors),
+ {noreply, maybe_autoheal(State#state{monitors = Monitors1})}
end;
handle_cast({joined_cluster, Node, NodeType}, State) ->
@@ -393,8 +393,7 @@ handle_info({nodedown, Node, Info}, State = #state{node_guids = GUIDs}) ->
handle_info({mnesia_system_event,
{inconsistent_database, running_partitioned_network, Node}},
State = #state{partitions = Partitions,
- monitors = Monitors,
- autoheal = AState}) ->
+ monitors = Monitors}) ->
%% We will not get a node_up from this node - yet we should treat it as
%% up (mostly).
State1 = case pmon:is_monitored({rabbit, Node}, Monitors) of
@@ -405,8 +404,7 @@ handle_info({mnesia_system_event,
ok = handle_live_rabbit(Node),
Partitions1 = ordsets:to_list(
ordsets:add_element(Node, ordsets:from_list(Partitions))),
- {noreply, State1#state{partitions = Partitions1,
- autoheal = rabbit_autoheal:maybe_start(AState)}};
+ {noreply, maybe_autoheal(State1#state{partitions = Partitions1})};
handle_info({autoheal_msg, Msg}, State = #state{autoheal = AState,
partitions = Partitions}) ->
@@ -549,6 +547,15 @@ handle_live_rabbit(Node) ->
ok = rabbit_alarm:on_node_up(Node),
ok = rabbit_mnesia:on_node_up(Node).
+maybe_autoheal(State = #state{partitions = []}) ->
+ State;
+
+maybe_autoheal(State = #state{autoheal = AState}) ->
+ case all_nodes_up() of
+ true -> State#state{autoheal = rabbit_autoheal:maybe_start(AState)};
+ false -> State
+ end.
+
%%--------------------------------------------------------------------
%% Internal utils
%%--------------------------------------------------------------------