summaryrefslogtreecommitdiff
path: root/src/rabbit_autoheal.erl
diff options
context:
space:
mode:
Diffstat (limited to 'src/rabbit_autoheal.erl')
-rw-r--r--src/rabbit_autoheal.erl50
1 files changed, 43 insertions, 7 deletions
diff --git a/src/rabbit_autoheal.erl b/src/rabbit_autoheal.erl
index 90458741..09e9aa6a 100644
--- a/src/rabbit_autoheal.erl
+++ b/src/rabbit_autoheal.erl
@@ -21,6 +21,8 @@
%% The named process we are running in.
-define(SERVER, rabbit_node_monitor).
+-define(MNESIA_STOPPED_PING_INTERNAL, 200).
+
%%----------------------------------------------------------------------------
%% In order to autoheal we want to:
@@ -54,9 +56,17 @@
%% - we are the winner and are waiting for all losing nodes to stop
%% before telling them they can restart
%%
+%% about_to_heal
+%% - we are the leader, and have already assigned the winner and
+%% losers. We are part of the losers and we wait for the winner_is
+%% announcement. This leader-specific state differs from not_healing
+%% (the state other losers are in), because the leader could still
+%% receive request_start messages: those subsequent requests must be
+%% ignored.
+%%
%% {leader_waiting, OutstandingStops}
%% - we are the leader, and have already assigned the winner and losers.
-%% We are neither but need to ignore further requests to autoheal.
+%% We are neither but need to ignore further requests to autoheal.
%%
%% restarting
%% - we are restarting. Of course the node monitor immediately dies
@@ -128,14 +138,12 @@ handle_msg({request_start, Node},
" * Winner: ~p~n"
" * Losers: ~p~n",
[AllPartitions, Winner, Losers]),
- Continue = fun(Msg) ->
- handle_msg(Msg, not_healing, Partitions)
- end,
case node() =:= Winner of
- true -> Continue({become_winner, Losers});
+ true -> handle_msg({become_winner, Losers},
+ not_healing, Partitions);
false -> send(Winner, {become_winner, Losers}), %% [0]
case lists:member(node(), Losers) of
- true -> Continue({winner_is, Winner});
+ true -> about_to_heal;
false -> {leader_waiting, Losers}
end
end
@@ -163,7 +171,8 @@ handle_msg({become_winner, Losers},
end;
handle_msg({winner_is, Winner},
- not_healing, _Partitions) ->
+ State, _Partitions)
+ when State =:= not_healing orelse State =:= about_to_heal ->
rabbit_log:warning(
"Autoheal: we were selected to restart; winner is ~p~n", [Winner]),
rabbit_node_monitor:run_outside_applications(
@@ -194,9 +203,36 @@ abort(Down, Notify) ->
winner_finish(Notify).
winner_finish(Notify) ->
+ %% There is a race in Mnesia causing a starting loser to hang
+ %% forever if another loser stops at the same time: the starting
+ %% node connects to the other node, negotiates the protocol and
+ %% attempts to acquire a write lock on the schema on the other node.
+ %% If the other node stops between the protocol negotiation and lock
+ %% request, the starting node never gets an answer to its lock
+ %% request.
+ %%
+ %% To work around the problem, we make sure Mnesia is stopped on all
+ %% losing nodes before sending the "autoheal_safe_to_start" signal.
+ wait_for_mnesia_shutdown(Notify),
[{rabbit_outside_app_process, N} ! autoheal_safe_to_start || N <- Notify],
not_healing.
+wait_for_mnesia_shutdown([Node | Rest] = AllNodes) ->
+ case rpc:call(Node, mnesia, system_info, [is_running]) of
+ no ->
+ wait_for_mnesia_shutdown(Rest);
+ Running when
+ Running =:= yes orelse
+ Running =:= starting orelse
+ Running =:= stopping ->
+ timer:sleep(?MNESIA_STOPPED_PING_INTERNAL),
+ wait_for_mnesia_shutdown(AllNodes);
+ _ ->
+ wait_for_mnesia_shutdown(Rest)
+ end;
+wait_for_mnesia_shutdown([]) ->
+ ok.
+
make_decision(AllPartitions) ->
Sorted = lists:sort([{partition_value(P), P} || P <- AllPartitions]),
[[Winner | _] | Rest] = lists:reverse([P || {_, P} <- Sorted]),