summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSimon MacMullen <simon@rabbitmq.com>2013-04-22 16:23:54 +0100
committerSimon MacMullen <simon@rabbitmq.com>2013-04-22 16:23:54 +0100
commitd35534e6a7a0a04fc2ed68061d9c2508864a0865 (patch)
treeb1f40193c6edefdd551d9f35a355c3402da8b1a4
parent8b7df16ae4b8014181c32b29db2cba53c35b2c24 (diff)
downloadrabbitmq-server-bug25471.tar.gz
Switch pause_minority mode to making its decisions entirely based on node-upness, not rabbit-application-upness and explain why.bug25471
-rw-r--r--src/rabbit_node_monitor.erl20
1 files changed, 18 insertions, 2 deletions
diff --git a/src/rabbit_node_monitor.erl b/src/rabbit_node_monitor.erl
index ca8e6dbd..7d844c72 100644
--- a/src/rabbit_node_monitor.erl
+++ b/src/rabbit_node_monitor.erl
@@ -200,6 +200,7 @@ init([]) ->
%% writing out the cluster status files - bad things can then
%% happen.
process_flag(trap_exit, true),
+ net_kernel:monitor_nodes(true),
{ok, _} = mnesia:subscribe(system),
{ok, #state{monitors = pmon:new(),
subscribers = pmon:new(),
@@ -265,6 +266,10 @@ handle_info({'DOWN', _MRef, process, Pid, _Reason},
State = #state{subscribers = Subscribers}) ->
{noreply, State#state{subscribers = pmon:erase(Pid, Subscribers)}};
+handle_info({nodedown, Node}, State) ->
+ ok = handle_dead_node(Node),
+ {noreply, State};
+
handle_info({mnesia_system_event,
{inconsistent_database, running_partitioned_network, Node}},
State = #state{partitions = Partitions,
@@ -333,6 +338,18 @@ handle_dead_rabbit(Node) ->
ok = rabbit_amqqueue:on_node_down(Node),
ok = rabbit_alarm:on_node_down(Node),
ok = rabbit_mnesia:on_node_down(Node),
+ ok.
+
+handle_dead_node(_Node) ->
+ %% In general in rabbit_node_monitor we care about whether the
+ %% rabbit application is up rather than the node; we do this so
+ %% that we can respond in the same way to "rabbitmqctl stop_app"
+ %% and "rabbitmqctl stop" as much as possible.
+ %%
+ %% However, for pause_minority mode we can't do this, since we
+ %% depend on looking at whether other nodes are up to decide
+ %% whether to come back up ourselves - if we decide that based on
+ %% the rabbit application we would go down and never come back.
case application:get_env(rabbit, cluster_partition_handling) of
{ok, pause_minority} ->
case majority() of
@@ -347,8 +364,7 @@ handle_dead_rabbit(Node) ->
rabbit_log:warning("cluster_partition_handling ~p unrecognised, "
"assuming 'ignore'~n", [Term]),
ok
- end,
- ok.
+ end.
await_cluster_recovery() ->
rabbit_log:warning("Cluster minority status detected - awaiting recovery~n",