From 0c9e49560393811e6137ea9e5c454ed10ec5760b Mon Sep 17 00:00:00 2001 From: Tim Watson Date: Thu, 8 Nov 2012 14:52:05 +0000 Subject: introduce a noop process at the head of rabbit_sup's children, which we now monitor instead of the rabbit application's pid --- src/rabbit.erl | 22 +++++++++++++++-- src/rabbit_node_monitor.erl | 58 ++++++++++++++++++++++++--------------------- 2 files changed, 51 insertions(+), 29 deletions(-) diff --git a/src/rabbit.erl b/src/rabbit.erl index c52c296a..8c13224f 100644 --- a/src/rabbit.erl +++ b/src/rabbit.erl @@ -21,7 +21,7 @@ -export([start/0, boot/0, stop/0, stop_and_halt/0, await_startup/0, status/0, is_running/0, is_running/1, environment/0, rotate_logs/1, force_event_refresh/0, - start_fhc/0]). + start_fhc/0, start_app_marker/1, hibernate/0]). -export([start/2, stop/1]). @@ -174,10 +174,15 @@ [{mfa, {rabbit_networking, boot, []}}, {requires, log_relay}]}). +-rabbit_boot_step({app_running, + [{description, "cluster membership"}, + {mfa, {rabbit, start_app_marker, [boot]}}, + {requires, networking}]}). + -rabbit_boot_step({notify_cluster, [{description, "notify cluster nodes"}, {mfa, {rabbit_node_monitor, notify_node_up, []}}, - {requires, networking}]}). + {requires, app_running}]}). %%--------------------------------------------------------------------------- @@ -770,3 +775,16 @@ start_fhc() -> rabbit_sup:start_restartable_child( file_handle_cache, [fun rabbit_alarm:set_alarm/1, fun rabbit_alarm:clear_alarm/1]). + +start_app_marker(boot) -> + supervisor:start_child(rabbit_sup, + {rabbit_app, {?MODULE, start_app_marker, [spawn]}, + transient, ?MAX_WAIT, worker, [?MODULE]}); +start_app_marker(spawn) -> + Pid = spawn_link(fun() -> erlang:hibernate(?MODULE, hibernate, []) end), + register(rabbit_running, Pid), + {ok, Pid}. + +hibernate() -> + erlang:hibernate(?MODULE, hibernate, []). + diff --git a/src/rabbit_node_monitor.erl b/src/rabbit_node_monitor.erl index b11c9d04..ec2f8159 100644 --- a/src/rabbit_node_monitor.erl +++ b/src/rabbit_node_monitor.erl @@ -85,10 +85,10 @@ cluster_status_filename() -> prepare_cluster_status_files() -> rabbit_mnesia:ensure_mnesia_dir(), - CorruptFiles = fun () -> throw({error, corrupt_cluster_status_files}) end, + Corrupt = fun(F) -> throw({error, corrupt_cluster_status_files, F}) end, RunningNodes1 = case try_read_file(running_nodes_filename()) of {ok, [Nodes]} when is_list(Nodes) -> Nodes; - {ok, _ } -> CorruptFiles(); + {ok, Other} -> Corrupt(Other); {error, enoent} -> [] end, ThisNode = [node()], @@ -102,8 +102,8 @@ prepare_cluster_status_files() -> {ok, [AllNodes0]} when is_list(AllNodes0) -> {legacy_cluster_nodes(AllNodes0), legacy_should_be_disc_node(AllNodes0)}; - {ok, _} -> - CorruptFiles(); + {ok, Files} -> + Corrupt(Files); {error, enoent} -> {legacy_cluster_nodes([]), true} end, @@ -114,7 +114,7 @@ prepare_cluster_status_files() -> end, ok = write_cluster_status({AllNodes2, DiscNodes, RunningNodes2}). -write_cluster_status({All, Disc, Running}) -> +write_cluster_status({All, Disc, Running}=St) -> ClusterStatusFN = cluster_status_filename(), Res = case rabbit_file:write_term_file(ClusterStatusFN, [{All, Disc}]) of ok -> @@ -134,8 +134,8 @@ read_cluster_status() -> try_read_file(running_nodes_filename())} of {{ok, [{All, Disc}]}, {ok, [Running]}} when is_list(Running) -> {All, Disc, Running}; - {_, _} -> - throw({error, corrupt_or_missing_cluster_files}) + {Stat, Run} -> + throw({error, {corrupt_or_missing_cluster_files, Stat, Run}}) end. update_cluster_status() -> @@ -199,44 +199,48 @@ handle_call(_Request, _From, State) -> %% mnesia propagation. handle_cast({node_up, Node, NodeType}, State = #state{monitors = Monitors}) -> - case pmon:is_monitored({rabbit, Node}, Monitors) of + case pmon:is_monitored({rabbit_running, Node}, Monitors) of true -> {noreply, State}; false -> rabbit_log:info("rabbit on node ~p up~n", [Node]), {AllNodes, DiscNodes, RunningNodes} = read_cluster_status(), - write_cluster_status({add_node(Node, AllNodes), - case NodeType of - disc -> add_node(Node, DiscNodes); - ram -> DiscNodes - end, - add_node(Node, RunningNodes)}), + ok = write_cluster_status({add_node(Node, AllNodes), + case NodeType of + disc -> add_node(Node, DiscNodes); + ram -> DiscNodes + end, + add_node(Node, RunningNodes)}), ok = handle_live_rabbit(Node), - {noreply, State#state{ - monitors = pmon:monitor({rabbit, Node}, Monitors)}} + {noreply, + State#state{ + monitors = pmon:monitor({rabbit_running, Node}, Monitors)}} end; handle_cast({joined_cluster, Node, NodeType}, State) -> {AllNodes, DiscNodes, RunningNodes} = read_cluster_status(), - write_cluster_status({add_node(Node, AllNodes), - case NodeType of - disc -> add_node(Node, DiscNodes); - ram -> DiscNodes - end, - RunningNodes}), + ok = write_cluster_status({add_node(Node, AllNodes), + case NodeType of + disc -> add_node(Node, DiscNodes); + ram -> DiscNodes + end, + RunningNodes}), {noreply, State}; handle_cast({left_cluster, Node}, State) -> {AllNodes, DiscNodes, RunningNodes} = read_cluster_status(), - write_cluster_status({del_node(Node, AllNodes), del_node(Node, DiscNodes), - del_node(Node, RunningNodes)}), + ok = write_cluster_status({del_node(Node, AllNodes), + del_node(Node, DiscNodes), + del_node(Node, RunningNodes)}), {noreply, State}; handle_cast(_Msg, State) -> {noreply, State}. -handle_info({'DOWN', _MRef, process, {rabbit, Node}, _Reason}, +handle_info({'DOWN', _MRef, process, {rabbit_running, Node}, _Reason}, State = #state{monitors = Monitors}) -> rabbit_log:info("rabbit on node ~p down~n", [Node]), {AllNodes, DiscNodes, RunningNodes} = read_cluster_status(), - write_cluster_status({AllNodes, DiscNodes, del_node(Node, RunningNodes)}), + ok = write_cluster_status({AllNodes, DiscNodes, + del_node(Node, RunningNodes)}), ok = handle_dead_rabbit(Node), - {noreply, State#state{monitors = pmon:erase({rabbit, Node}, Monitors)}}; + {noreply, State#state{monitors = pmon:erase( + {rabbit_running, Node}, Monitors)}}; handle_info({mnesia_system_event, {inconsistent_database, running_partitioned_network, Node}}, -- cgit v1.2.1 From 4def2d6d249f309e64376c8c8de80b1e7c545cf8 Mon Sep 17 00:00:00 2001 From: Simon MacMullen Date: Thu, 8 Nov 2012 15:09:50 +0000 Subject: reduce distance to default --- src/rabbit_node_monitor.erl | 34 ++++++++++++++++------------------ 1 file changed, 16 insertions(+), 18 deletions(-) diff --git a/src/rabbit_node_monitor.erl b/src/rabbit_node_monitor.erl index ec2f8159..6fa652ee 100644 --- a/src/rabbit_node_monitor.erl +++ b/src/rabbit_node_monitor.erl @@ -112,7 +112,7 @@ prepare_cluster_status_files() -> true -> ThisNode; false -> [] end, - ok = write_cluster_status({AllNodes2, DiscNodes, RunningNodes2}). + write_cluster_status({AllNodes2, DiscNodes, RunningNodes2}). write_cluster_status({All, Disc, Running}=St) -> ClusterStatusFN = cluster_status_filename(), @@ -203,12 +203,12 @@ handle_cast({node_up, Node, NodeType}, true -> {noreply, State}; false -> rabbit_log:info("rabbit on node ~p up~n", [Node]), {AllNodes, DiscNodes, RunningNodes} = read_cluster_status(), - ok = write_cluster_status({add_node(Node, AllNodes), - case NodeType of - disc -> add_node(Node, DiscNodes); - ram -> DiscNodes - end, - add_node(Node, RunningNodes)}), + write_cluster_status({add_node(Node, AllNodes), + case NodeType of + disc -> add_node(Node, DiscNodes); + ram -> DiscNodes + end, + add_node(Node, RunningNodes)}), ok = handle_live_rabbit(Node), {noreply, State#state{ @@ -216,18 +216,17 @@ handle_cast({node_up, Node, NodeType}, end; handle_cast({joined_cluster, Node, NodeType}, State) -> {AllNodes, DiscNodes, RunningNodes} = read_cluster_status(), - ok = write_cluster_status({add_node(Node, AllNodes), - case NodeType of - disc -> add_node(Node, DiscNodes); - ram -> DiscNodes - end, - RunningNodes}), + write_cluster_status({add_node(Node, AllNodes), + case NodeType of + disc -> add_node(Node, DiscNodes); + ram -> DiscNodes + end, + RunningNodes}), {noreply, State}; handle_cast({left_cluster, Node}, State) -> {AllNodes, DiscNodes, RunningNodes} = read_cluster_status(), - ok = write_cluster_status({del_node(Node, AllNodes), - del_node(Node, DiscNodes), - del_node(Node, RunningNodes)}), + write_cluster_status({del_node(Node, AllNodes), del_node(Node, DiscNodes), + del_node(Node, RunningNodes)}), {noreply, State}; handle_cast(_Msg, State) -> {noreply, State}. @@ -236,8 +235,7 @@ handle_info({'DOWN', _MRef, process, {rabbit_running, Node}, _Reason}, State = #state{monitors = Monitors}) -> rabbit_log:info("rabbit on node ~p down~n", [Node]), {AllNodes, DiscNodes, RunningNodes} = read_cluster_status(), - ok = write_cluster_status({AllNodes, DiscNodes, - del_node(Node, RunningNodes)}), + write_cluster_status({AllNodes, DiscNodes, del_node(Node, RunningNodes)}), ok = handle_dead_rabbit(Node), {noreply, State#state{monitors = pmon:erase( {rabbit_running, Node}, Monitors)}}; -- cgit v1.2.1 From c5a64acac12ba209f07d06c3316847c3f9c6e137 Mon Sep 17 00:00:00 2001 From: Simon MacMullen Date: Thu, 8 Nov 2012 15:11:36 +0000 Subject: Further clean --- src/rabbit_node_monitor.erl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/rabbit_node_monitor.erl b/src/rabbit_node_monitor.erl index 6fa652ee..d316e6a7 100644 --- a/src/rabbit_node_monitor.erl +++ b/src/rabbit_node_monitor.erl @@ -114,7 +114,7 @@ prepare_cluster_status_files() -> end, write_cluster_status({AllNodes2, DiscNodes, RunningNodes2}). -write_cluster_status({All, Disc, Running}=St) -> +write_cluster_status({All, Disc, Running}) -> ClusterStatusFN = cluster_status_filename(), Res = case rabbit_file:write_term_file(ClusterStatusFN, [{All, Disc}]) of ok -> -- cgit v1.2.1 From 46e4a2b5ac16428e69be16e6fd61632885dc5ef1 Mon Sep 17 00:00:00 2001 From: Simon MacMullen Date: Thu, 8 Nov 2012 16:20:26 +0000 Subject: Make the app marker into a gen_server --- src/rabbit.erl | 18 +++--------------- src/rabbit_app_marker.erl | 41 +++++++++++++++++++++++++++++++++++++++++ 2 files changed, 44 insertions(+), 15 deletions(-) create mode 100644 src/rabbit_app_marker.erl diff --git a/src/rabbit.erl b/src/rabbit.erl index 8c13224f..66adcca3 100644 --- a/src/rabbit.erl +++ b/src/rabbit.erl @@ -21,7 +21,7 @@ -export([start/0, boot/0, stop/0, stop_and_halt/0, await_startup/0, status/0, is_running/0, is_running/1, environment/0, rotate_logs/1, force_event_refresh/0, - start_fhc/0, start_app_marker/1, hibernate/0]). + start_fhc/0]). -export([start/2, stop/1]). @@ -176,7 +176,8 @@ -rabbit_boot_step({app_running, [{description, "cluster membership"}, - {mfa, {rabbit, start_app_marker, [boot]}}, + {mfa, {rabbit_sup, start_restartable_child, + [rabbit_app_marker]}}, {requires, networking}]}). -rabbit_boot_step({notify_cluster, @@ -775,16 +776,3 @@ start_fhc() -> rabbit_sup:start_restartable_child( file_handle_cache, [fun rabbit_alarm:set_alarm/1, fun rabbit_alarm:clear_alarm/1]). - -start_app_marker(boot) -> - supervisor:start_child(rabbit_sup, - {rabbit_app, {?MODULE, start_app_marker, [spawn]}, - transient, ?MAX_WAIT, worker, [?MODULE]}); -start_app_marker(spawn) -> - Pid = spawn_link(fun() -> erlang:hibernate(?MODULE, hibernate, []) end), - register(rabbit_running, Pid), - {ok, Pid}. - -hibernate() -> - erlang:hibernate(?MODULE, hibernate, []). - diff --git a/src/rabbit_app_marker.erl b/src/rabbit_app_marker.erl new file mode 100644 index 00000000..14daa98c --- /dev/null +++ b/src/rabbit_app_marker.erl @@ -0,0 +1,41 @@ +%% The contents of this file are subject to the Mozilla Public License +%% Version 1.1 (the "License"); you may not use this file except in +%% compliance with the License. You may obtain a copy of the License +%% at http://www.mozilla.org/MPL/ +%% +%% Software distributed under the License is distributed on an "AS IS" +%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +%% the License for the specific language governing rights and +%% limitations under the License. +%% +%% The Original Code is RabbitMQ. +%% +%% The Initial Developer of the Original Code is VMware, Inc. +%% Copyright (c) 2007-2012 VMware, Inc. All rights reserved. +%% + +-module(rabbit_app_marker). + +-behaviour(gen_server). + +-export([start_link/0]). + +-export([init/1, handle_call/3, handle_cast/2, handle_info/2, + terminate/2, code_change/3]). + +-include("rabbit.hrl"). + +start_link() -> + gen_server:start_link({local, rabbit_running}, ?MODULE, [], []). + +%%---------------------------------------------------------------------------- + +init([]) -> {ok, state, hibernate}. + +handle_call(_Msg, _From, State) -> {stop, not_understood, State}. +handle_cast(_Msg, State) -> {stop, not_understood, State}. +handle_info(_Msg, State) -> {stop, not_understood, State}. + +terminate(_Arg, _State) -> ok. + +code_change(_OldVsn, State, _Extra) -> {ok, State}. -- cgit v1.2.1 From 7711b085cb94c8903b9eb6787bdc509745ea80f4 Mon Sep 17 00:00:00 2001 From: Simon MacMullen Date: Thu, 8 Nov 2012 16:36:35 +0000 Subject: Explain --- src/rabbit_app_marker.erl | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/src/rabbit_app_marker.erl b/src/rabbit_app_marker.erl index 14daa98c..296c2918 100644 --- a/src/rabbit_app_marker.erl +++ b/src/rabbit_app_marker.erl @@ -25,6 +25,14 @@ -include("rabbit.hrl"). +%%---------------------------------------------------------------------------- + +%% We want to know when another node has *started* shutting down (to +%% write the cluster status file). The rabbit application goes away +%% pretty much when we have *finished* shutting down. So we have this +%% process to monitor instead - it;s the last thing to be started so +%% the first thing to go. + start_link() -> gen_server:start_link({local, rabbit_running}, ?MODULE, [], []). -- cgit v1.2.1 From a496dd6dba2faf56a5934120c1b92041ac45edba Mon Sep 17 00:00:00 2001 From: Simon MacMullen Date: Thu, 8 Nov 2012 16:58:16 +0000 Subject: Take two monitors into the shower. --- src/rabbit_node_monitor.erl | 31 +++++++++++++++++++++++-------- 1 file changed, 23 insertions(+), 8 deletions(-) diff --git a/src/rabbit_node_monitor.erl b/src/rabbit_node_monitor.erl index d316e6a7..e82a4aaa 100644 --- a/src/rabbit_node_monitor.erl +++ b/src/rabbit_node_monitor.erl @@ -210,9 +210,12 @@ handle_cast({node_up, Node, NodeType}, end, add_node(Node, RunningNodes)}), ok = handle_live_rabbit(Node), - {noreply, - State#state{ - monitors = pmon:monitor({rabbit_running, Node}, Monitors)}} + State1 = mon({rabbit_running, Node}, State), + State2 = case pmon:is_monitored({rabbit, Node}) of + true -> State1; + false -> mon({rabbit, Node}, State1) + end, + {noreply, State2} end; handle_cast({joined_cluster, Node, NodeType}, State) -> {AllNodes, DiscNodes, RunningNodes} = read_cluster_status(), @@ -231,14 +234,20 @@ handle_cast({left_cluster, Node}, State) -> handle_cast(_Msg, State) -> {noreply, State}. -handle_info({'DOWN', _MRef, process, {rabbit_running, Node}, _Reason}, - State = #state{monitors = Monitors}) -> - rabbit_log:info("rabbit on node ~p down~n", [Node]), +handle_info({'DOWN', _MRef, process, {rabbit_running, Node}, _Reason}, State) -> + %% The node has started to stop, remove it from the cluster status + %% file. We want to do this "early" to stand a better chance of + %% recording anything when all the nodes are shut down + %% simultaneously. {AllNodes, DiscNodes, RunningNodes} = read_cluster_status(), write_cluster_status({AllNodes, DiscNodes, del_node(Node, RunningNodes)}), + {noreply, unmon({rabbit_running, Node}, State)}; + +handle_info({'DOWN', _MRef, process, {rabbit, Node}, _Reason}, State) -> + %% The node has finished stopping (rabbit anyway), treat it as dead. + rabbit_log:info("rabbit on node ~p down~n", [Node]), ok = handle_dead_rabbit(Node), - {noreply, State#state{monitors = pmon:erase( - {rabbit_running, Node}, Monitors)}}; + {noreply, unmon({rabbit, Node}, State)}; handle_info({mnesia_system_event, {inconsistent_database, running_partitioned_network, Node}}, @@ -296,3 +305,9 @@ legacy_should_be_disc_node(DiscNodes) -> add_node(Node, Nodes) -> lists:usort([Node | Nodes]). del_node(Node, Nodes) -> Nodes -- [Node]. + +mon(Item, State = #state{monitors = Monitors}) -> + State#state{monitors = pmon:monitor(Item, Monitors)}. + +unmon(Item, State = #state{monitors = Monitors}) -> + State#state{monitors = pmon:erase(Item, Monitors)}. -- cgit v1.2.1 From fcd59347940d894cae4351783307357758c3525a Mon Sep 17 00:00:00 2001 From: Simon MacMullen Date: Fri, 9 Nov 2012 15:58:26 +0000 Subject: Fix idiocy --- src/rabbit_node_monitor.erl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/rabbit_node_monitor.erl b/src/rabbit_node_monitor.erl index e82a4aaa..b89d4dc4 100644 --- a/src/rabbit_node_monitor.erl +++ b/src/rabbit_node_monitor.erl @@ -211,7 +211,7 @@ handle_cast({node_up, Node, NodeType}, add_node(Node, RunningNodes)}), ok = handle_live_rabbit(Node), State1 = mon({rabbit_running, Node}, State), - State2 = case pmon:is_monitored({rabbit, Node}) of + State2 = case pmon:is_monitored({rabbit, Node}, Monitors) of true -> State1; false -> mon({rabbit, Node}, State1) end, -- cgit v1.2.1 From 0cfaf879a427c252c970e21ec6135ab898672402 Mon Sep 17 00:00:00 2001 From: Matthias Radestock Date: Tue, 13 Nov 2012 11:19:02 +0000 Subject: test triggering of memory alarms --- src/rabbit_tests.erl | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/rabbit_tests.erl b/src/rabbit_tests.erl index 8a24d388..096f9490 100644 --- a/src/rabbit_tests.erl +++ b/src/rabbit_tests.erl @@ -1131,6 +1131,9 @@ test_server_status() -> HWM = vm_memory_monitor:get_vm_memory_high_watermark(), ok = control_action(set_vm_memory_high_watermark, ["1"]), ok = control_action(set_vm_memory_high_watermark, ["1.0"]), + %% this will trigger an alarm + ok = control_action(set_vm_memory_high_watermark, ["0.0"]), + %% reset ok = control_action(set_vm_memory_high_watermark, [float_to_list(HWM)]), %% eval -- cgit v1.2.1 From c6e0ca5ed0c5a0c75bf004d316bf87a15053bcbc Mon Sep 17 00:00:00 2001 From: Tim Watson Date: Tue, 13 Nov 2012 11:46:59 +0000 Subject: roll back to (almost) default - keeping the extra error information when corrupt cluster status files are found --- src/rabbit.erl | 8 +------- src/rabbit_node_monitor.erl | 33 ++++++++------------------------- 2 files changed, 9 insertions(+), 32 deletions(-) diff --git a/src/rabbit.erl b/src/rabbit.erl index f3d31b22..ef9f5f56 100644 --- a/src/rabbit.erl +++ b/src/rabbit.erl @@ -174,16 +174,10 @@ [{mfa, {rabbit_networking, boot, []}}, {requires, log_relay}]}). --rabbit_boot_step({app_running, - [{description, "cluster membership"}, - {mfa, {rabbit_sup, start_restartable_child, - [rabbit_app_marker]}}, - {requires, networking}]}). - -rabbit_boot_step({notify_cluster, [{description, "notify cluster nodes"}, {mfa, {rabbit_node_monitor, notify_node_up, []}}, - {requires, app_running}]}). + {requires, networking}]}). %%--------------------------------------------------------------------------- diff --git a/src/rabbit_node_monitor.erl b/src/rabbit_node_monitor.erl index b89d4dc4..97feb2f2 100644 --- a/src/rabbit_node_monitor.erl +++ b/src/rabbit_node_monitor.erl @@ -112,7 +112,7 @@ prepare_cluster_status_files() -> true -> ThisNode; false -> [] end, - write_cluster_status({AllNodes2, DiscNodes, RunningNodes2}). + ok = write_cluster_status({AllNodes2, DiscNodes, RunningNodes2}). write_cluster_status({All, Disc, Running}) -> ClusterStatusFN = cluster_status_filename(), @@ -199,7 +199,7 @@ handle_call(_Request, _From, State) -> %% mnesia propagation. handle_cast({node_up, Node, NodeType}, State = #state{monitors = Monitors}) -> - case pmon:is_monitored({rabbit_running, Node}, Monitors) of + case pmon:is_monitored({rabbit, Node}, Monitors) of true -> {noreply, State}; false -> rabbit_log:info("rabbit on node ~p up~n", [Node]), {AllNodes, DiscNodes, RunningNodes} = read_cluster_status(), @@ -210,12 +210,8 @@ handle_cast({node_up, Node, NodeType}, end, add_node(Node, RunningNodes)}), ok = handle_live_rabbit(Node), - State1 = mon({rabbit_running, Node}, State), - State2 = case pmon:is_monitored({rabbit, Node}, Monitors) of - true -> State1; - false -> mon({rabbit, Node}, State1) - end, - {noreply, State2} + {noreply, State#state{ + monitors = pmon:monitor({rabbit, Node}, Monitors)}} end; handle_cast({joined_cluster, Node, NodeType}, State) -> {AllNodes, DiscNodes, RunningNodes} = read_cluster_status(), @@ -234,20 +230,13 @@ handle_cast({left_cluster, Node}, State) -> handle_cast(_Msg, State) -> {noreply, State}. -handle_info({'DOWN', _MRef, process, {rabbit_running, Node}, _Reason}, State) -> - %% The node has started to stop, remove it from the cluster status - %% file. We want to do this "early" to stand a better chance of - %% recording anything when all the nodes are shut down - %% simultaneously. +handle_info({'DOWN', _MRef, process, {rabbit, Node}, _Reason}, + State = #state{monitors = Monitors}) -> + rabbit_log:info("rabbit on node ~p down~n", [Node]), {AllNodes, DiscNodes, RunningNodes} = read_cluster_status(), write_cluster_status({AllNodes, DiscNodes, del_node(Node, RunningNodes)}), - {noreply, unmon({rabbit_running, Node}, State)}; - -handle_info({'DOWN', _MRef, process, {rabbit, Node}, _Reason}, State) -> - %% The node has finished stopping (rabbit anyway), treat it as dead. - rabbit_log:info("rabbit on node ~p down~n", [Node]), ok = handle_dead_rabbit(Node), - {noreply, unmon({rabbit, Node}, State)}; + {noreply, State#state{monitors = pmon:erase({rabbit, Node}, Monitors)}}; handle_info({mnesia_system_event, {inconsistent_database, running_partitioned_network, Node}}, @@ -305,9 +294,3 @@ legacy_should_be_disc_node(DiscNodes) -> add_node(Node, Nodes) -> lists:usort([Node | Nodes]). del_node(Node, Nodes) -> Nodes -- [Node]. - -mon(Item, State = #state{monitors = Monitors}) -> - State#state{monitors = pmon:monitor(Item, Monitors)}. - -unmon(Item, State = #state{monitors = Monitors}) -> - State#state{monitors = pmon:erase(Item, Monitors)}. -- cgit v1.2.1 From 236aff09ce9dc77367540a8a0acac88c0c3384da Mon Sep 17 00:00:00 2001 From: Tim Watson Date: Tue, 13 Nov 2012 11:48:12 +0000 Subject: rabbit_node_monitor traps exits, so shutdown allows us time to flush any pending file system operations before the process dies --- src/rabbit_node_monitor.erl | 1 + 1 file changed, 1 insertion(+) diff --git a/src/rabbit_node_monitor.erl b/src/rabbit_node_monitor.erl index 97feb2f2..21389583 100644 --- a/src/rabbit_node_monitor.erl +++ b/src/rabbit_node_monitor.erl @@ -184,6 +184,7 @@ partitions() -> %%---------------------------------------------------------------------------- init([]) -> + process_flag(trap_exit, true), {ok, _} = mnesia:subscribe(system), {ok, #state{monitors = pmon:new(), partitions = []}}. -- cgit v1.2.1 From e150aa15f9bb8d21081d75495ca79378ac4e26b1 Mon Sep 17 00:00:00 2001 From: Simon MacMullen Date: Tue, 13 Nov 2012 12:02:44 +0000 Subject: Explain why --- src/rabbit_node_monitor.erl | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/rabbit_node_monitor.erl b/src/rabbit_node_monitor.erl index 21389583..8d0e4456 100644 --- a/src/rabbit_node_monitor.erl +++ b/src/rabbit_node_monitor.erl @@ -184,6 +184,10 @@ partitions() -> %%---------------------------------------------------------------------------- init([]) -> + %% We trap exits so that the supervisor will not just kill us. We + %% want to be sure that we are not going to be killed while + %% writing out the cluster status files - bad things can then + %% happen. process_flag(trap_exit, true), {ok, _} = mnesia:subscribe(system), {ok, #state{monitors = pmon:new(), -- cgit v1.2.1 From 9ab849d1c407688a211b7506bafb2c3c6ff3d2a8 Mon Sep 17 00:00:00 2001 From: Simon MacMullen Date: Tue, 13 Nov 2012 12:03:36 +0000 Subject: And, err, this thing isn't needed any more. --- src/rabbit_app_marker.erl | 49 ----------------------------------------------- 1 file changed, 49 deletions(-) delete mode 100644 src/rabbit_app_marker.erl diff --git a/src/rabbit_app_marker.erl b/src/rabbit_app_marker.erl deleted file mode 100644 index 296c2918..00000000 --- a/src/rabbit_app_marker.erl +++ /dev/null @@ -1,49 +0,0 @@ -%% The contents of this file are subject to the Mozilla Public License -%% Version 1.1 (the "License"); you may not use this file except in -%% compliance with the License. You may obtain a copy of the License -%% at http://www.mozilla.org/MPL/ -%% -%% Software distributed under the License is distributed on an "AS IS" -%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See -%% the License for the specific language governing rights and -%% limitations under the License. -%% -%% The Original Code is RabbitMQ. -%% -%% The Initial Developer of the Original Code is VMware, Inc. -%% Copyright (c) 2007-2012 VMware, Inc. All rights reserved. -%% - --module(rabbit_app_marker). - --behaviour(gen_server). - --export([start_link/0]). - --export([init/1, handle_call/3, handle_cast/2, handle_info/2, - terminate/2, code_change/3]). - --include("rabbit.hrl"). - -%%---------------------------------------------------------------------------- - -%% We want to know when another node has *started* shutting down (to -%% write the cluster status file). The rabbit application goes away -%% pretty much when we have *finished* shutting down. So we have this -%% process to monitor instead - it;s the last thing to be started so -%% the first thing to go. - -start_link() -> - gen_server:start_link({local, rabbit_running}, ?MODULE, [], []). - -%%---------------------------------------------------------------------------- - -init([]) -> {ok, state, hibernate}. - -handle_call(_Msg, _From, State) -> {stop, not_understood, State}. -handle_cast(_Msg, State) -> {stop, not_understood, State}. -handle_info(_Msg, State) -> {stop, not_understood, State}. - -terminate(_Arg, _State) -> ok. - -code_change(_OldVsn, State, _Extra) -> {ok, State}. -- cgit v1.2.1