summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSimon MacMullen <simon@rabbitmq.com>2013-11-22 12:47:50 +0000
committerSimon MacMullen <simon@rabbitmq.com>2013-11-22 12:47:50 +0000
commit4afc5770d7fa20fb954c8a06594bb668aa1f4579 (patch)
treea29d4106d02554d4f304eefb2d03d34a8cff9f5c
parente1e8117349e22065da6b48434c5677d4a3199343 (diff)
parentb6a6cf56afb68c45a409e9879c7629663cf655ea (diff)
downloadrabbitmq-server-bug25873.tar.gz
Merge in stablebug25873
-rw-r--r--src/rabbit_mirror_queue_master.erl8
-rw-r--r--src/rabbit_mirror_queue_misc.erl48
-rw-r--r--src/rabbit_mirror_queue_slave.erl49
3 files changed, 65 insertions, 40 deletions
diff --git a/src/rabbit_mirror_queue_master.erl b/src/rabbit_mirror_queue_master.erl
index 3abd81f5..d9cef642 100644
--- a/src/rabbit_mirror_queue_master.erl
+++ b/src/rabbit_mirror_queue_master.erl
@@ -110,7 +110,13 @@ init_with_existing_bq(Q = #amqqueue{name = QName}, BQ, BQS) ->
Q1#amqqueue{gm_pids = [{GM, Self} | GMPids]})
end),
{_MNode, SNodes} = rabbit_mirror_queue_misc:suggested_queue_nodes(Q),
- rabbit_mirror_queue_misc:add_mirrors(QName, SNodes),
+ %% We need synchronous add here (i.e. do not return until the
+ %% slave is running) so that when queue declaration is finished
+ %% all slaves are up; we don't want to end up with unsynced slaves
+ %% just by declaring a new queue. But add can't be synchronous all
+ %% the time as it can be called by slaves and that's
+ %% deadlock-prone.
+ rabbit_mirror_queue_misc:add_mirrors(QName, SNodes, sync),
#state { name = QName,
gm = GM,
coordinator = CPid,
diff --git a/src/rabbit_mirror_queue_misc.erl b/src/rabbit_mirror_queue_misc.erl
index 9ebe2cfe..ca495733 100644
--- a/src/rabbit_mirror_queue_misc.erl
+++ b/src/rabbit_mirror_queue_misc.erl
@@ -17,9 +17,10 @@
-module(rabbit_mirror_queue_misc).
-behaviour(rabbit_policy_validator).
--export([remove_from_queue/3, on_node_up/0, add_mirrors/2, add_mirror/2,
+-export([remove_from_queue/3, on_node_up/0, add_mirrors/3,
report_deaths/4, store_updated_slaves/1, suggested_queue_nodes/1,
- is_mirrored/1, update_mirrors/2, validate_policy/1]).
+ is_mirrored/1, update_mirrors/2, validate_policy/1,
+ maybe_auto_sync/1]).
%% for testing only
-export([module/1]).
@@ -45,10 +46,8 @@
(rabbit_amqqueue:name(), pid(), [pid()])
-> {'ok', pid(), [pid()]} | {'error', 'not_found'}).
-spec(on_node_up/0 :: () -> 'ok').
--spec(add_mirrors/2 :: (rabbit_amqqueue:name(), [node()]) -> 'ok').
--spec(add_mirror/2 ::
- (rabbit_amqqueue:name(), node()) ->
- {'ok', atom()} | rabbit_types:error(any())).
+-spec(add_mirrors/3 :: (rabbit_amqqueue:name(), [node()], 'sync' | 'async')
+ -> 'ok').
-spec(store_updated_slaves/1 :: (rabbit_types:amqqueue()) ->
rabbit_types:amqqueue()).
-spec(suggested_queue_nodes/1 :: (rabbit_types:amqqueue()) ->
@@ -56,6 +55,7 @@
-spec(is_mirrored/1 :: (rabbit_types:amqqueue()) -> boolean()).
-spec(update_mirrors/2 ::
(rabbit_types:amqqueue(), rabbit_types:amqqueue()) -> 'ok').
+-spec(maybe_auto_sync/1 :: (rabbit_types:amqqueue()) -> 'ok').
-endif.
@@ -140,7 +140,7 @@ on_node_up() ->
end
end, [], rabbit_queue)
end),
- [add_mirror(QName, node()) || QName <- QNames],
+ [add_mirror(QName, node(), async) || QName <- QNames],
ok.
drop_mirrors(QName, Nodes) ->
@@ -165,45 +165,35 @@ drop_mirror(QName, MirrorNode) ->
end
end).
-add_mirrors(QName, Nodes) ->
- [add_mirror(QName, Node) || Node <- Nodes],
+add_mirrors(QName, Nodes, SyncMode) ->
+ [add_mirror(QName, Node, SyncMode) || Node <- Nodes],
ok.
-add_mirror(QName, MirrorNode) ->
+add_mirror(QName, MirrorNode, SyncMode) ->
rabbit_amqqueue:with(
QName,
fun (#amqqueue { name = Name, pid = QPid, slave_pids = SPids } = Q) ->
case [Pid || Pid <- [QPid | SPids], node(Pid) =:= MirrorNode] of
[] ->
- start_child(Name, MirrorNode, Q);
+ start_child(Name, MirrorNode, Q, SyncMode);
[SPid] ->
case rabbit_misc:is_process_alive(SPid) of
true -> {ok, already_mirrored};
- false -> start_child(Name, MirrorNode, Q)
+ false -> start_child(Name, MirrorNode, Q, SyncMode)
end
end
end).
-start_child(Name, MirrorNode, Q) ->
+start_child(Name, MirrorNode, Q, SyncMode) ->
case rabbit_misc:with_exit_handler(
- rabbit_misc:const({ok, down}),
+ rabbit_misc:const(down),
fun () ->
rabbit_mirror_queue_slave_sup:start_child(MirrorNode, [Q])
end) of
- {ok, SPid} when is_pid(SPid) ->
- maybe_auto_sync(Q),
- rabbit_log:info("Adding mirror of ~s on node ~p: ~p~n",
- [rabbit_misc:rs(Name), MirrorNode, SPid]),
- {ok, started};
- {error, {{stale_master_pid, StalePid}, _}} ->
- rabbit_log:warning("Detected stale HA master while adding "
- "mirror of ~s on node ~p: ~p~n",
- [rabbit_misc:rs(Name), MirrorNode, StalePid]),
- {ok, stale_master};
- {error, {{duplicate_live_master, _}=Err, _}} ->
- Err;
- Other ->
- Other
+ {ok, SPid} -> rabbit_log:info("Adding mirror of ~s on node ~p: ~p~n",
+ [rabbit_misc:rs(Name), MirrorNode, SPid]),
+ rabbit_mirror_queue_slave:go(SPid, SyncMode);
+ _ -> ok
end.
report_deaths(_MirrorPid, _IsMaster, _QueueName, []) ->
@@ -317,7 +307,7 @@ update_mirrors0(OldQ = #amqqueue{name = QName},
{NewMNode, NewSNodes} = suggested_queue_nodes(NewQ),
OldNodes = [OldMNode | OldSNodes],
NewNodes = [NewMNode | NewSNodes],
- add_mirrors (QName, NewNodes -- OldNodes),
+ add_mirrors (QName, NewNodes -- OldNodes, async),
drop_mirrors(QName, OldNodes -- NewNodes),
%% This is for the case where no extra nodes were added but we changed to
%% a policy requiring auto-sync.
diff --git a/src/rabbit_mirror_queue_slave.erl b/src/rabbit_mirror_queue_slave.erl
index 6f78d1d2..96f89ecc 100644
--- a/src/rabbit_mirror_queue_slave.erl
+++ b/src/rabbit_mirror_queue_slave.erl
@@ -24,7 +24,7 @@
%% All instructions from the GM group must be processed in the order
%% in which they're received.
--export([start_link/1, set_maximum_since_use/2, info/1]).
+-export([start_link/1, set_maximum_since_use/2, info/1, go/2]).
-export([init/1, handle_call/3, handle_cast/2, handle_info/2, terminate/2,
code_change/3, handle_pre_hibernate/1, prioritise_call/4,
@@ -78,7 +78,15 @@ set_maximum_since_use(QPid, Age) ->
info(QPid) -> gen_server2:call(QPid, info, infinity).
-init(Q = #amqqueue { name = QName }) ->
+init(Q) ->
+ {ok, {not_started, Q}, hibernate,
+ {backoff, ?HIBERNATE_AFTER_MIN, ?HIBERNATE_AFTER_MIN,
+ ?DESIRED_HIBERNATE}}.
+
+go(SPid, sync) -> gen_server2:call(SPid, go, infinity);
+go(SPid, async) -> gen_server2:cast(SPid, go).
+
+handle_go(Q = #amqqueue{name = QName}) ->
%% We join the GM group before we add ourselves to the amqqueue
%% record. As a result:
%% 1. We can receive msgs from GM that correspond to messages we will
@@ -124,22 +132,27 @@ init(Q = #amqqueue { name = QName }) ->
},
ok = gm:broadcast(GM, request_depth),
ok = gm:validate_members(GM, [GM | [G || {G, _} <- GMPids]]),
- {ok, State, hibernate,
- {backoff, ?HIBERNATE_AFTER_MIN, ?HIBERNATE_AFTER_MIN,
- ?DESIRED_HIBERNATE}};
+ rabbit_mirror_queue_misc:maybe_auto_sync(Q1),
+ {ok, State};
{stale, StalePid} ->
- {stop, {stale_master_pid, StalePid}};
+ rabbit_log:warning("Detected stale HA master while adding "
+ "mirror of ~s: ~p~n",
+ [rabbit_misc:rs(QName), StalePid]),
+ gm:leave(GM),
+ {error, {stale_master_pid, StalePid}};
duplicate_live_master ->
- {stop, {duplicate_live_master, Node}};
+ gm:leave(GM),
+ {error, {duplicate_live_master, Node}};
existing ->
gm:leave(GM),
- ignore;
+ {error, normal};
master_in_recovery ->
+ gm:leave(GM),
%% The queue record vanished - we must have a master starting
%% concurrently with us. In that case we can safely decide to do
%% nothing here, and the master will start us in
%% master:init_with_existing_bq/3
- ignore
+ {error, normal}
end.
init_it(Self, GM, Node, QName) ->
@@ -173,6 +186,12 @@ add_slave(Q = #amqqueue { slave_pids = SPids, gm_pids = GMPids }, New, GM) ->
rabbit_mirror_queue_misc:store_updated_slaves(
Q#amqqueue{slave_pids = SPids ++ [New], gm_pids = [{GM, New} | GMPids]}).
+handle_call(go, _From, {not_started, Q} = NotStarted) ->
+ case handle_go(Q) of
+ {ok, State} -> {reply, ok, State};
+ {error, Error} -> {stop, Error, NotStarted}
+ end;
+
handle_call({deliver, Delivery, true}, From, State) ->
%% Synchronous, "mandatory" deliver mode.
gen_server2:reply(From, ok),
@@ -208,6 +227,12 @@ handle_call({gm_deaths, LiveGMPids}, From,
handle_call(info, _From, State) ->
reply(infos(?INFO_KEYS, State), State).
+handle_cast(go, {not_started, Q} = NotStarted) ->
+ case handle_go(Q) of
+ {ok, State} -> {noreply, State};
+ {error, Error} -> {stop, Error, NotStarted}
+ end;
+
handle_cast({run_backing_queue, Mod, Fun}, State) ->
noreply(run_backing_queue(Mod, Fun, State));
@@ -293,6 +318,8 @@ handle_info({bump_credit, Msg}, State) ->
handle_info(Msg, State) ->
{stop, {unexpected_info, Msg}, State}.
+terminate(normal, {not_started, _Q}) ->
+ ok;
terminate(_Reason, #state { backing_queue_state = undefined }) ->
%% We've received a delete_and_terminate from gm, thus nothing to
%% do here.
@@ -403,7 +430,9 @@ handle_msg([SPid], _From, Msg) ->
ok = gen_server2:cast(SPid, {gm, Msg}).
inform_deaths(SPid, Live) ->
- case gen_server2:call(SPid, {gm_deaths, Live}, infinity) of
+ case rabbit_misc:with_exit_handler(
+ rabbit_misc:const(ok),
+ fun() -> gen_server2:call(SPid, {gm_deaths, Live}, infinity) end) of
ok -> ok;
{promote, CPid} -> {become, rabbit_mirror_queue_coordinator, [CPid]}
end.