diff options
author | Simon MacMullen <simon@rabbitmq.com> | 2013-08-19 17:14:13 +0100 |
---|---|---|
committer | Simon MacMullen <simon@rabbitmq.com> | 2013-08-19 17:14:13 +0100 |
commit | 11049881a87eb51e9bf6efbb4d2ef1ee4be62bfe (patch) | |
tree | 2a3f21103e1d6050802ed32714d1e62763aeb0a5 /src | |
parent | bd1305279e255adcf583afdd55a7cee18a9fcddb (diff) | |
parent | af4ef7640e817141615298c504e9129d14be1d9d (diff) | |
download | rabbitmq-server-bug24969.tar.gz |
Merge defaultbug24969
Diffstat (limited to 'src')
132 files changed, 13090 insertions, 6785 deletions
diff --git a/src/app_utils.erl b/src/app_utils.erl new file mode 100644 index 00000000..5ae2d295 --- /dev/null +++ b/src/app_utils.erl @@ -0,0 +1,138 @@ +%% The contents of this file are subject to the Mozilla Public License +%% Version 1.1 (the "License"); you may not use this file except in +%% compliance with the License. You may obtain a copy of the License +%% at http://www.mozilla.org/MPL/ +%% +%% Software distributed under the License is distributed on an "AS IS" +%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +%% the License for the specific language governing rights and +%% limitations under the License. +%% +%% The Original Code is RabbitMQ. +%% +%% The Initial Developer of the Original Code is GoPivotal, Inc. +%% Copyright (c) 2007-2013 GoPivotal, Inc. All rights reserved. +%% +-module(app_utils). + +-export([load_applications/1, start_applications/1, start_applications/2, + stop_applications/1, stop_applications/2, app_dependency_order/2, + wait_for_applications/1]). + +-ifdef(use_specs). + +-type error_handler() :: fun((atom(), any()) -> 'ok'). + +-spec load_applications([atom()]) -> 'ok'. +-spec start_applications([atom()]) -> 'ok'. +-spec stop_applications([atom()]) -> 'ok'. +-spec start_applications([atom()], error_handler()) -> 'ok'. +-spec stop_applications([atom()], error_handler()) -> 'ok'. +-spec wait_for_applications([atom()]) -> 'ok'. +-spec app_dependency_order([atom()], boolean()) -> [digraph:vertex()]. + +-endif. + +%%--------------------------------------------------------------------------- +%% Public API + +load_applications(Apps) -> + load_applications(queue:from_list(Apps), sets:new()), + ok. + +start_applications(Apps) -> + start_applications( + Apps, fun (App, Reason) -> + throw({error, {cannot_start_application, App, Reason}}) + end). + +stop_applications(Apps) -> + stop_applications( + Apps, fun (App, Reason) -> + throw({error, {cannot_stop_application, App, Reason}}) + end). + +start_applications(Apps, ErrorHandler) -> + manage_applications(fun lists:foldl/3, + fun application:start/1, + fun application:stop/1, + already_started, + ErrorHandler, + Apps). + +stop_applications(Apps, ErrorHandler) -> + manage_applications(fun lists:foldr/3, + fun application:stop/1, + fun application:start/1, + not_started, + ErrorHandler, + Apps). + + +wait_for_applications(Apps) -> + [wait_for_application(App) || App <- Apps], ok. + +app_dependency_order(RootApps, StripUnreachable) -> + {ok, G} = rabbit_misc:build_acyclic_graph( + fun (App, _Deps) -> [{App, App}] end, + fun (App, Deps) -> [{Dep, App} || Dep <- Deps] end, + [{App, app_dependencies(App)} || + {App, _Desc, _Vsn} <- application:loaded_applications()]), + try + case StripUnreachable of + true -> digraph:del_vertices(G, digraph:vertices(G) -- + digraph_utils:reachable(RootApps, G)); + false -> ok + end, + digraph_utils:topsort(G) + after + true = digraph:delete(G) + end. + +%%--------------------------------------------------------------------------- +%% Private API + +wait_for_application(Application) -> + case lists:keymember(Application, 1, rabbit_misc:which_applications()) of + true -> ok; + false -> timer:sleep(1000), + wait_for_application(Application) + end. + +load_applications(Worklist, Loaded) -> + case queue:out(Worklist) of + {empty, _WorkList} -> + ok; + {{value, App}, Worklist1} -> + case sets:is_element(App, Loaded) of + true -> load_applications(Worklist1, Loaded); + false -> case application:load(App) of + ok -> ok; + {error, {already_loaded, App}} -> ok; + Error -> throw(Error) + end, + load_applications( + queue:join(Worklist1, + queue:from_list(app_dependencies(App))), + sets:add_element(App, Loaded)) + end + end. + +app_dependencies(App) -> + case application:get_key(App, applications) of + undefined -> []; + {ok, Lst} -> Lst + end. + +manage_applications(Iterate, Do, Undo, SkipError, ErrorHandler, Apps) -> + Iterate(fun (App, Acc) -> + case Do(App) of + ok -> [App | Acc]; + {error, {SkipError, _}} -> Acc; + {error, Reason} -> + lists:foreach(Undo, Acc), + ErrorHandler(App, Reason) + end + end, [], Apps), + ok. + diff --git a/src/background_gc.erl b/src/background_gc.erl new file mode 100644 index 00000000..fbd7ce23 --- /dev/null +++ b/src/background_gc.erl @@ -0,0 +1,81 @@ +%% The contents of this file are subject to the Mozilla Public License +%% Version 1.1 (the "License"); you may not use this file except in +%% compliance with the License. You may obtain a copy of the License +%% at http://www.mozilla.org/MPL/ +%% +%% Software distributed under the License is distributed on an "AS IS" +%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +%% the License for the specific language governing rights and +%% limitations under the License. +%% +%% The Original Code is RabbitMQ. +%% +%% The Initial Developer of the Original Code is GoPivotal, Inc. +%% Copyright (c) 2007-2013 GoPivotal, Inc. All rights reserved. +%% + +-module(background_gc). + +-behaviour(gen_server2). + +-export([start_link/0, run/0]). +-export([gc/0]). %% For run_interval only + +-export([init/1, handle_call/3, handle_cast/2, handle_info/2, + terminate/2, code_change/3]). + +-define(MAX_RATIO, 0.01). +-define(IDEAL_INTERVAL, 60000). + +-record(state, {last_interval}). + +%%---------------------------------------------------------------------------- + +-ifdef(use_specs). + +-spec(start_link/0 :: () -> {'ok', pid()} | {'error', any()}). +-spec(run/0 :: () -> 'ok'). +-spec(gc/0 :: () -> 'ok'). + +-endif. + +%%---------------------------------------------------------------------------- + +start_link() -> gen_server2:start_link({local, ?MODULE}, ?MODULE, [], + [{timeout, infinity}]). + +run() -> gen_server2:cast(?MODULE, run). + +%%---------------------------------------------------------------------------- + +init([]) -> {ok, interval_gc(#state{last_interval = ?IDEAL_INTERVAL})}. + +handle_call(Msg, _From, State) -> + {stop, {unexpected_call, Msg}, {unexpected_call, Msg}, State}. + +handle_cast(run, State) -> gc(), {noreply, State}; + +handle_cast(Msg, State) -> {stop, {unexpected_cast, Msg}, State}. + +handle_info(run, State) -> {noreply, interval_gc(State)}; + +handle_info(Msg, State) -> {stop, {unexpected_info, Msg}, State}. + +code_change(_OldVsn, State, _Extra) -> {ok, State}. + +terminate(_Reason, State) -> State. + +%%---------------------------------------------------------------------------- + +interval_gc(State = #state{last_interval = LastInterval}) -> + {ok, Interval} = rabbit_misc:interval_operation( + {?MODULE, gc, []}, + ?MAX_RATIO, ?IDEAL_INTERVAL, LastInterval), + erlang:send_after(Interval, self(), run), + State#state{last_interval = Interval}. + +gc() -> + [garbage_collect(P) || P <- processes(), + {status, waiting} == process_info(P, status)], + garbage_collect(), %% since we will never be waiting... + ok. diff --git a/src/credit_flow.erl b/src/credit_flow.erl index ba99811f..d48d649e 100644 --- a/src/credit_flow.erl +++ b/src/credit_flow.erl @@ -10,8 +10,8 @@ %% %% The Original Code is RabbitMQ. %% -%% The Initial Developer of the Original Code is VMware, Inc. -%% Copyright (c) 2007-2012 VMware, Inc. All rights reserved. +%% The Initial Developer of the Original Code is GoPivotal, Inc. +%% Copyright (c) 2007-2013 GoPivotal, Inc. All rights reserved. %% -module(credit_flow). @@ -52,6 +52,22 @@ %%---------------------------------------------------------------------------- +%% process dict update macro - eliminates the performance-hurting +%% closure creation a HOF would introduce +-define(UPDATE(Key, Default, Var, Expr), + begin + %% We deliberately allow Var to escape from the case here + %% to be used in Expr. Any temporary var we introduced + %% would also escape, and might conflict. + case get(Key) of + undefined -> Var = Default; + Var -> ok + end, + put(Key, Expr) + end). + +%%---------------------------------------------------------------------------- + %% There are two "flows" here; of messages and of credit, going in %% opposite directions. The variable names "From" and "To" refer to %% the flow of credit, but the function names refer to the flow of @@ -66,29 +82,33 @@ send(From) -> send(From, ?DEFAULT_CREDIT). send(From, {InitialCredit, _MoreCreditAfter}) -> - update({credit_from, From}, InitialCredit, - fun (1) -> block(From), - 0; - (C) -> C - 1 - end). + ?UPDATE({credit_from, From}, InitialCredit, C, + if C == 1 -> block(From), + 0; + true -> C - 1 + end). ack(To) -> ack(To, ?DEFAULT_CREDIT). ack(To, {_InitialCredit, MoreCreditAfter}) -> - update({credit_to, To}, MoreCreditAfter, - fun (1) -> grant(To, MoreCreditAfter), - MoreCreditAfter; - (C) -> C - 1 - end). + ?UPDATE({credit_to, To}, MoreCreditAfter, C, + if C == 1 -> grant(To, MoreCreditAfter), + MoreCreditAfter; + true -> C - 1 + end). handle_bump_msg({From, MoreCredit}) -> - update({credit_from, From}, 0, - fun (C) when C =< 0 andalso C + MoreCredit > 0 -> unblock(From), - C + MoreCredit; - (C) -> C + MoreCredit - end). - -blocked() -> get(credit_blocked, []) =/= []. + ?UPDATE({credit_from, From}, 0, C, + if C =< 0 andalso C + MoreCredit > 0 -> unblock(From), + C + MoreCredit; + true -> C + MoreCredit + end). + +blocked() -> case get(credit_blocked) of + undefined -> false; + [] -> false; + _ -> true + end. peer_down(Peer) -> %% In theory we could also remove it from credit_deferred here, but it @@ -105,24 +125,17 @@ grant(To, Quantity) -> Msg = {bump_credit, {self(), Quantity}}, case blocked() of false -> To ! Msg; - true -> update(credit_deferred, [], - fun (Deferred) -> [{To, Msg} | Deferred] end) + true -> ?UPDATE(credit_deferred, [], Deferred, [{To, Msg} | Deferred]) end. -block(From) -> update(credit_blocked, [], fun (Blocks) -> [From | Blocks] end). +block(From) -> ?UPDATE(credit_blocked, [], Blocks, [From | Blocks]). unblock(From) -> - update(credit_blocked, [], fun (Blocks) -> Blocks -- [From] end), + ?UPDATE(credit_blocked, [], Blocks, Blocks -- [From]), case blocked() of - false -> [To ! Msg || {To, Msg} <- get(credit_deferred, [])], - erase(credit_deferred); + false -> case erase(credit_deferred) of + undefined -> ok; + Credits -> [To ! Msg || {To, Msg} <- Credits] + end; true -> ok end. - -get(Key, Default) -> - case get(Key) of - undefined -> Default; - Value -> Value - end. - -update(Key, Default, Fun) -> put(Key, Fun(get(Key, Default))), ok. diff --git a/src/delegate.erl b/src/delegate.erl index d595e481..0331ca01 100644 --- a/src/delegate.erl +++ b/src/delegate.erl @@ -10,31 +10,44 @@ %% %% The Original Code is RabbitMQ. %% -%% The Initial Developer of the Original Code is VMware, Inc. -%% Copyright (c) 2007-2012 VMware, Inc. All rights reserved. +%% The Initial Developer of the Original Code is GoPivotal, Inc. +%% Copyright (c) 2007-2013 GoPivotal, Inc. All rights reserved. %% -module(delegate). -behaviour(gen_server2). --export([start_link/1, invoke_no_result/2, invoke/2]). +-export([start_link/1, invoke_no_result/2, invoke/2, + monitor/2, demonitor/1, call/2, cast/2]). -export([init/1, handle_call/3, handle_cast/2, handle_info/2, terminate/2, code_change/3]). +-record(state, {node, monitors, name}). + %%---------------------------------------------------------------------------- -ifdef(use_specs). +-export_type([monitor_ref/0]). + +-type(monitor_ref() :: reference() | {atom(), pid()}). +-type(fun_or_mfa(A) :: fun ((pid()) -> A) | {atom(), atom(), [any()]}). + -spec(start_link/1 :: (non_neg_integer()) -> {'ok', pid()} | ignore | {'error', any()}). --spec(invoke/2 :: - ( pid(), fun ((pid()) -> A)) -> A; - ([pid()], fun ((pid()) -> A)) -> {[{pid(), A}], - [{pid(), term()}]}). --spec(invoke_no_result/2 :: - (pid() | [pid()], fun ((pid()) -> any())) -> 'ok'). +-spec(invoke/2 :: ( pid(), fun_or_mfa(A)) -> A; + ([pid()], fun_or_mfa(A)) -> {[{pid(), A}], + [{pid(), term()}]}). +-spec(invoke_no_result/2 :: (pid() | [pid()], fun_or_mfa(any())) -> 'ok'). +-spec(monitor/2 :: ('process', pid()) -> monitor_ref()). +-spec(demonitor/1 :: (monitor_ref()) -> 'true'). + +-spec(call/2 :: + ( pid(), any()) -> any(); + ([pid()], any()) -> {[{pid(), any()}], [{pid(), term()}]}). +-spec(cast/2 :: (pid() | [pid()], any()) -> 'ok'). -endif. @@ -46,19 +59,27 @@ %%---------------------------------------------------------------------------- start_link(Num) -> - gen_server2:start_link({local, delegate_name(Num)}, ?MODULE, [], []). + Name = delegate_name(Num), + gen_server2:start_link({local, Name}, ?MODULE, [Name], []). -invoke(Pid, Fun) when is_pid(Pid) andalso node(Pid) =:= node() -> - Fun(Pid); -invoke(Pid, Fun) when is_pid(Pid) -> - case invoke([Pid], Fun) of +invoke(Pid, FunOrMFA) when is_pid(Pid) andalso node(Pid) =:= node() -> + apply1(FunOrMFA, Pid); +invoke(Pid, FunOrMFA) when is_pid(Pid) -> + case invoke([Pid], FunOrMFA) of {[{Pid, Result}], []} -> Result; {[], [{Pid, {Class, Reason, StackTrace}}]} -> erlang:raise(Class, Reason, StackTrace) end; -invoke(Pids, Fun) when is_list(Pids) -> +invoke([], _FunOrMFA) -> %% optimisation + {[], []}; +invoke([Pid], FunOrMFA) when node(Pid) =:= node() -> %% optimisation + case safe_invoke(Pid, FunOrMFA) of + {ok, _, Result} -> {[{Pid, Result}], []}; + {error, _, Error} -> {[], [{Pid, Error}]} + end; +invoke(Pids, FunOrMFA) when is_list(Pids) -> {LocalPids, Grouped} = group_pids_by_node(Pids), %% The use of multi_call is only safe because the timeout is %% infinity, and thus there is no process spawned in order to do @@ -67,35 +88,59 @@ invoke(Pids, Fun) when is_list(Pids) -> case orddict:fetch_keys(Grouped) of [] -> {[], []}; RemoteNodes -> gen_server2:multi_call( - RemoteNodes, delegate(RemoteNodes), - {invoke, Fun, Grouped}, infinity) + RemoteNodes, delegate(self(), RemoteNodes), + {invoke, FunOrMFA, Grouped}, infinity) end, BadPids = [{Pid, {exit, {nodedown, BadNode}, []}} || BadNode <- BadNodes, Pid <- orddict:fetch(BadNode, Grouped)], - ResultsNoNode = lists:append([safe_invoke(LocalPids, Fun) | + ResultsNoNode = lists:append([safe_invoke(LocalPids, FunOrMFA) | [Results || {_Node, Results} <- Replies]]), lists:foldl( fun ({ok, Pid, Result}, {Good, Bad}) -> {[{Pid, Result} | Good], Bad}; ({error, Pid, Error}, {Good, Bad}) -> {Good, [{Pid, Error} | Bad]} end, {[], BadPids}, ResultsNoNode). -invoke_no_result(Pid, Fun) when is_pid(Pid) andalso node(Pid) =:= node() -> - safe_invoke(Pid, Fun), %% we don't care about any error +invoke_no_result(Pid, FunOrMFA) when is_pid(Pid) andalso node(Pid) =:= node() -> + safe_invoke(Pid, FunOrMFA), %% we don't care about any error ok; -invoke_no_result(Pid, Fun) when is_pid(Pid) -> - invoke_no_result([Pid], Fun); +invoke_no_result(Pid, FunOrMFA) when is_pid(Pid) -> + invoke_no_result([Pid], FunOrMFA); -invoke_no_result(Pids, Fun) when is_list(Pids) -> +invoke_no_result([], _FunOrMFA) -> %% optimisation + ok; +invoke_no_result([Pid], FunOrMFA) when node(Pid) =:= node() -> %% optimisation + safe_invoke(Pid, FunOrMFA), %% must not die + ok; +invoke_no_result(Pids, FunOrMFA) when is_list(Pids) -> {LocalPids, Grouped} = group_pids_by_node(Pids), case orddict:fetch_keys(Grouped) of [] -> ok; - RemoteNodes -> gen_server2:abcast(RemoteNodes, delegate(RemoteNodes), - {invoke, Fun, Grouped}) + RemoteNodes -> gen_server2:abcast( + RemoteNodes, delegate(self(), RemoteNodes), + {invoke, FunOrMFA, Grouped}) end, - safe_invoke(LocalPids, Fun), %% must not die + safe_invoke(LocalPids, FunOrMFA), %% must not die ok. +monitor(process, Pid) when node(Pid) =:= node() -> + erlang:monitor(process, Pid); +monitor(process, Pid) -> + Name = delegate(Pid, [node(Pid)]), + gen_server2:cast(Name, {monitor, self(), Pid}), + {Name, Pid}. + +demonitor(Ref) when is_reference(Ref) -> + erlang:demonitor(Ref); +demonitor({Name, Pid}) -> + gen_server2:cast(Name, {demonitor, self(), Pid}). + +call(PidOrPids, Msg) -> + invoke(PidOrPids, {gen_server2, call, [Msg, infinity]}). + +cast(PidOrPids, Msg) -> + invoke_no_result(PidOrPids, {gen_server2, cast, [Msg]}). + %%---------------------------------------------------------------------------- group_pids_by_node(Pids) -> @@ -112,43 +157,88 @@ group_pids_by_node(Pids) -> delegate_name(Hash) -> list_to_atom("delegate_" ++ integer_to_list(Hash)). -delegate(RemoteNodes) -> +delegate(Pid, RemoteNodes) -> case get(delegate) of undefined -> Name = delegate_name( - erlang:phash2(self(), + erlang:phash2(Pid, delegate_sup:count(RemoteNodes))), put(delegate, Name), Name; Name -> Name end. -safe_invoke(Pids, Fun) when is_list(Pids) -> - [safe_invoke(Pid, Fun) || Pid <- Pids]; -safe_invoke(Pid, Fun) when is_pid(Pid) -> +safe_invoke(Pids, FunOrMFA) when is_list(Pids) -> + [safe_invoke(Pid, FunOrMFA) || Pid <- Pids]; +safe_invoke(Pid, FunOrMFA) when is_pid(Pid) -> try - {ok, Pid, Fun(Pid)} + {ok, Pid, apply1(FunOrMFA, Pid)} catch Class:Reason -> {error, Pid, {Class, Reason, erlang:get_stacktrace()}} end. +apply1({M, F, A}, Arg) -> apply(M, F, [Arg | A]); +apply1(Fun, Arg) -> Fun(Arg). + %%---------------------------------------------------------------------------- -init([]) -> - {ok, node(), hibernate, +init([Name]) -> + {ok, #state{node = node(), monitors = dict:new(), name = Name}, hibernate, {backoff, ?HIBERNATE_AFTER_MIN, ?HIBERNATE_AFTER_MIN, ?DESIRED_HIBERNATE}}. -handle_call({invoke, Fun, Grouped}, _From, Node) -> - {reply, safe_invoke(orddict:fetch(Node, Grouped), Fun), Node, hibernate}. - -handle_cast({invoke, Fun, Grouped}, Node) -> - safe_invoke(orddict:fetch(Node, Grouped), Fun), - {noreply, Node, hibernate}. - -handle_info(_Info, Node) -> - {noreply, Node, hibernate}. +handle_call({invoke, FunOrMFA, Grouped}, _From, State = #state{node = Node}) -> + {reply, safe_invoke(orddict:fetch(Node, Grouped), FunOrMFA), State, + hibernate}. + +handle_cast({monitor, MonitoringPid, Pid}, + State = #state{monitors = Monitors}) -> + Monitors1 = case dict:find(Pid, Monitors) of + {ok, {Ref, Pids}} -> + Pids1 = gb_sets:add_element(MonitoringPid, Pids), + dict:store(Pid, {Ref, Pids1}, Monitors); + error -> + Ref = erlang:monitor(process, Pid), + Pids = gb_sets:singleton(MonitoringPid), + dict:store(Pid, {Ref, Pids}, Monitors) + end, + {noreply, State#state{monitors = Monitors1}, hibernate}; + +handle_cast({demonitor, MonitoringPid, Pid}, + State = #state{monitors = Monitors}) -> + Monitors1 = case dict:find(Pid, Monitors) of + {ok, {Ref, Pids}} -> + Pids1 = gb_sets:del_element(MonitoringPid, Pids), + case gb_sets:is_empty(Pids1) of + true -> erlang:demonitor(Ref), + dict:erase(Pid, Monitors); + false -> dict:store(Pid, {Ref, Pids1}, Monitors) + end; + error -> + Monitors + end, + {noreply, State#state{monitors = Monitors1}, hibernate}; + +handle_cast({invoke, FunOrMFA, Grouped}, State = #state{node = Node}) -> + safe_invoke(orddict:fetch(Node, Grouped), FunOrMFA), + {noreply, State, hibernate}. + +handle_info({'DOWN', Ref, process, Pid, Info}, + State = #state{monitors = Monitors, name = Name}) -> + {noreply, + case dict:find(Pid, Monitors) of + {ok, {Ref, Pids}} -> + Msg = {'DOWN', {Name, Pid}, process, Pid, Info}, + gb_sets:fold(fun (MonitoringPid, _) -> MonitoringPid ! Msg end, + none, Pids), + State#state{monitors = dict:erase(Pid, Monitors)}; + error -> + State + end, hibernate}; + +handle_info(_Info, State) -> + {noreply, State, hibernate}. terminate(_Reason, _State) -> ok. -code_change(_OldVsn, Node, _Extra) -> - {ok, Node}. +code_change(_OldVsn, State, _Extra) -> + {ok, State}. diff --git a/src/delegate_sup.erl b/src/delegate_sup.erl index 2a8b915b..e31d6d38 100644 --- a/src/delegate_sup.erl +++ b/src/delegate_sup.erl @@ -10,8 +10,8 @@ %% %% The Original Code is RabbitMQ. %% -%% The Initial Developer of the Original Code is VMware, Inc. -%% Copyright (c) 2007-2012 VMware, Inc. All rights reserved. +%% The Initial Developer of the Original Code is GoPivotal, Inc. +%% Copyright (c) 2007-2013 GoPivotal, Inc. All rights reserved. %% -module(delegate_sup). diff --git a/src/dtree.erl b/src/dtree.erl index ca2d30cf..5ff36bd9 100644 --- a/src/dtree.erl +++ b/src/dtree.erl @@ -10,8 +10,8 @@ %% %% The Original Code is RabbitMQ. %% -%% The Initial Developer of the Original Code is VMware, Inc. -%% Copyright (c) 2007-2012 VMware, Inc. All rights reserved. +%% The Initial Developer of the Original Code is GoPivotal, Inc. +%% Copyright (c) 2007-2013 GoPivotal, Inc. All rights reserved. %% %% A dual-index tree. diff --git a/src/file_handle_cache.erl b/src/file_handle_cache.erl index f3b4dbaf..bac7c2c1 100644 --- a/src/file_handle_cache.erl +++ b/src/file_handle_cache.erl @@ -10,8 +10,8 @@ %% %% The Original Code is RabbitMQ. %% -%% The Initial Developer of the Original Code is VMware, Inc. -%% Copyright (c) 2007-2012 VMware, Inc. All rights reserved. +%% The Initial Developer of the Original Code is GoPivotal, Inc. +%% Copyright (c) 2007-2013 GoPivotal, Inc. All rights reserved. %% -module(file_handle_cache). @@ -120,12 +120,12 @@ %% do not need to worry about their handles being closed by the server %% - reopening them when necessary is handled transparently. %% -%% The server also supports obtain, release and transfer. obtain/0 +%% The server also supports obtain, release and transfer. obtain/{0,1} %% blocks until a file descriptor is available, at which point the -%% requesting process is considered to 'own' one more -%% descriptor. release/0 is the inverse operation and releases a -%% previously obtained descriptor. transfer/1 transfers ownership of a -%% file descriptor between processes. It is non-blocking. Obtain has a +%% requesting process is considered to 'own' more descriptor(s). +%% release/{0,1} is the inverse operation and releases previously obtained +%% descriptor(s). transfer/{1,2} transfers ownership of file descriptor(s) +%% between processes. It is non-blocking. Obtain has a %% lower limit, set by the ?OBTAIN_LIMIT/1 macro. File handles can use %% the entire limit, but will be evicted by obtain calls up to the %% point at which no more obtain calls can be satisfied by the obtains @@ -136,8 +136,8 @@ %% as sockets can do so in such a way that the overall number of open %% file descriptors is managed. %% -%% The callers of register_callback/3, obtain/0, and the argument of -%% transfer/1 are monitored, reducing the count of handles in use +%% The callers of register_callback/3, obtain, and the argument of +%% transfer are monitored, reducing the count of handles in use %% appropriately when the processes terminate. -behaviour(gen_server2). @@ -146,12 +146,13 @@ -export([open/3, close/1, read/2, append/2, needs_sync/1, sync/1, position/2, truncate/1, current_virtual_offset/1, current_raw_offset/1, flush/1, copy/3, set_maximum_since_use/1, delete/1, clear/1]). --export([obtain/0, release/0, transfer/1, set_limit/1, get_limit/0, info_keys/0, +-export([obtain/0, obtain/1, release/0, release/1, transfer/1, transfer/2, + set_limit/1, get_limit/0, info_keys/0, info/0, info/1]). -export([ulimit/0]). --export([start_link/0, init/1, handle_call/3, handle_cast/2, handle_info/2, - terminate/2, code_change/3, prioritise_cast/2]). +-export([start_link/0, start_link/2, init/1, handle_call/3, handle_cast/2, + handle_info/2, terminate/2, code_change/3, prioritise_cast/3]). -define(SERVER, ?MODULE). -define(RESERVED_FOR_OTHERS, 100). @@ -195,7 +196,9 @@ obtain_count, obtain_pending, clients, - timer_ref + timer_ref, + alarm_set, + alarm_clear }). -record(cstate, @@ -249,8 +252,11 @@ -spec(clear/1 :: (ref()) -> ok_or_error()). -spec(set_maximum_since_use/1 :: (non_neg_integer()) -> 'ok'). -spec(obtain/0 :: () -> 'ok'). +-spec(obtain/1 :: (non_neg_integer()) -> 'ok'). -spec(release/0 :: () -> 'ok'). +-spec(release/1 :: (non_neg_integer()) -> 'ok'). -spec(transfer/1 :: (pid()) -> 'ok'). +-spec(transfer/2 :: (pid(), non_neg_integer()) -> 'ok'). -spec(set_limit/1 :: (non_neg_integer()) -> 'ok'). -spec(get_limit/0 :: () -> non_neg_integer()). -spec(info_keys/0 :: () -> rabbit_types:info_keys()). @@ -268,7 +274,11 @@ %%---------------------------------------------------------------------------- start_link() -> - gen_server2:start_link({local, ?SERVER}, ?MODULE, [], [{timeout, infinity}]). + start_link(fun alarm_handler:set_alarm/1, fun alarm_handler:clear_alarm/1). + +start_link(AlarmSet, AlarmClear) -> + gen_server2:start_link({local, ?SERVER}, ?MODULE, [AlarmSet, AlarmClear], + [{timeout, infinity}]). register_callback(M, F, A) when is_atom(M) andalso is_atom(F) andalso is_list(A) -> @@ -374,11 +384,11 @@ sync(Ref) -> end). needs_sync(Ref) -> - with_handles( - [Ref], - fun ([#handle { is_dirty = false, write_buffer = [] }]) -> false; - ([_Handle]) -> true - end). + %% This must *not* use with_handles/2; see bug 25052 + case get({Ref, fhc_handle}) of + #handle { is_dirty = false, write_buffer = [] } -> false; + #handle {} -> true + end. position(Ref, NewOffset) -> with_flushed_handles( @@ -479,18 +489,22 @@ set_maximum_since_use(MaximumAge) -> true -> ok end. -obtain() -> +obtain() -> obtain(1). +release() -> release(1). +transfer(Pid) -> transfer(Pid, 1). + +obtain(Count) when Count > 0 -> %% If the FHC isn't running, obtains succeed immediately. case whereis(?SERVER) of undefined -> ok; - _ -> gen_server2:call(?SERVER, {obtain, self()}, infinity) + _ -> gen_server2:call(?SERVER, {obtain, Count, self()}, infinity) end. -release() -> - gen_server2:cast(?SERVER, {release, self()}). +release(Count) when Count > 0 -> + gen_server2:cast(?SERVER, {release, Count, self()}). -transfer(Pid) -> - gen_server2:cast(?SERVER, {transfer, self(), Pid}). +transfer(Pid, Count) when Count > 0 -> + gen_server2:cast(?SERVER, {transfer, Count, self(), Pid}). set_limit(Limit) -> gen_server2:call(?SERVER, {set_limit, Limit}, infinity). @@ -806,7 +820,7 @@ i(Item, _) -> throw({bad_argument, Item}). %% gen_server2 callbacks %%---------------------------------------------------------------------------- -init([]) -> +init([AlarmSet, AlarmClear]) -> Limit = case application:get_env(file_handles_high_watermark) of {ok, Watermark} when (is_integer(Watermark) andalso Watermark > 0) -> @@ -830,11 +844,13 @@ init([]) -> obtain_count = 0, obtain_pending = pending_new(), clients = Clients, - timer_ref = undefined }}. + timer_ref = undefined, + alarm_set = AlarmSet, + alarm_clear = AlarmClear }}. -prioritise_cast(Msg, _State) -> +prioritise_cast(Msg, _Len, _State) -> case Msg of - {release, _} -> 5; + {release, _, _} -> 5; _ -> 0 end. @@ -867,11 +883,12 @@ handle_call({open, Pid, Requested, EldestUnusedSince}, From, false -> {noreply, run_pending_item(Item, State)} end; -handle_call({obtain, Pid}, From, State = #fhc_state { obtain_count = Count, - obtain_pending = Pending, - clients = Clients }) -> +handle_call({obtain, N, Pid}, From, State = #fhc_state { + obtain_count = Count, + obtain_pending = Pending, + clients = Clients }) -> ok = track_client(Pid, Clients), - Item = #pending { kind = obtain, pid = Pid, requested = 1, from = From }, + Item = #pending { kind = obtain, pid = Pid, requested = N, from = From }, Enqueue = fun () -> true = ets:update_element(Clients, Pid, {#cstate.blocked, true}), @@ -882,7 +899,7 @@ handle_call({obtain, Pid}, From, State = #fhc_state { obtain_count = Count, case obtain_limit_reached(State) of true -> Enqueue(); false -> case needs_reduce(State #fhc_state { - obtain_count = Count + 1 }) of + obtain_count = Count + N }) of true -> reduce(Enqueue()); false -> adjust_alarm( State, run_pending_item(Item, State)) @@ -917,9 +934,9 @@ handle_cast({update, Pid, EldestUnusedSince}, %% storm of messages {noreply, State}; -handle_cast({release, Pid}, State) -> +handle_cast({release, N, Pid}, State) -> {noreply, adjust_alarm(State, process_pending( - update_counts(obtain, Pid, -1, State)))}; + update_counts(obtain, Pid, -N, State)))}; handle_cast({close, Pid, EldestUnusedSince}, State = #fhc_state { elders = Elders, clients = Clients }) -> @@ -931,11 +948,11 @@ handle_cast({close, Pid, EldestUnusedSince}, {noreply, adjust_alarm(State, process_pending( update_counts(open, Pid, -1, State)))}; -handle_cast({transfer, FromPid, ToPid}, State) -> +handle_cast({transfer, N, FromPid, ToPid}, State) -> ok = track_client(ToPid, State#fhc_state.clients), {noreply, process_pending( - update_counts(obtain, ToPid, +1, - update_counts(obtain, FromPid, -1, State)))}. + update_counts(obtain, ToPid, +N, + update_counts(obtain, FromPid, -N, State)))}. handle_info(check_counts, State) -> {noreply, maybe_reduce(State #fhc_state { timer_ref = undefined })}; @@ -1026,10 +1043,11 @@ obtain_limit_reached(#fhc_state { obtain_limit = Limit, obtain_count = Count}) -> Limit =/= infinity andalso Count >= Limit. -adjust_alarm(OldState, NewState) -> +adjust_alarm(OldState = #fhc_state { alarm_set = AlarmSet, + alarm_clear = AlarmClear }, NewState) -> case {obtain_limit_reached(OldState), obtain_limit_reached(NewState)} of - {false, true} -> alarm_handler:set_alarm({file_descriptor_limit, []}); - {true, false} -> alarm_handler:clear_alarm(file_descriptor_limit); + {false, true} -> AlarmSet({file_descriptor_limit, []}); + {true, false} -> AlarmClear(file_descriptor_limit); _ -> ok end, NewState. diff --git a/src/gatherer.erl b/src/gatherer.erl index 98b36038..c13298ca 100644 --- a/src/gatherer.erl +++ b/src/gatherer.erl @@ -10,15 +10,15 @@ %% %% The Original Code is RabbitMQ. %% -%% The Initial Developer of the Original Code is VMware, Inc. -%% Copyright (c) 2007-2012 VMware, Inc. All rights reserved. +%% The Initial Developer of the Original Code is GoPivotal, Inc. +%% Copyright (c) 2007-2013 GoPivotal, Inc. All rights reserved. %% -module(gatherer). -behaviour(gen_server2). --export([start_link/0, stop/1, fork/1, finish/1, in/2, out/1]). +-export([start_link/0, stop/1, fork/1, finish/1, in/2, sync_in/2, out/1]). -export([init/1, handle_call/3, handle_cast/2, handle_info/2, terminate/2, code_change/3]). @@ -32,6 +32,7 @@ -spec(fork/1 :: (pid()) -> 'ok'). -spec(finish/1 :: (pid()) -> 'ok'). -spec(in/2 :: (pid(), any()) -> 'ok'). +-spec(sync_in/2 :: (pid(), any()) -> 'ok'). -spec(out/1 :: (pid()) -> {'value', any()} | 'empty'). -endif. @@ -62,6 +63,9 @@ finish(Pid) -> in(Pid, Value) -> gen_server2:cast(Pid, {in, Value}). +sync_in(Pid, Value) -> + gen_server2:call(Pid, {in, Value}, infinity). + out(Pid) -> gen_server2:call(Pid, out, infinity). @@ -78,19 +82,22 @@ handle_call(stop, _From, State) -> handle_call(fork, _From, State = #gstate { forks = Forks }) -> {reply, ok, State #gstate { forks = Forks + 1 }, hibernate}; +handle_call({in, Value}, From, State) -> + {noreply, in(Value, From, State), hibernate}; + handle_call(out, From, State = #gstate { forks = Forks, values = Values, blocked = Blocked }) -> case queue:out(Values) of + {empty, _} when Forks == 0 -> + {reply, empty, State, hibernate}; {empty, _} -> - case Forks of - 0 -> {reply, empty, State, hibernate}; - _ -> {noreply, - State #gstate { blocked = queue:in(From, Blocked) }, - hibernate} - end; - {{value, _Value} = V, NewValues} -> - {reply, V, State #gstate { values = NewValues }, hibernate} + {noreply, State #gstate { blocked = queue:in(From, Blocked) }, + hibernate}; + {{value, {PendingIn, Value}}, NewValues} -> + reply(PendingIn, ok), + {reply, {value, Value}, State #gstate { values = NewValues }, + hibernate} end; handle_call(Msg, _From, State) -> @@ -107,15 +114,8 @@ handle_cast(finish, State = #gstate { forks = Forks, blocked = Blocked }) -> {noreply, State #gstate { forks = NewForks, blocked = NewBlocked }, hibernate}; -handle_cast({in, Value}, State = #gstate { values = Values, - blocked = Blocked }) -> - {noreply, case queue:out(Blocked) of - {empty, _} -> - State #gstate { values = queue:in(Value, Values) }; - {{value, From}, NewBlocked} -> - gen_server2:reply(From, {value, Value}), - State #gstate { blocked = NewBlocked } - end, hibernate}; +handle_cast({in, Value}, State) -> + {noreply, in(Value, undefined, State), hibernate}; handle_cast(Msg, State) -> {stop, {unexpected_cast, Msg}, State}. @@ -128,3 +128,18 @@ code_change(_OldVsn, State, _Extra) -> terminate(_Reason, State) -> State. + +%%---------------------------------------------------------------------------- + +in(Value, From, State = #gstate { values = Values, blocked = Blocked }) -> + case queue:out(Blocked) of + {empty, _} -> + State #gstate { values = queue:in({From, Value}, Values) }; + {{value, PendingOut}, NewBlocked} -> + reply(From, ok), + gen_server2:reply(PendingOut, {value, Value}), + State #gstate { blocked = NewBlocked } + end. + +reply(undefined, _Reply) -> ok; +reply(From, Reply) -> gen_server2:reply(From, Reply). diff --git a/src/gen_server2.erl b/src/gen_server2.erl index 78bbbe06..6690d181 100644 --- a/src/gen_server2.erl +++ b/src/gen_server2.erl @@ -16,12 +16,15 @@ %% The original code could reorder messages when communicating with a %% process on a remote node that was not currently connected. %% -%% 4) The callback module can optionally implement prioritise_call/3, -%% prioritise_cast/2 and prioritise_info/2. These functions take -%% Message, From and State or just Message and State and return a -%% single integer representing the priority attached to the message. -%% Messages with higher priorities are processed before requests with -%% lower priorities. The default priority is 0. +%% 4) The callback module can optionally implement prioritise_call/4, +%% prioritise_cast/3 and prioritise_info/3. These functions take +%% Message, From, Length and State or just Message, Length and State +%% (where Length is the current number of messages waiting to be +%% processed) and return a single integer representing the priority +%% attached to the message, or 'drop' to ignore it (for +%% prioritise_cast/3 and prioritise_info/3 only). Messages with +%% higher priorities are processed before requests with lower +%% priorities. The default priority is 0. %% %% 5) The callback module can optionally implement %% handle_pre_hibernate/1 and handle_post_hibernate/1. These will be @@ -72,8 +75,14 @@ %% format_message_queue/2 which is the equivalent of format_status/2 %% but where the second argument is specifically the priority_queue %% which contains the prioritised message_queue. +%% +%% 9) The function with_state/2 can be used to debug a process with +%% heavyweight state (without needing to copy the entire state out of +%% process as sys:get_status/1 would). Pass through a function which +%% can be invoked on the state, get back the result. The state is not +%% modified. -%% All modifications are (C) 2009-2012 VMware, Inc. +%% All modifications are (C) 2009-2013 GoPivotal, Inc. %% ``The contents of this file are subject to the Erlang Public License, %% Version 1.1, (the "License"); you may not use this file except in @@ -181,6 +190,7 @@ cast/2, reply/2, abcast/2, abcast/3, multi_call/2, multi_call/3, multi_call/4, + with_state/2, enter_loop/3, enter_loop/4, enter_loop/5, enter_loop/6, wake_hib/1]). %% System exports @@ -196,8 +206,7 @@ %% State record -record(gs2_state, {parent, name, state, mod, time, - timeout_state, queue, debug, prioritise_call, - prioritise_cast, prioritise_info}). + timeout_state, queue, debug, prioritisers}). -ifdef(use_specs). @@ -380,6 +389,16 @@ multi_call(Nodes, Name, Req, Timeout) when is_list(Nodes), is_atom(Name), is_integer(Timeout), Timeout >= 0 -> do_multi_call(Nodes, Name, Req, Timeout). +%% ----------------------------------------------------------------- +%% Apply a function to a generic server's state. +%% ----------------------------------------------------------------- +with_state(Name, Fun) -> + case catch gen:call(Name, '$with_state', Fun, infinity) of + {ok,Res} -> + Res; + {'EXIT',Reason} -> + exit({Reason, {?MODULE, with_state, [Name, Fun]}}) + end. %%----------------------------------------------------------------- %% enter_loop(Mod, Options, State, <ServerName>, <TimeOut>, <Backoff>) ->_ @@ -638,17 +657,22 @@ adjust_timeout_state(SleptAt, AwokeAt, {backoff, CurrentTO, MinimumTO, {backoff, CurrentTO1, MinimumTO, DesiredHibPeriod, RandomState1}. in({'$gen_cast', Msg} = Input, - GS2State = #gs2_state { prioritise_cast = PC }) -> - in(Input, PC(Msg, GS2State), GS2State); + GS2State = #gs2_state { prioritisers = {_, F, _} }) -> + in(Input, F(Msg, GS2State), GS2State); in({'$gen_call', From, Msg} = Input, - GS2State = #gs2_state { prioritise_call = PC }) -> - in(Input, PC(Msg, From, GS2State), GS2State); + GS2State = #gs2_state { prioritisers = {F, _, _} }) -> + in(Input, F(Msg, From, GS2State), GS2State); +in({'$with_state', _From, _Fun} = Input, GS2State) -> + in(Input, 0, GS2State); in({'EXIT', Parent, _R} = Input, GS2State = #gs2_state { parent = Parent }) -> in(Input, infinity, GS2State); in({system, _From, _Req} = Input, GS2State) -> in(Input, infinity, GS2State); -in(Input, GS2State = #gs2_state { prioritise_info = PI }) -> - in(Input, PI(Input, GS2State), GS2State). +in(Input, GS2State = #gs2_state { prioritisers = {_, _, F} }) -> + in(Input, F(Input, GS2State), GS2State). + +in(_Input, drop, GS2State) -> + GS2State; in(Input, Priority, GS2State = #gs2_state { queue = Queue }) -> GS2State # gs2_state { queue = priority_queue:in(Input, Priority, Queue) }. @@ -658,6 +682,10 @@ process_msg({system, From, Req}, %% gen_server puts Hib on the end as the 7th arg, but that version %% of the fun seems not to be documented so leaving out for now. sys:handle_system_msg(Req, From, Parent, ?MODULE, Debug, GS2State); +process_msg({'$with_state', From, Fun}, + GS2State = #gs2_state{state = State}) -> + reply(From, catch Fun(State)), + loop(GS2State); process_msg({'EXIT', Parent, Reason} = Msg, GS2State = #gs2_state { parent = Parent }) -> terminate(Reason, Msg, GS2State); @@ -864,13 +892,19 @@ dispatch(Info, Mod, State) -> common_reply(_Name, From, Reply, _NState, [] = _Debug) -> reply(From, Reply), []; -common_reply(Name, From, Reply, NState, Debug) -> - reply(Name, From, Reply, NState, Debug). +common_reply(Name, {To, _Tag} = From, Reply, NState, Debug) -> + reply(From, Reply), + sys:handle_debug(Debug, fun print_event/3, Name, {out, Reply, To, NState}). -common_debug([] = _Debug, _Func, _Info, _Event) -> +common_noreply(_Name, _NState, [] = _Debug) -> []; -common_debug(Debug, Func, Info, Event) -> - sys:handle_debug(Debug, Func, Info, Event). +common_noreply(Name, NState, Debug) -> + sys:handle_debug(Debug, fun print_event/3, Name, {noreply, NState}). + +common_become(_Name, _Mod, _NState, [] = _Debug) -> + []; +common_become(Name, Mod, NState, Debug) -> + sys:handle_debug(Debug, fun print_event/3, Name, {become, Mod, NState}). handle_msg({'$gen_call', From, Msg}, GS2State = #gs2_state { mod = Mod, state = State, @@ -887,23 +921,11 @@ handle_msg({'$gen_call', From, Msg}, GS2State = #gs2_state { mod = Mod, loop(GS2State #gs2_state { state = NState, time = Time1, debug = Debug1}); - {noreply, NState} -> - Debug1 = common_debug(Debug, fun print_event/3, Name, - {noreply, NState}), - loop(GS2State #gs2_state {state = NState, - time = infinity, - debug = Debug1}); - {noreply, NState, Time1} -> - Debug1 = common_debug(Debug, fun print_event/3, Name, - {noreply, NState}), - loop(GS2State #gs2_state {state = NState, - time = Time1, - debug = Debug1}); {stop, Reason, Reply, NState} -> {'EXIT', R} = (catch terminate(Reason, Msg, GS2State #gs2_state { state = NState })), - reply(Name, From, Reply, NState, Debug), + common_reply(Name, From, Reply, NState, Debug), exit(R); Other -> handle_common_reply(Other, Msg, GS2State) @@ -916,28 +938,24 @@ handle_common_reply(Reply, Msg, GS2State = #gs2_state { name = Name, debug = Debug}) -> case Reply of {noreply, NState} -> - Debug1 = common_debug(Debug, fun print_event/3, Name, - {noreply, NState}), - loop(GS2State #gs2_state { state = NState, - time = infinity, - debug = Debug1 }); + Debug1 = common_noreply(Name, NState, Debug), + loop(GS2State #gs2_state {state = NState, + time = infinity, + debug = Debug1}); {noreply, NState, Time1} -> - Debug1 = common_debug(Debug, fun print_event/3, Name, - {noreply, NState}), - loop(GS2State #gs2_state { state = NState, - time = Time1, - debug = Debug1 }); + Debug1 = common_noreply(Name, NState, Debug), + loop(GS2State #gs2_state {state = NState, + time = Time1, + debug = Debug1}); {become, Mod, NState} -> - Debug1 = common_debug(Debug, fun print_event/3, Name, - {become, Mod, NState}), + Debug1 = common_become(Name, Mod, NState, Debug), loop(find_prioritisers( GS2State #gs2_state { mod = Mod, state = NState, time = infinity, debug = Debug1 })); {become, Mod, NState, Time1} -> - Debug1 = common_debug(Debug, fun print_event/3, Name, - {become, Mod, NState}), + Debug1 = common_become(Name, Mod, NState, Debug), loop(find_prioritisers( GS2State #gs2_state { mod = Mod, state = NState, @@ -957,12 +975,6 @@ handle_common_termination(Reply, Msg, GS2State) -> terminate({bad_return_value, Reply}, Msg, GS2State) end. -reply(Name, {To, Tag}, Reply, State, Debug) -> - reply({To, Tag}, Reply), - sys:handle_debug( - Debug, fun print_event/3, Name, {out, Reply, To, State}). - - %%----------------------------------------------------------------- %% Callback functions for system messages handling. %%----------------------------------------------------------------- @@ -1165,30 +1177,33 @@ whereis_name(Name) -> end. find_prioritisers(GS2State = #gs2_state { mod = Mod }) -> - PrioriCall = function_exported_or_default( - Mod, 'prioritise_call', 3, - fun (_Msg, _From, _State) -> 0 end), - PrioriCast = function_exported_or_default(Mod, 'prioritise_cast', 2, - fun (_Msg, _State) -> 0 end), - PrioriInfo = function_exported_or_default(Mod, 'prioritise_info', 2, - fun (_Msg, _State) -> 0 end), - GS2State #gs2_state { prioritise_call = PrioriCall, - prioritise_cast = PrioriCast, - prioritise_info = PrioriInfo }. + PCall = function_exported_or_default(Mod, 'prioritise_call', 4, + fun (_Msg, _From, _State) -> 0 end), + PCast = function_exported_or_default(Mod, 'prioritise_cast', 3, + fun (_Msg, _State) -> 0 end), + PInfo = function_exported_or_default(Mod, 'prioritise_info', 3, + fun (_Msg, _State) -> 0 end), + GS2State #gs2_state { prioritisers = {PCall, PCast, PInfo} }. function_exported_or_default(Mod, Fun, Arity, Default) -> case erlang:function_exported(Mod, Fun, Arity) of true -> case Arity of - 2 -> fun (Msg, GS2State = #gs2_state { state = State }) -> - case catch Mod:Fun(Msg, State) of + 3 -> fun (Msg, GS2State = #gs2_state { queue = Queue, + state = State }) -> + Length = priority_queue:len(Queue), + case catch Mod:Fun(Msg, Length, State) of + drop -> + drop; Res when is_integer(Res) -> Res; Err -> handle_common_termination(Err, Msg, GS2State) end end; - 3 -> fun (Msg, From, GS2State = #gs2_state { state = State }) -> - case catch Mod:Fun(Msg, From, State) of + 4 -> fun (Msg, From, GS2State = #gs2_state { queue = Queue, + state = State }) -> + Length = priority_queue:len(Queue), + case catch Mod:Fun(Msg, From, Length, State) of Res when is_integer(Res) -> Res; Err -> @@ -10,8 +10,8 @@ %% %% The Original Code is RabbitMQ. %% -%% The Initial Developer of the Original Code is VMware, Inc. -%% Copyright (c) 2007-2012 VMware, Inc. All rights reserved. +%% The Initial Developer of the Original Code is GoPivotal, Inc. +%% Copyright (c) 2007-2013 GoPivotal, Inc. All rights reserved. %% -module(gm). @@ -77,9 +77,19 @@ %% confirmed_broadcast/2 directly from the callback module otherwise %% you will deadlock the entire group. %% -%% group_members/1 -%% Provide the Pid. Returns a list of the current group members. +%% info/1 +%% Provide the Pid. Returns a proplist with various facts, including +%% the group name and the current group members. %% +%% validate_members/2 +%% Check whether a given member list agrees with the chosen member's +%% view. Any differences will be communicated via the members_changed +%% callback. If there are no differences then there will be no reply. +%% Note that members will not necessarily share the same view. +%% +%% forget_group/1 +%% Provide the group name. Removes its mnesia record. Makes no attempt +%% to ensure the group is empty. %% %% Implementation Overview %% ----------------------- @@ -372,11 +382,11 @@ -behaviour(gen_server2). --export([create_tables/0, start_link/3, leave/1, broadcast/2, - confirmed_broadcast/2, group_members/1]). +-export([create_tables/0, start_link/4, leave/1, broadcast/2, + confirmed_broadcast/2, info/1, validate_members/2, forget_group/1]). -export([init/1, handle_call/3, handle_cast/2, handle_info/2, terminate/2, - code_change/3, prioritise_info/2]). + code_change/3, prioritise_info/3]). -ifndef(use_specs). -export([behaviour_info/1]). @@ -404,7 +414,8 @@ callback_args, confirms, broadcast_buffer, - broadcast_timer + broadcast_timer, + txn_executor }). -record(gm_group, { name, version, members }). @@ -424,14 +435,17 @@ -export_type([group_name/0]). -type(group_name() :: any()). +-type(txn_fun() :: fun((fun(() -> any())) -> any())). -spec(create_tables/0 :: () -> 'ok' | {'aborted', any()}). --spec(start_link/3 :: (group_name(), atom(), any()) -> +-spec(start_link/4 :: (group_name(), atom(), any(), txn_fun()) -> rabbit_types:ok_pid_or_error()). -spec(leave/1 :: (pid()) -> 'ok'). -spec(broadcast/2 :: (pid(), any()) -> 'ok'). -spec(confirmed_broadcast/2 :: (pid(), any()) -> 'ok'). --spec(group_members/1 :: (pid()) -> [pid()]). +-spec(info/1 :: (pid()) -> rabbit_types:infos()). +-spec(validate_members/2 :: (pid(), [pid()]) -> 'ok'). +-spec(forget_group/1 :: (group_name()) -> 'ok'). %% The joined, members_changed and handle_msg callbacks can all return %% any of the following terms: @@ -502,8 +516,8 @@ table_definitions() -> {Name, Attributes} = ?TABLE, [{Name, [?TABLE_MATCH | Attributes]}]. -start_link(GroupName, Module, Args) -> - gen_server2:start_link(?MODULE, [GroupName, Module, Args], []). +start_link(GroupName, Module, Args, TxnFun) -> + gen_server2:start_link(?MODULE, [GroupName, Module, Args, TxnFun], []). leave(Server) -> gen_server2:cast(Server, leave). @@ -514,11 +528,20 @@ broadcast(Server, Msg) -> confirmed_broadcast(Server, Msg) -> gen_server2:call(Server, {confirmed_broadcast, Msg}, infinity). -group_members(Server) -> - gen_server2:call(Server, group_members, infinity). +info(Server) -> + gen_server2:call(Server, info, infinity). + +validate_members(Server, Members) -> + gen_server2:cast(Server, {validate_members, Members}). +forget_group(GroupName) -> + {atomic, ok} = mnesia:sync_transaction( + fun () -> + mnesia:delete({?GROUP_TABLE, GroupName}) + end), + ok. -init([GroupName, Module, Args]) -> +init([GroupName, Module, Args, TxnFun]) -> {MegaSecs, Secs, MicroSecs} = now(), random:seed(MegaSecs, Secs, MicroSecs), Self = make_member(GroupName), @@ -534,7 +557,8 @@ init([GroupName, Module, Args]) -> callback_args = Args, confirms = queue:new(), broadcast_buffer = [], - broadcast_timer = undefined }, hibernate, + broadcast_timer = undefined, + txn_executor = TxnFun }, hibernate, {backoff, ?HIBERNATE_AFTER_MIN, ?HIBERNATE_AFTER_MIN, ?DESIRED_HIBERNATE}}. @@ -553,12 +577,16 @@ handle_call({confirmed_broadcast, Msg}, _From, handle_call({confirmed_broadcast, Msg}, From, State) -> internal_broadcast(Msg, From, State); -handle_call(group_members, _From, +handle_call(info, _From, State = #state { members_state = undefined }) -> reply(not_joined, State); -handle_call(group_members, _From, State = #state { view = View }) -> - reply(alive_view_members(View), State); +handle_call(info, _From, State = #state { group_name = GroupName, + module = Module, + view = View }) -> + reply([{group_name, GroupName}, + {module, Module}, + {group_members, get_pids(alive_view_members(View))}], State); handle_call({add_on_right, _NewMember}, _From, State = #state { members_state = undefined }) -> @@ -570,7 +598,8 @@ handle_call({add_on_right, NewMember}, _From, view = View, members_state = MembersState, module = Module, - callback_args = Args }) -> + callback_args = Args, + txn_executor = TxnFun }) -> {MembersState1, Group} = record_new_member_in_group( GroupName, Self, NewMember, @@ -581,7 +610,7 @@ handle_call({add_on_right, NewMember}, _From, {catchup, Self, prepare_members_state(MembersState1)}), MembersState1 - end), + end, TxnFun), View2 = group_to_view(Group), State1 = check_neighbours(State #state { view = View2, members_state = MembersState1 }), @@ -627,8 +656,9 @@ handle_cast(join, State = #state { self = Self, group_name = GroupName, members_state = undefined, module = Module, - callback_args = Args }) -> - View = join_group(Self, GroupName), + callback_args = Args, + txn_executor = TxnFun }) -> + View = join_group(Self, GroupName, TxnFun), MembersState = case alive_view_members(View) of [Self] -> blank_member_state(); @@ -639,6 +669,19 @@ handle_cast(join, State = #state { self = Self, handle_callback_result( {Module:joined(Args, get_pids(all_known_members(View))), State1}); +handle_cast({validate_members, OldMembers}, + State = #state { view = View, + module = Module, + callback_args = Args }) -> + NewMembers = get_pids(all_known_members(View)), + Births = NewMembers -- OldMembers, + Deaths = OldMembers -- NewMembers, + case {Births, Deaths} of + {[], []} -> noreply(State); + _ -> Result = Module:members_changed(Args, Births, Deaths), + handle_callback_result({Result, State}) + end; + handle_cast(leave, State) -> {stop, normal, State}. @@ -647,7 +690,10 @@ handle_info(flush, State) -> noreply( flush_broadcast_buffer(State #state { broadcast_timer = undefined })); -handle_info({'DOWN', MRef, process, _Pid, _Reason}, +handle_info(timeout, State) -> + noreply(flush_broadcast_buffer(State)); + +handle_info({'DOWN', MRef, process, _Pid, Reason}, State = #state { self = Self, left = Left, right = Right, @@ -655,18 +701,22 @@ handle_info({'DOWN', MRef, process, _Pid, _Reason}, view = View, module = Module, callback_args = Args, - confirms = Confirms }) -> + confirms = Confirms, + txn_executor = TxnFun }) -> Member = case {Left, Right} of {{Member1, MRef}, _} -> Member1; {_, {Member1, MRef}} -> Member1; _ -> undefined end, - case Member of - undefined -> + case {Member, Reason} of + {undefined, _} -> + noreply(State); + {_, {shutdown, ring_shutdown}} -> noreply(State); _ -> View1 = - group_to_view(record_dead_member_in_group(Member, GroupName)), + group_to_view(record_dead_member_in_group(Member, + GroupName, TxnFun)), {Result, State2} = case alive_view_members(View1) of [Self] -> @@ -694,12 +744,12 @@ terminate(Reason, State = #state { module = Module, code_change(_OldVsn, State, _Extra) -> {ok, State}. -prioritise_info(flush, _State) -> +prioritise_info(flush, _Len, _State) -> 1; -prioritise_info({'DOWN', _MRef, process, _Pid, _Reason}, +prioritise_info({'DOWN', _MRef, process, _Pid, _Reason}, _Len, #state { members_state = MS }) when MS /= undefined -> 1; -prioritise_info(_, _State) -> +prioritise_info(_, _Len, _State) -> 0. @@ -810,10 +860,13 @@ handle_msg({activity, _NotLeft, _Activity}, State) -> noreply(State) -> - {noreply, ensure_broadcast_timer(State), hibernate}. + {noreply, ensure_broadcast_timer(State), flush_timeout(State)}. reply(Reply, State) -> - {reply, Reply, ensure_broadcast_timer(State), hibernate}. + {reply, Reply, ensure_broadcast_timer(State), flush_timeout(State)}. + +flush_timeout(#state{broadcast_buffer = []}) -> hibernate; +flush_timeout(_) -> 0. ensure_broadcast_timer(State = #state { broadcast_buffer = [], broadcast_timer = undefined }) -> @@ -876,11 +929,9 @@ flush_broadcast_buffer(State = #state { self = Self, %% View construction and inspection %% --------------------------------------------------------------------------- -needs_view_update(ReqVer, {Ver, _View}) -> - Ver < ReqVer. +needs_view_update(ReqVer, {Ver, _View}) -> Ver < ReqVer. -view_version({Ver, _View}) -> - Ver. +view_version({Ver, _View}) -> Ver. is_member_alive({dead, _Member}) -> false; is_member_alive(_) -> true. @@ -899,17 +950,13 @@ store_view_member(VMember = #view_member { id = Id }, {Ver, View}) -> with_view_member(Fun, View, Id) -> store_view_member(Fun(fetch_view_member(Id, View)), View). -fetch_view_member(Id, {_Ver, View}) -> - ?DICT:fetch(Id, View). +fetch_view_member(Id, {_Ver, View}) -> ?DICT:fetch(Id, View). -find_view_member(Id, {_Ver, View}) -> - ?DICT:find(Id, View). +find_view_member(Id, {_Ver, View}) -> ?DICT:find(Id, View). -blank_view(Ver) -> - {Ver, ?DICT:new()}. +blank_view(Ver) -> {Ver, ?DICT:new()}. -alive_view_members({_Ver, View}) -> - ?DICT:fetch_keys(View). +alive_view_members({_Ver, View}) -> ?DICT:fetch_keys(View). all_known_members({_Ver, View}) -> ?DICT:fold( @@ -974,14 +1021,15 @@ ensure_alive_suffix1(MembersQ) -> %% View modification %% --------------------------------------------------------------------------- -join_group(Self, GroupName) -> - join_group(Self, GroupName, read_group(GroupName)). +join_group(Self, GroupName, TxnFun) -> + join_group(Self, GroupName, read_group(GroupName), TxnFun). -join_group(Self, GroupName, {error, not_found}) -> - join_group(Self, GroupName, prune_or_create_group(Self, GroupName)); -join_group(Self, _GroupName, #gm_group { members = [Self] } = Group) -> +join_group(Self, GroupName, {error, not_found}, TxnFun) -> + join_group(Self, GroupName, + prune_or_create_group(Self, GroupName, TxnFun), TxnFun); +join_group(Self, _GroupName, #gm_group { members = [Self] } = Group, _TxnFun) -> group_to_view(Group); -join_group(Self, GroupName, #gm_group { members = Members } = Group) -> +join_group(Self, GroupName, #gm_group { members = Members } = Group, TxnFun) -> case lists:member(Self, Members) of true -> group_to_view(Group); @@ -989,20 +1037,22 @@ join_group(Self, GroupName, #gm_group { members = Members } = Group) -> case lists:filter(fun is_member_alive/1, Members) of [] -> join_group(Self, GroupName, - prune_or_create_group(Self, GroupName)); + prune_or_create_group(Self, GroupName, TxnFun)); Alive -> Left = lists:nth(random:uniform(length(Alive)), Alive), Handler = fun () -> join_group( Self, GroupName, - record_dead_member_in_group(Left, GroupName)) + record_dead_member_in_group( + Left, GroupName, TxnFun), + TxnFun) end, try case gen_server2:call( get_pid(Left), {add_on_right, Self}, infinity) of {ok, Group1} -> group_to_view(Group1); - not_ready -> join_group(Self, GroupName) + not_ready -> join_group(Self, GroupName, TxnFun) end catch exit:{R, _} @@ -1021,29 +1071,29 @@ read_group(GroupName) -> [Group] -> Group end. -prune_or_create_group(Self, GroupName) -> - {atomic, Group} = - mnesia:sync_transaction( - fun () -> GroupNew = #gm_group { name = GroupName, - members = [Self], - version = ?VERSION_START }, - case mnesia:read({?GROUP_TABLE, GroupName}) of - [] -> - mnesia:write(GroupNew), - GroupNew; - [Group1 = #gm_group { members = Members }] -> - case lists:any(fun is_member_alive/1, Members) of - true -> Group1; - false -> mnesia:write(GroupNew), - GroupNew - end - end - end), +prune_or_create_group(Self, GroupName, TxnFun) -> + Group = TxnFun( + fun () -> + GroupNew = #gm_group { name = GroupName, + members = [Self], + version = get_version(Self) }, + case mnesia:read({?GROUP_TABLE, GroupName}) of + [] -> + mnesia:write(GroupNew), + GroupNew; + [Group1 = #gm_group { members = Members }] -> + case lists:any(fun is_member_alive/1, Members) of + true -> Group1; + false -> mnesia:write(GroupNew), + GroupNew + end + end + end), Group. -record_dead_member_in_group(Member, GroupName) -> - {atomic, Group} = - mnesia:sync_transaction( +record_dead_member_in_group(Member, GroupName, TxnFun) -> + Group = + TxnFun( fun () -> [Group1 = #gm_group { members = Members, version = Ver }] = mnesia:read({?GROUP_TABLE, GroupName}), case lists:splitwith( @@ -1060,9 +1110,9 @@ record_dead_member_in_group(Member, GroupName) -> end), Group. -record_new_member_in_group(GroupName, Left, NewMember, Fun) -> - {atomic, {Result, Group}} = - mnesia:sync_transaction( +record_new_member_in_group(GroupName, Left, NewMember, Fun, TxnFun) -> + {Result, Group} = + TxnFun( fun () -> [#gm_group { members = Members, version = Ver } = Group1] = mnesia:read({?GROUP_TABLE, GroupName}), @@ -1077,10 +1127,10 @@ record_new_member_in_group(GroupName, Left, NewMember, Fun) -> end), {Result, Group}. -erase_members_in_group(Members, GroupName) -> +erase_members_in_group(Members, GroupName, TxnFun) -> DeadMembers = [{dead, Id} || Id <- Members], - {atomic, Group} = - mnesia:sync_transaction( + Group = + TxnFun( fun () -> [Group1 = #gm_group { members = [_|_] = Members1, version = Ver }] = @@ -1101,7 +1151,8 @@ maybe_erase_aliases(State = #state { self = Self, view = View0, members_state = MembersState, module = Module, - callback_args = Args }, View) -> + callback_args = Args, + txn_executor = TxnFun }, View) -> #view_member { aliases = Aliases } = fetch_view_member(Self, View), {Erasable, MembersState1} = ?SETS:fold( @@ -1118,7 +1169,7 @@ maybe_erase_aliases(State = #state { self = Self, case Erasable of [] -> {ok, State1 #state { view = View }}; _ -> View1 = group_to_view( - erase_members_in_group(Erasable, GroupName)), + erase_members_in_group(Erasable, GroupName, TxnFun)), {callback_view_changed(Args, Module, View0, View1), check_neighbours(State1 #state { view = View1 })} end. @@ -1150,10 +1201,8 @@ ensure_neighbour(Ver, Self, {RealNeighbour, MRef}, Neighbour) -> end, {Neighbour, maybe_monitor(Neighbour, Self)}. -maybe_monitor(Self, Self) -> - undefined; -maybe_monitor(Other, _Self) -> - erlang:monitor(process, get_pid(Other)). +maybe_monitor( Self, Self) -> undefined; +maybe_monitor(Other, _Self) -> erlang:monitor(process, get_pid(Other)). check_neighbours(State = #state { self = Self, left = Left, @@ -1242,23 +1291,19 @@ find_member_or_blank(Id, MembersState) -> error -> blank_member() end. -erase_member(Id, MembersState) -> - ?DICT:erase(Id, MembersState). +erase_member(Id, MembersState) -> ?DICT:erase(Id, MembersState). blank_member() -> #member { pending_ack = queue:new(), last_pub = -1, last_ack = -1 }. -blank_member_state() -> - ?DICT:new(). +blank_member_state() -> ?DICT:new(). store_member(Id, MemberState, MembersState) -> ?DICT:store(Id, MemberState, MembersState). -prepare_members_state(MembersState) -> - ?DICT:to_list(MembersState). +prepare_members_state(MembersState) -> ?DICT:to_list(MembersState). -build_members_state(MembersStateList) -> - ?DICT:from_list(MembersStateList). +build_members_state(MembersStateList) -> ?DICT:from_list(MembersStateList). make_member(GroupName) -> {case read_group(GroupName) of @@ -1272,6 +1317,8 @@ remove_erased_members(MembersState, View) -> MembersState1) end, blank_member_state(), all_known_members(View)). +get_version({Version, _Pid}) -> Version. + get_pid({_Version, Pid}) -> Pid. get_pids(Ids) -> [Pid || {_Version, Pid} <- Ids]. @@ -1280,16 +1327,12 @@ get_pids(Ids) -> [Pid || {_Version, Pid} <- Ids]. %% Activity assembly %% --------------------------------------------------------------------------- -activity_nil() -> - queue:new(). +activity_nil() -> queue:new(). -activity_cons(_Id, [], [], Tail) -> - Tail; -activity_cons(Sender, Pubs, Acks, Tail) -> - queue:in({Sender, Pubs, Acks}, Tail). +activity_cons( _Id, [], [], Tail) -> Tail; +activity_cons(Sender, Pubs, Acks, Tail) -> queue:in({Sender, Pubs, Acks}, Tail). -activity_finalise(Activity) -> - queue:to_list(Activity). +activity_finalise(Activity) -> queue:to_list(Activity). maybe_send_activity([], _State) -> ok; @@ -1393,34 +1436,25 @@ purge_confirms(Confirms) -> %% Msg transformation %% --------------------------------------------------------------------------- -acks_from_queue(Q) -> - [PubNum || {PubNum, _Msg} <- queue:to_list(Q)]. +acks_from_queue(Q) -> [PubNum || {PubNum, _Msg} <- queue:to_list(Q)]. -pubs_from_queue(Q) -> - queue:to_list(Q). +pubs_from_queue(Q) -> queue:to_list(Q). -queue_from_pubs(Pubs) -> - queue:from_list(Pubs). +queue_from_pubs(Pubs) -> queue:from_list(Pubs). -apply_acks([], Pubs) -> - Pubs; -apply_acks(List, Pubs) -> - {_, Pubs1} = queue:split(length(List), Pubs), - Pubs1. +apply_acks( [], Pubs) -> Pubs; +apply_acks(List, Pubs) -> {_, Pubs1} = queue:split(length(List), Pubs), + Pubs1. join_pubs(Q, []) -> Q; join_pubs(Q, Pubs) -> queue:join(Q, queue_from_pubs(Pubs)). -last_ack([], LA) -> - LA; -last_ack(List, LA) -> - LA1 = lists:last(List), - true = LA1 > LA, %% ASSERTION - LA1. - -last_pub([], LP) -> - LP; -last_pub(List, LP) -> - {PubNum, _Msg} = lists:last(List), - true = PubNum > LP, %% ASSERTION - PubNum. +last_ack( [], LA) -> LA; +last_ack(List, LA) -> LA1 = lists:last(List), + true = LA1 > LA, %% ASSERTION + LA1. + +last_pub( [], LP) -> LP; +last_pub(List, LP) -> {PubNum, _Msg} = lists:last(List), + true = PubNum > LP, %% ASSERTION + PubNum. diff --git a/src/gm_soak_test.erl b/src/gm_soak_test.erl index 57217541..b379d218 100644 --- a/src/gm_soak_test.erl +++ b/src/gm_soak_test.erl @@ -10,8 +10,8 @@ %% %% The Original Code is RabbitMQ. %% -%% The Initial Developer of the Original Code is VMware, Inc. -%% Copyright (c) 2007-2012 VMware, Inc. All rights reserved. +%% The Initial Developer of the Original Code is GoPivotal, Inc. +%% Copyright (c) 2007-2013 GoPivotal, Inc. All rights reserved. %% -module(gm_soak_test). @@ -105,7 +105,9 @@ spawn_member() -> random:seed(MegaSecs, Secs, MicroSecs), %% start up delay of no more than 10 seconds timer:sleep(random:uniform(10000)), - {ok, Pid} = gm:start_link(?MODULE, ?MODULE, []), + {ok, Pid} = gm:start_link( + ?MODULE, ?MODULE, [], + fun rabbit_misc:execute_mnesia_transaction/1), Start = random:uniform(10000), send_loop(Pid, Start, Start + random:uniform(10000)), gm:leave(Pid), diff --git a/src/gm_speed_test.erl b/src/gm_speed_test.erl index dad75bd4..768cc462 100644 --- a/src/gm_speed_test.erl +++ b/src/gm_speed_test.erl @@ -10,8 +10,8 @@ %% %% The Original Code is RabbitMQ. %% -%% The Initial Developer of the Original Code is VMware, Inc. -%% Copyright (c) 2007-2012 VMware, Inc. All rights reserved. +%% The Initial Developer of the Original Code is GoPivotal, Inc. +%% Copyright (c) 2007-2013 GoPivotal, Inc. All rights reserved. %% -module(gm_speed_test). @@ -44,7 +44,8 @@ terminate(Owner, _Reason) -> %% other wile_e_coyote(Time, WriteUnit) -> - {ok, Pid} = gm:start_link(?MODULE, ?MODULE, self()), + {ok, Pid} = gm:start_link(?MODULE, ?MODULE, self(), + fun rabbit_misc:execute_mnesia_transaction/1), receive joined -> ok end, timer:sleep(1000), %% wait for all to join timer:send_after(Time, stop), diff --git a/src/gm_tests.erl b/src/gm_tests.erl index 0a2d4204..233702ad 100644 --- a/src/gm_tests.erl +++ b/src/gm_tests.erl @@ -10,8 +10,8 @@ %% %% The Original Code is RabbitMQ. %% -%% The Initial Developer of the Original Code is VMware, Inc. -%% Copyright (c) 2007-2012 VMware, Inc. All rights reserved. +%% The Initial Developer of the Original Code is GoPivotal, Inc. +%% Copyright (c) 2007-2013 GoPivotal, Inc. All rights reserved. %% -module(gm_tests). @@ -76,7 +76,9 @@ test_confirmed_broadcast() -> test_member_death() -> with_two_members( fun (Pid, Pid2) -> - {ok, Pid3} = gm:start_link(?MODULE, ?MODULE, self()), + {ok, Pid3} = gm:start_link( + ?MODULE, ?MODULE, self(), + fun rabbit_misc:execute_mnesia_transaction/1), passed = receive_joined(Pid3, [Pid, Pid2, Pid3], timeout_joining_gm_group_3), passed = receive_birth(Pid, Pid3, timeout_waiting_for_birth_3_1), @@ -128,10 +130,12 @@ test_broadcast_fun(Fun) -> with_two_members(Fun) -> ok = gm:create_tables(), - {ok, Pid} = gm:start_link(?MODULE, ?MODULE, self()), + {ok, Pid} = gm:start_link(?MODULE, ?MODULE, self(), + fun rabbit_misc:execute_mnesia_transaction/1), passed = receive_joined(Pid, [Pid], timeout_joining_gm_group_1), - {ok, Pid2} = gm:start_link(?MODULE, ?MODULE, self()), + {ok, Pid2} = gm:start_link(?MODULE, ?MODULE, self(), + fun rabbit_misc:execute_mnesia_transaction/1), passed = receive_joined(Pid2, [Pid, Pid2], timeout_joining_gm_group_2), passed = receive_birth(Pid, Pid2, timeout_waiting_for_birth_2), diff --git a/src/lqueue.erl b/src/lqueue.erl index c4e046b5..4ff7cc0b 100644 --- a/src/lqueue.erl +++ b/src/lqueue.erl @@ -10,8 +10,8 @@ %% %% The Original Code is RabbitMQ. %% -%% The Initial Developer of the Original Code is VMware, Inc. -%% Copyright (c) 2011-2012 VMware, Inc. All rights reserved. +%% The Initial Developer of the Original Code is GoPivotal, Inc. +%% Copyright (c) 2011-2013 GoPivotal, Inc. All rights reserved. %% -module(lqueue). diff --git a/src/mirrored_supervisor.erl b/src/mirrored_supervisor.erl index 4fc488b8..d5f51db0 100644 --- a/src/mirrored_supervisor.erl +++ b/src/mirrored_supervisor.erl @@ -10,8 +10,8 @@ %% %% The Original Code is RabbitMQ. %% -%% The Initial Developer of the Original Code is VMware, Inc. -%% Copyright (c) 2011-2012 VMware, Inc. All rights reserved. +%% The Initial Developer of the Original Code is GoPivotal, Inc. +%% Copyright (c) 2011-2013 GoPivotal, Inc. All rights reserved. %% -module(mirrored_supervisor). @@ -174,7 +174,7 @@ -spec start_internal(Group, ChildSpecs) -> Result when Group :: group_name(), ChildSpecs :: [supervisor2:child_spec()], - Result :: supervisor2:startlink_ret(). + Result :: {'ok', pid()} | {'error', term()}. -spec create_tables() -> Result when Result :: 'ok'. @@ -212,9 +212,8 @@ start_link0(Prefix, Group, Init) -> init(Mod, Args) -> case Mod:init(Args) of {ok, {{Bad, _, _}, _ChildSpecs}} when - Bad =:= simple_one_for_one orelse - Bad =:= simple_one_for_one_terminate -> erlang:error(badarg); - Init -> Init + Bad =:= simple_one_for_one -> erlang:error(badarg); + Init -> Init end. start_child(Sup, ChildSpec) -> call(Sup, {start_child, ChildSpec}). diff --git a/src/mirrored_supervisor_tests.erl b/src/mirrored_supervisor_tests.erl index f8cbd853..780ef11d 100644 --- a/src/mirrored_supervisor_tests.erl +++ b/src/mirrored_supervisor_tests.erl @@ -10,8 +10,8 @@ %% %% The Original Code is RabbitMQ. %% -%% The Initial Developer of the Original Code is VMware, Inc. -%% Copyright (c) 2011-2012 VMware, Inc. All rights reserved. +%% The Initial Developer of the Original Code is GoPivotal, Inc. +%% Copyright (c) 2011-2013 GoPivotal, Inc. All rights reserved. %% -module(mirrored_supervisor_tests). diff --git a/src/mnesia_sync.erl b/src/mnesia_sync.erl index a3773d90..78c566e1 100644 --- a/src/mnesia_sync.erl +++ b/src/mnesia_sync.erl @@ -10,8 +10,8 @@ %% %% The Original Code is RabbitMQ. %% -%% The Initial Developer of the Original Code is VMware, Inc. -%% Copyright (c) 2007-2012 VMware, Inc. All rights reserved. +%% The Initial Developer of the Original Code is GoPivotal, Inc. +%% Copyright (c) 2007-2013 GoPivotal, Inc. All rights reserved. %% -module(mnesia_sync). diff --git a/src/mochijson2.erl b/src/mochijson2.erl new file mode 100644 index 00000000..bddb52cc --- /dev/null +++ b/src/mochijson2.erl @@ -0,0 +1,893 @@ +%% This file is a copy of `mochijson2.erl' from mochiweb, revision +%% d541e9a0f36c00dcadc2e589f20e47fbf46fc76f. For the license, see +%% `LICENSE-MIT-Mochi'. + +%% @author Bob Ippolito <bob@mochimedia.com> +%% @copyright 2007 Mochi Media, Inc. + +%% @doc Yet another JSON (RFC 4627) library for Erlang. mochijson2 works +%% with binaries as strings, arrays as lists (without an {array, _}) +%% wrapper and it only knows how to decode UTF-8 (and ASCII). +%% +%% JSON terms are decoded as follows (javascript -> erlang): +%% <ul> +%% <li>{"key": "value"} -> +%% {struct, [{<<"key">>, <<"value">>}]}</li> +%% <li>["array", 123, 12.34, true, false, null] -> +%% [<<"array">>, 123, 12.34, true, false, null] +%% </li> +%% </ul> +%% <ul> +%% <li>Strings in JSON decode to UTF-8 binaries in Erlang</li> +%% <li>Objects decode to {struct, PropList}</li> +%% <li>Numbers decode to integer or float</li> +%% <li>true, false, null decode to their respective terms.</li> +%% </ul> +%% The encoder will accept the same format that the decoder will produce, +%% but will also allow additional cases for leniency: +%% <ul> +%% <li>atoms other than true, false, null will be considered UTF-8 +%% strings (even as a proplist key) +%% </li> +%% <li>{json, IoList} will insert IoList directly into the output +%% with no validation +%% </li> +%% <li>{array, Array} will be encoded as Array +%% (legacy mochijson style) +%% </li> +%% <li>A non-empty raw proplist will be encoded as an object as long +%% as the first pair does not have an atom key of json, struct, +%% or array +%% </li> +%% </ul> + +-module(mochijson2). +-author('bob@mochimedia.com'). +-export([encoder/1, encode/1]). +-export([decoder/1, decode/1, decode/2]). + +%% This is a macro to placate syntax highlighters.. +-define(Q, $\"). +-define(ADV_COL(S, N), S#decoder{offset=N+S#decoder.offset, + column=N+S#decoder.column}). +-define(INC_COL(S), S#decoder{offset=1+S#decoder.offset, + column=1+S#decoder.column}). +-define(INC_LINE(S), S#decoder{offset=1+S#decoder.offset, + column=1, + line=1+S#decoder.line}). +-define(INC_CHAR(S, C), + case C of + $\n -> + S#decoder{column=1, + line=1+S#decoder.line, + offset=1+S#decoder.offset}; + _ -> + S#decoder{column=1+S#decoder.column, + offset=1+S#decoder.offset} + end). +-define(IS_WHITESPACE(C), + (C =:= $\s orelse C =:= $\t orelse C =:= $\r orelse C =:= $\n)). + +%% @type json_string() = atom | binary() +%% @type json_number() = integer() | float() +%% @type json_array() = [json_term()] +%% @type json_object() = {struct, [{json_string(), json_term()}]} +%% @type json_eep18_object() = {[{json_string(), json_term()}]} +%% @type json_iolist() = {json, iolist()} +%% @type json_term() = json_string() | json_number() | json_array() | +%% json_object() | json_eep18_object() | json_iolist() + +-record(encoder, {handler=null, + utf8=false}). + +-record(decoder, {object_hook=null, + offset=0, + line=1, + column=1, + state=null}). + +%% @spec encoder([encoder_option()]) -> function() +%% @doc Create an encoder/1 with the given options. +%% @type encoder_option() = handler_option() | utf8_option() +%% @type utf8_option() = boolean(). Emit unicode as utf8 (default - false) +encoder(Options) -> + State = parse_encoder_options(Options, #encoder{}), + fun (O) -> json_encode(O, State) end. + +%% @spec encode(json_term()) -> iolist() +%% @doc Encode the given as JSON to an iolist. +encode(Any) -> + json_encode(Any, #encoder{}). + +%% @spec decoder([decoder_option()]) -> function() +%% @doc Create a decoder/1 with the given options. +decoder(Options) -> + State = parse_decoder_options(Options, #decoder{}), + fun (O) -> json_decode(O, State) end. + +%% @spec decode(iolist(), [{format, proplist | eep18 | struct}]) -> json_term() +%% @doc Decode the given iolist to Erlang terms using the given object format +%% for decoding, where proplist returns JSON objects as [{binary(), json_term()}] +%% proplists, eep18 returns JSON objects as {[binary(), json_term()]}, and struct +%% returns them as-is. +decode(S, Options) -> + json_decode(S, parse_decoder_options(Options, #decoder{})). + +%% @spec decode(iolist()) -> json_term() +%% @doc Decode the given iolist to Erlang terms. +decode(S) -> + json_decode(S, #decoder{}). + +%% Internal API + +parse_encoder_options([], State) -> + State; +parse_encoder_options([{handler, Handler} | Rest], State) -> + parse_encoder_options(Rest, State#encoder{handler=Handler}); +parse_encoder_options([{utf8, Switch} | Rest], State) -> + parse_encoder_options(Rest, State#encoder{utf8=Switch}). + +parse_decoder_options([], State) -> + State; +parse_decoder_options([{object_hook, Hook} | Rest], State) -> + parse_decoder_options(Rest, State#decoder{object_hook=Hook}); +parse_decoder_options([{format, Format} | Rest], State) + when Format =:= struct orelse Format =:= eep18 orelse Format =:= proplist -> + parse_decoder_options(Rest, State#decoder{object_hook=Format}). + +json_encode(true, _State) -> + <<"true">>; +json_encode(false, _State) -> + <<"false">>; +json_encode(null, _State) -> + <<"null">>; +json_encode(I, _State) when is_integer(I) -> + integer_to_list(I); +json_encode(F, _State) when is_float(F) -> + mochinum:digits(F); +json_encode(S, State) when is_binary(S); is_atom(S) -> + json_encode_string(S, State); +json_encode([{K, _}|_] = Props, State) when (K =/= struct andalso + K =/= array andalso + K =/= json) -> + json_encode_proplist(Props, State); +json_encode({struct, Props}, State) when is_list(Props) -> + json_encode_proplist(Props, State); +json_encode({Props}, State) when is_list(Props) -> + json_encode_proplist(Props, State); +json_encode({}, State) -> + json_encode_proplist([], State); +json_encode(Array, State) when is_list(Array) -> + json_encode_array(Array, State); +json_encode({array, Array}, State) when is_list(Array) -> + json_encode_array(Array, State); +json_encode({json, IoList}, _State) -> + IoList; +json_encode(Bad, #encoder{handler=null}) -> + exit({json_encode, {bad_term, Bad}}); +json_encode(Bad, State=#encoder{handler=Handler}) -> + json_encode(Handler(Bad), State). + +json_encode_array([], _State) -> + <<"[]">>; +json_encode_array(L, State) -> + F = fun (O, Acc) -> + [$,, json_encode(O, State) | Acc] + end, + [$, | Acc1] = lists:foldl(F, "[", L), + lists:reverse([$\] | Acc1]). + +json_encode_proplist([], _State) -> + <<"{}">>; +json_encode_proplist(Props, State) -> + F = fun ({K, V}, Acc) -> + KS = json_encode_string(K, State), + VS = json_encode(V, State), + [$,, VS, $:, KS | Acc] + end, + [$, | Acc1] = lists:foldl(F, "{", Props), + lists:reverse([$\} | Acc1]). + +json_encode_string(A, State) when is_atom(A) -> + L = atom_to_list(A), + case json_string_is_safe(L) of + true -> + [?Q, L, ?Q]; + false -> + json_encode_string_unicode(xmerl_ucs:from_utf8(L), State, [?Q]) + end; +json_encode_string(B, State) when is_binary(B) -> + case json_bin_is_safe(B) of + true -> + [?Q, B, ?Q]; + false -> + json_encode_string_unicode(xmerl_ucs:from_utf8(B), State, [?Q]) + end; +json_encode_string(I, _State) when is_integer(I) -> + [?Q, integer_to_list(I), ?Q]; +json_encode_string(L, State) when is_list(L) -> + case json_string_is_safe(L) of + true -> + [?Q, L, ?Q]; + false -> + json_encode_string_unicode(L, State, [?Q]) + end. + +json_string_is_safe([]) -> + true; +json_string_is_safe([C | Rest]) -> + case C of + ?Q -> + false; + $\\ -> + false; + $\b -> + false; + $\f -> + false; + $\n -> + false; + $\r -> + false; + $\t -> + false; + C when C >= 0, C < $\s; C >= 16#7f, C =< 16#10FFFF -> + false; + C when C < 16#7f -> + json_string_is_safe(Rest); + _ -> + false + end. + +json_bin_is_safe(<<>>) -> + true; +json_bin_is_safe(<<C, Rest/binary>>) -> + case C of + ?Q -> + false; + $\\ -> + false; + $\b -> + false; + $\f -> + false; + $\n -> + false; + $\r -> + false; + $\t -> + false; + C when C >= 0, C < $\s; C >= 16#7f -> + false; + C when C < 16#7f -> + json_bin_is_safe(Rest) + end. + +json_encode_string_unicode([], _State, Acc) -> + lists:reverse([$\" | Acc]); +json_encode_string_unicode([C | Cs], State, Acc) -> + Acc1 = case C of + ?Q -> + [?Q, $\\ | Acc]; + %% Escaping solidus is only useful when trying to protect + %% against "</script>" injection attacks which are only + %% possible when JSON is inserted into a HTML document + %% in-line. mochijson2 does not protect you from this, so + %% if you do insert directly into HTML then you need to + %% uncomment the following case or escape the output of encode. + %% + %% $/ -> + %% [$/, $\\ | Acc]; + %% + $\\ -> + [$\\, $\\ | Acc]; + $\b -> + [$b, $\\ | Acc]; + $\f -> + [$f, $\\ | Acc]; + $\n -> + [$n, $\\ | Acc]; + $\r -> + [$r, $\\ | Acc]; + $\t -> + [$t, $\\ | Acc]; + C when C >= 0, C < $\s -> + [unihex(C) | Acc]; + C when C >= 16#7f, C =< 16#10FFFF, State#encoder.utf8 -> + [xmerl_ucs:to_utf8(C) | Acc]; + C when C >= 16#7f, C =< 16#10FFFF, not State#encoder.utf8 -> + [unihex(C) | Acc]; + C when C < 16#7f -> + [C | Acc]; + _ -> + exit({json_encode, {bad_char, C}}) + end, + json_encode_string_unicode(Cs, State, Acc1). + +hexdigit(C) when C >= 0, C =< 9 -> + C + $0; +hexdigit(C) when C =< 15 -> + C + $a - 10. + +unihex(C) when C < 16#10000 -> + <<D3:4, D2:4, D1:4, D0:4>> = <<C:16>>, + Digits = [hexdigit(D) || D <- [D3, D2, D1, D0]], + [$\\, $u | Digits]; +unihex(C) when C =< 16#10FFFF -> + N = C - 16#10000, + S1 = 16#d800 bor ((N bsr 10) band 16#3ff), + S2 = 16#dc00 bor (N band 16#3ff), + [unihex(S1), unihex(S2)]. + +json_decode(L, S) when is_list(L) -> + json_decode(iolist_to_binary(L), S); +json_decode(B, S) -> + {Res, S1} = decode1(B, S), + {eof, _} = tokenize(B, S1#decoder{state=trim}), + Res. + +decode1(B, S=#decoder{state=null}) -> + case tokenize(B, S#decoder{state=any}) of + {{const, C}, S1} -> + {C, S1}; + {start_array, S1} -> + decode_array(B, S1); + {start_object, S1} -> + decode_object(B, S1) + end. + +make_object(V, #decoder{object_hook=N}) when N =:= null orelse N =:= struct -> + V; +make_object({struct, P}, #decoder{object_hook=eep18}) -> + {P}; +make_object({struct, P}, #decoder{object_hook=proplist}) -> + P; +make_object(V, #decoder{object_hook=Hook}) -> + Hook(V). + +decode_object(B, S) -> + decode_object(B, S#decoder{state=key}, []). + +decode_object(B, S=#decoder{state=key}, Acc) -> + case tokenize(B, S) of + {end_object, S1} -> + V = make_object({struct, lists:reverse(Acc)}, S1), + {V, S1#decoder{state=null}}; + {{const, K}, S1} -> + {colon, S2} = tokenize(B, S1), + {V, S3} = decode1(B, S2#decoder{state=null}), + decode_object(B, S3#decoder{state=comma}, [{K, V} | Acc]) + end; +decode_object(B, S=#decoder{state=comma}, Acc) -> + case tokenize(B, S) of + {end_object, S1} -> + V = make_object({struct, lists:reverse(Acc)}, S1), + {V, S1#decoder{state=null}}; + {comma, S1} -> + decode_object(B, S1#decoder{state=key}, Acc) + end. + +decode_array(B, S) -> + decode_array(B, S#decoder{state=any}, []). + +decode_array(B, S=#decoder{state=any}, Acc) -> + case tokenize(B, S) of + {end_array, S1} -> + {lists:reverse(Acc), S1#decoder{state=null}}; + {start_array, S1} -> + {Array, S2} = decode_array(B, S1), + decode_array(B, S2#decoder{state=comma}, [Array | Acc]); + {start_object, S1} -> + {Array, S2} = decode_object(B, S1), + decode_array(B, S2#decoder{state=comma}, [Array | Acc]); + {{const, Const}, S1} -> + decode_array(B, S1#decoder{state=comma}, [Const | Acc]) + end; +decode_array(B, S=#decoder{state=comma}, Acc) -> + case tokenize(B, S) of + {end_array, S1} -> + {lists:reverse(Acc), S1#decoder{state=null}}; + {comma, S1} -> + decode_array(B, S1#decoder{state=any}, Acc) + end. + +tokenize_string(B, S=#decoder{offset=O}) -> + case tokenize_string_fast(B, O) of + {escape, O1} -> + Length = O1 - O, + S1 = ?ADV_COL(S, Length), + <<_:O/binary, Head:Length/binary, _/binary>> = B, + tokenize_string(B, S1, lists:reverse(binary_to_list(Head))); + O1 -> + Length = O1 - O, + <<_:O/binary, String:Length/binary, ?Q, _/binary>> = B, + {{const, String}, ?ADV_COL(S, Length + 1)} + end. + +tokenize_string_fast(B, O) -> + case B of + <<_:O/binary, ?Q, _/binary>> -> + O; + <<_:O/binary, $\\, _/binary>> -> + {escape, O}; + <<_:O/binary, C1, _/binary>> when C1 < 128 -> + tokenize_string_fast(B, 1 + O); + <<_:O/binary, C1, C2, _/binary>> when C1 >= 194, C1 =< 223, + C2 >= 128, C2 =< 191 -> + tokenize_string_fast(B, 2 + O); + <<_:O/binary, C1, C2, C3, _/binary>> when C1 >= 224, C1 =< 239, + C2 >= 128, C2 =< 191, + C3 >= 128, C3 =< 191 -> + tokenize_string_fast(B, 3 + O); + <<_:O/binary, C1, C2, C3, C4, _/binary>> when C1 >= 240, C1 =< 244, + C2 >= 128, C2 =< 191, + C3 >= 128, C3 =< 191, + C4 >= 128, C4 =< 191 -> + tokenize_string_fast(B, 4 + O); + _ -> + throw(invalid_utf8) + end. + +tokenize_string(B, S=#decoder{offset=O}, Acc) -> + case B of + <<_:O/binary, ?Q, _/binary>> -> + {{const, iolist_to_binary(lists:reverse(Acc))}, ?INC_COL(S)}; + <<_:O/binary, "\\\"", _/binary>> -> + tokenize_string(B, ?ADV_COL(S, 2), [$\" | Acc]); + <<_:O/binary, "\\\\", _/binary>> -> + tokenize_string(B, ?ADV_COL(S, 2), [$\\ | Acc]); + <<_:O/binary, "\\/", _/binary>> -> + tokenize_string(B, ?ADV_COL(S, 2), [$/ | Acc]); + <<_:O/binary, "\\b", _/binary>> -> + tokenize_string(B, ?ADV_COL(S, 2), [$\b | Acc]); + <<_:O/binary, "\\f", _/binary>> -> + tokenize_string(B, ?ADV_COL(S, 2), [$\f | Acc]); + <<_:O/binary, "\\n", _/binary>> -> + tokenize_string(B, ?ADV_COL(S, 2), [$\n | Acc]); + <<_:O/binary, "\\r", _/binary>> -> + tokenize_string(B, ?ADV_COL(S, 2), [$\r | Acc]); + <<_:O/binary, "\\t", _/binary>> -> + tokenize_string(B, ?ADV_COL(S, 2), [$\t | Acc]); + <<_:O/binary, "\\u", C3, C2, C1, C0, Rest/binary>> -> + C = erlang:list_to_integer([C3, C2, C1, C0], 16), + if C > 16#D7FF, C < 16#DC00 -> + %% coalesce UTF-16 surrogate pair + <<"\\u", D3, D2, D1, D0, _/binary>> = Rest, + D = erlang:list_to_integer([D3,D2,D1,D0], 16), + [CodePoint] = xmerl_ucs:from_utf16be(<<C:16/big-unsigned-integer, + D:16/big-unsigned-integer>>), + Acc1 = lists:reverse(xmerl_ucs:to_utf8(CodePoint), Acc), + tokenize_string(B, ?ADV_COL(S, 12), Acc1); + true -> + Acc1 = lists:reverse(xmerl_ucs:to_utf8(C), Acc), + tokenize_string(B, ?ADV_COL(S, 6), Acc1) + end; + <<_:O/binary, C1, _/binary>> when C1 < 128 -> + tokenize_string(B, ?INC_CHAR(S, C1), [C1 | Acc]); + <<_:O/binary, C1, C2, _/binary>> when C1 >= 194, C1 =< 223, + C2 >= 128, C2 =< 191 -> + tokenize_string(B, ?ADV_COL(S, 2), [C2, C1 | Acc]); + <<_:O/binary, C1, C2, C3, _/binary>> when C1 >= 224, C1 =< 239, + C2 >= 128, C2 =< 191, + C3 >= 128, C3 =< 191 -> + tokenize_string(B, ?ADV_COL(S, 3), [C3, C2, C1 | Acc]); + <<_:O/binary, C1, C2, C3, C4, _/binary>> when C1 >= 240, C1 =< 244, + C2 >= 128, C2 =< 191, + C3 >= 128, C3 =< 191, + C4 >= 128, C4 =< 191 -> + tokenize_string(B, ?ADV_COL(S, 4), [C4, C3, C2, C1 | Acc]); + _ -> + throw(invalid_utf8) + end. + +tokenize_number(B, S) -> + case tokenize_number(B, sign, S, []) of + {{int, Int}, S1} -> + {{const, list_to_integer(Int)}, S1}; + {{float, Float}, S1} -> + {{const, list_to_float(Float)}, S1} + end. + +tokenize_number(B, sign, S=#decoder{offset=O}, []) -> + case B of + <<_:O/binary, $-, _/binary>> -> + tokenize_number(B, int, ?INC_COL(S), [$-]); + _ -> + tokenize_number(B, int, S, []) + end; +tokenize_number(B, int, S=#decoder{offset=O}, Acc) -> + case B of + <<_:O/binary, $0, _/binary>> -> + tokenize_number(B, frac, ?INC_COL(S), [$0 | Acc]); + <<_:O/binary, C, _/binary>> when C >= $1 andalso C =< $9 -> + tokenize_number(B, int1, ?INC_COL(S), [C | Acc]) + end; +tokenize_number(B, int1, S=#decoder{offset=O}, Acc) -> + case B of + <<_:O/binary, C, _/binary>> when C >= $0 andalso C =< $9 -> + tokenize_number(B, int1, ?INC_COL(S), [C | Acc]); + _ -> + tokenize_number(B, frac, S, Acc) + end; +tokenize_number(B, frac, S=#decoder{offset=O}, Acc) -> + case B of + <<_:O/binary, $., C, _/binary>> when C >= $0, C =< $9 -> + tokenize_number(B, frac1, ?ADV_COL(S, 2), [C, $. | Acc]); + <<_:O/binary, E, _/binary>> when E =:= $e orelse E =:= $E -> + tokenize_number(B, esign, ?INC_COL(S), [$e, $0, $. | Acc]); + _ -> + {{int, lists:reverse(Acc)}, S} + end; +tokenize_number(B, frac1, S=#decoder{offset=O}, Acc) -> + case B of + <<_:O/binary, C, _/binary>> when C >= $0 andalso C =< $9 -> + tokenize_number(B, frac1, ?INC_COL(S), [C | Acc]); + <<_:O/binary, E, _/binary>> when E =:= $e orelse E =:= $E -> + tokenize_number(B, esign, ?INC_COL(S), [$e | Acc]); + _ -> + {{float, lists:reverse(Acc)}, S} + end; +tokenize_number(B, esign, S=#decoder{offset=O}, Acc) -> + case B of + <<_:O/binary, C, _/binary>> when C =:= $- orelse C=:= $+ -> + tokenize_number(B, eint, ?INC_COL(S), [C | Acc]); + _ -> + tokenize_number(B, eint, S, Acc) + end; +tokenize_number(B, eint, S=#decoder{offset=O}, Acc) -> + case B of + <<_:O/binary, C, _/binary>> when C >= $0 andalso C =< $9 -> + tokenize_number(B, eint1, ?INC_COL(S), [C | Acc]) + end; +tokenize_number(B, eint1, S=#decoder{offset=O}, Acc) -> + case B of + <<_:O/binary, C, _/binary>> when C >= $0 andalso C =< $9 -> + tokenize_number(B, eint1, ?INC_COL(S), [C | Acc]); + _ -> + {{float, lists:reverse(Acc)}, S} + end. + +tokenize(B, S=#decoder{offset=O}) -> + case B of + <<_:O/binary, C, _/binary>> when ?IS_WHITESPACE(C) -> + tokenize(B, ?INC_CHAR(S, C)); + <<_:O/binary, "{", _/binary>> -> + {start_object, ?INC_COL(S)}; + <<_:O/binary, "}", _/binary>> -> + {end_object, ?INC_COL(S)}; + <<_:O/binary, "[", _/binary>> -> + {start_array, ?INC_COL(S)}; + <<_:O/binary, "]", _/binary>> -> + {end_array, ?INC_COL(S)}; + <<_:O/binary, ",", _/binary>> -> + {comma, ?INC_COL(S)}; + <<_:O/binary, ":", _/binary>> -> + {colon, ?INC_COL(S)}; + <<_:O/binary, "null", _/binary>> -> + {{const, null}, ?ADV_COL(S, 4)}; + <<_:O/binary, "true", _/binary>> -> + {{const, true}, ?ADV_COL(S, 4)}; + <<_:O/binary, "false", _/binary>> -> + {{const, false}, ?ADV_COL(S, 5)}; + <<_:O/binary, "\"", _/binary>> -> + tokenize_string(B, ?INC_COL(S)); + <<_:O/binary, C, _/binary>> when (C >= $0 andalso C =< $9) + orelse C =:= $- -> + tokenize_number(B, S); + <<_:O/binary>> -> + trim = S#decoder.state, + {eof, S} + end. +%% +%% Tests +%% +-ifdef(TEST). +-include_lib("eunit/include/eunit.hrl"). + + +%% testing constructs borrowed from the Yaws JSON implementation. + +%% Create an object from a list of Key/Value pairs. + +obj_new() -> + {struct, []}. + +is_obj({struct, Props}) -> + F = fun ({K, _}) when is_binary(K) -> true end, + lists:all(F, Props). + +obj_from_list(Props) -> + Obj = {struct, Props}, + ?assert(is_obj(Obj)), + Obj. + +%% Test for equivalence of Erlang terms. +%% Due to arbitrary order of construction, equivalent objects might +%% compare unequal as erlang terms, so we need to carefully recurse +%% through aggregates (tuples and objects). + +equiv({struct, Props1}, {struct, Props2}) -> + equiv_object(Props1, Props2); +equiv(L1, L2) when is_list(L1), is_list(L2) -> + equiv_list(L1, L2); +equiv(N1, N2) when is_number(N1), is_number(N2) -> N1 == N2; +equiv(B1, B2) when is_binary(B1), is_binary(B2) -> B1 == B2; +equiv(A, A) when A =:= true orelse A =:= false orelse A =:= null -> true. + +%% Object representation and traversal order is unknown. +%% Use the sledgehammer and sort property lists. + +equiv_object(Props1, Props2) -> + L1 = lists:keysort(1, Props1), + L2 = lists:keysort(1, Props2), + Pairs = lists:zip(L1, L2), + true = lists:all(fun({{K1, V1}, {K2, V2}}) -> + equiv(K1, K2) and equiv(V1, V2) + end, Pairs). + +%% Recursively compare tuple elements for equivalence. + +equiv_list([], []) -> + true; +equiv_list([V1 | L1], [V2 | L2]) -> + equiv(V1, V2) andalso equiv_list(L1, L2). + +decode_test() -> + [1199344435545.0, 1] = decode(<<"[1199344435545.0,1]">>), + <<16#F0,16#9D,16#9C,16#95>> = decode([34,"\\ud835","\\udf15",34]). + +e2j_vec_test() -> + test_one(e2j_test_vec(utf8), 1). + +test_one([], _N) -> + %% io:format("~p tests passed~n", [N-1]), + ok; +test_one([{E, J} | Rest], N) -> + %% io:format("[~p] ~p ~p~n", [N, E, J]), + true = equiv(E, decode(J)), + true = equiv(E, decode(encode(E))), + test_one(Rest, 1+N). + +e2j_test_vec(utf8) -> + [ + {1, "1"}, + {3.1416, "3.14160"}, %% text representation may truncate, trail zeroes + {-1, "-1"}, + {-3.1416, "-3.14160"}, + {12.0e10, "1.20000e+11"}, + {1.234E+10, "1.23400e+10"}, + {-1.234E-10, "-1.23400e-10"}, + {10.0, "1.0e+01"}, + {123.456, "1.23456E+2"}, + {10.0, "1e1"}, + {<<"foo">>, "\"foo\""}, + {<<"foo", 5, "bar">>, "\"foo\\u0005bar\""}, + {<<"">>, "\"\""}, + {<<"\n\n\n">>, "\"\\n\\n\\n\""}, + {<<"\" \b\f\r\n\t\"">>, "\"\\\" \\b\\f\\r\\n\\t\\\"\""}, + {obj_new(), "{}"}, + {obj_from_list([{<<"foo">>, <<"bar">>}]), "{\"foo\":\"bar\"}"}, + {obj_from_list([{<<"foo">>, <<"bar">>}, {<<"baz">>, 123}]), + "{\"foo\":\"bar\",\"baz\":123}"}, + {[], "[]"}, + {[[]], "[[]]"}, + {[1, <<"foo">>], "[1,\"foo\"]"}, + + %% json array in a json object + {obj_from_list([{<<"foo">>, [123]}]), + "{\"foo\":[123]}"}, + + %% json object in a json object + {obj_from_list([{<<"foo">>, obj_from_list([{<<"bar">>, true}])}]), + "{\"foo\":{\"bar\":true}}"}, + + %% fold evaluation order + {obj_from_list([{<<"foo">>, []}, + {<<"bar">>, obj_from_list([{<<"baz">>, true}])}, + {<<"alice">>, <<"bob">>}]), + "{\"foo\":[],\"bar\":{\"baz\":true},\"alice\":\"bob\"}"}, + + %% json object in a json array + {[-123, <<"foo">>, obj_from_list([{<<"bar">>, []}]), null], + "[-123,\"foo\",{\"bar\":[]},null]"} + ]. + +%% test utf8 encoding +encoder_utf8_test() -> + %% safe conversion case (default) + [34,"\\u0001","\\u0442","\\u0435","\\u0441","\\u0442",34] = + encode(<<1,"\321\202\320\265\321\201\321\202">>), + + %% raw utf8 output (optional) + Enc = mochijson2:encoder([{utf8, true}]), + [34,"\\u0001",[209,130],[208,181],[209,129],[209,130],34] = + Enc(<<1,"\321\202\320\265\321\201\321\202">>). + +input_validation_test() -> + Good = [ + {16#00A3, <<?Q, 16#C2, 16#A3, ?Q>>}, %% pound + {16#20AC, <<?Q, 16#E2, 16#82, 16#AC, ?Q>>}, %% euro + {16#10196, <<?Q, 16#F0, 16#90, 16#86, 16#96, ?Q>>} %% denarius + ], + lists:foreach(fun({CodePoint, UTF8}) -> + Expect = list_to_binary(xmerl_ucs:to_utf8(CodePoint)), + Expect = decode(UTF8) + end, Good), + + Bad = [ + %% 2nd, 3rd, or 4th byte of a multi-byte sequence w/o leading byte + <<?Q, 16#80, ?Q>>, + %% missing continuations, last byte in each should be 80-BF + <<?Q, 16#C2, 16#7F, ?Q>>, + <<?Q, 16#E0, 16#80,16#7F, ?Q>>, + <<?Q, 16#F0, 16#80, 16#80, 16#7F, ?Q>>, + %% we don't support code points > 10FFFF per RFC 3629 + <<?Q, 16#F5, 16#80, 16#80, 16#80, ?Q>>, + %% escape characters trigger a different code path + <<?Q, $\\, $\n, 16#80, ?Q>> + ], + lists:foreach( + fun(X) -> + ok = try decode(X) catch invalid_utf8 -> ok end, + %% could be {ucs,{bad_utf8_character_code}} or + %% {json_encode,{bad_char,_}} + {'EXIT', _} = (catch encode(X)) + end, Bad). + +inline_json_test() -> + ?assertEqual(<<"\"iodata iodata\"">>, + iolist_to_binary( + encode({json, [<<"\"iodata">>, " iodata\""]}))), + ?assertEqual({struct, [{<<"key">>, <<"iodata iodata">>}]}, + decode( + encode({struct, + [{key, {json, [<<"\"iodata">>, " iodata\""]}}]}))), + ok. + +big_unicode_test() -> + UTF8Seq = list_to_binary(xmerl_ucs:to_utf8(16#0001d120)), + ?assertEqual( + <<"\"\\ud834\\udd20\"">>, + iolist_to_binary(encode(UTF8Seq))), + ?assertEqual( + UTF8Seq, + decode(iolist_to_binary(encode(UTF8Seq)))), + ok. + +custom_decoder_test() -> + ?assertEqual( + {struct, [{<<"key">>, <<"value">>}]}, + (decoder([]))("{\"key\": \"value\"}")), + F = fun ({struct, [{<<"key">>, <<"value">>}]}) -> win end, + ?assertEqual( + win, + (decoder([{object_hook, F}]))("{\"key\": \"value\"}")), + ok. + +atom_test() -> + %% JSON native atoms + [begin + ?assertEqual(A, decode(atom_to_list(A))), + ?assertEqual(iolist_to_binary(atom_to_list(A)), + iolist_to_binary(encode(A))) + end || A <- [true, false, null]], + %% Atom to string + ?assertEqual( + <<"\"foo\"">>, + iolist_to_binary(encode(foo))), + ?assertEqual( + <<"\"\\ud834\\udd20\"">>, + iolist_to_binary(encode(list_to_atom(xmerl_ucs:to_utf8(16#0001d120))))), + ok. + +key_encode_test() -> + %% Some forms are accepted as keys that would not be strings in other + %% cases + ?assertEqual( + <<"{\"foo\":1}">>, + iolist_to_binary(encode({struct, [{foo, 1}]}))), + ?assertEqual( + <<"{\"foo\":1}">>, + iolist_to_binary(encode({struct, [{<<"foo">>, 1}]}))), + ?assertEqual( + <<"{\"foo\":1}">>, + iolist_to_binary(encode({struct, [{"foo", 1}]}))), + ?assertEqual( + <<"{\"foo\":1}">>, + iolist_to_binary(encode([{foo, 1}]))), + ?assertEqual( + <<"{\"foo\":1}">>, + iolist_to_binary(encode([{<<"foo">>, 1}]))), + ?assertEqual( + <<"{\"foo\":1}">>, + iolist_to_binary(encode([{"foo", 1}]))), + ?assertEqual( + <<"{\"\\ud834\\udd20\":1}">>, + iolist_to_binary( + encode({struct, [{[16#0001d120], 1}]}))), + ?assertEqual( + <<"{\"1\":1}">>, + iolist_to_binary(encode({struct, [{1, 1}]}))), + ok. + +unsafe_chars_test() -> + Chars = "\"\\\b\f\n\r\t", + [begin + ?assertEqual(false, json_string_is_safe([C])), + ?assertEqual(false, json_bin_is_safe(<<C>>)), + ?assertEqual(<<C>>, decode(encode(<<C>>))) + end || C <- Chars], + ?assertEqual( + false, + json_string_is_safe([16#0001d120])), + ?assertEqual( + false, + json_bin_is_safe(list_to_binary(xmerl_ucs:to_utf8(16#0001d120)))), + ?assertEqual( + [16#0001d120], + xmerl_ucs:from_utf8( + binary_to_list( + decode(encode(list_to_atom(xmerl_ucs:to_utf8(16#0001d120))))))), + ?assertEqual( + false, + json_string_is_safe([16#110000])), + ?assertEqual( + false, + json_bin_is_safe(list_to_binary(xmerl_ucs:to_utf8([16#110000])))), + %% solidus can be escaped but isn't unsafe by default + ?assertEqual( + <<"/">>, + decode(<<"\"\\/\"">>)), + ok. + +int_test() -> + ?assertEqual(0, decode("0")), + ?assertEqual(1, decode("1")), + ?assertEqual(11, decode("11")), + ok. + +large_int_test() -> + ?assertEqual(<<"-2147483649214748364921474836492147483649">>, + iolist_to_binary(encode(-2147483649214748364921474836492147483649))), + ?assertEqual(<<"2147483649214748364921474836492147483649">>, + iolist_to_binary(encode(2147483649214748364921474836492147483649))), + ok. + +float_test() -> + ?assertEqual(<<"-2147483649.0">>, iolist_to_binary(encode(-2147483649.0))), + ?assertEqual(<<"2147483648.0">>, iolist_to_binary(encode(2147483648.0))), + ok. + +handler_test() -> + ?assertEqual( + {'EXIT',{json_encode,{bad_term,{x,y}}}}, + catch encode({x,y})), + F = fun ({x,y}) -> [] end, + ?assertEqual( + <<"[]">>, + iolist_to_binary((encoder([{handler, F}]))({x, y}))), + ok. + +encode_empty_test_() -> + [{A, ?_assertEqual(<<"{}">>, iolist_to_binary(encode(B)))} + || {A, B} <- [{"eep18 {}", {}}, + {"eep18 {[]}", {[]}}, + {"{struct, []}", {struct, []}}]]. + +encode_test_() -> + P = [{<<"k">>, <<"v">>}], + JSON = iolist_to_binary(encode({struct, P})), + [{atom_to_list(F), + ?_assertEqual(JSON, iolist_to_binary(encode(decode(JSON, [{format, F}]))))} + || F <- [struct, eep18, proplist]]. + +format_test_() -> + P = [{<<"k">>, <<"v">>}], + JSON = iolist_to_binary(encode({struct, P})), + [{atom_to_list(F), + ?_assertEqual(A, decode(JSON, [{format, F}]))} + || {F, A} <- [{struct, {struct, P}}, + {eep18, {P}}, + {proplist, P}]]. + +-endif. diff --git a/src/mochinum.erl b/src/mochinum.erl new file mode 100644 index 00000000..4ea7a22a --- /dev/null +++ b/src/mochinum.erl @@ -0,0 +1,358 @@ +%% This file is a copy of `mochijson2.erl' from mochiweb, revision +%% d541e9a0f36c00dcadc2e589f20e47fbf46fc76f. For the license, see +%% `LICENSE-MIT-Mochi'. + +%% @copyright 2007 Mochi Media, Inc. +%% @author Bob Ippolito <bob@mochimedia.com> + +%% @doc Useful numeric algorithms for floats that cover some deficiencies +%% in the math module. More interesting is digits/1, which implements +%% the algorithm from: +%% http://www.cs.indiana.edu/~burger/fp/index.html +%% See also "Printing Floating-Point Numbers Quickly and Accurately" +%% in Proceedings of the SIGPLAN '96 Conference on Programming Language +%% Design and Implementation. + +-module(mochinum). +-author("Bob Ippolito <bob@mochimedia.com>"). +-export([digits/1, frexp/1, int_pow/2, int_ceil/1]). + +%% IEEE 754 Float exponent bias +-define(FLOAT_BIAS, 1022). +-define(MIN_EXP, -1074). +-define(BIG_POW, 4503599627370496). + +%% External API + +%% @spec digits(number()) -> string() +%% @doc Returns a string that accurately represents the given integer or float +%% using a conservative amount of digits. Great for generating +%% human-readable output, or compact ASCII serializations for floats. +digits(N) when is_integer(N) -> + integer_to_list(N); +digits(0.0) -> + "0.0"; +digits(Float) -> + {Frac1, Exp1} = frexp_int(Float), + [Place0 | Digits0] = digits1(Float, Exp1, Frac1), + {Place, Digits} = transform_digits(Place0, Digits0), + R = insert_decimal(Place, Digits), + case Float < 0 of + true -> + [$- | R]; + _ -> + R + end. + +%% @spec frexp(F::float()) -> {Frac::float(), Exp::float()} +%% @doc Return the fractional and exponent part of an IEEE 754 double, +%% equivalent to the libc function of the same name. +%% F = Frac * pow(2, Exp). +frexp(F) -> + frexp1(unpack(F)). + +%% @spec int_pow(X::integer(), N::integer()) -> Y::integer() +%% @doc Moderately efficient way to exponentiate integers. +%% int_pow(10, 2) = 100. +int_pow(_X, 0) -> + 1; +int_pow(X, N) when N > 0 -> + int_pow(X, N, 1). + +%% @spec int_ceil(F::float()) -> integer() +%% @doc Return the ceiling of F as an integer. The ceiling is defined as +%% F when F == trunc(F); +%% trunc(F) when F < 0; +%% trunc(F) + 1 when F > 0. +int_ceil(X) -> + T = trunc(X), + case (X - T) of + Pos when Pos > 0 -> T + 1; + _ -> T + end. + + +%% Internal API + +int_pow(X, N, R) when N < 2 -> + R * X; +int_pow(X, N, R) -> + int_pow(X * X, N bsr 1, case N band 1 of 1 -> R * X; 0 -> R end). + +insert_decimal(0, S) -> + "0." ++ S; +insert_decimal(Place, S) when Place > 0 -> + L = length(S), + case Place - L of + 0 -> + S ++ ".0"; + N when N < 0 -> + {S0, S1} = lists:split(L + N, S), + S0 ++ "." ++ S1; + N when N < 6 -> + %% More places than digits + S ++ lists:duplicate(N, $0) ++ ".0"; + _ -> + insert_decimal_exp(Place, S) + end; +insert_decimal(Place, S) when Place > -6 -> + "0." ++ lists:duplicate(abs(Place), $0) ++ S; +insert_decimal(Place, S) -> + insert_decimal_exp(Place, S). + +insert_decimal_exp(Place, S) -> + [C | S0] = S, + S1 = case S0 of + [] -> + "0"; + _ -> + S0 + end, + Exp = case Place < 0 of + true -> + "e-"; + false -> + "e+" + end, + [C] ++ "." ++ S1 ++ Exp ++ integer_to_list(abs(Place - 1)). + + +digits1(Float, Exp, Frac) -> + Round = ((Frac band 1) =:= 0), + case Exp >= 0 of + true -> + BExp = 1 bsl Exp, + case (Frac =/= ?BIG_POW) of + true -> + scale((Frac * BExp * 2), 2, BExp, BExp, + Round, Round, Float); + false -> + scale((Frac * BExp * 4), 4, (BExp * 2), BExp, + Round, Round, Float) + end; + false -> + case (Exp =:= ?MIN_EXP) orelse (Frac =/= ?BIG_POW) of + true -> + scale((Frac * 2), 1 bsl (1 - Exp), 1, 1, + Round, Round, Float); + false -> + scale((Frac * 4), 1 bsl (2 - Exp), 2, 1, + Round, Round, Float) + end + end. + +scale(R, S, MPlus, MMinus, LowOk, HighOk, Float) -> + Est = int_ceil(math:log10(abs(Float)) - 1.0e-10), + %% Note that the scheme implementation uses a 326 element look-up table + %% for int_pow(10, N) where we do not. + case Est >= 0 of + true -> + fixup(R, S * int_pow(10, Est), MPlus, MMinus, Est, + LowOk, HighOk); + false -> + Scale = int_pow(10, -Est), + fixup(R * Scale, S, MPlus * Scale, MMinus * Scale, Est, + LowOk, HighOk) + end. + +fixup(R, S, MPlus, MMinus, K, LowOk, HighOk) -> + TooLow = case HighOk of + true -> + (R + MPlus) >= S; + false -> + (R + MPlus) > S + end, + case TooLow of + true -> + [(K + 1) | generate(R, S, MPlus, MMinus, LowOk, HighOk)]; + false -> + [K | generate(R * 10, S, MPlus * 10, MMinus * 10, LowOk, HighOk)] + end. + +generate(R0, S, MPlus, MMinus, LowOk, HighOk) -> + D = R0 div S, + R = R0 rem S, + TC1 = case LowOk of + true -> + R =< MMinus; + false -> + R < MMinus + end, + TC2 = case HighOk of + true -> + (R + MPlus) >= S; + false -> + (R + MPlus) > S + end, + case TC1 of + false -> + case TC2 of + false -> + [D | generate(R * 10, S, MPlus * 10, MMinus * 10, + LowOk, HighOk)]; + true -> + [D + 1] + end; + true -> + case TC2 of + false -> + [D]; + true -> + case R * 2 < S of + true -> + [D]; + false -> + [D + 1] + end + end + end. + +unpack(Float) -> + <<Sign:1, Exp:11, Frac:52>> = <<Float:64/float>>, + {Sign, Exp, Frac}. + +frexp1({_Sign, 0, 0}) -> + {0.0, 0}; +frexp1({Sign, 0, Frac}) -> + Exp = log2floor(Frac), + <<Frac1:64/float>> = <<Sign:1, ?FLOAT_BIAS:11, (Frac-1):52>>, + {Frac1, -(?FLOAT_BIAS) - 52 + Exp}; +frexp1({Sign, Exp, Frac}) -> + <<Frac1:64/float>> = <<Sign:1, ?FLOAT_BIAS:11, Frac:52>>, + {Frac1, Exp - ?FLOAT_BIAS}. + +log2floor(Int) -> + log2floor(Int, 0). + +log2floor(0, N) -> + N; +log2floor(Int, N) -> + log2floor(Int bsr 1, 1 + N). + + +transform_digits(Place, [0 | Rest]) -> + transform_digits(Place, Rest); +transform_digits(Place, Digits) -> + {Place, [$0 + D || D <- Digits]}. + + +frexp_int(F) -> + case unpack(F) of + {_Sign, 0, Frac} -> + {Frac, ?MIN_EXP}; + {_Sign, Exp, Frac} -> + {Frac + (1 bsl 52), Exp - 53 - ?FLOAT_BIAS} + end. + +%% +%% Tests +%% +-ifdef(TEST). +-include_lib("eunit/include/eunit.hrl"). + +int_ceil_test() -> + ?assertEqual(1, int_ceil(0.0001)), + ?assertEqual(0, int_ceil(0.0)), + ?assertEqual(1, int_ceil(0.99)), + ?assertEqual(1, int_ceil(1.0)), + ?assertEqual(-1, int_ceil(-1.5)), + ?assertEqual(-2, int_ceil(-2.0)), + ok. + +int_pow_test() -> + ?assertEqual(1, int_pow(1, 1)), + ?assertEqual(1, int_pow(1, 0)), + ?assertEqual(1, int_pow(10, 0)), + ?assertEqual(10, int_pow(10, 1)), + ?assertEqual(100, int_pow(10, 2)), + ?assertEqual(1000, int_pow(10, 3)), + ok. + +digits_test() -> + ?assertEqual("0", + digits(0)), + ?assertEqual("0.0", + digits(0.0)), + ?assertEqual("1.0", + digits(1.0)), + ?assertEqual("-1.0", + digits(-1.0)), + ?assertEqual("0.1", + digits(0.1)), + ?assertEqual("0.01", + digits(0.01)), + ?assertEqual("0.001", + digits(0.001)), + ?assertEqual("1.0e+6", + digits(1000000.0)), + ?assertEqual("0.5", + digits(0.5)), + ?assertEqual("4503599627370496.0", + digits(4503599627370496.0)), + %% small denormalized number + %% 4.94065645841246544177e-324 =:= 5.0e-324 + <<SmallDenorm/float>> = <<0,0,0,0,0,0,0,1>>, + ?assertEqual("5.0e-324", + digits(SmallDenorm)), + ?assertEqual(SmallDenorm, + list_to_float(digits(SmallDenorm))), + %% large denormalized number + %% 2.22507385850720088902e-308 + <<BigDenorm/float>> = <<0,15,255,255,255,255,255,255>>, + ?assertEqual("2.225073858507201e-308", + digits(BigDenorm)), + ?assertEqual(BigDenorm, + list_to_float(digits(BigDenorm))), + %% small normalized number + %% 2.22507385850720138309e-308 + <<SmallNorm/float>> = <<0,16,0,0,0,0,0,0>>, + ?assertEqual("2.2250738585072014e-308", + digits(SmallNorm)), + ?assertEqual(SmallNorm, + list_to_float(digits(SmallNorm))), + %% large normalized number + %% 1.79769313486231570815e+308 + <<LargeNorm/float>> = <<127,239,255,255,255,255,255,255>>, + ?assertEqual("1.7976931348623157e+308", + digits(LargeNorm)), + ?assertEqual(LargeNorm, + list_to_float(digits(LargeNorm))), + %% issue #10 - mochinum:frexp(math:pow(2, -1074)). + ?assertEqual("5.0e-324", + digits(math:pow(2, -1074))), + ok. + +frexp_test() -> + %% zero + ?assertEqual({0.0, 0}, frexp(0.0)), + %% one + ?assertEqual({0.5, 1}, frexp(1.0)), + %% negative one + ?assertEqual({-0.5, 1}, frexp(-1.0)), + %% small denormalized number + %% 4.94065645841246544177e-324 + <<SmallDenorm/float>> = <<0,0,0,0,0,0,0,1>>, + ?assertEqual({0.5, -1073}, frexp(SmallDenorm)), + %% large denormalized number + %% 2.22507385850720088902e-308 + <<BigDenorm/float>> = <<0,15,255,255,255,255,255,255>>, + ?assertEqual( + {0.99999999999999978, -1022}, + frexp(BigDenorm)), + %% small normalized number + %% 2.22507385850720138309e-308 + <<SmallNorm/float>> = <<0,16,0,0,0,0,0,0>>, + ?assertEqual({0.5, -1021}, frexp(SmallNorm)), + %% large normalized number + %% 1.79769313486231570815e+308 + <<LargeNorm/float>> = <<127,239,255,255,255,255,255,255>>, + ?assertEqual( + {0.99999999999999989, 1024}, + frexp(LargeNorm)), + %% issue #10 - mochinum:frexp(math:pow(2, -1074)). + ?assertEqual( + {0.5, -1073}, + frexp(math:pow(2, -1074))), + ok. + +-endif. diff --git a/src/pg_local.erl b/src/pg_local.erl index e2e82f1f..f535b136 100644 --- a/src/pg_local.erl +++ b/src/pg_local.erl @@ -13,7 +13,7 @@ %% versions of Erlang/OTP. The remaining type specs have been %% removed. -%% All modifications are (C) 2010-2012 VMware, Inc. +%% All modifications are (C) 2010-2013 GoPivotal, Inc. %% %CopyrightBegin% %% diff --git a/src/pmon.erl b/src/pmon.erl index 45786577..86308167 100644 --- a/src/pmon.erl +++ b/src/pmon.erl @@ -10,14 +10,18 @@ %% %% The Original Code is RabbitMQ. %% -%% The Initial Developer of the Original Code is VMware, Inc. -%% Copyright (c) 2011-2012 VMware, Inc. All rights reserved. +%% The Initial Developer of the Original Code is GoPivotal, Inc. +%% Copyright (c) 2011-2013 GoPivotal, Inc. All rights reserved. %% -module(pmon). --export([new/0, monitor/2, monitor_all/2, demonitor/2, is_monitored/2, erase/2, - monitored/1, is_empty/1]). +-export([new/0, new/1, monitor/2, monitor_all/2, demonitor/2, + is_monitored/2, erase/2, monitored/1, is_empty/1]). + +-compile({no_auto_import, [monitor/2]}). + +-record(state, {dict, module}). -ifdef(use_specs). @@ -25,40 +29,50 @@ -export_type([?MODULE/0]). --opaque(?MODULE() :: dict()). +-opaque(?MODULE() :: #state{dict :: dict(), + module :: atom()}). + +-type(item() :: pid() | {atom(), node()}). -spec(new/0 :: () -> ?MODULE()). --spec(monitor/2 :: (pid(), ?MODULE()) -> ?MODULE()). --spec(monitor_all/2 :: ([pid()], ?MODULE()) -> ?MODULE()). --spec(demonitor/2 :: (pid(), ?MODULE()) -> ?MODULE()). --spec(is_monitored/2 :: (pid(), ?MODULE()) -> boolean()). --spec(erase/2 :: (pid(), ?MODULE()) -> ?MODULE()). --spec(monitored/1 :: (?MODULE()) -> [pid()]). +-spec(new/1 :: ('erlang' | 'delegate') -> ?MODULE()). +-spec(monitor/2 :: (item(), ?MODULE()) -> ?MODULE()). +-spec(monitor_all/2 :: ([item()], ?MODULE()) -> ?MODULE()). +-spec(demonitor/2 :: (item(), ?MODULE()) -> ?MODULE()). +-spec(is_monitored/2 :: (item(), ?MODULE()) -> boolean()). +-spec(erase/2 :: (item(), ?MODULE()) -> ?MODULE()). +-spec(monitored/1 :: (?MODULE()) -> [item()]). -spec(is_empty/1 :: (?MODULE()) -> boolean()). -endif. -new() -> dict:new(). +new() -> new(erlang). + +new(Module) -> #state{dict = dict:new(), + module = Module}. -monitor(Pid, M) -> - case dict:is_key(Pid, M) of - true -> M; - false -> dict:store(Pid, erlang:monitor(process, Pid), M) +monitor(Item, S = #state{dict = M, module = Module}) -> + case dict:is_key(Item, M) of + true -> S; + false -> S#state{dict = dict:store( + Item, Module:monitor(process, Item), M)} end. -monitor_all(Pids, M) -> lists:foldl(fun monitor/2, M, Pids). +monitor_all([], S) -> S; %% optimisation +monitor_all([Item], S) -> monitor(Item, S); %% optimisation +monitor_all(Items, S) -> lists:foldl(fun monitor/2, S, Items). -demonitor(Pid, M) -> - case dict:find(Pid, M) of - {ok, MRef} -> erlang:demonitor(MRef), - dict:erase(Pid, M); +demonitor(Item, S = #state{dict = M, module = Module}) -> + case dict:find(Item, M) of + {ok, MRef} -> Module:demonitor(MRef), + S#state{dict = dict:erase(Item, M)}; error -> M end. -is_monitored(Pid, M) -> dict:is_key(Pid, M). +is_monitored(Item, #state{dict = M}) -> dict:is_key(Item, M). -erase(Pid, M) -> dict:erase(Pid, M). +erase(Item, S = #state{dict = M}) -> S#state{dict = dict:erase(Item, M)}. -monitored(M) -> dict:fetch_keys(M). +monitored(#state{dict = M}) -> dict:fetch_keys(M). -is_empty(M) -> dict:size(M) == 0. +is_empty(#state{dict = M}) -> dict:size(M) == 0. diff --git a/src/priority_queue.erl b/src/priority_queue.erl index 780fa2e9..6995c3be 100644 --- a/src/priority_queue.erl +++ b/src/priority_queue.erl @@ -10,8 +10,8 @@ %% %% The Original Code is RabbitMQ. %% -%% The Initial Developer of the Original Code is VMware, Inc. -%% Copyright (c) 2007-2012 VMware, Inc. All rights reserved. +%% The Initial Developer of the Original Code is GoPivotal, Inc. +%% Copyright (c) 2007-2013 GoPivotal, Inc. All rights reserved. %% %% Priority queues have essentially the same interface as ordinary @@ -51,7 +51,7 @@ -type(q() :: pqueue()). -type(priority() :: integer() | 'infinity'). --type(squeue() :: {queue, [any()], [any()]}). +-type(squeue() :: {queue, [any()], [any()], non_neg_integer()}). -type(pqueue() :: squeue() | {pqueue, [{priority(), squeue()}]}). -spec(new/0 :: () -> pqueue()). @@ -69,9 +69,9 @@ %%---------------------------------------------------------------------------- new() -> - {queue, [], []}. + {queue, [], [], 0}. -is_queue({queue, R, F}) when is_list(R), is_list(F) -> +is_queue({queue, R, F, L}) when is_list(R), is_list(F), is_integer(L) -> true; is_queue({pqueue, Queues}) when is_list(Queues) -> lists:all(fun ({infinity, Q}) -> is_queue(Q); @@ -80,17 +80,17 @@ is_queue({pqueue, Queues}) when is_list(Queues) -> is_queue(_) -> false. -is_empty({queue, [], []}) -> +is_empty({queue, [], [], 0}) -> true; is_empty(_) -> false. -len({queue, R, F}) when is_list(R), is_list(F) -> - length(R) + length(F); +len({queue, _R, _F, L}) -> + L; len({pqueue, Queues}) -> lists:sum([len(Q) || {_, Q} <- Queues]). -to_list({queue, In, Out}) when is_list(In), is_list(Out) -> +to_list({queue, In, Out, _Len}) when is_list(In), is_list(Out) -> [{0, V} || V <- Out ++ lists:reverse(In, [])]; to_list({pqueue, Queues}) -> [{maybe_negate_priority(P), V} || {P, Q} <- Queues, @@ -99,13 +99,13 @@ to_list({pqueue, Queues}) -> in(Item, Q) -> in(Item, 0, Q). -in(X, 0, {queue, [_] = In, []}) -> - {queue, [X], In}; -in(X, 0, {queue, In, Out}) when is_list(In), is_list(Out) -> - {queue, [X|In], Out}; -in(X, Priority, _Q = {queue, [], []}) -> +in(X, 0, {queue, [_] = In, [], 1}) -> + {queue, [X], In, 2}; +in(X, 0, {queue, In, Out, Len}) when is_list(In), is_list(Out) -> + {queue, [X|In], Out, Len + 1}; +in(X, Priority, _Q = {queue, [], [], 0}) -> in(X, Priority, {pqueue, []}); -in(X, Priority, Q = {queue, _, _}) -> +in(X, Priority, Q = {queue, _, _, _}) -> in(X, Priority, {pqueue, [{0, Q}]}); in(X, Priority, {pqueue, Queues}) -> P = maybe_negate_priority(Priority), @@ -113,33 +113,33 @@ in(X, Priority, {pqueue, Queues}) -> {value, {_, Q}} -> lists:keyreplace(P, 1, Queues, {P, in(X, Q)}); false when P == infinity -> - [{P, {queue, [X], []}} | Queues]; + [{P, {queue, [X], [], 1}} | Queues]; false -> case Queues of [{infinity, InfQueue} | Queues1] -> [{infinity, InfQueue} | - lists:keysort(1, [{P, {queue, [X], []}} | Queues1])]; + lists:keysort(1, [{P, {queue, [X], [], 1}} | Queues1])]; _ -> - lists:keysort(1, [{P, {queue, [X], []}} | Queues]) + lists:keysort(1, [{P, {queue, [X], [], 1}} | Queues]) end end}. -out({queue, [], []} = Q) -> +out({queue, [], [], 0} = Q) -> {empty, Q}; -out({queue, [V], []}) -> - {{value, V}, {queue, [], []}}; -out({queue, [Y|In], []}) -> +out({queue, [V], [], 1}) -> + {{value, V}, {queue, [], [], 0}}; +out({queue, [Y|In], [], Len}) -> [V|Out] = lists:reverse(In, []), - {{value, V}, {queue, [Y], Out}}; -out({queue, In, [V]}) when is_list(In) -> - {{value,V}, r2f(In)}; -out({queue, In,[V|Out]}) when is_list(In) -> - {{value, V}, {queue, In, Out}}; + {{value, V}, {queue, [Y], Out}, Len - 1}; +out({queue, In, [V], Len}) when is_list(In) -> + {{value,V}, r2f(In, Len - 1)}; +out({queue, In,[V|Out], Len}) when is_list(In) -> + {{value, V}, {queue, In, Out, Len - 1}}; out({pqueue, [{P, Q} | Queues]}) -> {R, Q1} = out(Q), NewQ = case is_empty(Q1) of true -> case Queues of - [] -> {queue, [], []}; + [] -> {queue, [], [], 0}; [{0, OnlyQ}] -> OnlyQ; [_|_] -> {pqueue, Queues} end; @@ -147,13 +147,13 @@ out({pqueue, [{P, Q} | Queues]}) -> end, {R, NewQ}. -join(A, {queue, [], []}) -> +join(A, {queue, [], [], 0}) -> A; -join({queue, [], []}, B) -> +join({queue, [], [], 0}, B) -> B; -join({queue, AIn, AOut}, {queue, BIn, BOut}) -> - {queue, BIn, AOut ++ lists:reverse(AIn, BOut)}; -join(A = {queue, _, _}, {pqueue, BPQ}) -> +join({queue, AIn, AOut, ALen}, {queue, BIn, BOut, BLen}) -> + {queue, BIn, AOut ++ lists:reverse(AIn, BOut), ALen + BLen}; +join(A = {queue, _, _, _}, {pqueue, BPQ}) -> {Pre, Post} = lists:splitwith(fun ({P, _}) -> P < 0 orelse P == infinity end, BPQ), Post1 = case Post of @@ -162,7 +162,7 @@ join(A = {queue, _, _}, {pqueue, BPQ}) -> _ -> [ {0, A} | Post ] end, {pqueue, Pre ++ Post1}; -join({pqueue, APQ}, B = {queue, _, _}) -> +join({pqueue, APQ}, B = {queue, _, _, _}) -> {Pre, Post} = lists:splitwith(fun ({P, _}) -> P < 0 orelse P == infinity end, APQ), Post1 = case Post of @@ -185,10 +185,10 @@ merge([{PA, A}|As], Bs = [{PB, _}|_], Acc) when PA < PB orelse PA == infinity -> merge(As = [{_, _}|_], [{PB, B}|Bs], Acc) -> merge(As, Bs, [ {PB, B} | Acc ]). -r2f([]) -> {queue, [], []}; -r2f([_] = R) -> {queue, [], R}; -r2f([X,Y]) -> {queue, [X], [Y]}; -r2f([X,Y|R]) -> {queue, [X,Y], lists:reverse(R, [])}. +r2f([], 0) -> {queue, [], [], 0}; +r2f([_] = R, 1) -> {queue, [], R, 1}; +r2f([X,Y], 2) -> {queue, [X], [Y], 2}; +r2f([X,Y|R], L) -> {queue, [X,Y], lists:reverse(R, []), L}. maybe_negate_priority(infinity) -> infinity; maybe_negate_priority(P) -> -P. diff --git a/src/rabbit.erl b/src/rabbit.erl index df009529..a9974711 100644 --- a/src/rabbit.erl +++ b/src/rabbit.erl @@ -10,17 +10,18 @@ %% %% The Original Code is RabbitMQ. %% -%% The Initial Developer of the Original Code is VMware, Inc. -%% Copyright (c) 2007-2012 VMware, Inc. All rights reserved. +%% The Initial Developer of the Original Code is GoPivotal, Inc. +%% Copyright (c) 2007-2013 GoPivotal, Inc. All rights reserved. %% -module(rabbit). -behaviour(application). --export([maybe_hipe_compile/0, prepare/0, start/0, stop/0, stop_and_halt/0, - status/0, is_running/0, is_running/1, environment/0, - rotate_logs/1, force_event_refresh/0]). +-export([start/0, boot/0, stop/0, + stop_and_halt/0, await_startup/0, status/0, is_running/0, + is_running/1, environment/0, rotate_logs/1, force_event_refresh/0, + start_fhc/0]). -export([start/2, stop/1]). @@ -35,7 +36,7 @@ -rabbit_boot_step({codec_correctness_check, [{description, "codec correctness check"}, {mfa, {rabbit_binary_generator, - check_empty_content_body_frame_size, + check_empty_frame_size, []}}, {requires, pre_boot}, {enables, external_infrastructure}]}). @@ -53,8 +54,7 @@ -rabbit_boot_step({file_handle_cache, [{description, "file handle cache server"}, - {mfa, {rabbit_sup, start_restartable_child, - [file_handle_cache]}}, + {mfa, {rabbit, start_fhc, []}}, {requires, pre_boot}, {enables, worker_pool}]}). @@ -123,7 +123,7 @@ [{description, "node monitor"}, {mfa, {rabbit_sup, start_restartable_child, [rabbit_node_monitor]}}, - {requires, kernel_ready}, + {requires, rabbit_alarm}, {enables, core_initialized}]}). -rabbit_boot_step({core_initialized, @@ -161,7 +161,12 @@ -rabbit_boot_step({log_relay, [{description, "error log relay"}, - {mfa, {rabbit_error_logger, boot, []}}, + {mfa, {rabbit_sup, start_child, + [rabbit_error_logger_lifecycle, + supervised_lifecycle, + [rabbit_error_logger_lifecycle, + {rabbit_error_logger, start, []}, + {rabbit_error_logger, stop, []}]]}}, {requires, routing_ready}, {enables, networking}]}). @@ -176,9 +181,15 @@ -rabbit_boot_step({notify_cluster, [{description, "notify cluster nodes"}, - {mfa, {rabbit_node_monitor, notify_cluster, []}}, + {mfa, {rabbit_node_monitor, notify_node_up, []}}, {requires, networking}]}). +-rabbit_boot_step({background_gc, + [{description, "background garbage collection"}, + {mfa, {rabbit_sup, start_restartable_child, + [background_gc]}}, + {enables, networking}]}). + %%--------------------------------------------------------------------------- -include("rabbit_framing.hrl"). @@ -186,21 +197,6 @@ -define(APPS, [os_mon, mnesia, rabbit]). -%% see bug 24513 for how this list was created --define(HIPE_WORTHY, - [rabbit_reader, rabbit_channel, gen_server2, - rabbit_exchange, rabbit_command_assembler, rabbit_framing_amqp_0_9_1, - rabbit_basic, rabbit_event, lists, queue, priority_queue, - rabbit_router, rabbit_trace, rabbit_misc, rabbit_binary_parser, - rabbit_exchange_type_direct, rabbit_guid, rabbit_net, - rabbit_amqqueue_process, rabbit_variable_queue, - rabbit_binary_generator, rabbit_writer, delegate, gb_sets, lqueue, - sets, orddict, rabbit_amqqueue, rabbit_limiter, gb_trees, - rabbit_queue_index, gen, dict, ordsets, file_handle_cache, - rabbit_msg_store, array, rabbit_msg_store_ets_index, rabbit_msg_file, - rabbit_exchange_type_fanout, rabbit_exchange_type_topic, mnesia, - mnesia_lib, rpc, mnesia_tm, qlc, sofs, proplists, credit_flow, pmon]). - %% HiPE compilation uses multiple cores anyway, but some bits are %% IO-bound so we can go faster if we parallelise a bit more. In %% practice 2 processes seems just as fast as any other number > 1, @@ -216,11 +212,11 @@ -type(log_location() :: 'tty' | 'undefined' | file:filename()). -type(param() :: atom()). --spec(maybe_hipe_compile/0 :: () -> 'ok'). --spec(prepare/0 :: () -> 'ok'). -spec(start/0 :: () -> 'ok'). +-spec(boot/0 :: () -> 'ok'). -spec(stop/0 :: () -> 'ok'). -spec(stop_and_halt/0 :: () -> no_return()). +-spec(await_startup/0 :: () -> 'ok'). -spec(status/0 :: () -> [{pid, integer()} | {running_applications, [{atom(), string(), string()}]} | @@ -229,7 +225,7 @@ {memory, any()}]). -spec(is_running/0 :: () -> boolean()). -spec(is_running/1 :: (node()) -> boolean()). --spec(environment/0 :: () -> [{param() | term()}]). +-spec(environment/0 :: () -> [{param(), term()}]). -spec(rotate_logs/1 :: (file_suffix()) -> rabbit_types:ok_or_error(any())). -spec(force_event_refresh/0 :: () -> 'ok'). @@ -251,19 +247,33 @@ %%---------------------------------------------------------------------------- +%% HiPE compilation happens before we have log handlers - so we have +%% to io:format/2, it's all we can do. + maybe_hipe_compile() -> {ok, Want} = application:get_env(rabbit, hipe_compile), Can = code:which(hipe) =/= non_existing, case {Want, Can} of - {true, true} -> hipe_compile(); - {true, false} -> io:format("Not HiPE compiling: HiPE not found in " - "this Erlang installation.~n"); - {false, _} -> ok + {true, true} -> hipe_compile(), + true; + {true, false} -> false; + {false, _} -> true end. +warn_if_hipe_compilation_failed(true) -> + ok; +warn_if_hipe_compilation_failed(false) -> + error_logger:warning_msg( + "Not HiPE compiling: HiPE not found in this Erlang installation.~n"). + +%% HiPE compilation happens before we have log handlers and can take a +%% long time, so make an exception to our no-stdout policy and display +%% progress via stdout. hipe_compile() -> - Count = length(?HIPE_WORTHY), - io:format("HiPE compiling: |~s|~n |", + {ok, HipeModulesAll} = application:get_env(rabbit, hipe_modules), + HipeModules = [HM || HM <- HipeModulesAll, code:which(HM) =/= non_existing], + Count = length(HipeModules), + io:format("~nHiPE compiling: |~s|~n |", [string:copies("-", Count)]), T1 = erlang:now(), PidMRefs = [spawn_monitor(fun () -> [begin @@ -271,7 +281,7 @@ hipe_compile() -> io:format("#") end || M <- Ms] end) || - Ms <- split(?HIPE_WORTHY, ?HIPE_PROCESSES)], + Ms <- split(HipeModules, ?HIPE_PROCESSES)], [receive {'DOWN', MRef, process, _, normal} -> ok; {'DOWN', MRef, process, _, Reason} -> exit(Reason) @@ -285,29 +295,80 @@ split(L, N) -> split0(L, [[] || _ <- lists:seq(1, N)]). split0([], Ls) -> Ls; split0([I | Is], [L | Ls]) -> split0(Is, Ls ++ [[I | L]]). -prepare() -> - ok = ensure_working_log_handlers(), - ok = rabbit_upgrade:maybe_upgrade_mnesia(). +ensure_application_loaded() -> + %% We end up looking at the rabbit app's env for HiPE and log + %% handling, so it needs to be loaded. But during the tests, it + %% may end up getting loaded twice, so guard against that. + case application:load(rabbit) of + ok -> ok; + {error, {already_loaded, rabbit}} -> ok + end. start() -> + start_it(fun() -> + %% We do not want to HiPE compile or upgrade + %% mnesia after just restarting the app + ok = ensure_application_loaded(), + ok = ensure_working_log_handlers(), + rabbit_node_monitor:prepare_cluster_status_files(), + rabbit_mnesia:check_cluster_consistency(), + ok = app_utils:start_applications( + app_startup_order(), fun handle_app_error/2), + ok = log_broker_started(rabbit_plugins:active()) + end). + +boot() -> + start_it(fun() -> + ok = ensure_application_loaded(), + Success = maybe_hipe_compile(), + ok = ensure_working_log_handlers(), + warn_if_hipe_compilation_failed(Success), + rabbit_node_monitor:prepare_cluster_status_files(), + ok = rabbit_upgrade:maybe_upgrade_mnesia(), + %% It's important that the consistency check happens after + %% the upgrade, since if we are a secondary node the + %% primary node will have forgotten us + rabbit_mnesia:check_cluster_consistency(), + Plugins = rabbit_plugins:setup(), + ToBeLoaded = Plugins ++ ?APPS, + ok = app_utils:load_applications(ToBeLoaded), + StartupApps = app_utils:app_dependency_order(ToBeLoaded, + false), + ok = app_utils:start_applications( + StartupApps, fun handle_app_error/2), + ok = log_broker_started(Plugins) + end). + +handle_app_error(App, {bad_return, {_MFA, {'EXIT', {Reason, _}}}}) -> + throw({could_not_start, App, Reason}); + +handle_app_error(App, Reason) -> + throw({could_not_start, App, Reason}). + +start_it(StartFun) -> + Marker = spawn_link(fun() -> receive stop -> ok end end), + register(rabbit_boot, Marker), try - %% prepare/1 ends up looking at the rabbit app's env, so it - %% needs to be loaded, but during the tests, it may end up - %% getting loaded twice, so guard against that - case application:load(rabbit) of - ok -> ok; - {error, {already_loaded, rabbit}} -> ok - end, - ok = prepare(), - ok = rabbit_misc:start_applications(application_load_order()) + StartFun() + catch + throw:{could_not_start, _App, _Reason}=Err -> + boot_error(Err, not_available); + _:Reason -> + boot_error(Reason, erlang:get_stacktrace()) after - %%give the error loggers some time to catch up + unlink(Marker), + Marker ! stop, + %% give the error loggers some time to catch up timer:sleep(100) end. stop() -> - rabbit_log:info("Stopping Rabbit~n"), - ok = rabbit_misc:stop_applications(application_load_order()). + case whereis(rabbit_boot) of + undefined -> ok; + _ -> await_startup() + end, + rabbit_log:info("Stopping RabbitMQ~n"), + ok = app_utils:stop_applications(app_shutdown_order()). stop_and_halt() -> try @@ -318,12 +379,15 @@ stop_and_halt() -> end, ok. +await_startup() -> + app_utils:wait_for_applications(app_startup_order()). + status() -> S1 = [{pid, list_to_integer(os:getpid())}, - {running_applications, application:which_applications(infinity)}, + {running_applications, rabbit_misc:which_applications()}, {os, os:type()}, {erlang_version, erlang:system_info(system_version)}, - {memory, erlang:memory()}], + {memory, rabbit_vm:memory()}], S2 = rabbit_misc:filter_exit_map( fun ({Key, {M, F, A}}) -> {Key, erlang:apply(M, F, A)} end, [{vm_memory_high_watermark, {vm_memory_monitor, @@ -348,13 +412,11 @@ status() -> is_running() -> is_running(node()). -is_running(Node) -> - rabbit_nodes:is_running(Node, rabbit). +is_running(Node) -> rabbit_nodes:is_process_running(Node, rabbit). environment() -> - lists:keysort( - 1, [P || P = {K, _} <- application:get_all_env(rabbit), - K =/= default_pass]). + lists:keysort(1, [P || P = {K, _} <- application:get_all_env(rabbit), + K =/= default_pass]). rotate_logs(BinarySuffix) -> Suffix = binary_to_list(BinarySuffix), @@ -371,89 +433,55 @@ rotate_logs(BinarySuffix) -> start(normal, []) -> case erts_version_check() of ok -> + {ok, Vsn} = application:get_key(rabbit, vsn), + error_logger:info_msg("Starting RabbitMQ ~s on Erlang ~s~n~s~n~s~n", + [Vsn, erlang:system_info(otp_release), + ?COPYRIGHT_MESSAGE, ?INFORMATION_MESSAGE]), {ok, SupPid} = rabbit_sup:start_link(), true = register(rabbit, self()), print_banner(), + log_banner(), [ok = run_boot_step(Step) || Step <- boot_steps()], - io:format("~nbroker running~n"), {ok, SupPid}; Error -> Error end. stop(_State) -> - ok = rabbit_mnesia:record_running_nodes(), - terminated_ok = error_logger:delete_report_handler(rabbit_error_logger), ok = rabbit_alarm:stop(), ok = case rabbit_mnesia:is_clustered() of true -> rabbit_amqqueue:on_node_down(node()); - false -> rabbit_mnesia:empty_ram_only_tables() + false -> rabbit_table:clear_ram_only_tables() end, ok. %%--------------------------------------------------------------------------- %% application life cycle -application_load_order() -> - ok = load_applications(), - {ok, G} = rabbit_misc:build_acyclic_graph( - fun (App, _Deps) -> [{App, App}] end, - fun (App, Deps) -> [{Dep, App} || Dep <- Deps] end, - [{App, app_dependencies(App)} || - {App, _Desc, _Vsn} <- application:loaded_applications()]), - true = digraph:del_vertices( - G, digraph:vertices(G) -- digraph_utils:reachable(?APPS, G)), - Result = digraph_utils:topsort(G), - true = digraph:delete(G), - Result. - -load_applications() -> - load_applications(queue:from_list(?APPS), sets:new()). - -load_applications(Worklist, Loaded) -> - case queue:out(Worklist) of - {empty, _WorkList} -> - ok; - {{value, App}, Worklist1} -> - case sets:is_element(App, Loaded) of - true -> load_applications(Worklist1, Loaded); - false -> case application:load(App) of - ok -> ok; - {error, {already_loaded, App}} -> ok; - Error -> throw(Error) - end, - load_applications( - queue:join(Worklist1, - queue:from_list(app_dependencies(App))), - sets:add_element(App, Loaded)) - end - end. +app_startup_order() -> + ok = app_utils:load_applications(?APPS), + app_utils:app_dependency_order(?APPS, false). -app_dependencies(App) -> - case application:get_key(App, applications) of - undefined -> []; - {ok, Lst} -> Lst - end. +app_shutdown_order() -> + Apps = ?APPS ++ rabbit_plugins:active(), + app_utils:app_dependency_order(Apps, true). %%--------------------------------------------------------------------------- %% boot step logic -run_boot_step({StepName, Attributes}) -> - Description = case lists:keysearch(description, 1, Attributes) of - {value, {_, D}} -> D; - false -> StepName - end, +run_boot_step({_StepName, Attributes}) -> case [MFA || {mfa, MFA} <- Attributes] of [] -> - io:format("-- ~s~n", [Description]); + ok; MFAs -> - io:format("starting ~-60s ...", [Description]), [try apply(M,F,A) + of + ok -> ok; + {error, Reason} -> boot_error(Reason, not_available) catch - _:Reason -> boot_step_error(Reason, erlang:get_stacktrace()) + _:Reason -> boot_error(Reason, erlang:get_stacktrace()) end || {M,F,A} <- MFAs], - io:format("done~n"), ok end. @@ -479,7 +507,8 @@ sort_boot_steps(UnsortedSteps) -> %% there is one, otherwise fail). SortedSteps = lists:reverse( [begin - {StepName, Step} = digraph:vertex(G, StepName), + {StepName, Step} = digraph:vertex(G, + StepName), Step end || StepName <- digraph_utils:topsort(G)]), digraph:delete(G), @@ -489,14 +518,17 @@ sort_boot_steps(UnsortedSteps) -> {mfa, {M,F,A}} <- Attributes, not erlang:function_exported(M, F, length(A))] of [] -> SortedSteps; - MissingFunctions -> boot_error( + MissingFunctions -> basic_boot_error( + {missing_functions, MissingFunctions}, "Boot step functions not exported: ~p~n", [MissingFunctions]) end; {error, {vertex, duplicate, StepName}} -> - boot_error("Duplicate boot step name: ~w~n", [StepName]); + basic_boot_error({duplicate_boot_step, StepName}, + "Duplicate boot step name: ~w~n", [StepName]); {error, {edge, Reason, From, To}} -> - boot_error( + basic_boot_error( + {invalid_boot_step_dependency, From, To}, "Could not add boot step dependency of ~w on ~w:~n~s", [To, From, case Reason of @@ -510,30 +542,43 @@ sort_boot_steps(UnsortedSteps) -> end]) end. -boot_step_error({error, {timeout_waiting_for_tables, _}}, _Stacktrace) -> +-ifdef(use_specs). +-spec(boot_error/2 :: (term(), not_available | [tuple()]) -> no_return()). +-endif. +boot_error(Term={error, {timeout_waiting_for_tables, _}}, _Stacktrace) -> + AllNodes = rabbit_mnesia:cluster_nodes(all), {Err, Nodes} = - case rabbit_mnesia:read_previously_running_nodes() of + case AllNodes -- [node()] of [] -> {"Timeout contacting cluster nodes. Since RabbitMQ was" " shut down forcefully~nit cannot determine which nodes" - " are timing out. Details on all nodes will~nfollow.~n", - rabbit_mnesia:all_clustered_nodes() -- [node()]}; + " are timing out.~n", []}; Ns -> {rabbit_misc:format( "Timeout contacting cluster nodes: ~p.~n", [Ns]), Ns} end, - boot_error(Err ++ rabbit_nodes:diagnostics(Nodes) ++ "~n~n", []); + basic_boot_error(Term, + Err ++ rabbit_nodes:diagnostics(Nodes) ++ "~n~n", []); +boot_error(Reason, Stacktrace) -> + Fmt = "Error description:~n ~p~n~n" ++ + "Log files (may contain more information):~n ~s~n ~s~n~n", + Args = [Reason, log_location(kernel), log_location(sasl)], + boot_error(Reason, Fmt, Args, Stacktrace). -boot_step_error(Reason, Stacktrace) -> - boot_error("Error description:~n ~p~n~n" - "Log files (may contain more information):~n ~s~n ~s~n~n" - "Stack trace:~n ~p~n~n", - [Reason, log_location(kernel), log_location(sasl), Stacktrace]). +-ifdef(use_specs). +-spec(boot_error/4 :: (term(), string(), [any()], not_available | [tuple()]) + -> no_return()). +-endif. +boot_error(Reason, Fmt, Args, not_available) -> + basic_boot_error(Reason, Fmt, Args); +boot_error(Reason, Fmt, Args, Stacktrace) -> + basic_boot_error(Reason, Fmt ++ "Stack trace:~n ~p~n~n", + Args ++ [Stacktrace]). -boot_error(Format, Args) -> +basic_boot_error(Reason, Format, Args) -> io:format("~n~nBOOT FAILED~n===========~n~n" ++ Format, Args), - error_logger:error_msg(Format, Args), + rabbit_misc:local_info_msg(Format, Args), timer:sleep(1000), - exit({?MODULE, failure_during_boot}). + exit({?MODULE, failure_during_boot, Reason}). %%--------------------------------------------------------------------------- %% boot step functions @@ -543,10 +588,14 @@ boot_delegate() -> rabbit_sup:start_supervisor_child(delegate_sup, [Count]). recover() -> - rabbit_binding:recover(rabbit_exchange:recover(), rabbit_amqqueue:start()). + rabbit_policy:recover(), + Qs = rabbit_amqqueue:recover(), + ok = rabbit_binding:recover(rabbit_exchange:recover(), + [QName || #amqqueue{name = QName} <- Qs]), + rabbit_amqqueue:start(Qs). maybe_insert_default_data() -> - case rabbit_mnesia:is_db_empty() of + case rabbit_table:is_empty() of true -> insert_default_data(); false -> ok end. @@ -561,7 +610,8 @@ insert_default_data() -> ok = rabbit_vhost:add(DefaultVHost), ok = rabbit_auth_backend_internal:add_user(DefaultUser, DefaultPass), ok = rabbit_auth_backend_internal:set_tags(DefaultUser, DefaultTags), - ok = rabbit_auth_backend_internal:set_permissions(DefaultUser, DefaultVHost, + ok = rabbit_auth_backend_internal:set_permissions(DefaultUser, + DefaultVHost, DefaultConfigurePerm, DefaultWritePerm, DefaultReadPerm), @@ -649,6 +699,17 @@ force_event_refresh() -> %%--------------------------------------------------------------------------- %% misc +log_broker_started(Plugins) -> + rabbit_misc:with_local_io( + fun() -> + PluginList = iolist_to_binary([rabbit_misc:format(" * ~s~n", [P]) + || P <- Plugins]), + error_logger:info_msg( + "Server startup complete; ~b plugins started.~n~s", + [length(Plugins), PluginList]), + io:format(" completed with ~p plugins.~n", [length(Plugins)]) + end). + erts_version_check() -> FoundVer = erlang:system_info(version), case rabbit_misc:version_compare(?ERTS_MINIMUM, FoundVer, lte) of @@ -660,49 +721,39 @@ erts_version_check() -> print_banner() -> {ok, Product} = application:get_key(id), {ok, Version} = application:get_key(vsn), - ProductLen = string:len(Product), - io:format("~n" - "+---+ +---+~n" - "| | | |~n" - "| | | |~n" - "| | | |~n" - "| +---+ +-------+~n" - "| |~n" - "| ~s +---+ |~n" - "| | | |~n" - "| ~s +---+ |~n" - "| |~n" - "+-------------------+~n" - "~s~n~s~n~s~n~n", - [Product, string:right([$v|Version], ProductLen), - ?PROTOCOL_VERSION, - ?COPYRIGHT_MESSAGE, ?INFORMATION_MESSAGE]), + io:format("~n ~s ~s. ~s" + "~n ## ## ~s" + "~n ## ##" + "~n ########## Logs: ~s" + "~n ###### ## ~s" + "~n ##########" + "~n Starting broker...", + [Product, Version, ?COPYRIGHT_MESSAGE, ?INFORMATION_MESSAGE, + log_location(kernel), log_location(sasl)]). + +log_banner() -> Settings = [{"node", node()}, - {"app descriptor", app_location()}, {"home dir", home_dir()}, {"config file(s)", config_files()}, {"cookie hash", rabbit_nodes:cookie_hash()}, {"log", log_location(kernel)}, {"sasl log", log_location(sasl)}, - {"database dir", rabbit_mnesia:dir()}, - {"erlang version", erlang:system_info(version)}], + {"database dir", rabbit_mnesia:dir()}], DescrLen = 1 + lists:max([length(K) || {K, _V} <- Settings]), Format = fun (K, V) -> - io:format("~-" ++ integer_to_list(DescrLen) ++ "s: ~s~n", - [K, V]) + rabbit_misc:format( + "~-" ++ integer_to_list(DescrLen) ++ "s: ~s~n", [K, V]) end, - lists:foreach(fun ({"config file(s)" = K, []}) -> - Format(K, "(none)"); - ({"config file(s)" = K, [V0 | Vs]}) -> - Format(K, V0), [Format("", V) || V <- Vs]; - ({K, V}) -> - Format(K, V) - end, Settings), - io:nl(). - -app_location() -> - {ok, Application} = application:get_application(), - filename:absname(code:where_is_file(atom_to_list(Application) ++ ".app")). + Banner = iolist_to_binary( + [case S of + {"config file(s)" = K, []} -> + Format(K, "(none)"); + {"config file(s)" = K, [V0 | Vs]} -> + Format(K, V0), [Format("", V) || V <- Vs]; + {K, V} -> + Format(K, V) + end || S <- Settings]), + error_logger:info_msg("~s", [Banner]). home_dir() -> case init:get_argument(home) of @@ -717,3 +768,10 @@ config_files() -> [File] <- Files]; error -> [] end. + +%% We don't want this in fhc since it references rabbit stuff. And we can't put +%% this in the bootstep directly. +start_fhc() -> + rabbit_sup:start_restartable_child( + file_handle_cache, + [fun rabbit_alarm:set_alarm/1, fun rabbit_alarm:clear_alarm/1]). diff --git a/src/rabbit_access_control.erl b/src/rabbit_access_control.erl index 75c53511..d54c2a8d 100644 --- a/src/rabbit_access_control.erl +++ b/src/rabbit_access_control.erl @@ -10,8 +10,8 @@ %% %% The Original Code is RabbitMQ. %% -%% The Initial Developer of the Original Code is VMware, Inc. -%% Copyright (c) 2007-2012 VMware, Inc. All rights reserved. +%% The Initial Developer of the Original Code is GoPivotal, Inc. +%% Copyright (c) 2007-2013 GoPivotal, Inc. All rights reserved. %% -module(rabbit_access_control). @@ -68,12 +68,13 @@ check_vhost_access(User = #user{ username = Username, auth_backend = Module }, VHostPath) -> check_access( fun() -> - rabbit_vhost:exists(VHostPath) andalso - Module:check_vhost_access(User, VHostPath) + %% TODO this could be an andalso shortcut under >R13A + case rabbit_vhost:exists(VHostPath) of + false -> false; + true -> Module:check_vhost_access(User, VHostPath) + end end, - "~s failed checking vhost access to ~s for ~s: ~p~n", - [Module, VHostPath, Username], - "access to vhost '~s' refused for user '~s'", + Module, "access to vhost '~s' refused for user '~s'", [VHostPath, Username]). check_resource_access(User, R = #resource{kind = exchange, name = <<"">>}, @@ -84,15 +85,14 @@ check_resource_access(User = #user{username = Username, auth_backend = Module}, Resource, Permission) -> check_access( fun() -> Module:check_resource_access(User, Resource, Permission) end, - "~s failed checking resource access to ~p for ~s: ~p~n", - [Module, Resource, Username], - "access to ~s refused for user '~s'", + Module, "access to ~s refused for user '~s'", [rabbit_misc:rs(Resource), Username]). -check_access(Fun, ErrStr, ErrArgs, RefStr, RefArgs) -> +check_access(Fun, Module, ErrStr, ErrArgs) -> Allow = case Fun() of - {error, _} = E -> - rabbit_log:error(ErrStr, ErrArgs ++ [E]), + {error, E} -> + rabbit_log:error(ErrStr ++ " by ~s: ~p~n", + ErrArgs ++ [Module, E]), false; Else -> Else @@ -101,5 +101,5 @@ check_access(Fun, ErrStr, ErrArgs, RefStr, RefArgs) -> true -> ok; false -> - rabbit_misc:protocol_error(access_refused, RefStr, RefArgs) + rabbit_misc:protocol_error(access_refused, ErrStr, ErrArgs) end. diff --git a/src/rabbit_alarm.erl b/src/rabbit_alarm.erl index d16d90a4..cd1d125b 100644 --- a/src/rabbit_alarm.erl +++ b/src/rabbit_alarm.erl @@ -10,30 +10,36 @@ %% %% The Original Code is RabbitMQ. %% -%% The Initial Developer of the Original Code is VMware, Inc. -%% Copyright (c) 2007-2012 VMware, Inc. All rights reserved. +%% The Initial Developer of the Original Code is GoPivotal, Inc. +%% Copyright (c) 2007-2013 GoPivotal, Inc. All rights reserved. %% -module(rabbit_alarm). -behaviour(gen_event). --export([start/0, stop/0, register/2, on_node_up/1, on_node_down/1]). +-export([start_link/0, start/0, stop/0, register/2, set_alarm/1, + clear_alarm/1, get_alarms/0, on_node_up/1, on_node_down/1]). -export([init/1, handle_call/2, handle_event/2, handle_info/2, terminate/2, code_change/3]). -export([remote_conserve_resources/3]). %% Internal use only --record(alarms, {alertees, alarmed_nodes}). +-define(SERVER, ?MODULE). + +-record(alarms, {alertees, alarmed_nodes, alarms}). %%---------------------------------------------------------------------------- -ifdef(use_specs). +-spec(start_link/0 :: () -> rabbit_types:ok_pid_or_error()). -spec(start/0 :: () -> 'ok'). -spec(stop/0 :: () -> 'ok'). --spec(register/2 :: (pid(), rabbit_types:mfargs()) -> boolean()). +-spec(register/2 :: (pid(), rabbit_types:mfargs()) -> [atom()]). +-spec(set_alarm/1 :: (any()) -> 'ok'). +-spec(clear_alarm/1 :: (any()) -> 'ok'). -spec(on_node_up/1 :: (node()) -> 'ok'). -spec(on_node_down/1 :: (node()) -> 'ok'). @@ -41,67 +47,90 @@ %%---------------------------------------------------------------------------- +start_link() -> + gen_event:start_link({local, ?SERVER}). + start() -> - ok = alarm_handler:add_alarm_handler(?MODULE, []), + ok = rabbit_sup:start_restartable_child(?MODULE), + ok = gen_event:add_handler(?SERVER, ?MODULE, []), {ok, MemoryWatermark} = application:get_env(vm_memory_high_watermark), - rabbit_sup:start_restartable_child(vm_memory_monitor, [MemoryWatermark]), - + rabbit_sup:start_restartable_child( + vm_memory_monitor, [MemoryWatermark, + fun (Alarm) -> + background_gc:run(), + set_alarm(Alarm) + end, + fun clear_alarm/1]), {ok, DiskLimit} = application:get_env(disk_free_limit), rabbit_sup:start_restartable_child(rabbit_disk_monitor, [DiskLimit]), ok. -stop() -> - ok = alarm_handler:delete_alarm_handler(?MODULE). +stop() -> ok. -register(Pid, HighMemMFA) -> - gen_event:call(alarm_handler, ?MODULE, - {register, Pid, HighMemMFA}, - infinity). +register(Pid, AlertMFA) -> + gen_event:call(?SERVER, ?MODULE, {register, Pid, AlertMFA}, infinity). -on_node_up(Node) -> gen_event:notify(alarm_handler, {node_up, Node}). +set_alarm(Alarm) -> gen_event:notify(?SERVER, {set_alarm, Alarm}). +clear_alarm(Alarm) -> gen_event:notify(?SERVER, {clear_alarm, Alarm}). -on_node_down(Node) -> gen_event:notify(alarm_handler, {node_down, Node}). +get_alarms() -> gen_event:call(?SERVER, ?MODULE, get_alarms, infinity). + +on_node_up(Node) -> gen_event:notify(?SERVER, {node_up, Node}). +on_node_down(Node) -> gen_event:notify(?SERVER, {node_down, Node}). -%% Can't use alarm_handler:{set,clear}_alarm because that doesn't -%% permit notifying a remote node. remote_conserve_resources(Pid, Source, true) -> - gen_event:notify({alarm_handler, node(Pid)}, + gen_event:notify({?SERVER, node(Pid)}, {set_alarm, {{resource_limit, Source, node()}, []}}); remote_conserve_resources(Pid, Source, false) -> - gen_event:notify({alarm_handler, node(Pid)}, + gen_event:notify({?SERVER, node(Pid)}, {clear_alarm, {resource_limit, Source, node()}}). + %%---------------------------------------------------------------------------- init([]) -> {ok, #alarms{alertees = dict:new(), - alarmed_nodes = dict:new()}}. + alarmed_nodes = dict:new(), + alarms = []}}. -handle_call({register, Pid, HighMemMFA}, State) -> - {ok, 0 < dict:size(State#alarms.alarmed_nodes), - internal_register(Pid, HighMemMFA, State)}; +handle_call({register, Pid, AlertMFA}, State = #alarms{alarmed_nodes = AN}) -> + {ok, lists:usort(lists:append([V || {_, V} <- dict:to_list(AN)])), + internal_register(Pid, AlertMFA, State)}; + +handle_call(get_alarms, State = #alarms{alarms = Alarms}) -> + {ok, Alarms, State}; handle_call(_Request, State) -> {ok, not_understood, State}. -handle_event({set_alarm, {{resource_limit, Source, Node}, []}}, State) -> - {ok, maybe_alert(fun dict:append/3, Node, Source, State)}; +handle_event({set_alarm, Alarm}, State = #alarms{alarms = Alarms}) -> + case lists:member(Alarm, Alarms) of + true -> {ok, State}; + false -> UpdatedAlarms = lists:usort([Alarm|Alarms]), + handle_set_alarm(Alarm, State#alarms{alarms = UpdatedAlarms}) + end; + +handle_event({clear_alarm, Alarm}, State = #alarms{alarms = Alarms}) -> + case lists:keymember(Alarm, 1, Alarms) of + true -> handle_clear_alarm( + Alarm, State#alarms{alarms = lists:keydelete( + Alarm, 1, Alarms)}); + false -> {ok, State} -handle_event({clear_alarm, {resource_limit, Source, Node}}, State) -> - {ok, maybe_alert(fun dict_unappend/3, Node, Source, State)}; + end; handle_event({node_up, Node}, State) -> %% Must do this via notify and not call to avoid possible deadlock. ok = gen_event:notify( - {alarm_handler, Node}, + {?SERVER, Node}, {register, self(), {?MODULE, remote_conserve_resources, []}}), {ok, State}; handle_event({node_down, Node}, State) -> - {ok, maybe_alert(fun dict_unappend_all/3, Node, [], State)}; + {ok, maybe_alert(fun dict_unappend_all/3, Node, [], false, State)}; -handle_event({register, Pid, HighMemMFA}, State) -> - {ok, internal_register(Pid, HighMemMFA, State)}; +handle_event({register, Pid, AlertMFA}, State) -> + {ok, internal_register(Pid, AlertMFA, State)}; handle_event(_Event, State) -> {ok, State}. @@ -121,45 +150,36 @@ code_change(_OldVsn, State, _Extra) -> %%---------------------------------------------------------------------------- +dict_append(Key, Val, Dict) -> + L = case dict:find(Key, Dict) of + {ok, V} -> V; + error -> [] + end, + dict:store(Key, lists:usort([Val|L]), Dict). + dict_unappend_all(Key, _Val, Dict) -> dict:erase(Key, Dict). dict_unappend(Key, Val, Dict) -> - case lists:delete(Val, dict:fetch(Key, Dict)) of + L = case dict:find(Key, Dict) of + {ok, V} -> V; + error -> [] + end, + + case lists:delete(Val, L) of [] -> dict:erase(Key, Dict); X -> dict:store(Key, X, Dict) end. -count_dict_values(Val, Dict) -> - dict:fold(fun (_Node, List, Count) -> - Count + case lists:member(Val, List) of - true -> 1; - false -> 0 - end - end, 0, Dict). - -maybe_alert(UpdateFun, Node, Source, +maybe_alert(UpdateFun, Node, Source, Alert, State = #alarms{alarmed_nodes = AN, alertees = Alertees}) -> AN1 = UpdateFun(Node, Source, AN), - BeforeSz = count_dict_values(Source, AN), - AfterSz = count_dict_values(Source, AN1), - - %% If we have changed our alarm state, inform the remotes. - IsLocal = Node =:= node(), - if IsLocal andalso BeforeSz < AfterSz -> - ok = alert_remote(true, Alertees, Source); - IsLocal andalso BeforeSz > AfterSz -> - ok = alert_remote(false, Alertees, Source); - true -> - ok - end, - %% If the overall alarm state has changed, inform the locals. - case {dict:size(AN), dict:size(AN1)} of - {0, 1} -> ok = alert_local(true, Alertees, Source); - {1, 0} -> ok = alert_local(false, Alertees, Source); - {_, _} -> ok + case node() of + Node -> ok = alert_remote(Alert, Alertees, Source); + _ -> ok end, + ok = alert_local(Alert, Alertees, Source), State#alarms{alarmed_nodes = AN1}. alert_local(Alert, Alertees, Source) -> @@ -177,12 +197,42 @@ alert(Alertees, Source, Alert, NodeComparator) -> end end, ok, Alertees). -internal_register(Pid, {M, F, A} = HighMemMFA, +internal_register(Pid, {M, F, A} = AlertMFA, State = #alarms{alertees = Alertees}) -> _MRef = erlang:monitor(process, Pid), case dict:find(node(), State#alarms.alarmed_nodes) of {ok, Sources} -> [apply(M, F, A ++ [Pid, R, true]) || R <- Sources]; error -> ok end, - NewAlertees = dict:store(Pid, HighMemMFA, Alertees), + NewAlertees = dict:store(Pid, AlertMFA, Alertees), State#alarms{alertees = NewAlertees}. + +handle_set_alarm({{resource_limit, Source, Node}, []}, State) -> + rabbit_log:warning( + "~s resource limit alarm set on node ~p.~n~n" + "**********************************************************~n" + "*** Publishers will be blocked until this alarm clears ***~n" + "**********************************************************~n", + [Source, Node]), + {ok, maybe_alert(fun dict_append/3, Node, Source, true, State)}; +handle_set_alarm({file_descriptor_limit, []}, State) -> + rabbit_log:warning( + "file descriptor limit alarm set.~n~n" + "********************************************************************~n" + "*** New connections will not be accepted until this alarm clears ***~n" + "********************************************************************~n"), + {ok, State}; +handle_set_alarm(Alarm, State) -> + rabbit_log:warning("alarm '~p' set~n", [Alarm]), + {ok, State}. + +handle_clear_alarm({resource_limit, Source, Node}, State) -> + rabbit_log:warning("~s resource limit alarm cleared on node ~p~n", + [Source, Node]), + {ok, maybe_alert(fun dict_unappend/3, Node, Source, false, State)}; +handle_clear_alarm(file_descriptor_limit, State) -> + rabbit_log:warning("file descriptor limit alarm cleared~n"), + {ok, State}; +handle_clear_alarm(Alarm, State) -> + rabbit_log:warning("alarm '~p' cleared~n", [Alarm]), + {ok, State}. diff --git a/src/rabbit_amqqueue.erl b/src/rabbit_amqqueue.erl index c1673504..a1efaf65 100644 --- a/src/rabbit_amqqueue.erl +++ b/src/rabbit_amqqueue.erl @@ -10,29 +10,32 @@ %% %% The Original Code is RabbitMQ. %% -%% The Initial Developer of the Original Code is VMware, Inc. -%% Copyright (c) 2007-2012 VMware, Inc. All rights reserved. +%% The Initial Developer of the Original Code is GoPivotal, Inc. +%% Copyright (c) 2007-2013 GoPivotal, Inc. All rights reserved. %% -module(rabbit_amqqueue). --export([start/0, stop/0, declare/5, delete_immediately/1, delete/3, purge/1]). +-export([recover/0, stop/0, start/1, declare/5, + delete_immediately/1, delete/3, purge/1, forget_all_durable/1]). -export([pseudo_queue/2]). --export([lookup/1, with/2, with_or_die/2, assert_equivalence/5, +-export([lookup/1, not_found_or_absent/1, with/2, with/3, with_or_die/2, + assert_equivalence/5, check_exclusive_access/2, with_exclusive_access_or_die/3, stat/1, deliver/2, deliver_flow/2, requeue/3, ack/3, reject/4]). -export([list/0, list/1, info_keys/0, info/1, info/2, info_all/1, info_all/2]). --export([force_event_refresh/0]). +-export([force_event_refresh/0, wake_up/1]). -export([consumers/1, consumers_all/1, consumer_info_keys/0]). --export([basic_get/3, basic_consume/7, basic_cancel/4]). --export([notify_sent/2, notify_sent_queue_down/1, unblock/2, flush_all/2]). --export([notify_down_all/2, limit_all/3]). +-export([basic_get/4, basic_consume/9, basic_cancel/4]). +-export([notify_sent/2, notify_sent_queue_down/1, resume/2, flush_all/2]). +-export([notify_down_all/2, activate_limit_all/2, credit/5]). -export([on_node_down/1]). --export([store_queue/1]). - +-export([update/2, store_queue/1, policy_changed/2]). +-export([start_mirroring/1, stop_mirroring/1, sync_mirrors/1, + cancel_sync_mirrors/1]). %% internal --export([internal_declare/2, internal_delete/2, run_backing_queue/3, +-export([internal_declare/2, internal_delete/1, run_backing_queue/3, set_ram_duration_target/2, set_maximum_since_use/2]). -include("rabbit.hrl"). @@ -58,24 +61,33 @@ -type(msg_id() :: non_neg_integer()). -type(ok_or_errors() :: 'ok' | {'error', [{'error' | 'exit' | 'throw', any()}]}). --type(routing_result() :: 'routed' | 'unroutable' | 'not_delivered'). --type(queue_or_not_found() :: rabbit_types:amqqueue() | 'not_found'). - --spec(start/0 :: () -> [name()]). +-type(routing_result() :: 'routed' | 'unroutable'). +-type(queue_or_absent() :: rabbit_types:amqqueue() | + {'absent', rabbit_types:amqqueue()}). +-type(not_found_or_absent() :: 'not_found' | + {'absent', rabbit_types:amqqueue()}). +-spec(recover/0 :: () -> [rabbit_types:amqqueue()]). -spec(stop/0 :: () -> 'ok'). +-spec(start/1 :: ([rabbit_types:amqqueue()]) -> 'ok'). -spec(declare/5 :: (name(), boolean(), boolean(), rabbit_framing:amqp_table(), rabbit_types:maybe(pid())) - -> {'new' | 'existing', rabbit_types:amqqueue()} | + -> {'new' | 'existing' | 'absent', rabbit_types:amqqueue()} | rabbit_types:channel_exit()). -spec(internal_declare/2 :: (rabbit_types:amqqueue(), boolean()) - -> queue_or_not_found() | rabbit_misc:thunk(queue_or_not_found())). + -> queue_or_absent() | rabbit_misc:thunk(queue_or_absent())). +-spec(update/2 :: + (name(), + fun((rabbit_types:amqqueue()) -> rabbit_types:amqqueue())) -> 'ok'). -spec(lookup/1 :: (name()) -> rabbit_types:ok(rabbit_types:amqqueue()) | rabbit_types:error('not_found'); ([name()]) -> [rabbit_types:amqqueue()]). --spec(with/2 :: (name(), qfun(A)) -> A | rabbit_types:error('not_found')). +-spec(not_found_or_absent/1 :: (name()) -> not_found_or_absent()). +-spec(with/2 :: (name(), qfun(A)) -> + A | rabbit_types:error(not_found_or_absent())). +-spec(with/3 :: (name(), qfun(A), fun((not_found_or_absent()) -> B)) -> A | B). -spec(with_or_die/2 :: (name(), qfun(A)) -> A | rabbit_types:channel_exit()). -spec(assert_equivalence/5 :: @@ -99,6 +111,7 @@ -spec(info_all/2 :: (rabbit_types:vhost(), rabbit_types:info_keys()) -> [rabbit_types:infos()]). -spec(force_event_refresh/0 :: () -> 'ok'). +-spec(wake_up/1 :: (rabbit_types:amqqueue()) -> 'ok'). -spec(consumers/1 :: (rabbit_types:amqqueue()) -> [{pid(), rabbit_types:ctag(), boolean()}]). @@ -122,6 +135,7 @@ rabbit_types:error('in_use') | rabbit_types:error('not_empty')). -spec(purge/1 :: (rabbit_types:amqqueue()) -> qlen()). +-spec(forget_all_durable/1 :: (node()) -> 'ok'). -spec(deliver/2 :: ([rabbit_types:amqqueue()], rabbit_types:delivery()) -> {routing_result(), qpids()}). -spec(deliver_flow/2 :: ([rabbit_types:amqqueue()], rabbit_types:delivery()) -> @@ -130,25 +144,26 @@ -spec(ack/3 :: (pid(), [msg_id()], pid()) -> 'ok'). -spec(reject/4 :: (pid(), [msg_id()], boolean(), pid()) -> 'ok'). -spec(notify_down_all/2 :: (qpids(), pid()) -> ok_or_errors()). --spec(limit_all/3 :: (qpids(), pid(), rabbit_limiter:token()) -> - ok_or_errors()). --spec(basic_get/3 :: (rabbit_types:amqqueue(), pid(), boolean()) -> +-spec(activate_limit_all/2 :: (qpids(), pid()) -> ok_or_errors()). +-spec(basic_get/4 :: (rabbit_types:amqqueue(), pid(), boolean(), pid()) -> {'ok', non_neg_integer(), qmsg()} | 'empty'). --spec(basic_consume/7 :: - (rabbit_types:amqqueue(), boolean(), pid(), - rabbit_limiter:token(), rabbit_types:ctag(), boolean(), any()) +-spec(credit/5 :: (rabbit_types:amqqueue(), pid(), rabbit_types:ctag(), + non_neg_integer(), boolean()) -> 'ok'). +-spec(basic_consume/9 :: + (rabbit_types:amqqueue(), boolean(), pid(), pid(), boolean(), + rabbit_types:ctag(), boolean(), {non_neg_integer(), boolean()} | 'none', any()) -> rabbit_types:ok_or_error('exclusive_consume_unavailable')). -spec(basic_cancel/4 :: (rabbit_types:amqqueue(), pid(), rabbit_types:ctag(), any()) -> 'ok'). -spec(notify_sent/2 :: (pid(), pid()) -> 'ok'). -spec(notify_sent_queue_down/1 :: (pid()) -> 'ok'). --spec(unblock/2 :: (pid(), pid()) -> 'ok'). +-spec(resume/2 :: (pid(), pid()) -> 'ok'). -spec(flush_all/2 :: (qpids(), pid()) -> 'ok'). --spec(internal_delete/2 :: - (name(), pid()) -> rabbit_types:ok_or_error('not_found') | - rabbit_types:connection_exit() | - fun (() -> rabbit_types:ok_or_error('not_found') | - rabbit_types:connection_exit())). +-spec(internal_delete/1 :: + (name()) -> rabbit_types:ok_or_error('not_found') | + rabbit_types:connection_exit() | + fun (() -> rabbit_types:ok_or_error('not_found') | + rabbit_types:connection_exit())). -spec(run_backing_queue/3 :: (pid(), atom(), (fun ((atom(), A) -> {[rabbit_types:msg_id()], A}))) -> 'ok'). @@ -157,6 +172,12 @@ -spec(on_node_down/1 :: (node()) -> 'ok'). -spec(pseudo_queue/2 :: (name(), pid()) -> rabbit_types:amqqueue()). -spec(store_queue/1 :: (rabbit_types:amqqueue()) -> 'ok'). +-spec(policy_changed/2 :: + (rabbit_types:amqqueue(), rabbit_types:amqqueue()) -> 'ok'). +-spec(start_mirroring/1 :: (pid()) -> 'ok'). +-spec(stop_mirroring/1 :: (pid()) -> 'ok'). +-spec(sync_mirrors/1 :: (pid()) -> 'ok' | rabbit_types:error('not_mirrored')). +-spec(cancel_sync_mirrors/1 :: (pid()) -> 'ok' | {'ok', 'not_syncing'}). -endif. @@ -165,7 +186,10 @@ -define(CONSUMER_INFO_KEYS, [queue_name, channel_pid, consumer_tag, ack_required]). -start() -> +recover() -> + %% Clear out remnants of old incarnation, in case we restarted + %% faster than other nodes handled DOWN messages from us. + on_node_down(node()), DurableQueues = find_durable_queues(), {ok, BQ} = application:get_env(rabbit, backing_queue_module), ok = BQ:start([QName || #amqqueue{name = QName} <- DurableQueues]), @@ -182,36 +206,45 @@ stop() -> {ok, BQ} = application:get_env(rabbit, backing_queue_module), ok = BQ:stop(). +start(Qs) -> + %% At this point all recovered queues and their bindings are + %% visible to routing, so now it is safe for them to complete + %% their initialisation (which may involve interacting with other + %% queues). + [Pid ! {self(), go} || #amqqueue{pid = Pid} <- Qs], + ok. + find_durable_queues() -> Node = node(), %% TODO: use dirty ops instead rabbit_misc:execute_mnesia_transaction( fun () -> - qlc:e(qlc:q([Q || Q = #amqqueue{pid = Pid} + qlc:e(qlc:q([Q || Q = #amqqueue{name = Name, + pid = Pid} <- mnesia:table(rabbit_durable_queue), + mnesia:read(rabbit_queue, Name, read) =:= [], node(Pid) == Node])) end). recover_durable_queues(DurableQueues) -> Qs = [start_queue_process(node(), Q) || Q <- DurableQueues], - [QName || Q = #amqqueue{name = QName, pid = Pid} <- Qs, - gen_server2:call(Pid, {init, true}, infinity) == {new, Q}]. + [Q || Q = #amqqueue{pid = Pid} <- Qs, + gen_server2:call(Pid, {init, self()}, infinity) == {new, Q}]. declare(QueueName, Durable, AutoDelete, Args, Owner) -> ok = check_declare_arguments(QueueName, Args), - {Node, MNodes} = determine_queue_nodes(Args), - Q = start_queue_process(Node, #amqqueue{name = QueueName, - durable = Durable, - auto_delete = AutoDelete, - arguments = Args, - exclusive_owner = Owner, - pid = none, - slave_pids = [], - mirror_nodes = MNodes}), - case gen_server2:call(Q#amqqueue.pid, {init, false}, infinity) of - not_found -> rabbit_misc:not_found(QueueName); - Q1 -> Q1 - end. + Q0 = rabbit_policy:set(#amqqueue{name = QueueName, + durable = Durable, + auto_delete = AutoDelete, + arguments = Args, + exclusive_owner = Owner, + pid = none, + slave_pids = [], + sync_slave_pids = [], + gm_pids = []}), + {Node, _MNodes} = rabbit_mirror_queue_misc:suggested_queue_nodes(Q0), + Q1 = start_queue_process(Node, Q0), + gen_server2:call(Q1#amqqueue.pid, {init, new}, infinity). internal_declare(Q, true) -> rabbit_misc:execute_mnesia_tx_with_tail( @@ -221,45 +254,51 @@ internal_declare(Q = #amqqueue{name = QueueName}, false) -> fun () -> case mnesia:wread({rabbit_queue, QueueName}) of [] -> - case mnesia:read({rabbit_durable_queue, QueueName}) of - [] -> ok = store_queue(Q), - B = add_default_binding(Q), - fun () -> B(), Q end; - %% Q exists on stopped node - [_] -> rabbit_misc:const(not_found) + case not_found_or_absent(QueueName) of + not_found -> Q1 = rabbit_policy:set(Q), + ok = store_queue(Q1), + B = add_default_binding(Q1), + fun () -> B(), Q1 end; + {absent, _Q} = R -> rabbit_misc:const(R) end; [ExistingQ = #amqqueue{pid = QPid}] -> case rabbit_misc:is_process_alive(QPid) of true -> rabbit_misc:const(ExistingQ); - false -> TailFun = internal_delete(QueueName, QPid), + false -> TailFun = internal_delete(QueueName), fun () -> TailFun(), ExistingQ end end end end). +update(Name, Fun) -> + case mnesia:wread({rabbit_queue, Name}) of + [Q = #amqqueue{durable = Durable}] -> + Q1 = Fun(Q), + ok = mnesia:write(rabbit_queue, Q1, write), + case Durable of + true -> ok = mnesia:write(rabbit_durable_queue, Q1, write); + _ -> ok + end; + [] -> + ok + end. + store_queue(Q = #amqqueue{durable = true}) -> - ok = mnesia:write(rabbit_durable_queue, Q#amqqueue{slave_pids = []}, write), + ok = mnesia:write(rabbit_durable_queue, + Q#amqqueue{slave_pids = [], + sync_slave_pids = [], + gm_pids = []}, write), ok = mnesia:write(rabbit_queue, Q, write), ok; store_queue(Q = #amqqueue{durable = false}) -> ok = mnesia:write(rabbit_queue, Q, write), ok. -determine_queue_nodes(Args) -> - Policy = rabbit_misc:table_lookup(Args, <<"x-ha-policy">>), - PolicyParams = rabbit_misc:table_lookup(Args, <<"x-ha-policy-params">>), - case {Policy, PolicyParams} of - {{_Type, <<"nodes">>}, {array, Nodes}} -> - case [list_to_atom(binary_to_list(Node)) || - {longstr, Node} <- Nodes] of - [Node] -> {Node, undefined}; - [First | Rest] -> {First, [First | Rest]} - end; - {{_Type, <<"all">>}, _} -> - {node(), all}; - _ -> - {node(), undefined} - end. +policy_changed(Q1, Q2) -> + rabbit_mirror_queue_misc:update_mirrors(Q1, Q2), + %% Make sure we emit a stats event even if nothing + %% mirroring-related has changed - the policy may have changed anyway. + wake_up(Q1). start_queue_process(Node, Q) -> {ok, Pid} = rabbit_amqqueue_sup:start_child(Node, [Q]), @@ -273,6 +312,8 @@ add_default_binding(#amqqueue{name = QueueName}) -> key = RoutingKey, args = []}). +lookup([]) -> []; %% optimisation +lookup([Name]) -> ets:lookup(rabbit_queue, Name); %% optimisation lookup(Names) when is_list(Names) -> %% Normally we'd call mnesia:dirty_read/1 here, but that is quite %% expensive for reasons explained in rabbit_misc:dirty_read/1. @@ -280,21 +321,47 @@ lookup(Names) when is_list(Names) -> lookup(Name) -> rabbit_misc:dirty_read({rabbit_queue, Name}). +not_found_or_absent(Name) -> + %% NB: we assume that the caller has already performed a lookup on + %% rabbit_queue and not found anything + case mnesia:read({rabbit_durable_queue, Name}) of + [] -> not_found; + [Q] -> {absent, Q} %% Q exists on stopped node + end. + +not_found_or_absent_dirty(Name) -> + %% We should read from both tables inside a tx, to get a + %% consistent view. But the chances of an inconsistency are small, + %% and only affect the error kind. + case rabbit_misc:dirty_read({rabbit_durable_queue, Name}) of + {error, not_found} -> not_found; + {ok, Q} -> {absent, Q} + end. + with(Name, F, E) -> case lookup(Name) of - {ok, Q = #amqqueue{slave_pids = []}} -> - rabbit_misc:with_exit_handler(E, fun () -> F(Q) end); - {ok, Q} -> - E1 = fun () -> timer:sleep(25), with(Name, F, E) end, - rabbit_misc:with_exit_handler(E1, fun () -> F(Q) end); + {ok, Q = #amqqueue{pid = QPid}} -> + %% We check is_process_alive(QPid) in case we receive a + %% nodedown (for example) in F() that has nothing to do + %% with the QPid. + rabbit_misc:with_exit_handler( + fun () -> + case rabbit_misc:is_process_alive(QPid) of + true -> E(not_found_or_absent_dirty(Name)); + false -> timer:sleep(25), + with(Name, F, E) + end + end, fun () -> F(Q) end); {error, not_found} -> - E() + E(not_found_or_absent_dirty(Name)) end. -with(Name, F) -> - with(Name, F, fun () -> {error, not_found} end). +with(Name, F) -> with(Name, F, fun (E) -> {error, E} end). + with_or_die(Name, F) -> - with(Name, F, fun () -> rabbit_misc:not_found(Name) end). + with(Name, F, fun (not_found) -> rabbit_misc:not_found(Name); + ({absent, Q}) -> rabbit_misc:absent(Q) + end). assert_equivalence(#amqqueue{durable = Durable, auto_delete = AutoDelete} = Q, @@ -325,16 +392,10 @@ with_exclusive_access_or_die(Name, ReaderPid, F) -> assert_args_equivalence(#amqqueue{name = QueueName, arguments = Args}, RequiredArgs) -> - rabbit_misc:assert_args_equivalence( - Args, RequiredArgs, QueueName, - [<<"x-expires">>, <<"x-message-ttl">>, <<"x-ha-policy">>]). + rabbit_misc:assert_args_equivalence(Args, RequiredArgs, QueueName, + [Key || {Key, _Fun} <- args()]). check_declare_arguments(QueueName, Args) -> - Checks = [{<<"x-expires">>, fun check_positive_int_arg/2}, - {<<"x-message-ttl">>, fun check_non_neg_int_arg/2}, - {<<"x-ha-policy">>, fun check_ha_policy_arg/2}, - {<<"x-dead-letter-exchange">>, fun check_string_arg/2}, - {<<"x-dead-letter-routing-key">>, fun check_dlxrk_arg/2}], [case rabbit_misc:table_lookup(Args, Key) of undefined -> ok; TypeVal -> case Fun(TypeVal, Args) of @@ -345,13 +406,14 @@ check_declare_arguments(QueueName, Args) -> [Key, rabbit_misc:rs(QueueName), Error]) end - end || {Key, Fun} <- Checks], + end || {Key, Fun} <- args()], ok. -check_string_arg({longstr, _}, _Args) -> - ok; -check_string_arg({Type, _}, _) -> - {error, {unacceptable_type, Type}}. +args() -> + [{<<"x-expires">>, fun check_expires_arg/2}, + {<<"x-message-ttl">>, fun check_message_ttl_arg/2}, + {<<"x-dead-letter-routing-key">>, fun check_dlxrk_arg/2}, + {<<"x-max-length">>, fun check_max_length_arg/2}]. check_int_arg({Type, _}, _) -> case lists:member(Type, ?INTEGER_ARG_TYPES) of @@ -359,53 +421,35 @@ check_int_arg({Type, _}, _) -> false -> {error, {unacceptable_type, Type}} end. -check_positive_int_arg({Type, Val}, Args) -> +check_max_length_arg({Type, Val}, Args) -> case check_int_arg({Type, Val}, Args) of - ok when Val > 0 -> ok; - ok -> {error, {value_zero_or_less, Val}}; - Error -> Error + ok when Val >= 0 -> ok; + ok -> {error, {value_negative, Val}}; + Error -> Error end. -check_non_neg_int_arg({Type, Val}, Args) -> +check_expires_arg({Type, Val}, Args) -> case check_int_arg({Type, Val}, Args) of - ok when Val >= 0 -> ok; - ok -> {error, {value_less_than_zero, Val}}; + ok when Val == 0 -> {error, {value_zero, Val}}; + ok -> rabbit_misc:check_expiry(Val); Error -> Error end. +check_message_ttl_arg({Type, Val}, Args) -> + case check_int_arg({Type, Val}, Args) of + ok -> rabbit_misc:check_expiry(Val); + Error -> Error + end. + check_dlxrk_arg({longstr, _}, Args) -> case rabbit_misc:table_lookup(Args, <<"x-dead-letter-exchange">>) of undefined -> {error, routing_key_but_no_dlx_defined}; _ -> ok end; -check_dlxrk_arg({Type, _}, _Args) -> +check_dlxrk_arg({Type, _}, _Args) -> {error, {unacceptable_type, Type}}. -check_ha_policy_arg({longstr, <<"all">>}, _Args) -> - ok; -check_ha_policy_arg({longstr, <<"nodes">>}, Args) -> - case rabbit_misc:table_lookup(Args, <<"x-ha-policy-params">>) of - undefined -> - {error, {require, 'x-ha-policy-params'}}; - {array, []} -> - {error, {require_non_empty_list_of_nodes_for_ha}}; - {array, Ary} -> - case lists:all(fun ({longstr, _Node}) -> true; - (_ ) -> false - end, Ary) of - true -> ok; - false -> {error, {require_node_list_as_longstrs_for_ha, Ary}} - end; - {Type, _} -> - {error, {ha_nodes_policy_params_not_array_of_longstr, Type}} - end; -check_ha_policy_arg({longstr, Policy}, _Args) -> - {error, {invalid_ha_policy, Policy}}; -check_ha_policy_arg({Type, _}, _Args) -> - {error, {unacceptable_type, Type}}. - -list() -> - mnesia:dirty_match_object(rabbit_queue, #amqqueue{_ = '_'}). +list() -> mnesia:dirty_match_object(rabbit_queue, #amqqueue{_ = '_'}). list(VHostPath) -> mnesia:dirty_match_object( @@ -416,11 +460,10 @@ info_keys() -> rabbit_amqqueue_process:info_keys(). map(VHostPath, F) -> rabbit_misc:filter_exit_map(F, list(VHostPath)). -info(#amqqueue{ pid = QPid }) -> - delegate_call(QPid, info). +info(#amqqueue{ pid = QPid }) -> delegate:call(QPid, info). info(#amqqueue{ pid = QPid }, Items) -> - case delegate_call(QPid, {info, Items}) of + case delegate:call(QPid, {info, Items}) of {ok, Res} -> Res; {error, Error} -> throw(Error) end. @@ -434,8 +477,7 @@ info_all(VHostPath, Items) -> map(VHostPath, fun (Q) -> info(Q, Items) end). %% the first place since a node failed). Therefore we keep poking at %% the list of queues until we were able to talk to a live process or %% the queue no longer exists. -force_event_refresh() -> - force_event_refresh([Q#amqqueue.name || Q <- list()]). +force_event_refresh() -> force_event_refresh([Q#amqqueue.name || Q <- list()]). force_event_refresh(QNames) -> Qs = [Q || Q <- list(), lists:member(Q#amqqueue.name, QNames)], @@ -450,8 +492,9 @@ force_event_refresh(QNames) -> force_event_refresh(Failed) end. -consumers(#amqqueue{ pid = QPid }) -> - delegate_call(QPid, consumers). +wake_up(#amqqueue{pid = QPid}) -> gen_server2:cast(QPid, wake_up). + +consumers(#amqqueue{ pid = QPid }) -> delegate:call(QPid, consumers). consumer_info_keys() -> ?CONSUMER_INFO_KEYS. @@ -465,50 +508,54 @@ consumers_all(VHostPath) -> {ChPid, ConsumerTag, AckRequired} <- consumers(Q)] end)). -stat(#amqqueue{pid = QPid}) -> - delegate_call(QPid, stat). +stat(#amqqueue{pid = QPid}) -> delegate:call(QPid, stat). delete_immediately(QPids) -> [gen_server2:cast(QPid, delete_immediately) || QPid <- QPids], ok. delete(#amqqueue{ pid = QPid }, IfUnused, IfEmpty) -> - delegate_call(QPid, {delete, IfUnused, IfEmpty}). + delegate:call(QPid, {delete, IfUnused, IfEmpty}). -purge(#amqqueue{ pid = QPid }) -> delegate_call(QPid, purge). +purge(#amqqueue{ pid = QPid }) -> delegate:call(QPid, purge). deliver(Qs, Delivery) -> deliver(Qs, Delivery, noflow). deliver_flow(Qs, Delivery) -> deliver(Qs, Delivery, flow). -requeue(QPid, MsgIds, ChPid) -> - delegate_call(QPid, {requeue, MsgIds, ChPid}). +requeue(QPid, MsgIds, ChPid) -> delegate:call(QPid, {requeue, MsgIds, ChPid}). -ack(QPid, MsgIds, ChPid) -> - delegate_cast(QPid, {ack, MsgIds, ChPid}). +ack(QPid, MsgIds, ChPid) -> delegate:cast(QPid, {ack, MsgIds, ChPid}). reject(QPid, MsgIds, Requeue, ChPid) -> - delegate_cast(QPid, {reject, MsgIds, Requeue, ChPid}). + delegate:cast(QPid, {reject, MsgIds, Requeue, ChPid}). notify_down_all(QPids, ChPid) -> - safe_delegate_call_ok( - fun (QPid) -> gen_server2:call(QPid, {notify_down, ChPid}, infinity) end, - QPids). + {_, Bads} = delegate:call(QPids, {notify_down, ChPid}), + case lists:filter( + fun ({_Pid, {exit, {R, _}, _}}) -> rabbit_misc:is_abnormal_exit(R); + ({_Pid, _}) -> false + end, Bads) of + [] -> ok; + Bads1 -> {error, Bads1} + end. -limit_all(QPids, ChPid, Limiter) -> - delegate:invoke_no_result( - QPids, fun (QPid) -> gen_server2:cast(QPid, {limit, ChPid, Limiter}) end). +activate_limit_all(QPids, ChPid) -> + delegate:cast(QPids, {activate_limit, ChPid}). -basic_get(#amqqueue{pid = QPid}, ChPid, NoAck) -> - delegate_call(QPid, {basic_get, ChPid, NoAck}). +credit(#amqqueue{pid = QPid}, ChPid, CTag, Credit, Drain) -> + delegate:cast(QPid, {credit, ChPid, CTag, Credit, Drain}). -basic_consume(#amqqueue{pid = QPid}, NoAck, ChPid, Limiter, - ConsumerTag, ExclusiveConsume, OkMsg) -> - delegate_call(QPid, {basic_consume, NoAck, ChPid, - Limiter, ConsumerTag, ExclusiveConsume, OkMsg}). +basic_get(#amqqueue{pid = QPid}, ChPid, NoAck, LimiterPid) -> + delegate:call(QPid, {basic_get, ChPid, NoAck, LimiterPid}). + +basic_consume(#amqqueue{pid = QPid}, NoAck, ChPid, LimiterPid, LimiterActive, + ConsumerTag, ExclusiveConsume, CreditArgs, OkMsg) -> + delegate:call(QPid, {basic_consume, NoAck, ChPid, LimiterPid, LimiterActive, + ConsumerTag, ExclusiveConsume, CreditArgs, OkMsg}). basic_cancel(#amqqueue{pid = QPid}, ChPid, ConsumerTag, OkMsg) -> - ok = delegate_call(QPid, {basic_cancel, ChPid, ConsumerTag, OkMsg}). + delegate:call(QPid, {basic_cancel, ChPid, ConsumerTag, OkMsg}). notify_sent(QPid, ChPid) -> Key = {consumer_credit_to, QPid}, @@ -527,36 +574,58 @@ notify_sent_queue_down(QPid) -> erase({consumer_credit_to, QPid}), ok. -unblock(QPid, ChPid) -> - delegate_cast(QPid, {unblock, ChPid}). +resume(QPid, ChPid) -> delegate:cast(QPid, {resume, ChPid}). -flush_all(QPids, ChPid) -> - delegate:invoke_no_result( - QPids, fun (QPid) -> gen_server2:cast(QPid, {flush, ChPid}) end). +flush_all(QPids, ChPid) -> delegate:cast(QPids, {flush, ChPid}). internal_delete1(QueueName) -> ok = mnesia:delete({rabbit_queue, QueueName}), - ok = mnesia:delete({rabbit_durable_queue, QueueName}), + %% this 'guarded' delete prevents unnecessary writes to the mnesia + %% disk log + case mnesia:wread({rabbit_durable_queue, QueueName}) of + [] -> ok; + [_] -> ok = mnesia:delete({rabbit_durable_queue, QueueName}) + end, %% we want to execute some things, as decided by rabbit_exchange, %% after the transaction. rabbit_binding:remove_for_destination(QueueName). -internal_delete(QueueName, QPid) -> +internal_delete(QueueName) -> rabbit_misc:execute_mnesia_tx_with_tail( fun () -> - case mnesia:wread({rabbit_queue, QueueName}) of - [] -> rabbit_misc:const({error, not_found}); - [_] -> Deletions = internal_delete1(QueueName), - T = rabbit_binding:process_deletions(Deletions), - fun() -> - ok = T(), - ok = rabbit_event:notify(queue_deleted, - [{pid, QPid}, - {name, QueueName}]) - end + case {mnesia:wread({rabbit_queue, QueueName}), + mnesia:wread({rabbit_durable_queue, QueueName})} of + {[], []} -> + rabbit_misc:const({error, not_found}); + _ -> + Deletions = internal_delete1(QueueName), + T = rabbit_binding:process_deletions(Deletions), + fun() -> + ok = T(), + ok = rabbit_event:notify(queue_deleted, + [{name, QueueName}]) + end end end). +forget_all_durable(Node) -> + %% Note rabbit is not running so we avoid e.g. the worker pool. Also why + %% we don't invoke the return from rabbit_binding:process_deletions/1. + {atomic, ok} = + mnesia:sync_transaction( + fun () -> + Qs = mnesia:match_object(rabbit_durable_queue, + #amqqueue{_ = '_'}, write), + [rabbit_binding:process_deletions( + internal_delete1(Name)) || + #amqqueue{name = Name, pid = Pid} = Q <- Qs, + node(Pid) =:= Node, + rabbit_policy:get(<<"ha-mode">>, Q) + =:= {error, not_found}], + ok + end), + ok. + run_backing_queue(QPid, Mod, Fun) -> gen_server2:cast(QPid, {run_backing_queue, Mod, Fun}). @@ -566,14 +635,21 @@ set_ram_duration_target(QPid, Duration) -> set_maximum_since_use(QPid, Age) -> gen_server2:cast(QPid, {set_maximum_since_use, Age}). +start_mirroring(QPid) -> ok = delegate:cast(QPid, start_mirroring). +stop_mirroring(QPid) -> ok = delegate:cast(QPid, stop_mirroring). + +sync_mirrors(QPid) -> delegate:call(QPid, sync_mirrors). +cancel_sync_mirrors(QPid) -> delegate:call(QPid, cancel_sync_mirrors). + on_node_down(Node) -> rabbit_misc:execute_mnesia_tx_with_tail( fun () -> QsDels = - qlc:e(qlc:q([{{QName, Pid}, delete_queue(QName)} || + qlc:e(qlc:q([{QName, delete_queue(QName)} || #amqqueue{name = QName, pid = Pid, slave_pids = []} <- mnesia:table(rabbit_queue), - node(Pid) == Node])), + node(Pid) == Node andalso + not rabbit_misc:is_process_alive(Pid)])), {Qs, Dels} = lists:unzip(QsDels), T = rabbit_binding:process_deletions( lists:foldl(fun rabbit_binding:combine_deletions/2, @@ -581,10 +657,9 @@ on_node_down(Node) -> fun () -> T(), lists:foreach( - fun({QName, QPid}) -> + fun(QName) -> ok = rabbit_event:notify(queue_deleted, - [{pid, QPid}, - {name, QName}]) + [{name, QName}]) end, Qs) end end). @@ -599,64 +674,54 @@ pseudo_queue(QueueName, Pid) -> auto_delete = false, arguments = [], pid = Pid, - slave_pids = [], - mirror_nodes = undefined}. + slave_pids = []}. -deliver([], #delivery{mandatory = false, immediate = false}, _Flow) -> +deliver([], #delivery{mandatory = false}, _Flow) -> %% /dev/null optimisation {routed, []}; -deliver(Qs, Delivery = #delivery{mandatory = false, immediate = false}, Flow) -> - %% optimisation: when Mandatory = false and Immediate = false, - %% rabbit_amqqueue:deliver will deliver the message to the queue - %% process asynchronously, and return true, which means all the - %% QPids will always be returned. It is therefore safe to use a - %% fire-and-forget cast here and return the QPids - the semantics - %% is preserved. This scales much better than the non-immediate - %% case below. - QPids = qpids(Qs), +deliver(Qs, Delivery = #delivery{mandatory = false}, Flow) -> + %% optimisation: when Mandatory = false, rabbit_amqqueue:deliver + %% will deliver the message to the queue process asynchronously, + %% and return true, which means all the QPids will always be + %% returned. It is therefore safe to use a fire-and-forget cast + %% here and return the QPids - the semantics is preserved. This + %% scales much better than the case below. + {MPids, SPids} = qpids(Qs), + QPids = MPids ++ SPids, case Flow of flow -> [credit_flow:send(QPid) || QPid <- QPids]; noflow -> ok end, - delegate:invoke_no_result( - QPids, fun (QPid) -> - gen_server2:cast(QPid, {deliver, Delivery, Flow}) - end), - {routed, QPids}; -deliver(Qs, Delivery = #delivery{mandatory = Mandatory, immediate = Immediate}, - _Flow) -> - QPids = qpids(Qs), - {Success, _} = - delegate:invoke( - QPids, fun (QPid) -> - gen_server2:call(QPid, {deliver, Delivery}, infinity) - end), - case {Mandatory, Immediate, - lists:foldl(fun ({QPid, true}, {_, H}) -> {true, [QPid | H]}; - ({_, false}, {_, H}) -> {true, H} - end, {false, []}, Success)} of - {true, _ , {false, []}} -> {unroutable, []}; - {_ , true, {_ , []}} -> {not_delivered, []}; - {_ , _ , {_ , R}} -> {routed, R} - end. + %% We let slaves know that they were being addressed as slaves at + %% the time - if they receive such a message from the channel + %% after they have become master they should mark the message as + %% 'delivered' since they do not know what the master may have + %% done with it. + MMsg = {deliver, Delivery, false, Flow}, + SMsg = {deliver, Delivery, true, Flow}, + delegate:cast(MPids, MMsg), + delegate:cast(SPids, SMsg), + {routed, QPids}; -qpids(Qs) -> lists:append([[QPid | SPids] || - #amqqueue{pid = QPid, slave_pids = SPids} <- Qs]). - -safe_delegate_call_ok(F, Pids) -> - case delegate:invoke(Pids, fun (Pid) -> - rabbit_misc:with_exit_handler( - fun () -> ok end, - fun () -> F(Pid) end) - end) of - {_, []} -> ok; - {_, Bad} -> {error, Bad} +deliver(Qs, Delivery, _Flow) -> + {MPids, SPids} = qpids(Qs), + %% see comment above + MMsg = {deliver, Delivery, false}, + SMsg = {deliver, Delivery, true}, + {MRouted, _} = delegate:call(MPids, MMsg), + {SRouted, _} = delegate:call(SPids, SMsg), + case MRouted ++ SRouted of + [] -> {unroutable, []}; + R -> {routed, [QPid || {QPid, ok} <- R]} end. -delegate_call(Pid, Msg) -> - delegate:invoke(Pid, fun (P) -> gen_server2:call(P, Msg, infinity) end). - -delegate_cast(Pid, Msg) -> - delegate:invoke_no_result(Pid, fun (P) -> gen_server2:cast(P, Msg) end). +qpids([]) -> {[], []}; %% optimisation +qpids([#amqqueue{pid = QPid, slave_pids = SPids}]) -> {[QPid], SPids}; %% opt +qpids(Qs) -> + {MPids, SPids} = lists:foldl(fun (#amqqueue{pid = QPid, slave_pids = SPids}, + {MPidAcc, SPidAcc}) -> + {[QPid | MPidAcc], [SPids | SPidAcc]} + end, {[], []}, Qs), + {MPids, lists:append(SPids)}. diff --git a/src/rabbit_amqqueue_process.erl b/src/rabbit_amqqueue_process.erl index 5701efeb..6e0eb9bf 100644 --- a/src/rabbit_amqqueue_process.erl +++ b/src/rabbit_amqqueue_process.erl @@ -10,8 +10,8 @@ %% %% The Original Code is RabbitMQ. %% -%% The Initial Developer of the Original Code is VMware, Inc. -%% Copyright (c) 2007-2012 VMware, Inc. All rights reserved. +%% The Initial Developer of the Original Code is GoPivotal, Inc. +%% Copyright (c) 2007-2013 GoPivotal, Inc. All rights reserved. %% -module(rabbit_amqqueue_process). @@ -26,11 +26,11 @@ -export([start_link/1, info_keys/0]). --export([init_with_backing_queue_state/8]). +-export([init_with_backing_queue_state/7]). -export([init/1, terminate/2, code_change/3, handle_call/3, handle_cast/2, - handle_info/2, handle_pre_hibernate/1, prioritise_call/3, - prioritise_cast/2, prioritise_info/2, format_message_queue/2]). + handle_info/2, handle_pre_hibernate/1, prioritise_call/4, + prioritise_cast/3, prioritise_info/3, format_message_queue/2]). %% Queue's state -record(q, {q, @@ -47,13 +47,12 @@ msg_id_to_channel, ttl, ttl_timer_ref, + ttl_timer_expiry, senders, - publish_seqno, - unconfirmed, - delayed_stop, - queue_monitors, dlx, - dlx_routing_key + dlx_routing_key, + max_length, + status }). -record(consumer, {tag, ack_required}). @@ -63,9 +62,12 @@ monitor_ref, acktags, consumer_count, + %% Queue of {ChPid, #consumer{}} for consumers which have + %% been blocked for any reason blocked_consumers, + %% The limiter itself limiter, - is_limit_active, + %% Internal flow control for queue -> writer unsent_message_count}). %%---------------------------------------------------------------------------- @@ -75,8 +77,8 @@ -spec(start_link/1 :: (rabbit_types:amqqueue()) -> rabbit_types:ok_pid_or_error()). -spec(info_keys/0 :: () -> rabbit_types:info_keys()). --spec(init_with_backing_queue_state/8 :: - (rabbit_types:amqqueue(), atom(), tuple(), any(), [any()], +-spec(init_with_backing_queue_state/7 :: + (rabbit_types:amqqueue(), atom(), tuple(), any(), [rabbit_types:delivery()], pmon:pmon(), dict()) -> #q{}). -endif. @@ -84,7 +86,8 @@ %%---------------------------------------------------------------------------- -define(STATISTICS_KEYS, - [pid, + [name, + policy, exclusive_consumer_pid, exclusive_consumer_tag, messages_ready, @@ -93,22 +96,20 @@ consumers, memory, slave_pids, - backing_queue_status + synchronised_slave_pids, + backing_queue_status, + status ]). -define(CREATION_EVENT_KEYS, - [pid, - name, + [name, durable, auto_delete, arguments, - owner_pid, - slave_pids, - synchronised_slave_pids + owner_pid ]). --define(INFO_KEYS, - ?CREATION_EVENT_KEYS ++ ?STATISTICS_KEYS -- [pid, slave_pids]). +-define(INFO_KEYS, [pid | ?CREATION_EVENT_KEYS ++ ?STATISTICS_KEYS -- [name]]). %%---------------------------------------------------------------------------- @@ -120,73 +121,59 @@ info_keys() -> ?INFO_KEYS. init(Q) -> process_flag(trap_exit, true), - - State = #q{q = Q#amqqueue{pid = self()}, - exclusive_consumer = none, - has_had_consumers = false, - backing_queue = backing_queue_module(Q), - backing_queue_state = undefined, - active_consumers = queue:new(), - expires = undefined, - sync_timer_ref = undefined, - rate_timer_ref = undefined, - expiry_timer_ref = undefined, - ttl = undefined, - senders = pmon:new(), - dlx = undefined, - dlx_routing_key = undefined, - publish_seqno = 1, - unconfirmed = dtree:empty(), - delayed_stop = undefined, - queue_monitors = pmon:new(), - msg_id_to_channel = gb_trees:empty()}, - {ok, rabbit_event:init_stats_timer(State, #q.stats_timer), hibernate, + {ok, init_state(Q#amqqueue{pid = self()}), hibernate, {backoff, ?HIBERNATE_AFTER_MIN, ?HIBERNATE_AFTER_MIN, ?DESIRED_HIBERNATE}}. init_with_backing_queue_state(Q = #amqqueue{exclusive_owner = Owner}, BQ, BQS, - RateTRef, AckTags, Deliveries, Senders, MTC) -> + RateTRef, Deliveries, Senders, MTC) -> case Owner of none -> ok; _ -> erlang:monitor(process, Owner) end, + State = init_state(Q), + State1 = State#q{backing_queue = BQ, + backing_queue_state = BQS, + rate_timer_ref = RateTRef, + senders = Senders, + msg_id_to_channel = MTC}, + State2 = process_args(State1), + lists:foldl(fun (Delivery, StateN) -> + deliver_or_enqueue(Delivery, true, StateN) + end, State2, Deliveries). + +init_state(Q) -> State = #q{q = Q, exclusive_consumer = none, has_had_consumers = false, - backing_queue = BQ, - backing_queue_state = BQS, active_consumers = queue:new(), - expires = undefined, - sync_timer_ref = undefined, - rate_timer_ref = RateTRef, - expiry_timer_ref = undefined, - ttl = undefined, - senders = Senders, - publish_seqno = 1, - unconfirmed = dtree:empty(), - delayed_stop = undefined, - queue_monitors = pmon:new(), - msg_id_to_channel = MTC}, - State1 = requeue_and_run(AckTags, process_args( - rabbit_event:init_stats_timer( - State, #q.stats_timer))), - lists:foldl( - fun (Delivery, StateN) -> deliver_or_enqueue(Delivery, StateN) end, - State1, Deliveries). + senders = pmon:new(delegate), + msg_id_to_channel = gb_trees:empty(), + status = running}, + rabbit_event:init_stats_timer(State, #q.stats_timer). terminate(shutdown = R, State = #q{backing_queue = BQ}) -> terminate_shutdown(fun (BQS) -> BQ:terminate(R, BQS) end, State); +terminate({shutdown, missing_owner} = Reason, State) -> + %% if the owner was missing then there will be no queue, so don't emit stats + terminate_shutdown(terminate_delete(false, Reason, State), State); terminate({shutdown, _} = R, State = #q{backing_queue = BQ}) -> terminate_shutdown(fun (BQS) -> BQ:terminate(R, BQS) end, State); -terminate(Reason, State = #q{q = #amqqueue{name = QName}, - backing_queue = BQ}) -> - %% FIXME: How do we cancel active subscriptions? - terminate_shutdown( - fun (BQS) -> - BQS1 = BQ:delete_and_terminate(Reason, BQS), - %% don't care if the internal delete doesn't return 'ok'. - rabbit_amqqueue:internal_delete(QName, self()), - BQS1 - end, State). +terminate(Reason, State) -> + terminate_shutdown(terminate_delete(true, Reason, State), State). + +terminate_delete(EmitStats, Reason, + State = #q{q = #amqqueue{name = QName}, + backing_queue = BQ}) -> + fun (BQS) -> + BQS1 = BQ:delete_and_terminate(Reason, BQS), + if EmitStats -> rabbit_event:if_enabled(State, #q.stats_timer, + fun() -> emit_stats(State) end); + true -> ok + end, + %% don't care if the internal delete doesn't return 'ok'. + rabbit_amqqueue:internal_delete(QName), + BQS1 + end. code_change(_OldVsn, State, _Extra) -> {ok, State}. @@ -194,34 +181,63 @@ code_change(_OldVsn, State, _Extra) -> %%---------------------------------------------------------------------------- declare(Recover, From, State = #q{q = Q, - backing_queue = BQ, + backing_queue = undefined, backing_queue_state = undefined}) -> - case rabbit_amqqueue:internal_declare(Q, Recover) of - not_found -> {stop, normal, not_found, State}; - Q -> gen_server2:reply(From, {new, Q}), - ok = file_handle_cache:register_callback( - rabbit_amqqueue, set_maximum_since_use, - [self()]), - ok = rabbit_memory_monitor:register( - self(), {rabbit_amqqueue, - set_ram_duration_target, [self()]}), - BQS = bq_init(BQ, Q, Recover), - State1 = process_args(State#q{backing_queue_state = BQS}), - rabbit_event:notify(queue_created, - infos(?CREATION_EVENT_KEYS, State1)), - rabbit_event:if_enabled(State1, #q.stats_timer, - fun() -> emit_stats(State1) end), - noreply(State1); - Q1 -> {stop, normal, {existing, Q1}, State} + case rabbit_amqqueue:internal_declare(Q, Recover =/= new) of + #amqqueue{} = Q1 -> + case matches(Recover, Q, Q1) of + true -> + gen_server2:reply(From, {new, Q}), + ok = file_handle_cache:register_callback( + rabbit_amqqueue, set_maximum_since_use, [self()]), + ok = rabbit_memory_monitor:register( + self(), {rabbit_amqqueue, + set_ram_duration_target, [self()]}), + BQ = backing_queue_module(Q1), + BQS = bq_init(BQ, Q, Recover), + recovery_barrier(Recover), + State1 = process_args(State#q{backing_queue = BQ, + backing_queue_state = BQS}), + rabbit_event:notify(queue_created, + infos(?CREATION_EVENT_KEYS, State1)), + rabbit_event:if_enabled(State1, #q.stats_timer, + fun() -> emit_stats(State1) end), + noreply(State1); + false -> + {stop, normal, {existing, Q1}, State} + end; + Err -> + {stop, normal, Err, State} end. +matches(new, Q1, Q2) -> + %% i.e. not policy + Q1#amqqueue.name =:= Q2#amqqueue.name andalso + Q1#amqqueue.durable =:= Q2#amqqueue.durable andalso + Q1#amqqueue.auto_delete =:= Q2#amqqueue.auto_delete andalso + Q1#amqqueue.exclusive_owner =:= Q2#amqqueue.exclusive_owner andalso + Q1#amqqueue.arguments =:= Q2#amqqueue.arguments andalso + Q1#amqqueue.pid =:= Q2#amqqueue.pid andalso + Q1#amqqueue.slave_pids =:= Q2#amqqueue.slave_pids; +matches(_, Q, Q) -> true; +matches(_, _Q, _Q1) -> false. + bq_init(BQ, Q, Recover) -> Self = self(), - BQ:init(Q, Recover, + BQ:init(Q, Recover =/= new, fun (Mod, Fun) -> rabbit_amqqueue:run_backing_queue(Self, Mod, Fun) end). +recovery_barrier(new) -> + ok; +recovery_barrier(BarrierPid) -> + MRef = erlang:monitor(process, BarrierPid), + receive + {BarrierPid, go} -> erlang:demonitor(MRef, [flush]); + {'DOWN', MRef, process, _, _} -> ok + end. + process_args(State = #q{q = #amqqueue{arguments = Arguments}}) -> lists:foldl( fun({Arg, Fun}, State1) -> @@ -231,13 +247,14 @@ process_args(State = #q{q = #amqqueue{arguments = Arguments}}) -> end end, State, [{<<"x-expires">>, fun init_expires/2}, - {<<"x-message-ttl">>, fun init_ttl/2}, {<<"x-dead-letter-exchange">>, fun init_dlx/2}, - {<<"x-dead-letter-routing-key">>, fun init_dlx_routing_key/2}]). + {<<"x-dead-letter-routing-key">>, fun init_dlx_routing_key/2}, + {<<"x-message-ttl">>, fun init_ttl/2}, + {<<"x-max-length">>, fun init_max_length/2}]). init_expires(Expires, State) -> ensure_expiry_timer(State#q{expires = Expires}). -init_ttl(TTL, State) -> drop_expired_messages(State#q{ttl = TTL}). +init_ttl(TTL, State) -> drop_expired_msgs(State#q{ttl = TTL}). init_dlx(DLX, State = #q{q = #amqqueue{name = QName}}) -> State#q{dlx = rabbit_misc:r(QName, exchange, DLX)}. @@ -245,80 +262,61 @@ init_dlx(DLX, State = #q{q = #amqqueue{name = QName}}) -> init_dlx_routing_key(RoutingKey, State) -> State#q{dlx_routing_key = RoutingKey}. +init_max_length(MaxLen, State) -> State#q{max_length = MaxLen}. + terminate_shutdown(Fun, State) -> State1 = #q{backing_queue_state = BQS} = - stop_sync_timer(stop_rate_timer(State)), + lists:foldl(fun (F, S) -> F(S) end, State, + [fun stop_sync_timer/1, + fun stop_rate_timer/1, + fun stop_expiry_timer/1, + fun stop_ttl_timer/1]), case BQS of undefined -> State1; _ -> ok = rabbit_memory_monitor:deregister(self()), - [emit_consumer_deleted(Ch, CTag) + QName = qname(State), + [emit_consumer_deleted(Ch, CTag, QName) || {Ch, CTag, _} <- consumers(State1)], State1#q{backing_queue_state = Fun(BQS)} end. reply(Reply, NewState) -> - assert_invariant(NewState), {NewState1, Timeout} = next_state(NewState), - {reply, Reply, NewState1, Timeout}. + {reply, Reply, ensure_stats_timer(ensure_rate_timer(NewState1)), Timeout}. noreply(NewState) -> - assert_invariant(NewState), {NewState1, Timeout} = next_state(NewState), - {noreply, NewState1, Timeout}. + {noreply, ensure_stats_timer(ensure_rate_timer(NewState1)), Timeout}. next_state(State = #q{backing_queue = BQ, backing_queue_state = BQS}) -> + assert_invariant(State), {MsgIds, BQS1} = BQ:drain_confirmed(BQS), - State1 = ensure_stats_timer( - ensure_rate_timer( - confirm_messages(MsgIds, State#q{ - backing_queue_state = BQS1}))), + State1 = confirm_messages(MsgIds, State#q{backing_queue_state = BQS1}), case BQ:needs_timeout(BQS1) of false -> {stop_sync_timer(State1), hibernate }; idle -> {stop_sync_timer(State1), ?SYNC_INTERVAL}; timed -> {ensure_sync_timer(State1), 0 } end. -backing_queue_module(#amqqueue{arguments = Args}) -> - case rabbit_misc:table_lookup(Args, <<"x-ha-policy">>) of - undefined -> {ok, BQM} = application:get_env(backing_queue_module), - BQM; - _Policy -> rabbit_mirror_queue_master +backing_queue_module(Q) -> + case rabbit_mirror_queue_misc:is_mirrored(Q) of + false -> {ok, BQM} = application:get_env(backing_queue_module), + BQM; + true -> rabbit_mirror_queue_master end. -ensure_sync_timer(State = #q{sync_timer_ref = undefined}) -> - TRef = erlang:send_after(?SYNC_INTERVAL, self(), sync_timeout), - State#q{sync_timer_ref = TRef}; ensure_sync_timer(State) -> - State. + rabbit_misc:ensure_timer(State, #q.sync_timer_ref, + ?SYNC_INTERVAL, sync_timeout). + +stop_sync_timer(State) -> rabbit_misc:stop_timer(State, #q.sync_timer_ref). -stop_sync_timer(State = #q{sync_timer_ref = undefined}) -> - State; -stop_sync_timer(State = #q{sync_timer_ref = TRef}) -> - erlang:cancel_timer(TRef), - State#q{sync_timer_ref = undefined}. - -ensure_rate_timer(State = #q{rate_timer_ref = undefined}) -> - TRef = erlang:send_after( - ?RAM_DURATION_UPDATE_INTERVAL, self(), update_ram_duration), - State#q{rate_timer_ref = TRef}; -ensure_rate_timer(State = #q{rate_timer_ref = just_measured}) -> - State#q{rate_timer_ref = undefined}; ensure_rate_timer(State) -> - State. + rabbit_misc:ensure_timer(State, #q.rate_timer_ref, + ?RAM_DURATION_UPDATE_INTERVAL, + update_ram_duration). -stop_rate_timer(State = #q{rate_timer_ref = undefined}) -> - State; -stop_rate_timer(State = #q{rate_timer_ref = just_measured}) -> - State#q{rate_timer_ref = undefined}; -stop_rate_timer(State = #q{rate_timer_ref = TRef}) -> - erlang:cancel_timer(TRef), - State#q{rate_timer_ref = undefined}. - -stop_expiry_timer(State = #q{expiry_timer_ref = undefined}) -> - State; -stop_expiry_timer(State = #q{expiry_timer_ref = TRef}) -> - erlang:cancel_timer(TRef), - State#q{expiry_timer_ref = undefined}. +stop_rate_timer(State) -> rabbit_misc:stop_timer(State, #q.rate_timer_ref). %% We wish to expire only when there are no consumers *and* the expiry %% hasn't been refreshed (by queue.declare or basic.get) for the @@ -328,17 +326,41 @@ ensure_expiry_timer(State = #q{expires = undefined}) -> ensure_expiry_timer(State = #q{expires = Expires}) -> case is_unused(State) of true -> NewState = stop_expiry_timer(State), - TRef = erlang:send_after(Expires, self(), maybe_expire), - NewState#q{expiry_timer_ref = TRef}; + rabbit_misc:ensure_timer(NewState, #q.expiry_timer_ref, + Expires, maybe_expire); false -> State end. +stop_expiry_timer(State) -> rabbit_misc:stop_timer(State, #q.expiry_timer_ref). + +ensure_ttl_timer(undefined, State) -> + State; +ensure_ttl_timer(Expiry, State = #q{ttl_timer_ref = undefined}) -> + After = (case Expiry - now_micros() of + V when V > 0 -> V + 999; %% always fire later + _ -> 0 + end) div 1000, + TRef = erlang:send_after(After, self(), drop_expired), + State#q{ttl_timer_ref = TRef, ttl_timer_expiry = Expiry}; +ensure_ttl_timer(Expiry, State = #q{ttl_timer_ref = TRef, + ttl_timer_expiry = TExpiry}) + when Expiry + 1000 < TExpiry -> + case erlang:cancel_timer(TRef) of + false -> State; + _ -> ensure_ttl_timer(Expiry, State#q{ttl_timer_ref = undefined}) + end; +ensure_ttl_timer(_Expiry, State) -> + State. + +stop_ttl_timer(State) -> rabbit_misc:stop_timer(State, #q.ttl_timer_ref). + ensure_stats_timer(State) -> rabbit_event:ensure_stats_timer(State, #q.stats_timer, emit_stats). -assert_invariant(#q{active_consumers = AC, - backing_queue = BQ, backing_queue_state = BQS}) -> - true = (queue:is_empty(AC) orelse BQ:is_empty(BQS)). +assert_invariant(State = #q{active_consumers = AC}) -> + true = (queue:is_empty(AC) orelse is_empty(State)). + +is_empty(#q{backing_queue = BQ, backing_queue_state = BQS}) -> BQ:is_empty(BQS). lookup_ch(ChPid) -> case get({ch, ChPid}) of @@ -346,17 +368,17 @@ lookup_ch(ChPid) -> C -> C end. -ch_record(ChPid) -> +ch_record(ChPid, LimiterPid) -> Key = {ch, ChPid}, case get(Key) of undefined -> MonitorRef = erlang:monitor(process, ChPid), + Limiter = rabbit_limiter:client(LimiterPid), C = #cr{ch_pid = ChPid, monitor_ref = MonitorRef, - acktags = sets:new(), + acktags = queue:new(), consumer_count = 0, blocked_consumers = queue:new(), - is_limit_active = false, - limiter = rabbit_limiter:make_token(), + limiter = Limiter, unsent_message_count = 0}, put(Key, C), C; @@ -366,9 +388,9 @@ ch_record(ChPid) -> update_ch_record(C = #cr{consumer_count = ConsumerCount, acktags = ChAckTags, unsent_message_count = UnsentMessageCount}) -> - case {sets:size(ChAckTags), ConsumerCount, UnsentMessageCount} of - {0, 0, 0} -> ok = erase_ch_record(C); - _ -> ok = store_ch_record(C) + case {queue:is_empty(ChAckTags), ConsumerCount, UnsentMessageCount} of + {true, 0, 0} -> ok = erase_ch_record(C); + _ -> ok = store_ch_record(C) end, C. @@ -376,37 +398,32 @@ store_ch_record(C = #cr{ch_pid = ChPid}) -> put({ch, ChPid}, C), ok. -erase_ch_record(#cr{ch_pid = ChPid, - limiter = Limiter, - monitor_ref = MonitorRef}) -> - ok = rabbit_limiter:unregister(Limiter, self()), +erase_ch_record(#cr{ch_pid = ChPid, monitor_ref = MonitorRef}) -> erlang:demonitor(MonitorRef), erase({ch, ChPid}), ok. -update_consumer_count(C = #cr{consumer_count = 0, limiter = Limiter}, +1) -> - ok = rabbit_limiter:register(Limiter, self()), - update_ch_record(C#cr{consumer_count = 1}); -update_consumer_count(C = #cr{consumer_count = 1, limiter = Limiter}, -1) -> - ok = rabbit_limiter:unregister(Limiter, self()), - update_ch_record(C#cr{consumer_count = 0, - limiter = rabbit_limiter:make_token()}); -update_consumer_count(C = #cr{consumer_count = Count}, Delta) -> - update_ch_record(C#cr{consumer_count = Count + Delta}). - all_ch_record() -> [C || {{ch, _}, C} <- get()]. block_consumer(C = #cr{blocked_consumers = Blocked}, QEntry) -> update_ch_record(C#cr{blocked_consumers = queue:in(QEntry, Blocked)}). -is_ch_blocked(#cr{unsent_message_count = Count, is_limit_active = Limited}) -> - Limited orelse Count >= ?UNSENT_MESSAGE_LIMIT. +is_ch_blocked(#cr{unsent_message_count = Count, limiter = Limiter}) -> + Count >= ?UNSENT_MESSAGE_LIMIT orelse rabbit_limiter:is_suspended(Limiter). -ch_record_state_transition(OldCR, NewCR) -> - case {is_ch_blocked(OldCR), is_ch_blocked(NewCR)} of - {true, false} -> unblock; - {false, true} -> block; - {_, _} -> ok +maybe_send_drained(WasEmpty, State) -> + case (not WasEmpty) andalso is_empty(State) of + true -> [send_drained(C) || C <- all_ch_record()]; + false -> ok + end, + State. + +send_drained(C = #cr{ch_pid = ChPid, limiter = Limiter}) -> + case rabbit_limiter:drained(Limiter) of + {[], Limiter} -> ok; + {CTagCredit, Limiter2} -> rabbit_channel:send_drained( + ChPid, CTagCredit), + update_ch_record(C#cr{limiter = Limiter2}) end. deliver_msgs_to_consumers(_DeliverFun, true, State) -> @@ -424,18 +441,21 @@ deliver_msgs_to_consumers(DeliverFun, false, end. deliver_msg_to_consumer(DeliverFun, E = {ChPid, Consumer}, State) -> - C = ch_record(ChPid), + C = lookup_ch(ChPid), case is_ch_blocked(C) of true -> block_consumer(C, E), {false, State}; - false -> case rabbit_limiter:can_send(C#cr.limiter, self(), - Consumer#consumer.ack_required) of - false -> block_consumer(C#cr{is_limit_active = true}, E), - {false, State}; - true -> AC1 = queue:in(E, State#q.active_consumers), - deliver_msg_to_consumer( - DeliverFun, Consumer, C, - State#q{active_consumers = AC1}) + false -> case rabbit_limiter:can_send(C#cr.limiter, + Consumer#consumer.ack_required, + Consumer#consumer.tag) of + {suspend, Limiter} -> + block_consumer(C#cr{limiter = Limiter}, E), + {false, State}; + {continue, Limiter} -> + AC1 = queue:in(E, State#q.active_consumers), + deliver_msg_to_consumer( + DeliverFun, Consumer, C#cr{limiter = Limiter}, + State#q{active_consumers = AC1}) end end. @@ -451,7 +471,7 @@ deliver_msg_to_consumer(DeliverFun, rabbit_channel:deliver(ChPid, ConsumerTag, AckRequired, {QName, self(), AckTag, IsDelivered, Message}), ChAckTags1 = case AckRequired of - true -> sets:add_element(AckTag, ChAckTags); + true -> queue:in(AckTag, ChAckTags); false -> ChAckTags end, update_ch_record(C#cr{acktags = ChAckTags1, @@ -459,9 +479,8 @@ deliver_msg_to_consumer(DeliverFun, {Stop, State1}. deliver_from_queue_deliver(AckRequired, State) -> - {{Message, IsDelivered, AckTag, Remaining}, State1} = - fetch(AckRequired, State), - {{Message, IsDelivered, AckTag}, 0 == Remaining, State1}. + {Result, State1} = fetch(AckRequired, State), + {Result, is_empty(State1), State1}. confirm_messages([], State) -> State; @@ -481,106 +500,142 @@ confirm_messages(MsgIds, State = #q{msg_id_to_channel = MTC}) -> rabbit_misc:gb_trees_foreach(fun rabbit_misc:confirm_to_sender/2, CMs), State#q{msg_id_to_channel = MTC1}. -should_confirm_message(#delivery{msg_seq_no = undefined}, _State) -> - never; -should_confirm_message(#delivery{sender = SenderPid, +send_or_record_confirm(#delivery{msg_seq_no = undefined}, State) -> + {never, State}; +send_or_record_confirm(#delivery{sender = SenderPid, msg_seq_no = MsgSeqNo, message = #basic_message { is_persistent = true, id = MsgId}}, - #q{q = #amqqueue{durable = true}}) -> - {eventually, SenderPid, MsgSeqNo, MsgId}; -should_confirm_message(#delivery{sender = SenderPid, - msg_seq_no = MsgSeqNo}, - _State) -> - {immediately, SenderPid, MsgSeqNo}. - -needs_confirming({eventually, _, _, _}) -> true; -needs_confirming(_) -> false. - -maybe_record_confirm_message({eventually, SenderPid, MsgSeqNo, MsgId}, - State = #q{msg_id_to_channel = MTC}) -> - State#q{msg_id_to_channel = - gb_trees:insert(MsgId, {SenderPid, MsgSeqNo}, MTC)}; -maybe_record_confirm_message({immediately, SenderPid, MsgSeqNo}, State) -> + State = #q{q = #amqqueue{durable = true}, + msg_id_to_channel = MTC}) -> + MTC1 = gb_trees:insert(MsgId, {SenderPid, MsgSeqNo}, MTC), + {eventually, State#q{msg_id_to_channel = MTC1}}; +send_or_record_confirm(#delivery{sender = SenderPid, + msg_seq_no = MsgSeqNo}, State) -> rabbit_misc:confirm_to_sender(SenderPid, [MsgSeqNo]), - State; -maybe_record_confirm_message(_Confirm, State) -> - State. + {immediately, State}. -run_message_queue(State) -> +discard(#delivery{sender = SenderPid, + msg_seq_no = MsgSeqNo, + message = #basic_message{id = MsgId}}, State) -> State1 = #q{backing_queue = BQ, backing_queue_state = BQS} = - drop_expired_messages(State), - {_IsEmpty1, State2} = deliver_msgs_to_consumers( + case MsgSeqNo of + undefined -> State; + _ -> confirm_messages([MsgId], State) + end, + BQS1 = BQ:discard(MsgId, SenderPid, BQS), + State1#q{backing_queue_state = BQS1}. + +run_message_queue(State) -> + {_IsEmpty1, State1} = deliver_msgs_to_consumers( fun deliver_from_queue_deliver/2, - BQ:is_empty(BQS), State1), - State2. + is_empty(State), State), + State1. -attempt_delivery(#delivery{sender = SenderPid, message = Message}, Confirm, - State = #q{backing_queue = BQ, backing_queue_state = BQS}) -> +attempt_delivery(Delivery = #delivery{sender = SenderPid, message = Message}, + Props, Delivered, State = #q{backing_queue = BQ, + backing_queue_state = BQS}) -> case BQ:is_duplicate(Message, BQS) of {false, BQS1} -> deliver_msgs_to_consumers( - fun (AckRequired, State1 = #q{backing_queue_state = BQS2}) -> - Props = message_properties(Confirm, State1), + fun (true, State1 = #q{backing_queue_state = BQS2}) -> + true = BQ:is_empty(BQS2), {AckTag, BQS3} = BQ:publish_delivered( - AckRequired, Message, Props, - SenderPid, BQS2), - {{Message, false, AckTag}, true, - State1#q{backing_queue_state = BQS3}} + Message, Props, SenderPid, BQS2), + {{Message, Delivered, AckTag}, + true, State1#q{backing_queue_state = BQS3}}; + (false, State1) -> + {{Message, Delivered, undefined}, + true, discard(Delivery, State1)} end, false, State#q{backing_queue_state = BQS1}); - {Duplicate, BQS1} -> - %% if the message has previously been seen by the BQ then - %% it must have been seen under the same circumstances as - %% now: i.e. if it is now a deliver_immediately then it - %% must have been before. - {case Duplicate of - published -> true; - discarded -> false - end, - State#q{backing_queue_state = BQS1}} + {true, BQS1} -> + {true, State#q{backing_queue_state = BQS1}} + end. + +deliver_or_enqueue(Delivery = #delivery{message = Message, sender = SenderPid}, + Delivered, State) -> + {Confirm, State1} = send_or_record_confirm(Delivery, State), + Props = message_properties(Message, Confirm, State), + case attempt_delivery(Delivery, Props, Delivered, State1) of + {true, State2} -> + State2; + %% The next one is an optimisation + {false, State2 = #q{ttl = 0, dlx = undefined}} -> + discard(Delivery, State2); + {false, State2 = #q{backing_queue = BQ, backing_queue_state = BQS}} -> + BQS1 = BQ:publish(Message, Props, Delivered, SenderPid, BQS), + {Dropped, State3 = #q{backing_queue_state = BQS2}} = + maybe_drop_head(State2#q{backing_queue_state = BQS1}), + QLen = BQ:len(BQS2), + %% optimisation: it would be perfectly safe to always + %% invoke drop_expired_msgs here, but that is expensive so + %% we only do that if a new message that might have an + %% expiry ends up at the head of the queue. If the head + %% remains unchanged, or if the newly published message + %% has no expiry and becomes the head of the queue then + %% the call is unnecessary. + case {Dropped > 0, QLen =:= 1, Props#message_properties.expiry} of + {false, false, _} -> State3; + {true, true, undefined} -> State3; + {_, _, _} -> drop_expired_msgs(State3) + end end. -deliver_or_enqueue(Delivery = #delivery{message = Message, - msg_seq_no = MsgSeqNo, - sender = SenderPid}, State) -> - Confirm = should_confirm_message(Delivery, State), - case attempt_delivery(Delivery, Confirm, State) of - {true, State1} -> - maybe_record_confirm_message(Confirm, State1); - %% the next two are optimisations - {false, State1 = #q{ttl = 0, dlx = undefined}} when Confirm == never -> - discard_delivery(Delivery, State1); - {false, State1 = #q{ttl = 0, dlx = undefined}} -> - rabbit_misc:confirm_to_sender(SenderPid, [MsgSeqNo]), - discard_delivery(Delivery, State1); - {false, State1} -> - State2 = #q{backing_queue = BQ, backing_queue_state = BQS} = - maybe_record_confirm_message(Confirm, State1), - Props = message_properties(Confirm, State2), - BQS1 = BQ:publish(Message, Props, SenderPid, BQS), - ensure_ttl_timer(State2#q{backing_queue_state = BQS1}) +maybe_drop_head(State = #q{max_length = undefined}) -> + {0, State}; +maybe_drop_head(State = #q{max_length = MaxLen, + backing_queue = BQ, + backing_queue_state = BQS}) -> + case BQ:len(BQS) - MaxLen of + Excess when Excess > 0 -> + {Excess, + with_dlx( + State#q.dlx, + fun (X) -> dead_letter_maxlen_msgs(X, Excess, State) end, + fun () -> + {_, BQS1} = lists:foldl(fun (_, {_, BQS0}) -> + BQ:drop(false, BQS0) + end, {ok, BQS}, + lists:seq(1, Excess)), + State#q{backing_queue_state = BQS1} + end)}; + _ -> {0, State} end. -requeue_and_run(AckTags, State = #q{backing_queue = BQ}) -> - run_backing_queue(BQ, fun (M, BQS) -> - {_MsgIds, BQS1} = M:requeue(AckTags, BQS), - BQS1 - end, State). +requeue_and_run(AckTags, State = #q{backing_queue = BQ, + backing_queue_state = BQS}) -> + WasEmpty = BQ:is_empty(BQS), + {_MsgIds, BQS1} = BQ:requeue(AckTags, BQS), + {_Dropped, State1} = maybe_drop_head(State#q{backing_queue_state = BQS1}), + run_message_queue(maybe_send_drained(WasEmpty, drop_expired_msgs(State1))). -fetch(AckRequired, State = #q{backing_queue_state = BQS, - backing_queue = BQ}) -> +fetch(AckRequired, State = #q{backing_queue = BQ, + backing_queue_state = BQS}) -> {Result, BQS1} = BQ:fetch(AckRequired, BQS), - {Result, State#q{backing_queue_state = BQS1}}. + State1 = drop_expired_msgs(State#q{backing_queue_state = BQS1}), + {Result, maybe_send_drained(Result =:= empty, State1)}. + +ack(AckTags, ChPid, State) -> + subtract_acks(ChPid, AckTags, State, + fun (State1 = #q{backing_queue = BQ, + backing_queue_state = BQS}) -> + {_Guids, BQS1} = BQ:ack(AckTags, BQS), + State1#q{backing_queue_state = BQS1} + end). + +requeue(AckTags, ChPid, State) -> + subtract_acks(ChPid, AckTags, State, + fun (State1) -> requeue_and_run(AckTags, State1) end). remove_consumer(ChPid, ConsumerTag, Queue) -> queue:filter(fun ({CP, #consumer{tag = CTag}}) -> (CP /= ChPid) or (CTag /= ConsumerTag) end, Queue). -remove_consumers(ChPid, Queue) -> +remove_consumers(ChPid, Queue, QName) -> queue:filter(fun ({CP, #consumer{tag = CTag}}) when CP =:= ChPid -> - emit_consumer_deleted(ChPid, CTag), + emit_consumer_deleted(ChPid, CTag, QName), false; (_) -> true @@ -588,20 +643,29 @@ remove_consumers(ChPid, Queue) -> possibly_unblock(State, ChPid, Update) -> case lookup_ch(ChPid) of - not_found -> + not_found -> State; + C -> C1 = Update(C), + case is_ch_blocked(C) andalso not is_ch_blocked(C1) of + false -> update_ch_record(C1), + State; + true -> unblock(State, C1) + end + end. + +unblock(State, C = #cr{limiter = Limiter}) -> + case lists:partition( + fun({_ChPid, #consumer{tag = CTag}}) -> + rabbit_limiter:is_consumer_blocked(Limiter, CTag) + end, queue:to_list(C#cr.blocked_consumers)) of + {_, []} -> + update_ch_record(C), State; - C -> - C1 = Update(C), - case ch_record_state_transition(C, C1) of - ok -> update_ch_record(C1), - State; - unblock -> #cr{blocked_consumers = Consumers} = C1, - update_ch_record( - C1#cr{blocked_consumers = queue:new()}), - AC1 = queue:join(State#q.active_consumers, - Consumers), - run_message_queue(State#q{active_consumers = AC1}) - end + {Blocked, Unblocked} -> + BlockedQ = queue:from_list(Blocked), + UnblockedQ = queue:from_list(Unblocked), + update_ch_record(C#cr{blocked_consumers = BlockedQ}), + AC1 = queue:join(State#q.active_consumers, UnblockedQ), + run_message_queue(State#q{active_consumers = AC1}) end. should_auto_delete(#q{q = #amqqueue{auto_delete = false}}) -> false; @@ -621,7 +685,8 @@ handle_ch_down(DownPid, State = #q{exclusive_consumer = Holder, C = #cr{ch_pid = ChPid, acktags = ChAckTags, blocked_consumers = Blocked} -> - _ = remove_consumers(ChPid, Blocked), %% for stats emission + QName = qname(State), + _ = remove_consumers(ChPid, Blocked, QName), %% for stats emission ok = erase_ch_record(C), State1 = State#q{ exclusive_consumer = case Holder of @@ -629,11 +694,12 @@ handle_ch_down(DownPid, State = #q{exclusive_consumer = Holder, Other -> Other end, active_consumers = remove_consumers( - ChPid, State#q.active_consumers), + ChPid, State#q.active_consumers, + QName), senders = Senders1}, case should_auto_delete(State1) of true -> {stop, State1}; - false -> {ok, requeue_and_run(sets:to_list(ChAckTags), + false -> {ok, requeue_and_run(queue:to_list(ChAckTags), ensure_expiry_timer(State1))} end end. @@ -648,13 +714,8 @@ check_exclusive_access(none, true, State) -> false -> in_use end. -consumer_count() -> consumer_count(fun (_) -> false end). - -active_consumer_count() -> consumer_count(fun is_ch_blocked/1). - -consumer_count(Exclude) -> - lists:sum([Count || C = #cr{consumer_count = Count} <- all_ch_record(), - not Exclude(C)]). +consumer_count() -> + lists:sum([Count || #cr{consumer_count = Count} <- all_ch_record()]). is_unused(_State) -> consumer_count() == 0. @@ -663,158 +724,130 @@ maybe_send_reply(ChPid, Msg) -> ok = rabbit_channel:send_command(ChPid, Msg). qname(#q{q = #amqqueue{name = QName}}) -> QName. -backing_queue_timeout(State = #q{backing_queue = BQ}) -> - run_backing_queue(BQ, fun (M, BQS) -> M:timeout(BQS) end, State). - -run_backing_queue(Mod, Fun, State = #q{backing_queue = BQ, - backing_queue_state = BQS}) -> - run_message_queue(State#q{backing_queue_state = BQ:invoke(Mod, Fun, BQS)}). +backing_queue_timeout(State = #q{backing_queue = BQ, + backing_queue_state = BQS}) -> + State#q{backing_queue_state = BQ:timeout(BQS)}. subtract_acks(ChPid, AckTags, State, Fun) -> case lookup_ch(ChPid) of not_found -> State; C = #cr{acktags = ChAckTags} -> - update_ch_record(C#cr{acktags = lists:foldl(fun sets:del_element/2, - ChAckTags, AckTags)}), + update_ch_record( + C#cr{acktags = subtract_acks(AckTags, [], ChAckTags)}), Fun(State) end. -discard_delivery(#delivery{sender = SenderPid, - message = Message}, - State = #q{backing_queue = BQ, - backing_queue_state = BQS}) -> - State#q{backing_queue_state = BQ:discard(Message, SenderPid, BQS)}. - -message_properties(Confirm, #q{ttl = TTL}) -> - #message_properties{expiry = calculate_msg_expiry(TTL), - needs_confirming = needs_confirming(Confirm)}. - -calculate_msg_expiry(undefined) -> undefined; -calculate_msg_expiry(TTL) -> now_micros() + (TTL * 1000). +subtract_acks([], [], AckQ) -> + AckQ; +subtract_acks([], Prefix, AckQ) -> + queue:join(queue:from_list(lists:reverse(Prefix)), AckQ); +subtract_acks([T | TL] = AckTags, Prefix, AckQ) -> + case queue:out(AckQ) of + {{value, T}, QTail} -> subtract_acks(TL, Prefix, QTail); + {{value, AT}, QTail} -> subtract_acks(AckTags, [AT | Prefix], QTail) + end. -drop_expired_messages(State = #q{ttl = undefined}) -> - State; -drop_expired_messages(State = #q{backing_queue_state = BQS, - backing_queue = BQ }) -> - Now = now_micros(), - DLXFun = dead_letter_fun(expired, State), - ExpirePred = fun (#message_properties{expiry = Expiry}) -> Now > Expiry end, - case DLXFun of - undefined -> {undefined, BQS1} = BQ:dropwhile(ExpirePred, false, BQS), - BQS1; - _ -> {Msgs, BQS1} = BQ:dropwhile(ExpirePred, true, BQS), - lists:foreach( - fun({Msg, AckTag}) -> DLXFun(Msg, AckTag) end, Msgs), - BQS1 - end, - ensure_ttl_timer(State#q{backing_queue_state = BQS1}). - -ensure_ttl_timer(State = #q{backing_queue = BQ, - backing_queue_state = BQS, - ttl = TTL, - ttl_timer_ref = undefined}) - when TTL =/= undefined -> - case BQ:is_empty(BQS) of - true -> State; - false -> TRef = erlang:send_after(TTL, self(), drop_expired), - State#q{ttl_timer_ref = TRef} - end; -ensure_ttl_timer(State) -> - State. +message_properties(Message, Confirm, #q{ttl = TTL}) -> + #message_properties{expiry = calculate_msg_expiry(Message, TTL), + needs_confirming = Confirm == eventually}. -ack_if_no_dlx(AckTags, State = #q{dlx = undefined, - backing_queue = BQ, - backing_queue_state = BQS }) -> - {_Guids, BQS1} = BQ:ack(AckTags, BQS), - State#q{backing_queue_state = BQS1}; -ack_if_no_dlx(_AckTags, State) -> - State. - -dead_letter_fun(_Reason, #q{dlx = undefined}) -> - undefined; -dead_letter_fun(Reason, _State) -> - fun(Msg, AckTag) -> - gen_server2:cast(self(), {dead_letter, {Msg, AckTag}, Reason}) +calculate_msg_expiry(#basic_message{content = Content}, TTL) -> + #content{properties = Props} = + rabbit_binary_parser:ensure_content_decoded(Content), + %% We assert that the expiration must be valid - we check in the channel. + {ok, MsgTTL} = rabbit_basic:parse_expiration(Props), + case lists:min([TTL, MsgTTL]) of + undefined -> undefined; + T -> now_micros() + T * 1000 end. -dead_letter_publish(Msg, Reason, State = #q{publish_seqno = MsgSeqNo}) -> - DLMsg = #basic_message{exchange_name = XName} = - make_dead_letter_msg(Reason, Msg, State), - case rabbit_exchange:lookup(XName) of - {ok, X} -> - Delivery = rabbit_basic:delivery(false, false, DLMsg, MsgSeqNo), - {Queues, Cycles} = detect_dead_letter_cycles( - DLMsg, rabbit_exchange:route(X, Delivery)), - lists:foreach(fun log_cycle_once/1, Cycles), - QPids = rabbit_amqqueue:lookup(Queues), - {_, DeliveredQPids} = rabbit_amqqueue:deliver(QPids, Delivery), - DeliveredQPids; - {error, not_found} -> - [] +%% Logically this function should invoke maybe_send_drained/2. +%% However, that is expensive. Since some frequent callers of +%% drop_expired_msgs/1, in particular deliver_or_enqueue/3, cannot +%% possibly cause the queue to become empty, we push the +%% responsibility to the callers. So be cautious when adding new ones. +drop_expired_msgs(State) -> + case is_empty(State) of + true -> State; + false -> drop_expired_msgs(now_micros(), State) end. -dead_letter_msg(Msg, AckTag, Reason, State = #q{publish_seqno = MsgSeqNo, - unconfirmed = UC}) -> - QPids = dead_letter_publish(Msg, Reason, State), - State1 = State#q{queue_monitors = pmon:monitor_all( - QPids, State#q.queue_monitors), - publish_seqno = MsgSeqNo + 1}, - case QPids of - [] -> cleanup_after_confirm([AckTag], State1); - _ -> UC1 = dtree:insert(MsgSeqNo, QPids, AckTag, UC), - noreply(State1#q{unconfirmed = UC1}) - end. +drop_expired_msgs(Now, State = #q{backing_queue_state = BQS, + backing_queue = BQ }) -> + ExpirePred = fun (#message_properties{expiry = Exp}) -> Now >= Exp end, + {Props, State1} = + with_dlx( + State#q.dlx, + fun (X) -> dead_letter_expired_msgs(ExpirePred, X, State) end, + fun () -> {Next, BQS1} = BQ:dropwhile(ExpirePred, BQS), + {Next, State#q{backing_queue_state = BQS1}} end), + ensure_ttl_timer(case Props of + undefined -> undefined; + #message_properties{expiry = Exp} -> Exp + end, State1). + +with_dlx(undefined, _With, Without) -> Without(); +with_dlx(DLX, With, Without) -> case rabbit_exchange:lookup(DLX) of + {ok, X} -> With(X); + {error, not_found} -> Without() + end. + +dead_letter_expired_msgs(ExpirePred, X, State = #q{backing_queue = BQ}) -> + dead_letter_msgs(fun (DLFun, Acc, BQS1) -> + BQ:fetchwhile(ExpirePred, DLFun, Acc, BQS1) + end, expired, X, State). + +dead_letter_rejected_msgs(AckTags, X, State = #q{backing_queue = BQ}) -> + {ok, State1} = + dead_letter_msgs( + fun (DLFun, Acc, BQS) -> + {Acc1, BQS1} = BQ:ackfold(DLFun, Acc, BQS, AckTags), + {ok, Acc1, BQS1} + end, rejected, X, State), + State1. + +dead_letter_maxlen_msgs(X, Excess, State = #q{backing_queue = BQ}) -> + {ok, State1} = + dead_letter_msgs( + fun (DLFun, Acc, BQS) -> + lists:foldl(fun (_, {ok, Acc0, BQS0}) -> + {{Msg, _, AckTag}, BQS1} = + BQ:fetch(true, BQS0), + {ok, DLFun(Msg, AckTag, Acc0), BQS1} + end, {ok, Acc, BQS}, lists:seq(1, Excess)) + end, maxlen, X, State), + State1. + +dead_letter_msgs(Fun, Reason, X, State = #q{dlx_routing_key = RK, + backing_queue_state = BQS, + backing_queue = BQ}) -> + QName = qname(State), + {Res, Acks1, BQS1} = + Fun(fun (Msg, AckTag, Acks) -> + dead_letter_publish(Msg, Reason, X, RK, QName), + [AckTag | Acks] + end, [], BQS), + {_Guids, BQS2} = BQ:ack(Acks1, BQS1), + {Res, State#q{backing_queue_state = BQS2}}. + +dead_letter_publish(Msg, Reason, X, RK, QName) -> + DLMsg = make_dead_letter_msg(Msg, Reason, X#exchange.name, RK, QName), + Delivery = rabbit_basic:delivery(false, DLMsg, undefined), + {Queues, Cycles} = detect_dead_letter_cycles( + Reason, DLMsg, rabbit_exchange:route(X, Delivery)), + lists:foreach(fun log_cycle_once/1, Cycles), + rabbit_amqqueue:deliver( rabbit_amqqueue:lookup(Queues), Delivery), + ok. -handle_queue_down(QPid, Reason, State = #q{queue_monitors = QMons, - unconfirmed = UC}) -> - case pmon:is_monitored(QPid, QMons) of - false -> noreply(State); - true -> case rabbit_misc:is_abnormal_termination(Reason) of - true -> {Lost, _UC1} = dtree:take_all(QPid, UC), - QNameS = rabbit_misc:rs(qname(State)), - rabbit_log:warning("DLQ ~p for ~s died with " - "~p unconfirmed messages~n", - [QPid, QNameS, length(Lost)]); - false -> ok - end, - {MsgSeqNoAckTags, UC1} = dtree:take(QPid, UC), - cleanup_after_confirm( - [AckTag || {_MsgSeqNo, AckTag} <- MsgSeqNoAckTags], - State#q{queue_monitors = pmon:erase(QPid, QMons), - unconfirmed = UC1}) - end. +stop(State) -> stop(noreply, State). -stop_later(Reason, State) -> - stop_later(Reason, undefined, noreply, State). - -stop_later(Reason, From, Reply, State = #q{unconfirmed = UC}) -> - case {dtree:is_empty(UC), Reply} of - {true, noreply} -> - {stop, Reason, State}; - {true, _} -> - {stop, Reason, Reply, State}; - {false, _} -> - noreply(State#q{delayed_stop = {Reason, {From, Reply}}}) - end. +stop(noreply, State) -> {stop, normal, State}; +stop(Reply, State) -> {stop, normal, Reply, State}. -cleanup_after_confirm(AckTags, State = #q{delayed_stop = DS, - unconfirmed = UC, - backing_queue = BQ, - backing_queue_state = BQS}) -> - {_Guids, BQS1} = BQ:ack(AckTags, BQS), - State1 = State#q{backing_queue_state = BQS1}, - case dtree:is_empty(UC) andalso DS =/= undefined of - true -> case DS of - {_, {_, noreply}} -> ok; - {_, {From, Reply}} -> gen_server2:reply(From, Reply) - end, - {Reason, _} = DS, - {stop, Reason, State1}; - false -> noreply(State1) - end. -detect_dead_letter_cycles(#basic_message{content = Content}, Queues) -> +detect_dead_letter_cycles(expired, + #basic_message{content = Content}, Queues) -> #content{properties = #'P_basic'{headers = Headers}} = rabbit_binary_parser:ensure_content_decoded(Content), NoCycles = {Queues, []}, @@ -823,38 +856,56 @@ detect_dead_letter_cycles(#basic_message{content = Content}, Queues) -> NoCycles; _ -> case rabbit_misc:table_lookup(Headers, <<"x-death">>) of - {array, DeathTables} -> - OldQueues = [rabbit_misc:table_lookup(D, <<"queue">>) || - {table, D} <- DeathTables], - OldQueues1 = [QName || {longstr, QName} <- OldQueues], - OldQueuesSet = ordsets:from_list(OldQueues1), + {array, Deaths} -> {Cycling, NotCycling} = lists:partition( - fun(Queue) -> - ordsets:is_element(Queue#resource.name, - OldQueuesSet) + fun (#resource{name = Queue}) -> + is_dead_letter_cycle(Queue, Deaths) end, Queues), + OldQueues = [rabbit_misc:table_lookup(D, <<"queue">>) || + {table, D} <- Deaths], + OldQueues1 = [QName || {longstr, QName} <- OldQueues], {NotCycling, [[QName | OldQueues1] || #resource{name = QName} <- Cycling]}; _ -> NoCycles end + end; +detect_dead_letter_cycles(_Reason, _Msg, Queues) -> + {Queues, []}. + +is_dead_letter_cycle(Queue, Deaths) -> + {Cycle, Rest} = + lists:splitwith( + fun ({table, D}) -> + {longstr, Queue} =/= rabbit_misc:table_lookup(D, <<"queue">>); + (_) -> + true + end, Deaths), + %% Is there a cycle, and if so, is it entirely due to expiry? + case Rest of + [] -> false; + [H|_] -> lists:all( + fun ({table, D}) -> + {longstr, <<"expired">>} =:= + rabbit_misc:table_lookup(D, <<"reason">>); + (_) -> + false + end, Cycle ++ [H]) end. -make_dead_letter_msg(Reason, - Msg = #basic_message{content = Content, +make_dead_letter_msg(Msg = #basic_message{content = Content, exchange_name = Exchange, routing_keys = RoutingKeys}, - State = #q{dlx = DLX, dlx_routing_key = DlxRoutingKey}) -> + Reason, DLX, RK, #resource{name = QName}) -> {DeathRoutingKeys, HeadersFun1} = - case DlxRoutingKey of + case RK of undefined -> {RoutingKeys, fun (H) -> H end}; - _ -> {[DlxRoutingKey], - fun (H) -> lists:keydelete(<<"CC">>, 1, H) end} + _ -> {[RK], fun (H) -> lists:keydelete(<<"CC">>, 1, H) end} end, ReasonBin = list_to_binary(atom_to_list(Reason)), - #resource{name = QName} = qname(State), TimeSec = rabbit_misc:now_ms() div 1000, + PerMsgTTL = per_msg_ttl_header(Content#content.properties), HeadersFun2 = fun (Headers) -> %% The first routing key is the one specified in the @@ -865,47 +916,29 @@ make_dead_letter_msg(Reason, {<<"queue">>, longstr, QName}, {<<"time">>, timestamp, TimeSec}, {<<"exchange">>, longstr, Exchange#resource.name}, - {<<"routing-keys">>, array, RKs1}], - HeadersFun1(rabbit_basic:append_table_header(<<"x-death">>, - Info, Headers)) + {<<"routing-keys">>, array, RKs1}] ++ PerMsgTTL, + HeadersFun1(rabbit_basic:prepend_table_header(<<"x-death">>, + Info, Headers)) end, - Content1 = rabbit_basic:map_headers(HeadersFun2, Content), - Msg#basic_message{exchange_name = DLX, id = rabbit_guid:gen(), - routing_keys = DeathRoutingKeys, content = Content1}. + Content1 = #content{properties = Props} = + rabbit_basic:map_headers(HeadersFun2, Content), + Content2 = Content1#content{properties = + Props#'P_basic'{expiration = undefined}}, + Msg#basic_message{exchange_name = DLX, + id = rabbit_guid:gen(), + routing_keys = DeathRoutingKeys, + content = Content2}. + +per_msg_ttl_header(#'P_basic'{expiration = undefined}) -> + []; +per_msg_ttl_header(#'P_basic'{expiration = Expiration}) -> + [{<<"original-expiration">>, longstr, Expiration}]; +per_msg_ttl_header(_) -> + []. now_micros() -> timer:now_diff(now(), {0,0,0}). -infos(Items, State) -> - {Prefix, Items1} = - case lists:member(synchronised_slave_pids, Items) of - true -> Prefix1 = slaves_status(State), - case lists:member(slave_pids, Items) of - true -> {Prefix1, Items -- [slave_pids]}; - false -> {proplists:delete(slave_pids, Prefix1), Items} - end; - false -> {[], Items} - end, - Prefix ++ [{Item, i(Item, State)} - || Item <- (Items1 -- [synchronised_slave_pids])]. - -slaves_status(#q{q = #amqqueue{name = Name}}) -> - case rabbit_amqqueue:lookup(Name) of - {ok, #amqqueue{mirror_nodes = undefined}} -> - [{slave_pids, ''}, {synchronised_slave_pids, ''}]; - {ok, #amqqueue{slave_pids = SPids}} -> - {Results, _Bad} = - delegate:invoke(SPids, fun rabbit_mirror_queue_slave:info/1), - {SPids1, SSPids} = - lists:foldl( - fun ({Pid, Infos}, {SPidsN, SSPidsN}) -> - {[Pid | SPidsN], - case proplists:get_bool(is_synchronised, Infos) of - true -> [Pid | SSPidsN]; - false -> SSPidsN - end} - end, {[], []}, Results), - [{slave_pids, SPids1}, {synchronised_slave_pids, SSPids}] - end. +infos(Items, State) -> [{Item, i(Item, State)} || Item <- Items]. i(name, #q{q = #amqqueue{name = Name}}) -> Name; i(durable, #q{q = #amqqueue{durable = Durable}}) -> Durable; @@ -917,6 +950,12 @@ i(owner_pid, #q{q = #amqqueue{exclusive_owner = none}}) -> ''; i(owner_pid, #q{q = #amqqueue{exclusive_owner = ExclusiveOwner}}) -> ExclusiveOwner; +i(policy, #q{q = #amqqueue{name = Name}}) -> + {ok, Q} = rabbit_amqqueue:lookup(Name), + case rabbit_policy:name(Q) of + none -> ''; + Policy -> Policy + end; i(exclusive_consumer_pid, #q{exclusive_consumer = none}) -> ''; i(exclusive_consumer_pid, #q{exclusive_consumer = {ChPid, _ConsumerTag}}) -> @@ -928,7 +967,7 @@ i(exclusive_consumer_tag, #q{exclusive_consumer = {_ChPid, ConsumerTag}}) -> i(messages_ready, #q{backing_queue_state = BQS, backing_queue = BQ}) -> BQ:len(BQS); i(messages_unacknowledged, _) -> - lists:sum([sets:size(C#cr.acktags) || C <- all_ch_record()]); + lists:sum([queue:len(C#cr.acktags) || C <- all_ch_record()]); i(messages, State) -> lists:sum([i(Item, State) || Item <- [messages_ready, messages_unacknowledged]]); @@ -938,10 +977,21 @@ i(memory, _) -> {memory, M} = process_info(self(), memory), M; i(slave_pids, #q{q = #amqqueue{name = Name}}) -> - case rabbit_amqqueue:lookup(Name) of - {ok, #amqqueue{mirror_nodes = undefined}} -> []; - {ok, #amqqueue{slave_pids = SPids}} -> SPids + {ok, Q = #amqqueue{slave_pids = SPids}} = + rabbit_amqqueue:lookup(Name), + case rabbit_mirror_queue_misc:is_mirrored(Q) of + false -> ''; + true -> SPids end; +i(synchronised_slave_pids, #q{q = #amqqueue{name = Name}}) -> + {ok, Q = #amqqueue{sync_slave_pids = SSPids}} = + rabbit_amqqueue:lookup(Name), + case rabbit_mirror_queue_misc:is_mirrored(Q) of + false -> ''; + true -> SSPids + end; +i(status, #q{status = Status}) -> + Status; i(backing_queue_status, #q{backing_queue_state = BQS, backing_queue = BQ}) -> BQ:status(BQS); i(Item, _) -> @@ -963,47 +1013,41 @@ emit_stats(State) -> emit_stats(State, Extra) -> rabbit_event:notify(queue_stats, Extra ++ infos(?STATISTICS_KEYS, State)). -emit_consumer_created(ChPid, ConsumerTag, Exclusive, AckRequired) -> +emit_consumer_created(ChPid, ConsumerTag, Exclusive, AckRequired, QName) -> rabbit_event:notify(consumer_created, [{consumer_tag, ConsumerTag}, {exclusive, Exclusive}, {ack_required, AckRequired}, {channel, ChPid}, - {queue, self()}]). + {queue, QName}]). -emit_consumer_deleted(ChPid, ConsumerTag) -> +emit_consumer_deleted(ChPid, ConsumerTag, QName) -> rabbit_event:notify(consumer_deleted, [{consumer_tag, ConsumerTag}, {channel, ChPid}, - {queue, self()}]). + {queue, QName}]). %%---------------------------------------------------------------------------- -prioritise_call(Msg, _From, _State) -> +prioritise_call(Msg, _From, _Len, _State) -> case Msg of info -> 9; {info, _Items} -> 9; consumers -> 9; - {basic_consume, _, _, _, _, _, _} -> 7; - {basic_cancel, _, _, _} -> 7; stat -> 7; _ -> 0 end. -prioritise_cast(Msg, _State) -> +prioritise_cast(Msg, _Len, _State) -> case Msg of delete_immediately -> 8; {set_ram_duration_target, _Duration} -> 8; {set_maximum_since_use, _Age} -> 8; - {ack, _AckTags, _ChPid} -> 7; - {reject, _AckTags, _Requeue, _ChPid} -> 7; - {notify_sent, _ChPid, _Credit} -> 7; - {unblock, _ChPid} -> 7; {run_backing_queue, _Mod, _Fun} -> 6; _ -> 0 end. -prioritise_info(Msg, #q{q = #amqqueue{exclusive_owner = DownPid}}) -> +prioritise_info(Msg, _Len, #q{q = #amqqueue{exclusive_owner = DownPid}}) -> case Msg of {'DOWN', _, process, DownPid, _} -> 8; update_ram_duration -> 8; @@ -1014,9 +1058,6 @@ prioritise_info(Msg, #q{q = #amqqueue{exclusive_owner = DownPid}}) -> _ -> 0 end. -handle_call(_, _, State = #q{delayed_stop = DS}) when DS =/= undefined -> - noreply(State); - handle_call({init, Recover}, From, State = #q{q = #amqqueue{exclusive_owner = none}}) -> declare(Recover, From, State); @@ -1026,17 +1067,21 @@ handle_call({init, Recover}, From, case rabbit_misc:is_process_alive(Owner) of true -> erlang:monitor(process, Owner), declare(Recover, From, State); - false -> #q{backing_queue = BQ, backing_queue_state = undefined, - q = #amqqueue{name = QName} = Q} = State, + false -> #q{backing_queue = undefined, + backing_queue_state = undefined, + q = #amqqueue{name = QName} = Q} = State, gen_server2:reply(From, not_found), case Recover of - true -> ok; - _ -> rabbit_log:warning( - "Queue ~p exclusive owner went away~n", [QName]) + new -> rabbit_log:warning( + "Queue ~p exclusive owner went away~n", + [rabbit_misc:rs(QName)]); + _ -> ok end, + BQ = backing_queue_module(Q), BQS = bq_init(BQ, Q, Recover), %% Rely on terminate to delete the queue. - {stop, normal, State#q{backing_queue_state = BQS}} + {stop, {shutdown, missing_owner}, + State#q{backing_queue = BQ, backing_queue_state = BQS}} end; handle_call(info, _From, State) -> @@ -1051,30 +1096,12 @@ handle_call({info, Items}, _From, State) -> handle_call(consumers, _From, State) -> reply(consumers(State), State); -handle_call({deliver, Delivery = #delivery{immediate = true}}, _From, State) -> - %% FIXME: Is this correct semantics? - %% - %% I'm worried in particular about the case where an exchange has - %% two queues against a particular routing key, and a message is - %% sent in immediate mode through the binding. In non-immediate - %% mode, both queues get the message, saving it for later if - %% there's noone ready to receive it just now. In immediate mode, - %% should both queues still get the message, somehow, or should - %% just all ready-to-consume queues get the message, with unready - %% queues discarding the message? - %% - Confirm = should_confirm_message(Delivery, State), - {Delivered, State1} = attempt_delivery(Delivery, Confirm, State), - reply(Delivered, case Delivered of - true -> maybe_record_confirm_message(Confirm, State1); - false -> discard_delivery(Delivery, State1) - end); - -handle_call({deliver, Delivery = #delivery{mandatory = true}}, From, State) -> - gen_server2:reply(From, true), - noreply(deliver_or_enqueue(Delivery, State)); - -handle_call({notify_down, ChPid}, From, State) -> +handle_call({deliver, Delivery, Delivered}, From, State) -> + %% Synchronous, "mandatory" deliver mode. + gen_server2:reply(From, ok), + noreply(deliver_or_enqueue(Delivery, Delivered, State)); + +handle_call({notify_down, ChPid}, _From, State) -> %% we want to do this synchronously, so that auto_deleted queues %% are no longer visible by the time we send a response to the %% client. The queue is ultimately deleted in terminate/2; if we @@ -1082,71 +1109,87 @@ handle_call({notify_down, ChPid}, From, State) -> %% gen_server2 *before* the reply is sent. case handle_ch_down(ChPid, State) of {ok, State1} -> reply(ok, State1); - {stop, State1} -> stop_later(normal, From, ok, State1) + {stop, State1} -> stop(ok, State1) end; -handle_call({basic_get, ChPid, NoAck}, _From, +handle_call({basic_get, ChPid, NoAck, LimiterPid}, _From, State = #q{q = #amqqueue{name = QName}}) -> AckRequired = not NoAck, State1 = ensure_expiry_timer(State), - case fetch(AckRequired, drop_expired_messages(State1)) of + case fetch(AckRequired, State1) of {empty, State2} -> reply(empty, State2); - {{Message, IsDelivered, AckTag, Remaining}, State2} -> - State3 = + {{Message, IsDelivered, AckTag}, State2} -> + State3 = #q{backing_queue = BQ, backing_queue_state = BQS} = case AckRequired of - true -> C = #cr{acktags = ChAckTags} = ch_record(ChPid), - ChAckTags1 = sets:add_element(AckTag, ChAckTags), + true -> C = #cr{acktags = ChAckTags} = + ch_record(ChPid, LimiterPid), + ChAckTags1 = queue:in(AckTag, ChAckTags), update_ch_record(C#cr{acktags = ChAckTags1}), State2; false -> State2 end, Msg = {QName, self(), AckTag, IsDelivered, Message}, - reply({ok, Remaining, Msg}, State3) + reply({ok, BQ:len(BQS), Msg}, State3) end; -handle_call({basic_consume, NoAck, ChPid, Limiter, - ConsumerTag, ExclusiveConsume, OkMsg}, - _From, State = #q{exclusive_consumer = ExistingHolder}) -> - case check_exclusive_access(ExistingHolder, ExclusiveConsume, - State) of +handle_call({basic_consume, NoAck, ChPid, LimiterPid, LimiterActive, + ConsumerTag, ExclusiveConsume, CreditArgs, OkMsg}, + _From, State = #q{exclusive_consumer = Holder}) -> + case check_exclusive_access(Holder, ExclusiveConsume, State) of in_use -> reply({error, exclusive_consume_unavailable}, State); ok -> - C = ch_record(ChPid), - C1 = update_consumer_count(C#cr{limiter = Limiter}, +1), + C = #cr{consumer_count = Count, + limiter = Limiter} = ch_record(ChPid, LimiterPid), + Limiter1 = case LimiterActive of + true -> rabbit_limiter:activate(Limiter); + false -> Limiter + end, + Limiter2 = case CreditArgs of + none -> Limiter1; + {Crd, Drain} -> rabbit_limiter:credit( + Limiter1, ConsumerTag, Crd, Drain) + end, + C1 = update_ch_record(C#cr{consumer_count = Count + 1, + limiter = Limiter2}), + case is_empty(State) of + true -> send_drained(C1); + false -> ok + end, Consumer = #consumer{tag = ConsumerTag, ack_required = not NoAck}, ExclusiveConsumer = if ExclusiveConsume -> {ChPid, ConsumerTag}; - true -> ExistingHolder + true -> Holder end, State1 = State#q{has_had_consumers = true, exclusive_consumer = ExclusiveConsumer}, ok = maybe_send_reply(ChPid, OkMsg), - E = {ChPid, Consumer}, - State2 = - case is_ch_blocked(C1) of - true -> block_consumer(C1, E), - State1; - false -> update_ch_record(C1), - AC1 = queue:in(E, State1#q.active_consumers), - run_message_queue(State1#q{active_consumers = AC1}) - end, emit_consumer_created(ChPid, ConsumerTag, ExclusiveConsume, - not NoAck), - reply(ok, State2) + not NoAck, qname(State1)), + AC1 = queue:in({ChPid, Consumer}, State1#q.active_consumers), + reply(ok, run_message_queue(State1#q{active_consumers = AC1})) end; -handle_call({basic_cancel, ChPid, ConsumerTag, OkMsg}, From, +handle_call({basic_cancel, ChPid, ConsumerTag, OkMsg}, _From, State = #q{exclusive_consumer = Holder}) -> ok = maybe_send_reply(ChPid, OkMsg), case lookup_ch(ChPid) of not_found -> reply(ok, State); - C = #cr{blocked_consumers = Blocked} -> - emit_consumer_deleted(ChPid, ConsumerTag), + C = #cr{consumer_count = Count, + limiter = Limiter, + blocked_consumers = Blocked} -> + emit_consumer_deleted(ChPid, ConsumerTag, qname(State)), Blocked1 = remove_consumer(ChPid, ConsumerTag, Blocked), - update_consumer_count(C#cr{blocked_consumers = Blocked1}, -1), + Limiter1 = case Count of + 1 -> rabbit_limiter:deactivate(Limiter); + _ -> Limiter + end, + Limiter2 = rabbit_limiter:forget_consumer(Limiter1, ConsumerTag), + update_ch_record(C#cr{consumer_count = Count - 1, + limiter = Limiter2, + blocked_consumers = Blocked1}), State1 = State#q{ exclusive_consumer = case Holder of {ChPid, ConsumerTag} -> none; @@ -1154,109 +1197,120 @@ handle_call({basic_cancel, ChPid, ConsumerTag, OkMsg}, From, end, active_consumers = remove_consumer( ChPid, ConsumerTag, - State#q.active_consumers)}, + State#q.active_consumers)}, case should_auto_delete(State1) of false -> reply(ok, ensure_expiry_timer(State1)); - true -> stop_later(normal, From, ok, State1) + true -> stop(ok, State1) end end; handle_call(stat, _From, State) -> State1 = #q{backing_queue = BQ, backing_queue_state = BQS} = - drop_expired_messages(ensure_expiry_timer(State)), - reply({ok, BQ:len(BQS), active_consumer_count()}, State1); + ensure_expiry_timer(State), + reply({ok, BQ:len(BQS), consumer_count()}, State1); -handle_call({delete, IfUnused, IfEmpty}, From, +handle_call({delete, IfUnused, IfEmpty}, _From, State = #q{backing_queue_state = BQS, backing_queue = BQ}) -> - IsEmpty = BQ:is_empty(BQS), + IsEmpty = BQ:is_empty(BQS), IsUnused = is_unused(State), if - IfEmpty and not(IsEmpty) -> reply({error, not_empty}, State); - IfUnused and not(IsUnused) -> reply({error, in_use}, State); - true -> stop_later(normal, From, - {ok, BQ:len(BQS)}, State) + IfEmpty and not(IsEmpty) -> reply({error, not_empty}, State); + IfUnused and not(IsUnused) -> reply({error, in_use}, State); + true -> stop({ok, BQ:len(BQS)}, State) end; handle_call(purge, _From, State = #q{backing_queue = BQ, backing_queue_state = BQS}) -> {Count, BQS1} = BQ:purge(BQS), - reply({ok, Count}, State#q{backing_queue_state = BQS1}); + State1 = State#q{backing_queue_state = BQS1}, + reply({ok, Count}, maybe_send_drained(Count =:= 0, State1)); handle_call({requeue, AckTags, ChPid}, From, State) -> gen_server2:reply(From, ok), - noreply(subtract_acks( - ChPid, AckTags, State, - fun (State1) -> requeue_and_run(AckTags, State1) end)); + noreply(requeue(AckTags, ChPid, State)); + +handle_call(sync_mirrors, _From, + State = #q{backing_queue = rabbit_mirror_queue_master, + backing_queue_state = BQS}) -> + S = fun(BQSN) -> State#q{backing_queue_state = BQSN} end, + HandleInfo = fun (Status) -> + receive {'$gen_call', From, {info, Items}} -> + Infos = infos(Items, State#q{status = Status}), + gen_server2:reply(From, {ok, Infos}) + after 0 -> + ok + end + end, + EmitStats = fun (Status) -> + rabbit_event:if_enabled( + State, #q.stats_timer, + fun() -> emit_stats(State#q{status = Status}) end) + end, + case rabbit_mirror_queue_master:sync_mirrors(HandleInfo, EmitStats, BQS) of + {ok, BQS1} -> reply(ok, S(BQS1)); + {stop, Reason, BQS1} -> {stop, Reason, S(BQS1)} + end; + +handle_call(sync_mirrors, _From, State) -> + reply({error, not_mirrored}, State); + +%% By definition if we get this message here we do not have to do anything. +handle_call(cancel_sync_mirrors, _From, State) -> + reply({ok, not_syncing}, State); handle_call(force_event_refresh, _From, State = #q{exclusive_consumer = Exclusive}) -> rabbit_event:notify(queue_created, infos(?CREATION_EVENT_KEYS, State)), + QName = qname(State), case Exclusive of - none -> [emit_consumer_created(Ch, CTag, false, AckRequired) || + none -> [emit_consumer_created( + Ch, CTag, false, AckRequired, QName) || {Ch, CTag, AckRequired} <- consumers(State)]; {Ch, CTag} -> [{Ch, CTag, AckRequired}] = consumers(State), - emit_consumer_created(Ch, CTag, true, AckRequired) + emit_consumer_created(Ch, CTag, true, AckRequired, QName) end, reply(ok, State). -handle_cast({confirm, MsgSeqNos, QPid}, State = #q{unconfirmed = UC}) -> - {MsgSeqNoAckTags, UC1} = dtree:take(MsgSeqNos, QPid, UC), - State1 = case dtree:is_defined(QPid, UC1) of - false -> QMons = State#q.queue_monitors, - State#q{queue_monitors = pmon:demonitor(QPid, QMons)}; - true -> State - end, - cleanup_after_confirm([AckTag || {_MsgSeqNo, AckTag} <- MsgSeqNoAckTags], - State1#q{unconfirmed = UC1}); - -handle_cast(_, State = #q{delayed_stop = DS}) when DS =/= undefined -> - noreply(State); - -handle_cast({run_backing_queue, Mod, Fun}, State) -> - noreply(run_backing_queue(Mod, Fun, State)); +handle_cast({run_backing_queue, Mod, Fun}, + State = #q{backing_queue = BQ, backing_queue_state = BQS}) -> + noreply(State#q{backing_queue_state = BQ:invoke(Mod, Fun, BQS)}); -handle_cast({deliver, Delivery = #delivery{sender = Sender}, Flow}, +handle_cast({deliver, Delivery = #delivery{sender = Sender}, Delivered, Flow}, State = #q{senders = Senders}) -> - %% Asynchronous, non-"mandatory", non-"immediate" deliver mode. + %% Asynchronous, non-"mandatory" deliver mode. Senders1 = case Flow of flow -> credit_flow:ack(Sender), pmon:monitor(Sender, Senders); noflow -> Senders end, State1 = State#q{senders = Senders1}, - noreply(deliver_or_enqueue(Delivery, State1)); + noreply(deliver_or_enqueue(Delivery, Delivered, State1)); handle_cast({ack, AckTags, ChPid}, State) -> - noreply(subtract_acks( - ChPid, AckTags, State, - fun (State1 = #q{backing_queue = BQ, - backing_queue_state = BQS}) -> - {_Guids, BQS1} = BQ:ack(AckTags, BQS), - State1#q{backing_queue_state = BQS1} - end)); - -handle_cast({reject, AckTags, Requeue, ChPid}, State) -> - noreply(subtract_acks( - ChPid, AckTags, State, - case Requeue of - true -> fun (State1) -> requeue_and_run(AckTags, State1) end; - false -> fun (State1 = #q{backing_queue = BQ, - backing_queue_state = BQS}) -> - Fun = dead_letter_fun(rejected, State1), - BQS1 = BQ:fold(Fun, BQS, AckTags), - ack_if_no_dlx( - AckTags, - State1#q{backing_queue_state = BQS1}) - end - end)); + noreply(ack(AckTags, ChPid, State)); + +handle_cast({reject, AckTags, true, ChPid}, State) -> + noreply(requeue(AckTags, ChPid, State)); + +handle_cast({reject, AckTags, false, ChPid}, State) -> + noreply(with_dlx( + State#q.dlx, + fun (X) -> subtract_acks(ChPid, AckTags, State, + fun (State1) -> + dead_letter_rejected_msgs( + AckTags, X, State1) + end) end, + fun () -> ack(AckTags, ChPid, State) end)); handle_cast(delete_immediately, State) -> - stop_later(normal, State); + stop(State); -handle_cast({unblock, ChPid}, State) -> +handle_cast({resume, ChPid}, State) -> noreply( possibly_unblock(State, ChPid, - fun (C) -> C#cr{is_limit_active = false} end)); + fun (C = #cr{limiter = Limiter}) -> + C#cr{limiter = rabbit_limiter:resume(Limiter)} + end)); handle_cast({notify_sent, ChPid, Credit}, State) -> noreply( @@ -1265,21 +1319,12 @@ handle_cast({notify_sent, ChPid, Credit}, State) -> C#cr{unsent_message_count = Count - Credit} end)); -handle_cast({limit, ChPid, Limiter}, State) -> +handle_cast({activate_limit, ChPid}, State) -> noreply( - possibly_unblock( - State, ChPid, - fun (C = #cr{consumer_count = ConsumerCount, - limiter = OldLimiter, - is_limit_active = OldLimited}) -> - case (ConsumerCount =/= 0 andalso - not rabbit_limiter:is_enabled(OldLimiter)) of - true -> ok = rabbit_limiter:register(Limiter, self()); - false -> ok - end, - Limited = OldLimited andalso rabbit_limiter:is_enabled(Limiter), - C#cr{limiter = Limiter, is_limit_active = Limited} - end)); + possibly_unblock(State, ChPid, + fun (C = #cr{limiter = Limiter}) -> + C#cr{limiter = rabbit_limiter:activate(Limiter)} + end)); handle_cast({flush, ChPid}, State) -> ok = rabbit_channel:flushed(ChPid, self()), @@ -1294,33 +1339,61 @@ handle_cast({set_maximum_since_use, Age}, State) -> ok = file_handle_cache:set_maximum_since_use(Age), noreply(State); -handle_cast({dead_letter, {Msg, AckTag}, Reason}, State) -> - dead_letter_msg(Msg, AckTag, Reason, State). - -%% We need to not ignore this as we need to remove outstanding -%% confirms due to queue death. -handle_info({'DOWN', _MonitorRef, process, DownPid, Reason}, - State = #q{delayed_stop = DS}) when DS =/= undefined -> - handle_queue_down(DownPid, Reason, State); - -handle_info(_, State = #q{delayed_stop = DS}) when DS =/= undefined -> - noreply(State); +handle_cast(start_mirroring, State = #q{backing_queue = BQ, + backing_queue_state = BQS}) -> + %% lookup again to get policy for init_with_existing_bq + {ok, Q} = rabbit_amqqueue:lookup(qname(State)), + true = BQ =/= rabbit_mirror_queue_master, %% assertion + BQ1 = rabbit_mirror_queue_master, + BQS1 = BQ1:init_with_existing_bq(Q, BQ, BQS), + noreply(State#q{backing_queue = BQ1, + backing_queue_state = BQS1}); + +handle_cast(stop_mirroring, State = #q{backing_queue = BQ, + backing_queue_state = BQS}) -> + BQ = rabbit_mirror_queue_master, %% assertion + {BQ1, BQS1} = BQ:stop_mirroring(BQS), + noreply(State#q{backing_queue = BQ1, + backing_queue_state = BQS1}); + +handle_cast({credit, ChPid, CTag, Credit, Drain}, + State = #q{backing_queue = BQ, + backing_queue_state = BQS}) -> + Len = BQ:len(BQS), + rabbit_channel:send_credit_reply(ChPid, Len), + C = #cr{limiter = Limiter} = lookup_ch(ChPid), + C1 = C#cr{limiter = rabbit_limiter:credit(Limiter, CTag, Credit, Drain)}, + noreply(case Drain andalso Len == 0 of + true -> update_ch_record(C1), + send_drained(C1), + State; + false -> case is_ch_blocked(C1) of + true -> update_ch_record(C1), + State; + false -> unblock(State, C1) + end + end); + +handle_cast(wake_up, State) -> + noreply(State). handle_info(maybe_expire, State) -> case is_unused(State) of - true -> stop_later(normal, State); - false -> noreply(ensure_expiry_timer(State)) + true -> stop(State); + false -> noreply(State#q{expiry_timer_ref = undefined}) end; handle_info(drop_expired, State) -> - noreply(drop_expired_messages(State#q{ttl_timer_ref = undefined})); + WasEmpty = is_empty(State), + State1 = drop_expired_msgs(State#q{ttl_timer_ref = undefined}), + noreply(maybe_send_drained(WasEmpty, State1)); handle_info(emit_stats, State) -> - %% Do not invoke noreply as it would see no timer and create a new one. emit_stats(State), - State1 = rabbit_event:reset_stats_timer(State, #q.stats_timer), - assert_invariant(State1), - {noreply, State1, hibernate}; + %% Don't call noreply/1, we don't want to set timers + {State1, Timeout} = next_state(rabbit_event:reset_stats_timer( + State, #q.stats_timer)), + {noreply, State1, Timeout}; handle_info({'DOWN', _MonitorRef, process, DownPid, _Reason}, State = #q{q = #amqqueue{exclusive_owner = DownPid}}) -> @@ -1330,12 +1403,12 @@ handle_info({'DOWN', _MonitorRef, process, DownPid, _Reason}, %% match what people expect (see bug 21824). However we need this %% monitor-and-async- delete in case the connection goes away %% unexpectedly. - stop_later(normal, State); + stop(State); -handle_info({'DOWN', _MonitorRef, process, DownPid, Reason}, State) -> +handle_info({'DOWN', _MonitorRef, process, DownPid, _Reason}, State) -> case handle_ch_down(DownPid, State) of - {ok, State1} -> handle_queue_down(DownPid, Reason, State1); - {stop, State1} -> stop_later(normal, State1) + {ok, State1} -> noreply(State1); + {stop, State1} -> stop(State1) end; handle_info(update_ram_duration, State = #q{backing_queue = BQ, @@ -1344,8 +1417,10 @@ handle_info(update_ram_duration, State = #q{backing_queue = BQ, DesiredDuration = rabbit_memory_monitor:report_ram_duration(self(), RamDuration), BQS2 = BQ:set_ram_duration_target(DesiredDuration, BQS1), - noreply(State#q{rate_timer_ref = just_measured, - backing_queue_state = BQS2}); + %% Don't call noreply/1, we don't want to set timers + {State1, Timeout} = next_state(State#q{rate_timer_ref = undefined, + backing_queue_state = BQS2}), + {noreply, State1, Timeout}; handle_info(sync_timeout, State) -> noreply(backing_queue_timeout(State#q{sync_timer_ref = undefined})); diff --git a/src/rabbit_amqqueue_sup.erl b/src/rabbit_amqqueue_sup.erl index a4305e5f..74ae59da 100644 --- a/src/rabbit_amqqueue_sup.erl +++ b/src/rabbit_amqqueue_sup.erl @@ -10,8 +10,8 @@ %% %% The Original Code is RabbitMQ. %% -%% The Initial Developer of the Original Code is VMware, Inc. -%% Copyright (c) 2007-2012 VMware, Inc. All rights reserved. +%% The Initial Developer of the Original Code is GoPivotal, Inc. +%% Copyright (c) 2007-2013 GoPivotal, Inc. All rights reserved. %% -module(rabbit_amqqueue_sup). @@ -47,6 +47,6 @@ start_child(Node, Args) -> supervisor2:start_child({?SERVER, Node}, Args). init([]) -> - {ok, {{simple_one_for_one_terminate, 10, 10}, + {ok, {{simple_one_for_one, 10, 10}, [{rabbit_amqqueue, {rabbit_amqqueue_process, start_link, []}, temporary, ?MAX_WAIT, worker, [rabbit_amqqueue_process]}]}}. diff --git a/src/rabbit_auth_backend.erl b/src/rabbit_auth_backend.erl index e89951e7..4ffc8c3a 100644 --- a/src/rabbit_auth_backend.erl +++ b/src/rabbit_auth_backend.erl @@ -10,8 +10,8 @@ %% %% The Original Code is RabbitMQ. %% -%% The Initial Developer of the Original Code is VMware, Inc. -%% Copyright (c) 2007-2012 VMware, Inc. All rights reserved. +%% The Initial Developer of the Original Code is GoPivotal, Inc. +%% Copyright (c) 2007-2013 GoPivotal, Inc. All rights reserved. %% -module(rabbit_auth_backend). @@ -20,7 +20,7 @@ %% A description proplist as with auth mechanisms, %% exchanges. Currently unused. --callback description() -> [proplist:property()]. +-callback description() -> [proplists:property()]. %% Check a user can log in, given a username and a proplist of %% authentication information (e.g. [{password, Password}]). diff --git a/src/rabbit_auth_backend_internal.erl b/src/rabbit_auth_backend_internal.erl index 7b9df81e..61919d05 100644 --- a/src/rabbit_auth_backend_internal.erl +++ b/src/rabbit_auth_backend_internal.erl @@ -10,8 +10,8 @@ %% %% The Original Code is RabbitMQ. %% -%% The Initial Developer of the Original Code is VMware, Inc. -%% Copyright (c) 2007-2012 VMware, Inc. All rights reserved. +%% The Initial Developer of the Original Code is GoPivotal, Inc. +%% Copyright (c) 2007-2013 GoPivotal, Inc. All rights reserved. %% -module(rabbit_auth_backend_internal). @@ -49,7 +49,7 @@ -spec(hash_password/1 :: (rabbit_types:password()) -> rabbit_types:password_hash()). -spec(set_tags/2 :: (rabbit_types:username(), [atom()]) -> 'ok'). --spec(list_users/0 :: () -> rabbit_types:infos()). +-spec(list_users/0 :: () -> [rabbit_types:infos()]). -spec(user_info_keys/0 :: () -> rabbit_types:info_keys()). -spec(lookup_user/1 :: (rabbit_types:username()) -> rabbit_types:ok(rabbit_types:internal_user()) @@ -58,14 +58,14 @@ regexp(), regexp(), regexp()) -> 'ok'). -spec(clear_permissions/2 :: (rabbit_types:username(), rabbit_types:vhost()) -> 'ok'). --spec(list_permissions/0 :: () -> rabbit_types:infos()). +-spec(list_permissions/0 :: () -> [rabbit_types:infos()]). -spec(list_vhost_permissions/1 :: - (rabbit_types:vhost()) -> rabbit_types:infos()). + (rabbit_types:vhost()) -> [rabbit_types:infos()]). -spec(list_user_permissions/1 :: - (rabbit_types:username()) -> rabbit_types:infos()). + (rabbit_types:username()) -> [rabbit_types:infos()]). -spec(list_user_vhost_permissions/2 :: (rabbit_types:username(), rabbit_types:vhost()) - -> rabbit_types:infos()). + -> [rabbit_types:infos()]). -spec(perms_info_keys/0 :: () -> rabbit_types:info_keys()). -spec(vhost_perms_info_keys/0 :: () -> rabbit_types:info_keys()). -spec(user_perms_info_keys/0 :: () -> rabbit_types:info_keys()). @@ -203,7 +203,9 @@ hash_password(Cleartext) -> <<Salt/binary, Hash/binary>>. check_password(Cleartext, <<Salt:4/binary, Hash/binary>>) -> - Hash =:= salted_md5(Salt, Cleartext). + Hash =:= salted_md5(Salt, Cleartext); +check_password(_Cleartext, _Any) -> + false. make_salt() -> {A1,A2,A3} = now(), diff --git a/src/rabbit_auth_mechanism.erl b/src/rabbit_auth_mechanism.erl index eda6a743..21528b11 100644 --- a/src/rabbit_auth_mechanism.erl +++ b/src/rabbit_auth_mechanism.erl @@ -10,8 +10,8 @@ %% %% The Original Code is RabbitMQ. %% -%% The Initial Developer of the Original Code is VMware, Inc. -%% Copyright (c) 2007-2012 VMware, Inc. All rights reserved. +%% The Initial Developer of the Original Code is GoPivotal, Inc. +%% Copyright (c) 2007-2013 GoPivotal, Inc. All rights reserved. %% -module(rabbit_auth_mechanism). @@ -19,7 +19,7 @@ -ifdef(use_specs). %% A description. --callback description() -> [proplist:property()]. +-callback description() -> [proplists:property()]. %% If this mechanism is enabled, should it be offered for a given socket? %% (primarily so EXTERNAL can be SSL-only) diff --git a/src/rabbit_auth_mechanism_amqplain.erl b/src/rabbit_auth_mechanism_amqplain.erl index c0d86cd1..8e896b45 100644 --- a/src/rabbit_auth_mechanism_amqplain.erl +++ b/src/rabbit_auth_mechanism_amqplain.erl @@ -10,8 +10,8 @@ %% %% The Original Code is RabbitMQ. %% -%% The Initial Developer of the Original Code is VMware, Inc. -%% Copyright (c) 2007-2012 VMware, Inc. All rights reserved. +%% The Initial Developer of the Original Code is GoPivotal, Inc. +%% Copyright (c) 2007-2013 GoPivotal, Inc. All rights reserved. %% -module(rabbit_auth_mechanism_amqplain). @@ -33,8 +33,7 @@ %% referring generically to "SASL security mechanism", i.e. the above. description() -> - [{name, <<"AMQPLAIN">>}, - {description, <<"QPid AMQPLAIN mechanism">>}]. + [{description, <<"QPid AMQPLAIN mechanism">>}]. should_offer(_Sock) -> true. diff --git a/src/rabbit_auth_mechanism_cr_demo.erl b/src/rabbit_auth_mechanism_cr_demo.erl index 5df1d5d7..8699a9fa 100644 --- a/src/rabbit_auth_mechanism_cr_demo.erl +++ b/src/rabbit_auth_mechanism_cr_demo.erl @@ -10,8 +10,8 @@ %% %% The Original Code is RabbitMQ. %% -%% The Initial Developer of the Original Code is VMware, Inc. -%% Copyright (c) 2007-2012 VMware, Inc. All rights reserved. +%% The Initial Developer of the Original Code is GoPivotal, Inc. +%% Copyright (c) 2007-2013 GoPivotal, Inc. All rights reserved. %% -module(rabbit_auth_mechanism_cr_demo). @@ -37,8 +37,7 @@ %% SECURE-OK: "My password is ~s", [Password] description() -> - [{name, <<"RABBIT-CR-DEMO">>}, - {description, <<"RabbitMQ Demo challenge-response authentication " + [{description, <<"RabbitMQ Demo challenge-response authentication " "mechanism">>}]. should_offer(_Sock) -> diff --git a/src/rabbit_auth_mechanism_plain.erl b/src/rabbit_auth_mechanism_plain.erl index 423170e1..5ab22e75 100644 --- a/src/rabbit_auth_mechanism_plain.erl +++ b/src/rabbit_auth_mechanism_plain.erl @@ -10,8 +10,8 @@ %% %% The Original Code is RabbitMQ. %% -%% The Initial Developer of the Original Code is VMware, Inc. -%% Copyright (c) 2007-2012 VMware, Inc. All rights reserved. +%% The Initial Developer of the Original Code is GoPivotal, Inc. +%% Copyright (c) 2007-2013 GoPivotal, Inc. All rights reserved. %% -module(rabbit_auth_mechanism_plain). @@ -31,13 +31,11 @@ %% SASL PLAIN, as used by the Qpid Java client and our clients. Also, %% apparently, by OpenAMQ. -%% TODO: once the minimum erlang becomes R13B03, reimplement this -%% using the binary module - that makes use of BIFs to do binary -%% matching and will thus be much faster. +%% TODO: reimplement this using the binary module? - that makes use of +%% BIFs to do binary matching and will thus be much faster. description() -> - [{name, <<"PLAIN">>}, - {description, <<"SASL PLAIN authentication mechanism">>}]. + [{description, <<"SASL PLAIN authentication mechanism">>}]. should_offer(_Sock) -> true. diff --git a/src/rabbit_autoheal.erl b/src/rabbit_autoheal.erl new file mode 100644 index 00000000..a5b91867 --- /dev/null +++ b/src/rabbit_autoheal.erl @@ -0,0 +1,199 @@ +%% The contents of this file are subject to the Mozilla Public License +%% Version 1.1 (the "License"); you may not use this file except in +%% compliance with the License. You may obtain a copy of the License +%% at http://www.mozilla.org/MPL/ +%% +%% Software distributed under the License is distributed on an "AS IS" +%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +%% the License for the specific language governing rights and +%% limitations under the License. +%% +%% The Original Code is RabbitMQ. +%% +%% The Initial Developer of the Original Code is GoPivotal, Inc. +%% Copyright (c) 2007-2013 GoPivotal, Inc. All rights reserved. +%% + +-module(rabbit_autoheal). + +-export([init/0, maybe_start/1, node_down/2, handle_msg/3]). + +%% The named process we are running in. +-define(SERVER, rabbit_node_monitor). + +%%---------------------------------------------------------------------------- + +%% In order to autoheal we want to: +%% +%% * Find the winning partition +%% * Stop all nodes in other partitions +%% * Wait for them all to be stopped +%% * Start them again +%% +%% To keep things simple, we assume all nodes are up. We don't start +%% unless all nodes are up, and if a node goes down we abandon the +%% whole process. To further keep things simple we also defer the +%% decision as to the winning node to the "leader" - arbitrarily +%% selected as the first node in the cluster. +%% +%% To coordinate the restarting nodes we pick a special node from the +%% winning partition - the "winner". Restarting nodes then stop, tell +%% the winner they have done so, and wait for it to tell them it is +%% safe to start again. The winner and the leader are not necessarily +%% the same node. +%% +%% Possible states: +%% +%% not_healing +%% - the default +%% +%% {winner_waiting, OutstandingStops, Notify} +%% - we are the winner and are waiting for all losing nodes to stop +%% before telling them they can restart +%% +%% restarting +%% - we are restarting. Of course the node monitor immediately dies +%% then so this state does not last long. We therefore send the +%% autoheal_safe_to_start message to the rabbit_outside_app_process +%% instead. + +%%---------------------------------------------------------------------------- + +init() -> not_healing. + +maybe_start(not_healing) -> + case enabled() of + true -> [Leader | _] = lists:usort(rabbit_mnesia:cluster_nodes(all)), + send(Leader, {request_start, node()}), + rabbit_log:info("Autoheal request sent to ~p~n", [Leader]), + not_healing; + false -> not_healing + end; +maybe_start(State) -> + State. + +enabled() -> + {ok, autoheal} =:= application:get_env(rabbit, cluster_partition_handling). + +node_down(_Node, {winner_waiting, _Nodes, _Notify} = Autoheal) -> + Autoheal; +node_down(_Node, not_healing) -> + not_healing; +node_down(Node, _State) -> + rabbit_log:info("Autoheal: aborting - ~p went down~n", [Node]), + not_healing. + +%% By receiving this message we become the leader +%% TODO should we try to debounce this? +handle_msg({request_start, Node}, + not_healing, Partitions) -> + rabbit_log:info("Autoheal request received from ~p~n", [Node]), + case rabbit_node_monitor:all_rabbit_nodes_up() of + false -> not_healing; + true -> AllPartitions = all_partitions(Partitions), + {Winner, Losers} = make_decision(AllPartitions), + rabbit_log:info("Autoheal decision~n" + " * Partitions: ~p~n" + " * Winner: ~p~n" + " * Losers: ~p~n", + [AllPartitions, Winner, Losers]), + send(Winner, {become_winner, Losers}), + [send(L, {winner_is, Winner}) || L <- Losers], + not_healing + end; + +handle_msg({request_start, Node}, + State, _Partitions) -> + rabbit_log:info("Autoheal request received from ~p when in state ~p; " + "ignoring~n", [Node, State]), + State; + +handle_msg({become_winner, Losers}, + not_healing, _Partitions) -> + rabbit_log:info("Autoheal: I am the winner, waiting for ~p to stop~n", + [Losers]), + {winner_waiting, Losers, Losers}; + +handle_msg({become_winner, Losers}, + {winner_waiting, WaitFor, Notify}, _Partitions) -> + rabbit_log:info("Autoheal: I am the winner, waiting additionally for " + "~p to stop~n", [Losers]), + {winner_waiting, lists:usort(Losers ++ WaitFor), + lists:usort(Losers ++ Notify)}; + +handle_msg({winner_is, Winner}, + not_healing, _Partitions) -> + rabbit_log:warning( + "Autoheal: we were selected to restart; winner is ~p~n", [Winner]), + rabbit_node_monitor:run_outside_applications( + fun () -> + MRef = erlang:monitor(process, {?SERVER, Winner}), + rabbit:stop(), + send(Winner, {node_stopped, node()}), + receive + {'DOWN', MRef, process, {?SERVER, Winner}, _Reason} -> ok; + autoheal_safe_to_start -> ok + end, + erlang:demonitor(MRef, [flush]), + rabbit:start() + end), + restarting; + +%% This is the winner receiving its last notification that a node has +%% stopped - all nodes can now start again +handle_msg({node_stopped, Node}, + {winner_waiting, [Node], Notify}, _Partitions) -> + rabbit_log:info("Autoheal: final node has stopped, starting...~n",[]), + [{rabbit_outside_app_process, N} ! autoheal_safe_to_start || N <- Notify], + not_healing; + +handle_msg({node_stopped, Node}, + {winner_waiting, WaitFor, Notify}, _Partitions) -> + {winner_waiting, WaitFor -- [Node], Notify}; + +handle_msg(_, restarting, _Partitions) -> + %% ignore, we can contribute no further + restarting; + +handle_msg({node_stopped, _Node}, State, _Partitions) -> + %% ignore, we already cancelled the autoheal process + State. + +%%---------------------------------------------------------------------------- + +send(Node, Msg) -> {?SERVER, Node} ! {autoheal_msg, Msg}. + +make_decision(AllPartitions) -> + Sorted = lists:sort([{partition_value(P), P} || P <- AllPartitions]), + [[Winner | _] | Rest] = lists:reverse([P || {_, P} <- Sorted]), + {Winner, lists:append(Rest)}. + +partition_value(Partition) -> + Connections = [Res || Node <- Partition, + Res <- [rpc:call(Node, rabbit_networking, + connections_local, [])], + is_list(Res)], + {length(lists:append(Connections)), length(Partition)}. + +%% We have our local understanding of what partitions exist; but we +%% only know which nodes we have been partitioned from, not which +%% nodes are partitioned from each other. +all_partitions(PartitionedWith) -> + Nodes = rabbit_mnesia:cluster_nodes(all), + Partitions = [{node(), PartitionedWith} | + rabbit_node_monitor:partitions(Nodes -- [node()])], + all_partitions(Partitions, [Nodes]). + +all_partitions([], Partitions) -> + Partitions; +all_partitions([{Node, CantSee} | Rest], Partitions) -> + {[Containing], Others} = + lists:partition(fun (Part) -> lists:member(Node, Part) end, Partitions), + A = Containing -- CantSee, + B = Containing -- A, + Partitions1 = case {A, B} of + {[], _} -> Partitions; + {_, []} -> Partitions; + _ -> [A, B | Others] + end, + all_partitions(Rest, Partitions1). diff --git a/src/rabbit_backing_queue.erl b/src/rabbit_backing_queue.erl index dc144a0e..61b504bc 100644 --- a/src/rabbit_backing_queue.erl +++ b/src/rabbit_backing_queue.erl @@ -10,8 +10,8 @@ %% %% The Original Code is RabbitMQ. %% -%% The Initial Developer of the Original Code is VMware, Inc. -%% Copyright (c) 2007-2012 VMware, Inc. All rights reserved. +%% The Initial Developer of the Original Code is GoPivotal, Inc. +%% Copyright (c) 2007-2013 GoPivotal, Inc. All rights reserved. %% -module(rabbit_backing_queue). @@ -22,17 +22,18 @@ -type(ack() :: any()). -type(state() :: any()). +-type(msg_ids() :: [rabbit_types:msg_id()]). -type(fetch_result(Ack) :: - ('empty' | - %% Message, IsDelivered, AckTag, Remaining_Len - {rabbit_types:basic_message(), boolean(), Ack, non_neg_integer()})). + ('empty' | {rabbit_types:basic_message(), boolean(), Ack})). +-type(drop_result(Ack) :: + ('empty' | {rabbit_types:msg_id(), Ack})). -type(attempt_recovery() :: boolean()). -type(purged_msg_count() :: non_neg_integer()). --type(async_callback() :: fun ((atom(), fun ((atom(), state()) -> state())) -> 'ok')). +-type(async_callback() :: + fun ((atom(), fun ((atom(), state()) -> state())) -> 'ok')). -type(duration() :: ('undefined' | 'infinity' | number())). --type(msg_fun() :: fun((rabbit_types:basic_message(), ack()) -> 'ok') | - 'undefined'). +-type(msg_fun(A) :: fun ((rabbit_types:basic_message(), ack(), A) -> A)). -type(msg_pred() :: fun ((rabbit_types:message_properties()) -> boolean())). %% Called on startup with a list of durable queue names. The queues @@ -68,24 +69,29 @@ %% content. -callback delete_and_terminate(any(), state()) -> state(). -%% Remove all messages in the queue, but not messages which have been -%% fetched and are pending acks. +%% Remove all 'fetchable' messages from the queue, i.e. all messages +%% except those that have been fetched already and are pending acks. -callback purge(state()) -> {purged_msg_count(), state()}. +%% Remove all messages in the queue which have been fetched and are +%% pending acks. +-callback purge_acks(state()) -> state(). + %% Publish a message. -callback publish(rabbit_types:basic_message(), - rabbit_types:message_properties(), pid(), state()) -> - state(). + rabbit_types:message_properties(), boolean(), pid(), + state()) -> state(). %% Called for messages which have already been passed straight %% out to a client. The queue will be empty for these calls %% (i.e. saves the round trip through the backing queue). --callback publish_delivered(true, rabbit_types:basic_message(), - rabbit_types:message_properties(), pid(), state()) - -> {ack(), state()}; - (false, rabbit_types:basic_message(), +-callback publish_delivered(rabbit_types:basic_message(), rabbit_types:message_properties(), pid(), state()) - -> {undefined, state()}. + -> {ack(), state()}. + +%% Called to inform the BQ about messages which have reached the +%% queue, but are not going to be further passed to BQ. +-callback discard(rabbit_types:msg_id(), pid(), state()) -> state(). %% Return ids of messages which have been confirmed since the last %% invocation of this function (or initialisation). @@ -114,32 +120,51 @@ %% first time the message id appears in the result of %% drain_confirmed. All subsequent appearances of that message id will %% be ignored. --callback drain_confirmed(state()) -> {[rabbit_guid:guid()], state()}. - -%% Drop messages from the head of the queue while the supplied predicate returns -%% true. Also accepts a boolean parameter that determines whether the messages -%% necessitate an ack or not. If they do, the function returns a list of -%% messages with the respective acktags. --callback dropwhile(msg_pred(), true, state()) - -> {[{rabbit_types:basic_message(), ack()}], state()}; - (msg_pred(), false, state()) - -> {undefined, state()}. +-callback drain_confirmed(state()) -> {msg_ids(), state()}. + +%% Drop messages from the head of the queue while the supplied +%% predicate on message properties returns true. Returns the first +%% message properties for which the predictate returned false, or +%% 'undefined' if the whole backing queue was traversed w/o the +%% predicate ever returning false. +-callback dropwhile(msg_pred(), state()) + -> {rabbit_types:message_properties() | undefined, state()}. + +%% Like dropwhile, except messages are fetched in "require +%% acknowledgement" mode and are passed, together with their ack tag, +%% to the supplied function. The function is also fed an +%% accumulator. The result of fetchwhile is as for dropwhile plus the +%% accumulator. +-callback fetchwhile(msg_pred(), msg_fun(A), A, state()) + -> {rabbit_types:message_properties() | undefined, + A, state()}. %% Produce the next message. -callback fetch(true, state()) -> {fetch_result(ack()), state()}; (false, state()) -> {fetch_result(undefined), state()}. +%% Remove the next message. +-callback drop(true, state()) -> {drop_result(ack()), state()}; + (false, state()) -> {drop_result(undefined), state()}. + %% Acktags supplied are for messages which can now be forgotten %% about. Must return 1 msg_id per Ack, in the same order as Acks. --callback ack([ack()], state()) -> {[rabbit_guid:guid()], state()}. - -%% Acktags supplied are for messages which should be processed. The -%% provided callback function is called with each message. --callback fold(msg_fun(), state(), [ack()]) -> state(). +-callback ack([ack()], state()) -> {msg_ids(), state()}. %% Reinsert messages into the queue which have already been delivered %% and were pending acknowledgement. --callback requeue([ack()], state()) -> {[rabbit_guid:guid()], state()}. +-callback requeue([ack()], state()) -> {msg_ids(), state()}. + +%% Fold over messages by ack tag. The supplied function is called with +%% each message, its ack tag, and an accumulator. +-callback ackfold(msg_fun(A), A, state(), [ack()]) -> {A, state()}. + +%% Fold over all the messages in a queue and return the accumulated +%% results, leaving the queue undisturbed. +-callback fold(fun((rabbit_types:basic_message(), + rabbit_types:message_properties(), + boolean(), A) -> {('stop' | 'cont'), A}), + A, state()) -> {A, state()}. %% How long is my queue? -callback len(state()) -> non_neg_integer(). @@ -147,6 +172,9 @@ %% Is my queue empty? -callback is_empty(state()) -> boolean(). +%% What's the queue depth, where depth = length + number of pending acks +-callback depth(state()) -> non_neg_integer(). + %% For the next three functions, the assumption is that you're %% monitoring something like the ingress and egress rates of the %% queue. The RAM duration is thus the length of time represented by @@ -185,18 +213,10 @@ -callback invoke(atom(), fun ((atom(), A) -> A), state()) -> state(). %% Called prior to a publish or publish_delivered call. Allows the BQ -%% to signal that it's already seen this message (and in what capacity -%% - i.e. was it published previously or discarded previously) and -%% thus the message should be dropped. +%% to signal that it's already seen this message, (e.g. it was published +%% or discarded previously) and thus the message should be dropped. -callback is_duplicate(rabbit_types:basic_message(), state()) - -> {'false'|'published'|'discarded', state()}. - -%% Called to inform the BQ about messages which have reached the -%% queue, but are not going to be further passed to BQ for some -%% reason. Note that this is may be invoked for messages for which -%% BQ:is_duplicate/2 has already returned {'published' | 'discarded', -%% BQS}. --callback discard(rabbit_types:basic_message(), pid(), state()) -> state(). + -> {boolean(), state()}. -else. @@ -204,12 +224,13 @@ behaviour_info(callbacks) -> [{start, 1}, {stop, 0}, {init, 3}, {terminate, 2}, - {delete_and_terminate, 2}, {purge, 1}, {publish, 4}, - {publish_delivered, 5}, {drain_confirmed, 1}, {dropwhile, 3}, - {fetch, 2}, {ack, 2}, {fold, 3}, {requeue, 2}, {len, 1}, - {is_empty, 1}, {set_ram_duration_target, 2}, {ram_duration, 1}, - {needs_timeout, 1}, {timeout, 1}, {handle_pre_hibernate, 1}, - {status, 1}, {invoke, 3}, {is_duplicate, 2}, {discard, 3}]; + {delete_and_terminate, 2}, {purge, 1}, {purge_acks, 1}, {publish, 5}, + {publish_delivered, 4}, {discard, 3}, {drain_confirmed, 1}, + {dropwhile, 2}, {fetchwhile, 4}, + {fetch, 2}, {ack, 2}, {requeue, 2}, {ackfold, 4}, {fold, 3}, {len, 1}, + {is_empty, 1}, {depth, 1}, {set_ram_duration_target, 2}, + {ram_duration, 1}, {needs_timeout, 1}, {timeout, 1}, + {handle_pre_hibernate, 1}, {status, 1}, {invoke, 3}, {is_duplicate, 2}] ; behaviour_info(_Other) -> undefined. diff --git a/src/rabbit_backing_queue_qc.erl b/src/rabbit_backing_queue_qc.erl index a84800c0..e2bc3247 100644 --- a/src/rabbit_backing_queue_qc.erl +++ b/src/rabbit_backing_queue_qc.erl @@ -10,8 +10,8 @@ %% %% The Original Code is RabbitMQ. %% -%% The Initial Developer of the Original Code is VMware, Inc. -%% Copyright (c) 2011-2012 VMware, Inc. All rights reserved. +%% The Initial Developer of the Original Code is GoPivotal, Inc. +%% Copyright (c) 2011-2013 GoPivotal, Inc. All rights reserved. %% -module(rabbit_backing_queue_qc). @@ -85,17 +85,19 @@ backing_queue_test(Cmds) -> %% Commands -%% Command frequencies are tuned so that queues are normally reasonably -%% short, but they may sometimes exceed ?QUEUE_MAXLEN. Publish-multiple -%% and purging cause extreme queue lengths, so these have lower probabilities. -%% Fetches are sufficiently frequent so that commands that need acktags -%% get decent coverage. +%% Command frequencies are tuned so that queues are normally +%% reasonably short, but they may sometimes exceed +%% ?QUEUE_MAXLEN. Publish-multiple and purging cause extreme queue +%% lengths, so these have lower probabilities. Fetches/drops are +%% sufficiently frequent so that commands that need acktags get decent +%% coverage. command(S) -> frequency([{10, qc_publish(S)}, {1, qc_publish_delivered(S)}, {1, qc_publish_multiple(S)}, %% very slow - {15, qc_fetch(S)}, %% needed for ack and requeue + {9, qc_fetch(S)}, %% needed for ack and requeue + {6, qc_drop(S)}, %% {15, qc_ack(S)}, {15, qc_requeue(S)}, {3, qc_set_ram_duration_target(S)}, @@ -104,7 +106,8 @@ command(S) -> {1, qc_dropwhile(S)}, {1, qc_is_empty(S)}, {1, qc_timeout(S)}, - {1, qc_purge(S)}]). + {1, qc_purge(S)}, + {1, qc_fold(S)}]). qc_publish(#state{bqstate = BQ}) -> {call, ?BQMOD, publish, @@ -112,18 +115,21 @@ qc_publish(#state{bqstate = BQ}) -> #message_properties{needs_confirming = frequency([{1, true}, {20, false}]), expiry = oneof([undefined | lists:seq(1, 10)])}, - self(), BQ]}. + false, self(), BQ]}. qc_publish_multiple(#state{}) -> {call, ?MODULE, publish_multiple, [resize(?QUEUE_MAXLEN, pos_integer())]}. qc_publish_delivered(#state{bqstate = BQ}) -> {call, ?BQMOD, publish_delivered, - [boolean(), qc_message(), #message_properties{}, self(), BQ]}. + [qc_message(), #message_properties{}, self(), BQ]}. qc_fetch(#state{bqstate = BQ}) -> {call, ?BQMOD, fetch, [boolean(), BQ]}. +qc_drop(#state{bqstate = BQ}) -> + {call, ?BQMOD, drop, [boolean(), BQ]}. + qc_ack(#state{bqstate = BQ, acks = Acks}) -> {call, ?BQMOD, ack, [rand_choice(proplists:get_keys(Acks)), BQ]}. @@ -141,7 +147,7 @@ qc_drain_confirmed(#state{bqstate = BQ}) -> {call, ?BQMOD, drain_confirmed, [BQ]}. qc_dropwhile(#state{bqstate = BQ}) -> - {call, ?BQMOD, dropwhile, [fun dropfun/1, false, BQ]}. + {call, ?BQMOD, dropwhile, [fun dropfun/1, BQ]}. qc_is_empty(#state{bqstate = BQ}) -> {call, ?BQMOD, is_empty, [BQ]}. @@ -152,6 +158,9 @@ qc_timeout(#state{bqstate = BQ}) -> qc_purge(#state{bqstate = BQ}) -> {call, ?BQMOD, purge, [BQ]}. +qc_fold(#state{bqstate = BQ}) -> + {call, ?BQMOD, fold, [makefoldfun(pos_integer()), foldacc(), BQ]}. + %% Preconditions %% Create long queues by only allowing publishing @@ -173,7 +182,7 @@ precondition(#state{len = Len}, {call, ?MODULE, publish_multiple, _Arg}) -> %% Model updates -next_state(S, BQ, {call, ?BQMOD, publish, [Msg, MsgProps, _Pid, _BQ]}) -> +next_state(S, BQ, {call, ?BQMOD, publish, [Msg, MsgProps, _Del, _Pid, _BQ]}) -> #state{len = Len, messages = Messages, confirms = Confirms, @@ -199,7 +208,7 @@ next_state(S, _BQ, {call, ?MODULE, publish_multiple, [PublishCount]}) -> next_state(S, Res, {call, ?BQMOD, publish_delivered, - [AckReq, Msg, MsgProps, _Pid, _BQ]}) -> + [Msg, MsgProps, _Pid, _BQ]}) -> #state{confirms = Confirms, acks = Acks, next_seq_id = NextSeq} = S, AckTag = {call, erlang, element, [1, Res]}, BQ1 = {call, erlang, element, [2, Res]}, @@ -213,29 +222,14 @@ next_state(S, Res, true -> gb_sets:add(MsgId, Confirms); _ -> Confirms end, - acks = case AckReq of - true -> [{AckTag, {NextSeq, {MsgProps, Msg}}}|Acks]; - false -> Acks - end + acks = [{AckTag, {NextSeq, {MsgProps, Msg}}}|Acks] }; next_state(S, Res, {call, ?BQMOD, fetch, [AckReq, _BQ]}) -> - #state{len = Len, messages = Messages, acks = Acks} = S, - ResultInfo = {call, erlang, element, [1, Res]}, - BQ1 = {call, erlang, element, [2, Res]}, - AckTag = {call, erlang, element, [3, ResultInfo]}, - S1 = S#state{bqstate = BQ1}, - case gb_trees:is_empty(Messages) of - true -> S1; - false -> {SeqId, MsgProp_Msg, M2} = gb_trees:take_smallest(Messages), - S2 = S1#state{len = Len - 1, messages = M2}, - case AckReq of - true -> - S2#state{acks = [{AckTag, {SeqId, MsgProp_Msg}}|Acks]}; - false -> - S2 - end - end; + next_state_fetch_and_drop(S, Res, AckReq, 3); + +next_state(S, Res, {call, ?BQMOD, drop, [AckReq, _BQ]}) -> + next_state_fetch_and_drop(S, Res, AckReq, 2); next_state(S, Res, {call, ?BQMOD, ack, [AcksArg, _BQ]}) -> #state{acks = AcksState} = S, @@ -281,19 +275,38 @@ next_state(S, BQ, {call, ?MODULE, timeout, _Args}) -> next_state(S, Res, {call, ?BQMOD, purge, _Args}) -> BQ1 = {call, erlang, element, [2, Res]}, - S#state{bqstate = BQ1, len = 0, messages = gb_trees:empty()}. + S#state{bqstate = BQ1, len = 0, messages = gb_trees:empty()}; + +next_state(S, Res, {call, ?BQMOD, fold, _Args}) -> + BQ1 = {call, erlang, element, [2, Res]}, + S#state{bqstate = BQ1}. %% Postconditions postcondition(S, {call, ?BQMOD, fetch, _Args}, Res) -> #state{messages = Messages, len = Len, acks = Acks, confirms = Confrms} = S, case Res of - {{MsgFetched, _IsDelivered, AckTag, RemainingLen}, _BQ} -> + {{MsgFetched, _IsDelivered, AckTag}, _BQ} -> {_SeqId, {_MsgProps, Msg}} = gb_trees:smallest(Messages), MsgFetched =:= Msg andalso not proplists:is_defined(AckTag, Acks) andalso not gb_sets:is_element(AckTag, Confrms) andalso - RemainingLen =:= Len - 1; + Len =/= 0; + {empty, _BQ} -> + Len =:= 0 + end; + +postcondition(S, {call, ?BQMOD, drop, _Args}, Res) -> + #state{messages = Messages, len = Len, acks = Acks, confirms = Confrms} = S, + case Res of + {{MsgIdFetched, AckTag}, _BQ} -> + {_SeqId, {_MsgProps, Msg}} = gb_trees:smallest(Messages), + MsgId = eval({call, erlang, element, + [?RECORD_INDEX(id, basic_message), Msg]}), + MsgIdFetched =:= MsgId andalso + not proplists:is_defined(AckTag, Acks) andalso + not gb_sets:is_element(AckTag, Confrms) andalso + Len =/= 0; {empty, _BQ} -> Len =:= 0 end; @@ -316,6 +329,15 @@ postcondition(S, {call, ?BQMOD, drain_confirmed, _Args}, Res) -> lists:all(fun (M) -> gb_sets:is_element(M, Confirms) end, ReportedConfirmed); +postcondition(S, {call, ?BQMOD, fold, [FoldFun, Acc0, _BQ0]}, {Res, _BQ1}) -> + #state{messages = Messages} = S, + {_, Model} = lists:foldl(fun ({_SeqId, {_MsgProps, _Msg}}, {stop, Acc}) -> + {stop, Acc}; + ({_SeqId, {MsgProps, Msg}}, {cont, Acc}) -> + FoldFun(Msg, MsgProps, false, Acc) + end, {cont, Acc0}, gb_trees:to_list(Messages)), + true = Model =:= Res; + postcondition(#state{bqstate = BQ, len = Len}, {call, _M, _F, _A}, _Res) -> ?BQMOD:len(BQ) =:= Len. @@ -374,6 +396,16 @@ rand_choice(List, Selection, N) -> rand_choice(List -- [Picked], [Picked | Selection], N - 1). +makefoldfun(Size) -> + fun (Msg, _MsgProps, Unacked, Acc) -> + case {length(Acc) > Size, Unacked} of + {false, false} -> {cont, [Msg | Acc]}; + {false, true} -> {cont, Acc}; + {true, _} -> {stop, Acc} + end + end. +foldacc() -> []. + dropfun(Props) -> Expiry = eval({call, erlang, element, [?RECORD_INDEX(expiry, message_properties), Props]}), @@ -391,4 +423,31 @@ drop_messages(Messages) -> end end. +next_state_fetch_and_drop(S, Res, AckReq, AckTagIdx) -> + #state{len = Len, messages = Messages, acks = Acks} = S, + ResultInfo = {call, erlang, element, [1, Res]}, + BQ1 = {call, erlang, element, [2, Res]}, + AckTag = {call, erlang, element, [AckTagIdx, ResultInfo]}, + S1 = S#state{bqstate = BQ1}, + case gb_trees:is_empty(Messages) of + true -> S1; + false -> {SeqId, MsgProp_Msg, M2} = gb_trees:take_smallest(Messages), + S2 = S1#state{len = Len - 1, messages = M2}, + case AckReq of + true -> + S2#state{acks = [{AckTag, {SeqId, MsgProp_Msg}}|Acks]}; + false -> + S2 + end + end. + +-else. + +-export([prop_disabled/0]). + +prop_disabled() -> + exit({compiled_without_proper, + "PropEr was not present during compilation of the test module. " + "Hence all tests are disabled."}). + -endif. diff --git a/src/rabbit_basic.erl b/src/rabbit_basic.erl index 734456d3..2e825536 100644 --- a/src/rabbit_basic.erl +++ b/src/rabbit_basic.erl @@ -10,17 +10,18 @@ %% %% The Original Code is RabbitMQ. %% -%% The Initial Developer of the Original Code is VMware, Inc. -%% Copyright (c) 2007-2012 VMware, Inc. All rights reserved. +%% The Initial Developer of the Original Code is GoPivotal, Inc. +%% Copyright (c) 2007-2013 GoPivotal, Inc. All rights reserved. %% -module(rabbit_basic). -include("rabbit.hrl"). -include("rabbit_framing.hrl"). --export([publish/4, publish/6, publish/1, - message/3, message/4, properties/1, append_table_header/3, - extract_headers/1, map_headers/2, delivery/4, header_routes/1]). +-export([publish/4, publish/5, publish/1, + message/3, message/4, properties/1, prepend_table_header/3, + extract_headers/1, map_headers/2, delivery/3, header_routes/1, + parse_expiration/1]). -export([build_content/2, from_content/1]). %%---------------------------------------------------------------------------- @@ -40,13 +41,13 @@ -spec(publish/4 :: (exchange_input(), rabbit_router:routing_key(), properties_input(), body_input()) -> publish_result()). --spec(publish/6 :: - (exchange_input(), rabbit_router:routing_key(), boolean(), boolean(), +-spec(publish/5 :: + (exchange_input(), rabbit_router:routing_key(), boolean(), properties_input(), body_input()) -> publish_result()). -spec(publish/1 :: (rabbit_types:delivery()) -> publish_result()). --spec(delivery/4 :: - (boolean(), boolean(), rabbit_types:message(), undefined | integer()) -> +-spec(delivery/3 :: + (boolean(), rabbit_types:message(), undefined | integer()) -> rabbit_types:delivery()). -spec(message/4 :: (rabbit_exchange:name(), rabbit_router:routing_key(), @@ -58,7 +59,7 @@ -spec(properties/1 :: (properties_input()) -> rabbit_framing:amqp_property_record()). --spec(append_table_header/3 :: +-spec(prepend_table_header/3 :: (binary(), rabbit_framing:amqp_table(), headers()) -> headers()). -spec(extract_headers/1 :: (rabbit_types:content()) -> headers()). @@ -72,6 +73,9 @@ binary() | [binary()]) -> rabbit_types:content()). -spec(from_content/1 :: (rabbit_types:content()) -> {rabbit_framing:amqp_property_record(), binary()}). +-spec(parse_expiration/1 :: + (rabbit_framing:amqp_property_record()) + -> rabbit_types:ok_or_error2('undefined' | non_neg_integer(), any())). -endif. @@ -80,18 +84,16 @@ %% Convenience function, for avoiding round-trips in calls across the %% erlang distributed network. publish(Exchange, RoutingKeyBin, Properties, Body) -> - publish(Exchange, RoutingKeyBin, false, false, Properties, Body). + publish(Exchange, RoutingKeyBin, false, Properties, Body). %% Convenience function, for avoiding round-trips in calls across the %% erlang distributed network. -publish(X = #exchange{name = XName}, RKey, Mandatory, Immediate, Props, Body) -> - publish(X, delivery(Mandatory, Immediate, - message(XName, RKey, properties(Props), Body), - undefined)); -publish(XName, RKey, Mandatory, Immediate, Props, Body) -> - publish(delivery(Mandatory, Immediate, - message(XName, RKey, properties(Props), Body), - undefined)). +publish(X = #exchange{name = XName}, RKey, Mandatory, Props, Body) -> + Message = message(XName, RKey, properties(Props), Body), + publish(X, delivery(Mandatory, Message, undefined)); +publish(XName, RKey, Mandatory, Props, Body) -> + Message = message(XName, RKey, properties(Props), Body), + publish(delivery(Mandatory, Message, undefined)). publish(Delivery = #delivery{ message = #basic_message{exchange_name = XName}}) -> @@ -105,8 +107,8 @@ publish(X, Delivery) -> {RoutingRes, DeliveredQPids} = rabbit_amqqueue:deliver(Qs, Delivery), {ok, RoutingRes, DeliveredQPids}. -delivery(Mandatory, Immediate, Message, MsgSeqNo) -> - #delivery{mandatory = Mandatory, immediate = Immediate, sender = self(), +delivery(Mandatory, Message, MsgSeqNo) -> + #delivery{mandatory = Mandatory, sender = self(), message = Message, msg_seq_no = MsgSeqNo}. build_content(Properties, BodyBin) when is_binary(BodyBin) -> @@ -179,15 +181,45 @@ properties(P) when is_list(P) -> end end, #'P_basic'{}, P). -append_table_header(Name, Info, undefined) -> - append_table_header(Name, Info, []); -append_table_header(Name, Info, Headers) -> - Prior = case rabbit_misc:table_lookup(Headers, Name) of - undefined -> []; - {array, Existing} -> Existing - end, +prepend_table_header(Name, Info, undefined) -> + prepend_table_header(Name, Info, []); +prepend_table_header(Name, Info, Headers) -> + case rabbit_misc:table_lookup(Headers, Name) of + {array, Existing} -> + prepend_table(Name, Info, Existing, Headers); + undefined -> + prepend_table(Name, Info, [], Headers); + Other -> + Headers2 = prepend_table(Name, Info, [], Headers), + set_invalid_header(Name, Other, Headers2) + end. + +prepend_table(Name, Info, Prior, Headers) -> rabbit_misc:set_table_value(Headers, Name, array, [{table, Info} | Prior]). +set_invalid_header(Name, {_, _}=Value, Headers) when is_list(Headers) -> + case rabbit_misc:table_lookup(Headers, ?INVALID_HEADERS_KEY) of + undefined -> + set_invalid([{Name, array, [Value]}], Headers); + {table, ExistingHdr} -> + update_invalid(Name, Value, ExistingHdr, Headers); + Other -> + %% somehow the x-invalid-headers header is corrupt + Invalid = [{?INVALID_HEADERS_KEY, array, [Other]}], + set_invalid_header(Name, Value, set_invalid(Invalid, Headers)) + end. + +set_invalid(NewHdr, Headers) -> + rabbit_misc:set_table_value(Headers, ?INVALID_HEADERS_KEY, table, NewHdr). + +update_invalid(Name, Value, ExistingHdr, Header) -> + Values = case rabbit_misc:table_lookup(ExistingHdr, Name) of + undefined -> [Value]; + {array, Prior} -> [Value | Prior] + end, + NewHdr = rabbit_misc:set_table_value(ExistingHdr, Name, array, Values), + set_invalid(NewHdr, Header). + extract_headers(Content) -> #content{properties = #'P_basic'{headers = Headers}} = rabbit_binary_parser:ensure_content_decoded(Content), @@ -226,3 +258,19 @@ header_routes(HeadersTable) -> {Type, _Val} -> throw({error, {unacceptable_type_in_header, binary_to_list(HeaderKey), Type}}) end || HeaderKey <- ?ROUTING_HEADERS]). + +parse_expiration(#'P_basic'{expiration = undefined}) -> + {ok, undefined}; +parse_expiration(#'P_basic'{expiration = Expiration}) -> + case string:to_integer(binary_to_list(Expiration)) of + {error, no_integer} = E -> + E; + {N, ""} -> + case rabbit_misc:check_expiry(N) of + ok -> {ok, N}; + E = {error, _} -> E + end; + {_, S} -> + {error, {leftover_string, S}} + end. + diff --git a/src/rabbit_binary_generator.erl b/src/rabbit_binary_generator.erl index d69376fb..ae5bbf51 100644 --- a/src/rabbit_binary_generator.erl +++ b/src/rabbit_binary_generator.erl @@ -10,28 +10,19 @@ %% %% The Original Code is RabbitMQ. %% -%% The Initial Developer of the Original Code is VMware, Inc. -%% Copyright (c) 2007-2012 VMware, Inc. All rights reserved. +%% The Initial Developer of the Original Code is GoPivotal, Inc. +%% Copyright (c) 2007-2013 GoPivotal, Inc. All rights reserved. %% -module(rabbit_binary_generator). -include("rabbit_framing.hrl"). -include("rabbit.hrl"). -%% EMPTY_CONTENT_BODY_FRAME_SIZE, 8 = 1 + 2 + 4 + 1 -%% - 1 byte of frame type -%% - 2 bytes of channel number -%% - 4 bytes of frame payload length -%% - 1 byte of payload trailer FRAME_END byte -%% See definition of check_empty_content_body_frame_size/0, -%% an assertion called at startup. --define(EMPTY_CONTENT_BODY_FRAME_SIZE, 8). - -export([build_simple_method_frame/3, build_simple_content_frames/4, build_heartbeat_frame/0]). --export([generate_table/1, encode_properties/2]). --export([check_empty_content_body_frame_size/0]). +-export([generate_table/1]). +-export([check_empty_frame_size/0]). -export([ensure_content_encoded/2, clear_encoded_content/1]). -export([map_exception/3]). @@ -51,9 +42,7 @@ -> [frame()]). -spec(build_heartbeat_frame/0 :: () -> frame()). -spec(generate_table/1 :: (rabbit_framing:amqp_table()) -> binary()). --spec(encode_properties/2 :: - ([rabbit_framing:amqp_property_type()], [any()]) -> binary()). --spec(check_empty_content_body_frame_size/0 :: () -> 'ok'). +-spec(check_empty_frame_size/0 :: () -> 'ok'). -spec(ensure_content_encoded/2 :: (rabbit_types:content(), rabbit_types:protocol()) -> rabbit_types:encoded_content()). @@ -88,10 +77,8 @@ build_simple_content_frames(ChannelInt, Content, FrameMax, Protocol) -> [HeaderFrame | ContentFrames]. build_content_frames(FragsRev, FrameMax, ChannelInt) -> - BodyPayloadMax = if FrameMax == 0 -> - iolist_size(FragsRev); - true -> - FrameMax - ?EMPTY_CONTENT_BODY_FRAME_SIZE + BodyPayloadMax = if FrameMax == 0 -> iolist_size(FragsRev); + true -> FrameMax - ?EMPTY_FRAME_SIZE end, build_content_frames(0, [], BodyPayloadMax, [], lists:reverse(FragsRev), BodyPayloadMax, ChannelInt). @@ -129,51 +116,24 @@ create_frame(TypeInt, ChannelInt, Payload) -> %% table_field_to_binary supports the AMQP 0-8/0-9 standard types, S, %% I, D, T and F, as well as the QPid extensions b, d, f, l, s, t, x, %% and V. - -table_field_to_binary({FName, Type, Value}) -> - [short_string_to_binary(FName) | field_value_to_binary(Type, Value)]. - -field_value_to_binary(longstr, Value) -> - ["S", long_string_to_binary(Value)]; - -field_value_to_binary(signedint, Value) -> - ["I", <<Value:32/signed>>]; - -field_value_to_binary(decimal, {Before, After}) -> - ["D", Before, <<After:32>>]; - -field_value_to_binary(timestamp, Value) -> - ["T", <<Value:64>>]; - -field_value_to_binary(table, Value) -> - ["F", table_to_binary(Value)]; - -field_value_to_binary(array, Value) -> - ["A", array_to_binary(Value)]; - -field_value_to_binary(byte, Value) -> - ["b", <<Value:8/unsigned>>]; - -field_value_to_binary(double, Value) -> - ["d", <<Value:64/float>>]; - -field_value_to_binary(float, Value) -> - ["f", <<Value:32/float>>]; - -field_value_to_binary(long, Value) -> - ["l", <<Value:64/signed>>]; - -field_value_to_binary(short, Value) -> - ["s", <<Value:16/signed>>]; - -field_value_to_binary(bool, Value) -> - ["t", if Value -> 1; true -> 0 end]; - -field_value_to_binary(binary, Value) -> - ["x", long_string_to_binary(Value)]; - -field_value_to_binary(void, _Value) -> - ["V"]. +table_field_to_binary({FName, T, V}) -> + [short_string_to_binary(FName) | field_value_to_binary(T, V)]. + +field_value_to_binary(longstr, V) -> ["S", long_string_to_binary(V)]; +field_value_to_binary(signedint, V) -> ["I", <<V:32/signed>>]; +field_value_to_binary(decimal, V) -> {Before, After} = V, + ["D", Before, <<After:32>>]; +field_value_to_binary(timestamp, V) -> ["T", <<V:64>>]; +field_value_to_binary(table, V) -> ["F", table_to_binary(V)]; +field_value_to_binary(array, V) -> ["A", array_to_binary(V)]; +field_value_to_binary(byte, V) -> ["b", <<V:8/unsigned>>]; +field_value_to_binary(double, V) -> ["d", <<V:64/float>>]; +field_value_to_binary(float, V) -> ["f", <<V:32/float>>]; +field_value_to_binary(long, V) -> ["l", <<V:64/signed>>]; +field_value_to_binary(short, V) -> ["s", <<V:16/signed>>]; +field_value_to_binary(bool, V) -> ["t", if V -> 1; true -> 0 end]; +field_value_to_binary(binary, V) -> ["x", long_string_to_binary(V)]; +field_value_to_binary(void, _V) -> ["V"]. table_to_binary(Table) when is_list(Table) -> BinTable = generate_table(Table), @@ -187,9 +147,8 @@ generate_table(Table) when is_list(Table) -> list_to_binary(lists:map(fun table_field_to_binary/1, Table)). generate_array(Array) when is_list(Array) -> - list_to_binary(lists:map( - fun ({Type, Value}) -> field_value_to_binary(Type, Value) end, - Array)). + list_to_binary(lists:map(fun ({T, V}) -> field_value_to_binary(T, V) end, + Array)). short_string_to_binary(String) when is_binary(String) -> Len = size(String), @@ -207,65 +166,12 @@ long_string_to_binary(String) when is_binary(String) -> long_string_to_binary(String) -> [<<(length(String)):32>>, String]. -encode_properties([], []) -> - <<0, 0>>; -encode_properties(TypeList, ValueList) -> - encode_properties(0, TypeList, ValueList, 0, [], []). - -encode_properties(_Bit, [], [], FirstShortAcc, FlagsAcc, PropsAcc) -> - list_to_binary([lists:reverse(FlagsAcc), <<FirstShortAcc:16>>, lists:reverse(PropsAcc)]); -encode_properties(_Bit, [], _ValueList, _FirstShortAcc, _FlagsAcc, _PropsAcc) -> - exit(content_properties_values_overflow); -encode_properties(15, TypeList, ValueList, FirstShortAcc, FlagsAcc, PropsAcc) -> - NewFlagsShort = FirstShortAcc bor 1, % set the continuation low bit - encode_properties(0, TypeList, ValueList, 0, [<<NewFlagsShort:16>> | FlagsAcc], PropsAcc); -encode_properties(Bit, [bit | TypeList], [Value | ValueList], FirstShortAcc, FlagsAcc, PropsAcc) -> - case Value of - true -> encode_properties(Bit + 1, TypeList, ValueList, - FirstShortAcc bor (1 bsl (15 - Bit)), FlagsAcc, PropsAcc); - false -> encode_properties(Bit + 1, TypeList, ValueList, - FirstShortAcc, FlagsAcc, PropsAcc); - Other -> exit({content_properties_illegal_bit_value, Other}) - end; -encode_properties(Bit, [T | TypeList], [Value | ValueList], FirstShortAcc, FlagsAcc, PropsAcc) -> - case Value of - undefined -> encode_properties(Bit + 1, TypeList, ValueList, - FirstShortAcc, FlagsAcc, PropsAcc); - _ -> encode_properties(Bit + 1, TypeList, ValueList, - FirstShortAcc bor (1 bsl (15 - Bit)), - FlagsAcc, - [encode_property(T, Value) | PropsAcc]) - end. - -encode_property(shortstr, String) -> - Len = size(String), - if Len < 256 -> <<Len:8, String:Len/binary>>; - true -> exit(content_properties_shortstr_overflow) - end; -encode_property(longstr, String) -> - Len = size(String), <<Len:32, String:Len/binary>>; -encode_property(octet, Int) -> - <<Int:8/unsigned>>; -encode_property(shortint, Int) -> - <<Int:16/unsigned>>; -encode_property(longint, Int) -> - <<Int:32/unsigned>>; -encode_property(longlongint, Int) -> - <<Int:64/unsigned>>; -encode_property(timestamp, Int) -> - <<Int:64/unsigned>>; -encode_property(table, Table) -> - table_to_binary(Table). - -check_empty_content_body_frame_size() -> - %% Intended to ensure that EMPTY_CONTENT_BODY_FRAME_SIZE is - %% defined correctly. - ComputedSize = iolist_size(create_frame(?FRAME_BODY, 0, <<>>)), - if ComputedSize == ?EMPTY_CONTENT_BODY_FRAME_SIZE -> - ok; - true -> - exit({incorrect_empty_content_body_frame_size, - ComputedSize, ?EMPTY_CONTENT_BODY_FRAME_SIZE}) +check_empty_frame_size() -> + %% Intended to ensure that EMPTY_FRAME_SIZE is defined correctly. + case iolist_size(create_frame(?FRAME_BODY, 0, <<>>)) of + ?EMPTY_FRAME_SIZE -> ok; + ComputedSize -> exit({incorrect_empty_frame_size, + ComputedSize, ?EMPTY_FRAME_SIZE}) end. ensure_content_encoded(Content = #content{properties_bin = PropBin, diff --git a/src/rabbit_binary_parser.erl b/src/rabbit_binary_parser.erl index 5f0016b6..dc6d090f 100644 --- a/src/rabbit_binary_parser.erl +++ b/src/rabbit_binary_parser.erl @@ -10,8 +10,8 @@ %% %% The Original Code is RabbitMQ. %% -%% The Initial Developer of the Original Code is VMware, Inc. -%% Copyright (c) 2007-2012 VMware, Inc. All rights reserved. +%% The Initial Developer of the Original Code is GoPivotal, Inc. +%% Copyright (c) 2007-2013 GoPivotal, Inc. All rights reserved. %% -module(rabbit_binary_parser). @@ -50,47 +50,36 @@ parse_array(<<ValueAndRest/binary>>) -> {Type, Value, Rest} = parse_field_value(ValueAndRest), [{Type, Value} | parse_array(Rest)]. -parse_field_value(<<"S", VLen:32/unsigned, ValueString:VLen/binary, Rest/binary>>) -> - {longstr, ValueString, Rest}; +parse_field_value(<<"S", VLen:32/unsigned, V:VLen/binary, R/binary>>) -> + {longstr, V, R}; -parse_field_value(<<"I", Value:32/signed, Rest/binary>>) -> - {signedint, Value, Rest}; +parse_field_value(<<"I", V:32/signed, R/binary>>) -> + {signedint, V, R}; -parse_field_value(<<"D", Before:8/unsigned, After:32/unsigned, Rest/binary>>) -> - {decimal, {Before, After}, Rest}; +parse_field_value(<<"D", Before:8/unsigned, After:32/unsigned, R/binary>>) -> + {decimal, {Before, After}, R}; -parse_field_value(<<"T", Value:64/unsigned, Rest/binary>>) -> - {timestamp, Value, Rest}; +parse_field_value(<<"T", V:64/unsigned, R/binary>>) -> + {timestamp, V, R}; -parse_field_value(<<"F", VLen:32/unsigned, Table:VLen/binary, Rest/binary>>) -> - {table, parse_table(Table), Rest}; +parse_field_value(<<"F", VLen:32/unsigned, Table:VLen/binary, R/binary>>) -> + {table, parse_table(Table), R}; -parse_field_value(<<"A", VLen:32/unsigned, Array:VLen/binary, Rest/binary>>) -> - {array, parse_array(Array), Rest}; +parse_field_value(<<"A", VLen:32/unsigned, Array:VLen/binary, R/binary>>) -> + {array, parse_array(Array), R}; -parse_field_value(<<"b", Value:8/unsigned, Rest/binary>>) -> - {byte, Value, Rest}; +parse_field_value(<<"b", V:8/unsigned, R/binary>>) -> {byte, V, R}; +parse_field_value(<<"d", V:64/float, R/binary>>) -> {double, V, R}; +parse_field_value(<<"f", V:32/float, R/binary>>) -> {float, V, R}; +parse_field_value(<<"l", V:64/signed, R/binary>>) -> {long, V, R}; +parse_field_value(<<"s", V:16/signed, R/binary>>) -> {short, V, R}; +parse_field_value(<<"t", V:8/unsigned, R/binary>>) -> {bool, (V /= 0), R}; -parse_field_value(<<"d", Value:64/float, Rest/binary>>) -> - {double, Value, Rest}; +parse_field_value(<<"x", VLen:32/unsigned, V:VLen/binary, R/binary>>) -> + {binary, V, R}; -parse_field_value(<<"f", Value:32/float, Rest/binary>>) -> - {float, Value, Rest}; - -parse_field_value(<<"l", Value:64/signed, Rest/binary>>) -> - {long, Value, Rest}; - -parse_field_value(<<"s", Value:16/signed, Rest/binary>>) -> - {short, Value, Rest}; - -parse_field_value(<<"t", Value:8/unsigned, Rest/binary>>) -> - {bool, (Value /= 0), Rest}; - -parse_field_value(<<"x", VLen:32/unsigned, ValueString:VLen/binary, Rest/binary>>) -> - {binary, ValueString, Rest}; - -parse_field_value(<<"V", Rest/binary>>) -> - {void, undefined, Rest}. +parse_field_value(<<"V", R/binary>>) -> + {void, undefined, R}. ensure_content_decoded(Content = #content{properties = Props}) when Props =/= none -> diff --git a/src/rabbit_binding.erl b/src/rabbit_binding.erl index bb44797e..91f42e9c 100644 --- a/src/rabbit_binding.erl +++ b/src/rabbit_binding.erl @@ -10,8 +10,8 @@ %% %% The Original Code is RabbitMQ. %% -%% The Initial Developer of the Original Code is VMware, Inc. -%% Copyright (c) 2007-2012 VMware, Inc. All rights reserved. +%% The Initial Developer of the Original Code is GoPivotal, Inc. +%% Copyright (c) 2007-2013 GoPivotal, Inc. All rights reserved. %% -module(rabbit_binding). @@ -35,11 +35,16 @@ -type(key() :: binary()). --type(bind_errors() :: rabbit_types:error('source_not_found' | - 'destination_not_found' | - 'source_and_destination_not_found')). +-type(bind_errors() :: rabbit_types:error( + {'resources_missing', + [{'not_found', (rabbit_types:binding_source() | + rabbit_types:binding_destination())} | + {'absent', rabbit_types:amqqueue()}]})). + -type(bind_ok_or_error() :: 'ok' | bind_errors() | - rabbit_types:error('binding_not_found')). + rabbit_types:error( + 'binding_not_found' | + {'binding_invalid', string(), [any()]})). -type(bind_res() :: bind_ok_or_error() | rabbit_misc:thunk(bind_ok_or_error())). -type(inner_fun() :: fun((rabbit_types:exchange(), @@ -155,33 +160,38 @@ add(Binding, InnerFun) -> binding_action( Binding, fun (Src, Dst, B) -> - %% this argument is used to check queue exclusivity; - %% in general, we want to fail on that in preference to - %% anything else - case InnerFun(Src, Dst) of - ok -> case mnesia:read({rabbit_route, B}) of - [] -> add(Src, Dst, B); - [_] -> fun rabbit_misc:const_ok/0 - end; - {error, _} = Err -> rabbit_misc:const(Err) + case rabbit_exchange:validate_binding(Src, B) of + ok -> + %% this argument is used to check queue exclusivity; + %% in general, we want to fail on that in preference to + %% anything else + case InnerFun(Src, Dst) of + ok -> + case mnesia:read({rabbit_route, B}) of + [] -> add(Src, Dst, B); + [_] -> fun rabbit_misc:const_ok/0 + end; + {error, _} = Err -> + rabbit_misc:const(Err) + end; + {error, _} = Err -> + rabbit_misc:const(Err) end end). add(Src, Dst, B) -> [SrcDurable, DstDurable] = [durable(E) || E <- [Src, Dst]], - case (not (SrcDurable andalso DstDurable) orelse - mnesia:read({rabbit_durable_route, B}) =:= []) of - true -> ok = sync_route(#route{binding = B}, SrcDurable, DstDurable, + case (SrcDurable andalso DstDurable andalso + mnesia:read({rabbit_durable_route, B}) =/= []) of + false -> ok = sync_route(#route{binding = B}, SrcDurable, DstDurable, fun mnesia:write/3), - ok = rabbit_exchange:callback( - Src, add_binding, [transaction, Src, B]), + x_callback(transaction, Src, add_binding, B), Serial = rabbit_exchange:serial(Src), fun () -> - ok = rabbit_exchange:callback( - Src, add_binding, [Serial, Src, B]), - ok = rabbit_event:notify(binding_created, info(B)) + x_callback(Serial, Src, add_binding, B), + ok = rabbit_event:notify(binding_created, info(B)) end; - false -> rabbit_misc:const({error, binding_not_found}) + true -> rabbit_misc:const({error, binding_not_found}) end. remove(Binding) -> remove(Binding, fun (_Src, _Dst) -> ok end). @@ -279,21 +289,15 @@ has_for_source(SrcName) -> remove_for_source(SrcName) -> lock_route_tables(), Match = #route{binding = #binding{source = SrcName, _ = '_'}}, - Routes = lists:usort( - mnesia:match_object(rabbit_route, Match, write) ++ - mnesia:match_object(rabbit_durable_route, Match, write)), - [begin - sync_route(Route, fun mnesia:delete_object/3), - Route#route.binding - end || Route <- Routes]. + remove_routes( + lists:usort(mnesia:match_object(rabbit_route, Match, write) ++ + mnesia:match_object(rabbit_durable_route, Match, write))). -remove_for_destination(Dst) -> - remove_for_destination( - Dst, fun (R) -> sync_route(R, fun mnesia:delete_object/3) end). +remove_for_destination(DstName) -> + remove_for_destination(DstName, fun remove_routes/1). -remove_transient_for_destination(Dst) -> - remove_for_destination( - Dst, fun (R) -> sync_transient_route(R, fun mnesia:delete_object/3) end). +remove_transient_for_destination(DstName) -> + remove_for_destination(DstName, fun remove_transient_routes/1). %%---------------------------------------------------------------------------- @@ -310,6 +314,14 @@ binding_action(Binding = #binding{source = SrcName, Fun(Src, Dst, Binding#binding{args = SortedArgs}) end). +delete_object(Tab, Record, LockKind) -> + %% this 'guarded' delete prevents unnecessary writes to the mnesia + %% disk log + case mnesia:match_object(Tab, Record, LockKind) of + [] -> ok; + [_] -> mnesia:delete_object(Tab, Record, LockKind) + end. + sync_route(R, Fun) -> sync_route(R, true, true, Fun). sync_route(Route, true, true, Fun) -> @@ -330,21 +342,32 @@ sync_transient_route(Route, Fun) -> call_with_source_and_destination(SrcName, DstName, Fun) -> SrcTable = table_for_resource(SrcName), DstTable = table_for_resource(DstName), - ErrFun = fun (Err) -> rabbit_misc:const({error, Err}) end, + ErrFun = fun (Names) -> + Errs = [not_found_or_absent(Name) || Name <- Names], + rabbit_misc:const({error, {resources_missing, Errs}}) + end, rabbit_misc:execute_mnesia_tx_with_tail( fun () -> case {mnesia:read({SrcTable, SrcName}), mnesia:read({DstTable, DstName})} of {[Src], [Dst]} -> Fun(Src, Dst); - {[], [_] } -> ErrFun(source_not_found); - {[_], [] } -> ErrFun(destination_not_found); - {[], [] } -> ErrFun(source_and_destination_not_found) - end + {[], [_] } -> ErrFun([SrcName]); + {[_], [] } -> ErrFun([DstName]); + {[], [] } -> ErrFun([SrcName, DstName]) + end end). table_for_resource(#resource{kind = exchange}) -> rabbit_exchange; table_for_resource(#resource{kind = queue}) -> rabbit_queue. +not_found_or_absent(#resource{kind = exchange} = Name) -> + {not_found, Name}; +not_found_or_absent(#resource{kind = queue} = Name) -> + case rabbit_amqqueue:not_found_or_absent(Name) of + not_found -> {not_found, Name}; + {absent, _Q} = R -> R + end. + contains(Table, MatchHead) -> continue(mnesia:select(Table, [{MatchHead, [], ['$_']}], 1, read)). @@ -372,16 +395,32 @@ lock_route_tables() -> rabbit_semi_durable_route, rabbit_durable_route]]. -remove_for_destination(DstName, DeleteFun) -> +remove_routes(Routes) -> + %% This partitioning allows us to suppress unnecessary delete + %% operations on disk tables, which require an fsync. + {TransientRoutes, DurableRoutes} = + lists:partition(fun (R) -> mnesia:match_object( + rabbit_durable_route, R, write) == [] end, + Routes), + [ok = sync_transient_route(R, fun mnesia:delete_object/3) || + R <- TransientRoutes], + [ok = sync_route(R, fun mnesia:delete_object/3) || + R <- DurableRoutes], + [R#route.binding || R <- Routes]. + +remove_transient_routes(Routes) -> + [begin + ok = sync_transient_route(R, fun delete_object/3), + R#route.binding + end || R <- Routes]. + +remove_for_destination(DstName, Fun) -> lock_route_tables(), Match = reverse_route( #route{binding = #binding{destination = DstName, _ = '_'}}), - ReverseRoutes = mnesia:match_object(rabbit_reverse_route, Match, write), - Bindings = [begin - Route = reverse_route(ReverseRoute), - ok = DeleteFun(Route), - Route#route.binding - end || ReverseRoute <- ReverseRoutes], + Routes = [reverse_route(R) || R <- mnesia:match_object( + rabbit_reverse_route, Match, write)], + Bindings = Fun(Routes), group_bindings_fold(fun maybe_auto_delete/3, new_deletions(), lists:keysort(#binding.source, Bindings)). @@ -487,4 +526,5 @@ process_deletions(Deletions) -> del_notify(Bs) -> [rabbit_event:notify(binding_deleted, info(B)) || B <- Bs]. -x_callback(Arg, X, F, Bs) -> ok = rabbit_exchange:callback(X, F, [Arg, X, Bs]). +x_callback(Serial, X, F, Bs) -> + ok = rabbit_exchange:callback(X, F, Serial, [X, Bs]). diff --git a/src/rabbit_channel.erl b/src/rabbit_channel.erl index 22c6a223..d6c1e8c0 100644 --- a/src/rabbit_channel.erl +++ b/src/rabbit_channel.erl @@ -10,8 +10,8 @@ %% %% The Original Code is RabbitMQ. %% -%% The Initial Developer of the Original Code is VMware, Inc. -%% Copyright (c) 2007-2012 VMware, Inc. All rights reserved. +%% The Initial Developer of the Original Code is GoPivotal, Inc. +%% Copyright (c) 2007-2013 GoPivotal, Inc. All rights reserved. %% -module(rabbit_channel). @@ -21,22 +21,23 @@ -behaviour(gen_server2). -export([start_link/11, do/2, do/3, do_flow/3, flush/1, shutdown/1]). --export([send_command/2, deliver/4, flushed/2]). +-export([send_command/2, deliver/4, send_credit_reply/2, send_drained/2, + flushed/2]). -export([list/0, info_keys/0, info/1, info/2, info_all/0, info_all/1]). -export([refresh_config_local/0, ready_for_close/1]). -export([force_event_refresh/0]). -export([init/1, terminate/2, code_change/3, handle_call/3, handle_cast/2, - handle_info/2, handle_pre_hibernate/1, prioritise_call/3, - prioritise_cast/2, prioritise_info/2, format_message_queue/2]). + handle_info/2, handle_pre_hibernate/1, prioritise_call/4, + prioritise_cast/3, prioritise_info/3, format_message_queue/2]). %% Internal -export([list_local/0]). -record(ch, {state, protocol, channel, reader_pid, writer_pid, conn_pid, - conn_name, limiter, tx_status, next_tag, unacked_message_q, - uncommitted_message_q, uncommitted_acks, uncommitted_nacks, user, - virtual_host, most_recently_declared_queue, queue_monitors, - consumer_mapping, blocking, queue_consumers, delivering_queues, + conn_name, limiter, tx, next_tag, unacked_message_q, user, + virtual_host, most_recently_declared_queue, + queue_names, queue_monitors, consumer_mapping, + blocking, queue_consumers, delivering_queues, queue_collector_pid, stats_timer, confirm_enabled, publish_seqno, unconfirmed, confirmed, capabilities, trace_state}). @@ -64,6 +65,12 @@ -define(INFO_KEYS, ?CREATION_EVENT_KEYS ++ ?STATISTICS_KEYS -- [pid]). +-define(INCR_STATS(Incs, Measure, State), + case rabbit_event:stats_level(State, #ch.stats_timer) of + fine -> incr_stats(Incs, Measure); + _ -> ok + end). + %%---------------------------------------------------------------------------- -ifdef(use_specs). @@ -75,8 +82,8 @@ -spec(start_link/11 :: (channel_number(), pid(), pid(), pid(), string(), rabbit_types:protocol(), rabbit_types:user(), rabbit_types:vhost(), - rabbit_framing:amqp_table(), - pid(), rabbit_limiter:token()) -> rabbit_types:ok_pid_or_error()). + rabbit_framing:amqp_table(), pid(), pid()) -> + rabbit_types:ok_pid_or_error()). -spec(do/2 :: (pid(), rabbit_framing:amqp_method_record()) -> 'ok'). -spec(do/3 :: (pid(), rabbit_framing:amqp_method_record(), rabbit_types:maybe(rabbit_types:content())) -> 'ok'). @@ -88,6 +95,9 @@ -spec(deliver/4 :: (pid(), rabbit_types:ctag(), boolean(), rabbit_amqqueue:qmsg()) -> 'ok'). +-spec(send_credit_reply/2 :: (pid(), non_neg_integer()) -> 'ok'). +-spec(send_drained/2 :: (pid(), [{rabbit_types:ctag(), non_neg_integer()}]) + -> 'ok'). -spec(flushed/2 :: (pid(), pid()) -> 'ok'). -spec(list/0 :: () -> [pid()]). -spec(list_local/0 :: () -> [pid()]). @@ -132,11 +142,17 @@ send_command(Pid, Msg) -> deliver(Pid, ConsumerTag, AckRequired, Msg) -> gen_server2:cast(Pid, {deliver, ConsumerTag, AckRequired, Msg}). +send_credit_reply(Pid, Len) -> + gen_server2:cast(Pid, {send_credit_reply, Len}). + +send_drained(Pid, CTagCredit) -> + gen_server2:cast(Pid, {send_drained, CTagCredit}). + flushed(Pid, QPid) -> gen_server2:cast(Pid, {flushed, QPid}). list() -> - rabbit_misc:append_rpc_all_nodes(rabbit_mnesia:running_clustered_nodes(), + rabbit_misc:append_rpc_all_nodes(rabbit_mnesia:cluster_nodes(running), rabbit_channel, list_local, []). list_local() -> @@ -174,7 +190,7 @@ force_event_refresh() -> %%--------------------------------------------------------------------------- init([Channel, ReaderPid, WriterPid, ConnPid, ConnName, Protocol, User, VHost, - Capabilities, CollectorPid, Limiter]) -> + Capabilities, CollectorPid, LimiterPid]) -> process_flag(trap_exit, true), ok = pg_local:join(rabbit_channels, self()), State = #ch{state = starting, @@ -184,16 +200,14 @@ init([Channel, ReaderPid, WriterPid, ConnPid, ConnName, Protocol, User, VHost, writer_pid = WriterPid, conn_pid = ConnPid, conn_name = ConnName, - limiter = Limiter, - tx_status = none, + limiter = rabbit_limiter:new(LimiterPid), + tx = none, next_tag = 1, unacked_message_q = queue:new(), - uncommitted_message_q = queue:new(), - uncommitted_acks = [], - uncommitted_nacks = [], user = User, virtual_host = VHost, most_recently_declared_queue = <<>>, + queue_names = dict:new(), queue_monitors = pmon:new(), consumer_mapping = dict:new(), blocking = sets:new(), @@ -213,20 +227,20 @@ init([Channel, ReaderPid, WriterPid, ConnPid, ConnName, Protocol, User, VHost, {ok, State1, hibernate, {backoff, ?HIBERNATE_AFTER_MIN, ?HIBERNATE_AFTER_MIN, ?DESIRED_HIBERNATE}}. -prioritise_call(Msg, _From, _State) -> +prioritise_call(Msg, _From, _Len, _State) -> case Msg of info -> 9; {info, _Items} -> 9; _ -> 0 end. -prioritise_cast(Msg, _State) -> +prioritise_cast(Msg, _Len, _State) -> case Msg of {confirm, _MsgSeqNos, _QPid} -> 5; _ -> 0 end. -prioritise_info(Msg, _State) -> +prioritise_info(Msg, _Len, _State) -> case Msg of emit_stats -> 7; _ -> 0 @@ -258,7 +272,7 @@ handle_cast({method, Method, Content, Flow}, end, try handle_method(Method, Content, State) of {reply, Reply, NewState} -> - ok = rabbit_writer:send_command(NewState#ch.writer_pid, Reply), + ok = send(Reply, NewState), noreply(NewState); {noreply, NewState} -> noreply(NewState); @@ -267,7 +281,7 @@ handle_cast({method, Method, Content, Flow}, catch exit:Reason = #amqp_error{} -> MethodName = rabbit_misc:method_record_type(Method), - send_exception(Reason#amqp_error{method = MethodName}, State); + handle_exception(Reason#amqp_error{method = MethodName}, State); _:Reason -> {stop, {Reason, erlang:get_stacktrace()}, State} end; @@ -280,18 +294,20 @@ handle_cast(ready_for_close, State = #ch{state = closing, ok = rabbit_writer:send_command_sync(WriterPid, #'channel.close_ok'{}), {stop, normal, State}; -handle_cast(terminate, State) -> +handle_cast(terminate, State = #ch{writer_pid = WriterPid}) -> + ok = rabbit_writer:flush(WriterPid), {stop, normal, State}; -handle_cast({command, #'basic.consume_ok'{consumer_tag = ConsumerTag} = Msg}, - State = #ch{writer_pid = WriterPid}) -> - ok = rabbit_writer:send_command(WriterPid, Msg), - noreply(consumer_monitor(ConsumerTag, State)); +handle_cast({command, #'basic.consume_ok'{consumer_tag = CTag} = Msg}, State) -> + ok = send(Msg, State), + noreply(consumer_monitor(CTag, State)); -handle_cast({command, Msg}, State = #ch{writer_pid = WriterPid}) -> - ok = rabbit_writer:send_command(WriterPid, Msg), +handle_cast({command, Msg}, State) -> + ok = send(Msg, State), noreply(State); +handle_cast({deliver, _CTag, _AckReq, _Msg}, State = #ch{state = closing}) -> + noreply(State); handle_cast({deliver, ConsumerTag, AckRequired, Msg = {_QName, QPid, _MsgId, Redelivered, #basic_message{exchange_name = ExchangeName, @@ -309,12 +325,27 @@ handle_cast({deliver, ConsumerTag, AckRequired, Content), noreply(record_sent(ConsumerTag, AckRequired, Msg, State)); +handle_cast({send_credit_reply, Len}, State = #ch{writer_pid = WriterPid}) -> + ok = rabbit_writer:send_command( + WriterPid, #'basic.credit_ok'{available = Len}), + noreply(State); + +handle_cast({send_drained, CTagCredit}, State = #ch{writer_pid = WriterPid}) -> + [ok = rabbit_writer:send_command( + WriterPid, #'basic.credit_drained'{consumer_tag = ConsumerTag, + credit_drained = CreditDrained}) + || {ConsumerTag, CreditDrained} <- CTagCredit], + noreply(State); + handle_cast(force_event_refresh, State) -> rabbit_event:notify(channel_created, infos(?CREATION_EVENT_KEYS, State)), noreply(State); + handle_cast({confirm, MsgSeqNos, From}, State) -> State1 = #ch{confirmed = C} = confirm(MsgSeqNos, From, State), - noreply([send_confirms], State1, case C of [] -> hibernate; _ -> 0 end). + Timeout = case C of [] -> hibernate; _ -> 0 end, + %% NB: don't call noreply/1 since we don't want to send confirms. + {noreply, ensure_stats_timer(State1), Timeout}. handle_info({bump_credit, Msg}, State) -> credit_flow:handle_bump_msg(Msg), @@ -325,8 +356,10 @@ handle_info(timeout, State) -> handle_info(emit_stats, State) -> emit_stats(State), - noreply([ensure_stats_timer], - rabbit_event:reset_stats_timer(State, #ch.stats_timer)); + State1 = rabbit_event:reset_stats_timer(State, #ch.stats_timer), + %% NB: don't call noreply/1 since we don't want to kick off the + %% stats timer. + {noreply, send_confirms(State1), hibernate}; handle_info({'DOWN', _MRef, process, QPid, Reason}, State) -> State1 = handle_publishing_queue_down(QPid, Reason, State), @@ -334,9 +367,13 @@ handle_info({'DOWN', _MRef, process, QPid, Reason}, State) -> State3 = handle_consuming_queue_down(QPid, State2), State4 = handle_delivering_queue_down(QPid, State3), credit_flow:peer_down(QPid), - erase_queue_stats(QPid), - noreply(State3#ch{queue_monitors = pmon:erase( - QPid, State4#ch.queue_monitors)}); + #ch{queue_names = QNames, queue_monitors = QMons} = State4, + case dict:find(QPid, QNames) of + {ok, QName} -> erase_queue_stats(QName); + error -> ok + end, + noreply(State4#ch{queue_names = dict:erase(QPid, QNames), + queue_monitors = pmon:erase(QPid, QMons)}); handle_info({'EXIT', _Pid, Reason}, State) -> {stop, Reason, State}. @@ -357,6 +394,8 @@ terminate(Reason, State) -> _ -> ok end, pg_local:leave(rabbit_channels, self()), + rabbit_event:if_enabled(State, #ch.stats_timer, + fun() -> emit_stats(State) end), rabbit_event:notify(channel_closed, [{pid, self()}]). code_change(_OldVsn, State, _Extra) -> @@ -366,30 +405,11 @@ format_message_queue(Opt, MQ) -> rabbit_misc:format_message_queue(Opt, MQ). %%--------------------------------------------------------------------------- -reply(Reply, NewState) -> reply(Reply, [], NewState). - -reply(Reply, Mask, NewState) -> reply(Reply, Mask, NewState, hibernate). - -reply(Reply, Mask, NewState, Timeout) -> - {reply, Reply, next_state(Mask, NewState), Timeout}. - -noreply(NewState) -> noreply([], NewState). - -noreply(Mask, NewState) -> noreply(Mask, NewState, hibernate). - -noreply(Mask, NewState, Timeout) -> - {noreply, next_state(Mask, NewState), Timeout}. +reply(Reply, NewState) -> {reply, Reply, next_state(NewState), hibernate}. --define(MASKED_CALL(Fun, Mask, State), - case lists:member(Fun, Mask) of - true -> State; - false -> Fun(State) - end). +noreply(NewState) -> {noreply, next_state(NewState), hibernate}. -next_state(Mask, State) -> - State1 = ?MASKED_CALL(ensure_stats_timer, Mask, State), - State2 = ?MASKED_CALL(send_confirms, Mask, State1), - State2. +next_state(State) -> ensure_stats_timer(send_confirms(State)). ensure_stats_timer(State) -> rabbit_event:ensure_stats_timer(State, #ch.stats_timer, emit_stats). @@ -400,24 +420,40 @@ return_ok(State, false, Msg) -> {reply, Msg, State}. ok_msg(true, _Msg) -> undefined; ok_msg(false, Msg) -> Msg. -send_exception(Reason, State = #ch{protocol = Protocol, - channel = Channel, - writer_pid = WriterPid, - reader_pid = ReaderPid, - conn_pid = ConnPid}) -> - {CloseChannel, CloseMethod} = - rabbit_binary_generator:map_exception(Channel, Reason, Protocol), - rabbit_log:error("connection ~p, channel ~p - error:~n~p~n", - [ConnPid, Channel, Reason]), +send(_Command, #ch{state = closing}) -> + ok; +send(Command, #ch{writer_pid = WriterPid}) -> + ok = rabbit_writer:send_command(WriterPid, Command). + +handle_exception(Reason, State = #ch{protocol = Protocol, + channel = Channel, + writer_pid = WriterPid, + reader_pid = ReaderPid, + conn_pid = ConnPid}) -> %% something bad's happened: notify_queues may not be 'ok' {_Result, State1} = notify_queues(State), - case CloseChannel of - Channel -> ok = rabbit_writer:send_command(WriterPid, CloseMethod), - {noreply, State1}; - _ -> ReaderPid ! {channel_exit, Channel, Reason}, - {stop, normal, State1} + case rabbit_binary_generator:map_exception(Channel, Reason, Protocol) of + {Channel, CloseMethod} -> + rabbit_log:error("connection ~p, channel ~p - soft error:~n~p~n", + [ConnPid, Channel, Reason]), + ok = rabbit_writer:send_command(WriterPid, CloseMethod), + {noreply, State1}; + {0, _} -> + ReaderPid ! {channel_exit, Channel, Reason}, + {stop, normal, State1} end. +-ifdef(use_specs). +-spec(precondition_failed/1 :: (string()) -> no_return()). +-endif. +precondition_failed(Format) -> precondition_failed(Format, []). + +-ifdef(use_specs). +-spec(precondition_failed/2 :: (string(), [any()]) -> no_return()). +-endif. +precondition_failed(Format, Params) -> + rabbit_misc:protocol_error(precondition_failed, Format, Params). + return_queue_declare_ok(#resource{name = ActualName}, NoWait, MessageCount, ConsumerCount, State) -> return_ok(State#ch{most_recently_declared_queue = ActualName}, NoWait, @@ -431,15 +467,13 @@ check_resource_access(User, Resource, Perm) -> undefined -> []; Other -> Other end, - CacheTail = - case lists:member(V, Cache) of - true -> lists:delete(V, Cache); - false -> ok = rabbit_access_control:check_resource_access( - User, Resource, Perm), - lists:sublist(Cache, ?MAX_PERMISSION_CACHE_SIZE - 1) - end, - put(permission_cache, [V | CacheTail]), - ok. + case lists:member(V, Cache) of + true -> ok; + false -> ok = rabbit_access_control:check_resource_access( + User, Resource, Perm), + CacheTail = lists:sublist(Cache, ?MAX_PERMISSION_CACHE_SIZE-1), + put(permission_cache, [V | CacheTail]) + end. clear_permission_cache() -> erase(permission_cache), @@ -460,10 +494,21 @@ check_user_id_header(#'P_basic'{user_id = Username}, #ch{user = #user{username = Username}}) -> ok; check_user_id_header(#'P_basic'{user_id = Claimed}, - #ch{user = #user{username = Actual}}) -> - rabbit_misc:protocol_error( - precondition_failed, "user_id property set to '~s' but " - "authenticated user was '~s'", [Claimed, Actual]). + #ch{user = #user{username = Actual, + tags = Tags}}) -> + case lists:member(impersonator, Tags) of + true -> ok; + false -> precondition_failed( + "user_id property set to '~s' but authenticated user was " + "'~s'", [Claimed, Actual]) + end. + +check_expiration_header(Props) -> + case rabbit_basic:parse_expiration(Props) of + {ok, _} -> ok; + {error, E} -> precondition_failed("invalid expiration '~s': ~p", + [Props#'P_basic'.expiration, E]) + end. check_internal_exchange(#exchange{name = Name, internal = true}) -> rabbit_misc:protocol_error(access_refused, @@ -507,16 +552,12 @@ check_not_default_exchange(_) -> %% check that an exchange/queue name does not contain the reserved %% "amq." prefix. %% -%% One, quite reasonable, interpretation of the spec, taken by the -%% QPid M1 Java client, is that the exclusion of "amq." prefixed names +%% As per the AMQP 0-9-1 spec, the exclusion of "amq." prefixed names %% only applies on actual creation, and not in the cases where the -%% entity already exists. This is how we use this function in the code -%% below. However, AMQP JIRA 123 changes that in 0-10, and possibly -%% 0-9SP1, making it illegal to attempt to declare an exchange/queue -%% with an amq.* name when passive=false. So this will need -%% revisiting. +%% entity already exists or passive=true. %% -%% TODO: enforce other constraints on name. See AMQP JIRA 69. +%% NB: We deliberately do not enforce the other constraints on names +%% required by the spec. check_name(Kind, NameBin = <<"amq.", _/binary>>) -> rabbit_misc:protocol_error( access_refused, @@ -527,20 +568,16 @@ check_name(_Kind, NameBin) -> queue_blocked(QPid, State = #ch{blocking = Blocking}) -> case sets:is_element(QPid, Blocking) of false -> State; - true -> Blocking1 = sets:del_element(QPid, Blocking), - ok = case sets:size(Blocking1) of - 0 -> rabbit_writer:send_command( - State#ch.writer_pid, - #'channel.flow_ok'{active = false}); - _ -> ok - end, - State#ch{blocking = Blocking1} + true -> maybe_send_flow_ok( + State#ch{blocking = sets:del_element(QPid, Blocking)}) end. -record_confirm(undefined, _, State) -> - State; -record_confirm(MsgSeqNo, XName, State) -> - record_confirms([{MsgSeqNo, XName}], State). +maybe_send_flow_ok(State = #ch{blocking = Blocking}) -> + case sets:size(Blocking) of + 0 -> ok = send(#'channel.flow_ok'{active = false}, State); + _ -> ok + end, + State. record_confirms([], State) -> State; @@ -566,14 +603,25 @@ handle_method(_Method, _, #ch{state = starting}) -> handle_method(#'channel.close_ok'{}, _, #ch{state = closing}) -> stop; -handle_method(#'channel.close'{}, _, State = #ch{state = closing}) -> - {reply, #'channel.close_ok'{}, State}; +handle_method(#'channel.close'{}, _, State = #ch{writer_pid = WriterPid, + state = closing}) -> + ok = rabbit_writer:send_command(WriterPid, #'channel.close_ok'{}), + {noreply, State}; handle_method(_Method, _, State = #ch{state = closing}) -> {noreply, State}; handle_method(#'channel.close'{}, _, State = #ch{reader_pid = ReaderPid}) -> {ok, State1} = notify_queues(State), + %% We issue the channel.close_ok response after a handshake with + %% the reader, the other half of which is ready_for_close. That + %% way the reader forgets about the channel before we send the + %% response (and this channel process terminates). If we didn't do + %% that, a channel.open for the same channel number, which a + %% client is entitled to send as soon as it has received the + %% close_ok, might be received by the reader before it has seen + %% the termination and hence be sent to the old, now dead/dying + %% channel process, instead of a new process, and thus lost. ReaderPid ! {channel_closing, self()}, {noreply, State1}; @@ -581,20 +629,22 @@ handle_method(#'channel.close'{}, _, State = #ch{reader_pid = ReaderPid}) -> %% while waiting for the reply to a synchronous command, we generally %% do allow this...except in the case of a pending tx.commit, where %% it could wreak havoc. -handle_method(_Method, _, #ch{tx_status = TxStatus}) - when TxStatus =/= none andalso TxStatus =/= in_progress -> +handle_method(_Method, _, #ch{tx = Tx}) + when Tx =:= committing orelse Tx =:= failed -> rabbit_misc:protocol_error( channel_error, "unexpected command while processing 'tx.commit'", []); handle_method(#'access.request'{},_, State) -> {reply, #'access.request_ok'{ticket = 1}, State}; +handle_method(#'basic.publish'{immediate = true}, _Content, _State) -> + rabbit_misc:protocol_error(not_implemented, "immediate=true", []); + handle_method(#'basic.publish'{exchange = ExchangeNameBin, routing_key = RoutingKey, - mandatory = Mandatory, - immediate = Immediate}, + mandatory = Mandatory}, Content, State = #ch{virtual_host = VHostPath, - tx_status = TxStatus, + tx = Tx, confirm_enabled = ConfirmEnabled, trace_state = TraceState}) -> ExchangeName = rabbit_misc:r(VHostPath, exchange, ExchangeNameBin), @@ -603,30 +653,29 @@ handle_method(#'basic.publish'{exchange = ExchangeNameBin, check_internal_exchange(Exchange), %% We decode the content's properties here because we're almost %% certain to want to look at delivery-mode and priority. - DecodedContent = rabbit_binary_parser:ensure_content_decoded(Content), - check_user_id_header(DecodedContent#content.properties, State), + DecodedContent = #content {properties = Props} = + rabbit_binary_parser:ensure_content_decoded(Content), + check_user_id_header(Props, State), + check_expiration_header(Props), {MsgSeqNo, State1} = - case {TxStatus, ConfirmEnabled} of + case {Tx, ConfirmEnabled} of {none, false} -> {undefined, State}; {_, _} -> SeqNo = State#ch.publish_seqno, {SeqNo, State#ch{publish_seqno = SeqNo + 1}} end, case rabbit_basic:message(ExchangeName, RoutingKey, DecodedContent) of {ok, Message} -> - rabbit_trace:tap_trace_in(Message, TraceState), - Delivery = rabbit_basic:delivery(Mandatory, Immediate, Message, - MsgSeqNo), + rabbit_trace:tap_in(Message, TraceState), + Delivery = rabbit_basic:delivery(Mandatory, Message, MsgSeqNo), QNames = rabbit_exchange:route(Exchange, Delivery), - {noreply, - case TxStatus of - none -> deliver_to_queues({Delivery, QNames}, State1); - in_progress -> TMQ = State1#ch.uncommitted_message_q, - NewTMQ = queue:in({Delivery, QNames}, TMQ), - State1#ch{uncommitted_message_q = NewTMQ} - end}; + DQ = {Delivery, QNames}, + {noreply, case Tx of + none -> deliver_to_queues(DQ, State1); + {Msgs, Acks} -> Msgs1 = queue:in(DQ, Msgs), + State1#ch{tx = {Msgs1, Acks}} + end}; {error, Reason} -> - rabbit_misc:protocol_error(precondition_failed, - "invalid message: ~p", [Reason]) + precondition_failed("invalid message: ~p", [Reason]) end; handle_method(#'basic.nack'{delivery_tag = DeliveryTag, @@ -637,29 +686,31 @@ handle_method(#'basic.nack'{delivery_tag = DeliveryTag, handle_method(#'basic.ack'{delivery_tag = DeliveryTag, multiple = Multiple}, - _, State = #ch{unacked_message_q = UAMQ, tx_status = TxStatus}) -> + _, State = #ch{unacked_message_q = UAMQ, tx = Tx}) -> {Acked, Remaining} = collect_acks(UAMQ, DeliveryTag, Multiple), State1 = State#ch{unacked_message_q = Remaining}, - {noreply, - case TxStatus of - none -> ack(Acked, State1), - State1; - in_progress -> State1#ch{uncommitted_acks = - Acked ++ State1#ch.uncommitted_acks} - end}; + {noreply, case Tx of + none -> ack(Acked, State1), + State1; + {Msgs, Acks} -> Acks1 = ack_cons(ack, Acked, Acks), + State1#ch{tx = {Msgs, Acks1}} + end}; handle_method(#'basic.get'{queue = QueueNameBin, no_ack = NoAck}, _, State = #ch{writer_pid = WriterPid, conn_pid = ConnPid, + limiter = Limiter, next_tag = DeliveryTag}) -> QueueName = expand_queue_name_shortcut(QueueNameBin, State), check_read_permitted(QueueName, State), case rabbit_amqqueue:with_exclusive_access_or_die( QueueName, ConnPid, - fun (Q) -> rabbit_amqqueue:basic_get(Q, self(), NoAck) end) of + fun (Q) -> rabbit_amqqueue:basic_get( + Q, self(), NoAck, rabbit_limiter:pid(Limiter)) + end) of {ok, MessageCount, - Msg = {_QName, QPid, _MsgId, Redelivered, + Msg = {QName, QPid, _MsgId, Redelivered, #basic_message{exchange_name = ExchangeName, routing_keys = [RoutingKey | _CcRoutes], content = Content}}} -> @@ -671,7 +722,7 @@ handle_method(#'basic.get'{queue = QueueNameBin, routing_key = RoutingKey, message_count = MessageCount}, Content), - State1 = monitor_delivering_queue(NoAck, QPid, State), + State1 = monitor_delivering_queue(NoAck, QPid, QName, State), {noreply, record_sent(none, not(NoAck), Msg, State1)}; empty -> {reply, #'basic.get_empty'{}, State} @@ -682,7 +733,8 @@ handle_method(#'basic.consume'{queue = QueueNameBin, no_local = _, % FIXME: implement no_ack = NoAck, exclusive = ExclusiveConsume, - nowait = NoWait}, + nowait = NoWait, + arguments = Arguments}, _, State = #ch{conn_pid = ConnPid, limiter = Limiter, consumer_mapping = ConsumerMapping}) -> @@ -704,16 +756,20 @@ handle_method(#'basic.consume'{queue = QueueNameBin, QueueName, ConnPid, fun (Q) -> {rabbit_amqqueue:basic_consume( - Q, NoAck, self(), Limiter, + Q, NoAck, self(), + rabbit_limiter:pid(Limiter), + rabbit_limiter:is_active(Limiter), ActualConsumerTag, ExclusiveConsume, + parse_credit_args(Arguments), ok_msg(NoWait, #'basic.consume_ok'{ consumer_tag = ActualConsumerTag})), Q} end) of - {ok, Q = #amqqueue{pid = QPid}} -> + {ok, Q = #amqqueue{pid = QPid, name = QName}} -> CM1 = dict:store(ActualConsumerTag, Q, ConsumerMapping), State1 = monitor_delivering_queue( - NoAck, QPid, State#ch{consumer_mapping = CM1}), + NoAck, QPid, QName, + State#ch{consumer_mapping = CM1}), {noreply, case NoWait of true -> consumer_monitor(ActualConsumerTag, State1); @@ -778,33 +834,31 @@ handle_method(#'basic.qos'{prefetch_size = Size}, _, _State) when Size /= 0 -> rabbit_misc:protocol_error(not_implemented, "prefetch_size!=0 (~w)", [Size]); -handle_method(#'basic.qos'{prefetch_count = PrefetchCount}, _, +handle_method(#'basic.qos'{prefetch_count = 0}, _, State = #ch{limiter = Limiter}) -> - Limiter1 = case {rabbit_limiter:is_enabled(Limiter), PrefetchCount} of - {false, 0} -> Limiter; - {false, _} -> enable_limiter(State); - {_, _} -> Limiter - end, - Limiter3 = case rabbit_limiter:limit(Limiter1, PrefetchCount) of - ok -> Limiter1; - {disabled, Limiter2} -> ok = limit_queues(Limiter2, State), - Limiter2 - end, - {reply, #'basic.qos_ok'{}, State#ch{limiter = Limiter3}}; + Limiter1 = rabbit_limiter:unlimit_prefetch(Limiter), + {reply, #'basic.qos_ok'{}, State#ch{limiter = Limiter1}}; + +handle_method(#'basic.qos'{prefetch_count = PrefetchCount}, _, + State = #ch{limiter = Limiter, unacked_message_q = UAMQ}) -> + %% TODO queue:len(UAMQ) is not strictly right since that counts + %% unacked messages from basic.get too. Pretty obscure though. + Limiter1 = rabbit_limiter:limit_prefetch(Limiter, + PrefetchCount, queue:len(UAMQ)), + {reply, #'basic.qos_ok'{}, + maybe_limit_queues(Limiter, Limiter1, State#ch{limiter = Limiter1})}; handle_method(#'basic.recover_async'{requeue = true}, _, State = #ch{unacked_message_q = UAMQ, limiter = Limiter}) -> OkFun = fun () -> ok end, UAMQL = queue:to_list(UAMQ), - ok = fold_per_queue( - fun (QPid, MsgIds, ok) -> - rabbit_misc:with_exit_handler( - OkFun, fun () -> - rabbit_amqqueue:requeue( - QPid, MsgIds, self()) - end) - end, ok, UAMQL), + foreach_per_queue( + fun (QPid, MsgIds) -> + rabbit_misc:with_exit_handler( + OkFun, + fun () -> rabbit_amqqueue:requeue(QPid, MsgIds, self()) end) + end, lists:reverse(UAMQL)), ok = notify_limiter(Limiter, UAMQL), %% No answer required - basic.recover is the newer, synchronous %% variant of this method @@ -814,12 +868,9 @@ handle_method(#'basic.recover_async'{requeue = false}, _, _State) -> rabbit_misc:protocol_error(not_implemented, "requeue=false", []); handle_method(#'basic.recover'{requeue = Requeue}, Content, State) -> - {noreply, State2 = #ch{writer_pid = WriterPid}} = - handle_method(#'basic.recover_async'{requeue = Requeue}, - Content, - State), - ok = rabbit_writer:send_command(WriterPid, #'basic.recover_ok'{}), - {noreply, State2}; + {noreply, State1} = handle_method(#'basic.recover_async'{requeue = Requeue}, + Content, State), + {reply, #'basic.recover_ok'{}, State1}; handle_method(#'basic.reject'{delivery_tag = DeliveryTag, requeue = Requeue}, @@ -843,9 +894,13 @@ handle_method(#'exchange.declare'{exchange = ExchangeNameBin, {ok, FoundX} -> FoundX; {error, not_found} -> check_name('exchange', ExchangeNameBin), - case rabbit_misc:r_arg(VHostPath, exchange, Args, - <<"alternate-exchange">>) of + AeKey = <<"alternate-exchange">>, + case rabbit_misc:r_arg(VHostPath, exchange, Args, AeKey) of undefined -> ok; + {error, {invalid_type, Type}} -> + precondition_failed( + "invalid type '~s' for arg '~s' in ~s", + [Type, AeKey, rabbit_misc:rs(ExchangeName)]); AName -> check_read_permitted(ExchangeName, State), check_write_permitted(AName, State), ok @@ -881,8 +936,7 @@ handle_method(#'exchange.delete'{exchange = ExchangeNameBin, {error, not_found} -> rabbit_misc:not_found(ExchangeName); {error, in_use} -> - rabbit_misc:protocol_error( - precondition_failed, "~s in use", [rabbit_misc:rs(ExchangeName)]); + precondition_failed("~s in use", [rabbit_misc:rs(ExchangeName)]); ok -> return_ok(State, NoWait, #'exchange.delete_ok'{}) end; @@ -936,6 +990,19 @@ handle_method(#'queue.declare'{queue = QueueNameBin, return_queue_declare_ok(QueueName, NoWait, MessageCount, ConsumerCount, State); {error, not_found} -> + DlxKey = <<"x-dead-letter-exchange">>, + case rabbit_misc:r_arg(VHostPath, exchange, Args, DlxKey) of + undefined -> + ok; + {error, {invalid_type, Type}} -> + precondition_failed( + "invalid type '~s' for arg '~s' in ~s", + [Type, DlxKey, rabbit_misc:rs(QueueName)]); + DLX -> + check_read_permitted(QueueName, State), + check_write_permitted(DLX, State), + ok + end, case rabbit_amqqueue:declare(QueueName, Durable, AutoDelete, Args, Owner) of {new, #amqqueue{pid = QPid}} -> @@ -952,8 +1019,12 @@ handle_method(#'queue.declare'{queue = QueueNameBin, {existing, _Q} -> %% must have been created between the stat and the %% declare. Loop around again. - handle_method(Declare, none, State) - end + handle_method(Declare, none, State); + {absent, Q} -> + rabbit_misc:absent(Q) + end; + {error, {absent, Q}} -> + rabbit_misc:absent(Q) end; handle_method(#'queue.declare'{queue = QueueNameBin, @@ -980,11 +1051,9 @@ handle_method(#'queue.delete'{queue = QueueNameBin, QueueName, ConnPid, fun (Q) -> rabbit_amqqueue:delete(Q, IfUnused, IfEmpty) end) of {error, in_use} -> - rabbit_misc:protocol_error( - precondition_failed, "~s in use", [rabbit_misc:rs(QueueName)]); + precondition_failed("~s in use", [rabbit_misc:rs(QueueName)]); {error, not_empty} -> - rabbit_misc:protocol_error( - precondition_failed, "~s not empty", [rabbit_misc:rs(QueueName)]); + precondition_failed("~s not empty", [rabbit_misc:rs(QueueName)]); {ok, PurgedMessageCount} -> return_ok(State, NoWait, #'queue.delete_ok'{message_count = PurgedMessageCount}) @@ -1019,41 +1088,38 @@ handle_method(#'queue.purge'{queue = QueueNameBin, #'queue.purge_ok'{message_count = PurgedMessageCount}); handle_method(#'tx.select'{}, _, #ch{confirm_enabled = true}) -> - rabbit_misc:protocol_error( - precondition_failed, "cannot switch from confirm to tx mode", []); + precondition_failed("cannot switch from confirm to tx mode"); + +handle_method(#'tx.select'{}, _, State = #ch{tx = none}) -> + {reply, #'tx.select_ok'{}, State#ch{tx = new_tx()}}; handle_method(#'tx.select'{}, _, State) -> - {reply, #'tx.select_ok'{}, State#ch{tx_status = in_progress}}; + {reply, #'tx.select_ok'{}, State}; -handle_method(#'tx.commit'{}, _, #ch{tx_status = none}) -> - rabbit_misc:protocol_error( - precondition_failed, "channel is not transactional", []); - -handle_method(#'tx.commit'{}, _, - State = #ch{uncommitted_message_q = TMQ, - uncommitted_acks = TAL, - uncommitted_nacks = TNL, - limiter = Limiter}) -> - State1 = rabbit_misc:queue_fold(fun deliver_to_queues/2, State, TMQ), - ack(TAL, State1), - lists:foreach( - fun({Requeue, Acked}) -> reject(Requeue, Acked, Limiter) end, TNL), - {noreply, maybe_complete_tx(new_tx(State1#ch{tx_status = committing}))}; - -handle_method(#'tx.rollback'{}, _, #ch{tx_status = none}) -> - rabbit_misc:protocol_error( - precondition_failed, "channel is not transactional", []); +handle_method(#'tx.commit'{}, _, #ch{tx = none}) -> + precondition_failed("channel is not transactional"); + +handle_method(#'tx.commit'{}, _, State = #ch{tx = {Msgs, Acks}, + limiter = Limiter}) -> + State1 = rabbit_misc:queue_fold(fun deliver_to_queues/2, State, Msgs), + Rev = fun (X) -> lists:reverse(lists:sort(X)) end, + lists:foreach(fun ({ack, A}) -> ack(Rev(A), State1); + ({Requeue, A}) -> reject(Requeue, Rev(A), Limiter) + end, lists:reverse(Acks)), + {noreply, maybe_complete_tx(State1#ch{tx = committing})}; + +handle_method(#'tx.rollback'{}, _, #ch{tx = none}) -> + precondition_failed("channel is not transactional"); handle_method(#'tx.rollback'{}, _, State = #ch{unacked_message_q = UAMQ, - uncommitted_acks = TAL, - uncommitted_nacks = TNL}) -> - TNL1 = lists:append([L || {_, L} <- TNL]), - UAMQ1 = queue:from_list(lists:usort(TAL ++ TNL1 ++ queue:to_list(UAMQ))), - {reply, #'tx.rollback_ok'{}, new_tx(State#ch{unacked_message_q = UAMQ1})}; + tx = {_Msgs, Acks}}) -> + AcksL = lists:append(lists:reverse([lists:reverse(L) || {_, L} <- Acks])), + UAMQ1 = queue:from_list(lists:usort(AcksL ++ queue:to_list(UAMQ))), + {reply, #'tx.rollback_ok'{}, State#ch{unacked_message_q = UAMQ1, + tx = new_tx()}}; -handle_method(#'confirm.select'{}, _, #ch{tx_status = in_progress}) -> - rabbit_misc:protocol_error( - precondition_failed, "cannot switch from tx to confirm mode", []); +handle_method(#'confirm.select'{}, _, #ch{tx = {_, _}}) -> + precondition_failed("cannot switch from tx to confirm mode"); handle_method(#'confirm.select'{nowait = NoWait}, _, State) -> return_ok(State#ch{confirm_enabled = true}, @@ -1061,27 +1127,44 @@ handle_method(#'confirm.select'{nowait = NoWait}, _, State) -> handle_method(#'channel.flow'{active = true}, _, State = #ch{limiter = Limiter}) -> - Limiter2 = case rabbit_limiter:unblock(Limiter) of - ok -> Limiter; - {disabled, Limiter1} -> ok = limit_queues(Limiter1, State), - Limiter1 - end, - {reply, #'channel.flow_ok'{active = true}, State#ch{limiter = Limiter2}}; + Limiter1 = rabbit_limiter:unblock(Limiter), + {reply, #'channel.flow_ok'{active = true}, + maybe_limit_queues(Limiter, Limiter1, State#ch{limiter = Limiter1})}; handle_method(#'channel.flow'{active = false}, _, State = #ch{consumer_mapping = Consumers, limiter = Limiter}) -> - Limiter1 = case rabbit_limiter:is_enabled(Limiter) of - true -> Limiter; - false -> enable_limiter(State) - end, - State1 = State#ch{limiter = Limiter1}, - ok = rabbit_limiter:block(Limiter1), - case consumer_queues(Consumers) of - [] -> {reply, #'channel.flow_ok'{active = false}, State1}; - QPids -> State2 = State1#ch{blocking = sets:from_list(QPids)}, + case rabbit_limiter:is_blocked(Limiter) of + true -> {noreply, maybe_send_flow_ok(State)}; + false -> Limiter1 = rabbit_limiter:block(Limiter), + State1 = maybe_limit_queues(Limiter, Limiter1, + State#ch{limiter = Limiter1}), + %% The semantics of channel.flow{active=false} + %% require that no messages are delivered after the + %% channel.flow_ok has been sent. We accomplish that + %% by "flushing" all messages in flight from the + %% consumer queues to us. To do this we tell all the + %% queues to invoke rabbit_channel:flushed/2, which + %% will send us a {flushed, ...} message that appears + %% *after* all the {deliver, ...} messages. We keep + %% track of all the QPids thus asked, and once all of + %% them have responded (or died) we send the + %% channel.flow_ok. + QPids = consumer_queues(Consumers), ok = rabbit_amqqueue:flush_all(QPids, self()), - {noreply, State2} + {noreply, maybe_send_flow_ok( + State1#ch{blocking = sets:from_list(QPids)})} + end; + +handle_method(#'basic.credit'{consumer_tag = CTag, + credit = Credit, + drain = Drain}, _, + State = #ch{consumer_mapping = Consumers}) -> + case dict:find(CTag, Consumers) of + {ok, Q} -> ok = rabbit_amqqueue:credit( + Q, self(), CTag, Credit, Drain), + {noreply, State}; + error -> precondition_failed("unknown consumer tag '~s'", [CTag]) end; handle_method(_MethodRecord, _Content, _State) -> @@ -1111,15 +1194,19 @@ consumer_monitor(ConsumerTag, State end. -monitor_delivering_queue(true, _QPid, State) -> - State; -monitor_delivering_queue(false, QPid, State = #ch{queue_monitors = QMons, - delivering_queues = DQ}) -> - State#ch{queue_monitors = pmon:monitor(QPid, QMons), - delivering_queues = sets:add_element(QPid, DQ)}. +monitor_delivering_queue(NoAck, QPid, QName, + State = #ch{queue_names = QNames, + queue_monitors = QMons, + delivering_queues = DQ}) -> + State#ch{queue_names = dict:store(QPid, QName, QNames), + queue_monitors = pmon:monitor(QPid, QMons), + delivering_queues = case NoAck of + true -> DQ; + false -> sets:add_element(QPid, DQ) + end}. handle_publishing_queue_down(QPid, Reason, State = #ch{unconfirmed = UC}) -> - case rabbit_misc:is_abnormal_termination(Reason) of + case rabbit_misc:is_abnormal_exit(Reason) of true -> {MXs, UC1} = dtree:take_all(QPid, UC), send_nacks(MXs, State#ch{unconfirmed = UC1}); false -> {MXs, UC1} = dtree:take(QPid, UC), @@ -1129,16 +1216,21 @@ handle_publishing_queue_down(QPid, Reason, State = #ch{unconfirmed = UC}) -> handle_consuming_queue_down(QPid, State = #ch{consumer_mapping = ConsumerMapping, queue_consumers = QCons, - writer_pid = WriterPid}) -> + queue_names = QNames}) -> ConsumerTags = case dict:find(QPid, QCons) of error -> gb_sets:new(); {ok, CTags} -> CTags end, ConsumerMapping1 = gb_sets:fold(fun (CTag, CMap) -> - Cancel = #'basic.cancel'{consumer_tag = CTag, - nowait = true}, - ok = rabbit_writer:send_command(WriterPid, Cancel), + ok = send(#'basic.cancel'{consumer_tag = CTag, + nowait = true}, + State), + rabbit_event:notify( + consumer_deleted, + [{consumer_tag, CTag}, + {channel, self()}, + {queue, dict:fetch(QPid, QNames)}]), dict:erase(CTag, CMap) end, ConsumerMapping, ConsumerTags), State#ch{consumer_mapping = ConsumerMapping1, @@ -1147,14 +1239,20 @@ handle_consuming_queue_down(QPid, handle_delivering_queue_down(QPid, State = #ch{delivering_queues = DQ}) -> State#ch{delivering_queues = sets:del_element(QPid, DQ)}. +parse_credit_args(Arguments) -> + case rabbit_misc:table_lookup(Arguments, <<"x-credit">>) of + {table, T} -> case {rabbit_misc:table_lookup(T, <<"credit">>), + rabbit_misc:table_lookup(T, <<"drain">>)} of + {{long, Credit}, {boolean, Drain}} -> {Credit, Drain}; + _ -> none + end; + undefined -> none + end. + binding_action(Fun, ExchangeNameBin, DestinationType, DestinationNameBin, RoutingKey, Arguments, ReturnMethod, NoWait, State = #ch{virtual_host = VHostPath, conn_pid = ConnPid }) -> - %% FIXME: connection exception (!) on failure?? - %% (see rule named "failure" in spec-XML) - %% FIXME: don't allow binding to internal exchanges - - %% including the one named "" ! {DestinationName, ActualRoutingKey} = expand_binding(DestinationType, DestinationNameBin, RoutingKey, State), check_write_permitted(DestinationName, State), @@ -1172,19 +1270,17 @@ binding_action(Fun, ExchangeNameBin, DestinationType, DestinationNameBin, (_X, #exchange{}) -> ok end) of - {error, source_not_found} -> - rabbit_misc:not_found(ExchangeName); - {error, destination_not_found} -> - rabbit_misc:not_found(DestinationName); - {error, source_and_destination_not_found} -> - rabbit_misc:protocol_error( - not_found, "no ~s and no ~s", [rabbit_misc:rs(ExchangeName), - rabbit_misc:rs(DestinationName)]); + {error, {resources_missing, [{not_found, Name} | _]}} -> + rabbit_misc:not_found(Name); + {error, {resources_missing, [{absent, Q} | _]}} -> + rabbit_misc:absent(Q); {error, binding_not_found} -> rabbit_misc:protocol_error( not_found, "no binding ~s between ~s and ~s", [RoutingKey, rabbit_misc:rs(ExchangeName), rabbit_misc:rs(DestinationName)]); + {error, {binding_invalid, Fmt, Args}} -> + rabbit_misc:protocol_error(precondition_failed, Fmt, Args); {error, #amqp_error{} = Error} -> rabbit_misc:protocol_error(Error); ok -> return_ok(State, NoWait, ReturnMethod) @@ -1204,39 +1300,40 @@ basic_return(#basic_message{exchange_name = ExchangeName, Content). reject(DeliveryTag, Requeue, Multiple, - State = #ch{unacked_message_q = UAMQ, tx_status = TxStatus}) -> + State = #ch{unacked_message_q = UAMQ, tx = Tx}) -> {Acked, Remaining} = collect_acks(UAMQ, DeliveryTag, Multiple), State1 = State#ch{unacked_message_q = Remaining}, - {noreply, - case TxStatus of - none -> - reject(Requeue, Acked, State1#ch.limiter), - State1; - in_progress -> - State1#ch{uncommitted_nacks = - [{Requeue, Acked} | State1#ch.uncommitted_nacks]} - end}. - + {noreply, case Tx of + none -> reject(Requeue, Acked, State1#ch.limiter), + State1; + {Msgs, Acks} -> Acks1 = ack_cons(Requeue, Acked, Acks), + State1#ch{tx = {Msgs, Acks1}} + end}. + +%% NB: Acked is in youngest-first order reject(Requeue, Acked, Limiter) -> - ok = fold_per_queue( - fun (QPid, MsgIds, ok) -> - rabbit_amqqueue:reject(QPid, MsgIds, Requeue, self()) - end, ok, Acked), + foreach_per_queue( + fun (QPid, MsgIds) -> + rabbit_amqqueue:reject(QPid, MsgIds, Requeue, self()) + end, Acked), ok = notify_limiter(Limiter, Acked). record_sent(ConsumerTag, AckRequired, - Msg = {_QName, QPid, MsgId, Redelivered, _Message}, + Msg = {QName, QPid, MsgId, Redelivered, _Message}, State = #ch{unacked_message_q = UAMQ, next_tag = DeliveryTag, trace_state = TraceState}) -> - maybe_incr_stats([{QPid, 1}], case {ConsumerTag, AckRequired} of - {none, true} -> get; - {none, false} -> get_no_ack; - {_ , true} -> deliver; - {_ , false} -> deliver_no_ack - end, State), - maybe_incr_redeliver_stats(Redelivered, QPid, State), - rabbit_trace:tap_trace_out(Msg, TraceState), + ?INCR_STATS([{queue_stats, QName, 1}], case {ConsumerTag, AckRequired} of + {none, true} -> get; + {none, false} -> get_no_ack; + {_ , true} -> deliver; + {_ , false} -> deliver_no_ack + end, State), + case Redelivered of + true -> ?INCR_STATS([{queue_stats, QName, 1}], redeliver, State); + false -> ok + end, + rabbit_trace:tap_out(Msg, TraceState), UAMQ1 = case AckRequired of true -> queue:in({DeliveryTag, ConsumerTag, {QPid, MsgId}}, UAMQ); @@ -1244,41 +1341,61 @@ record_sent(ConsumerTag, AckRequired, end, State#ch{unacked_message_q = UAMQ1, next_tag = DeliveryTag + 1}. +%% NB: returns acks in youngest-first order collect_acks(Q, 0, true) -> - {queue:to_list(Q), queue:new()}; + {lists:reverse(queue:to_list(Q)), queue:new()}; collect_acks(Q, DeliveryTag, Multiple) -> - collect_acks([], queue:new(), Q, DeliveryTag, Multiple). + collect_acks([], [], Q, DeliveryTag, Multiple). collect_acks(ToAcc, PrefixAcc, Q, DeliveryTag, Multiple) -> case queue:out(Q) of {{value, UnackedMsg = {CurrentDeliveryTag, _ConsumerTag, _Msg}}, QTail} -> if CurrentDeliveryTag == DeliveryTag -> - {[UnackedMsg | ToAcc], queue:join(PrefixAcc, QTail)}; + {[UnackedMsg | ToAcc], + case PrefixAcc of + [] -> QTail; + _ -> queue:join( + queue:from_list(lists:reverse(PrefixAcc)), + QTail) + end}; Multiple -> collect_acks([UnackedMsg | ToAcc], PrefixAcc, QTail, DeliveryTag, Multiple); true -> - collect_acks(ToAcc, queue:in(UnackedMsg, PrefixAcc), + collect_acks(ToAcc, [UnackedMsg | PrefixAcc], QTail, DeliveryTag, Multiple) end; {empty, _} -> - rabbit_misc:protocol_error( - precondition_failed, "unknown delivery tag ~w", [DeliveryTag]) + precondition_failed("unknown delivery tag ~w", [DeliveryTag]) end. -ack(Acked, State) -> - QIncs = fold_per_queue( - fun (QPid, MsgIds, L) -> - ok = rabbit_amqqueue:ack(QPid, MsgIds, self()), - [{QPid, length(MsgIds)} | L] - end, [], Acked), - ok = notify_limiter(State#ch.limiter, Acked), - maybe_incr_stats(QIncs, ack, State). - -new_tx(State) -> State#ch{uncommitted_message_q = queue:new(), - uncommitted_acks = [], - uncommitted_nacks = []}. +%% NB: Acked is in youngest-first order +ack(Acked, State = #ch{queue_names = QNames}) -> + foreach_per_queue( + fun (QPid, MsgIds) -> + ok = rabbit_amqqueue:ack(QPid, MsgIds, self()), + ?INCR_STATS(case dict:find(QPid, QNames) of + {ok, QName} -> Count = length(MsgIds), + [{queue_stats, QName, Count}]; + error -> [] + end, ack, State) + end, Acked), + ok = notify_limiter(State#ch.limiter, Acked). + +%% {Msgs, Acks} +%% +%% Msgs is a queue. +%% +%% Acks looks s.t. like this: +%% [{false,[5,4]},{true,[3]},{ack,[2,1]}, ...] +%% +%% Each element is a pair consisting of a tag and a list of +%% ack'ed/reject'ed msg ids. The tag is one of 'ack' (to ack), 'true' +%% (reject w requeue), 'false' (reject w/o requeue). The msg ids, as +%% well as the list overall, are in "most-recent (generally youngest) +%% ack first" order. +new_tx() -> {queue:new(), []}. notify_queues(State = #ch{state = closing}) -> {ok, State}; @@ -1288,24 +1405,26 @@ notify_queues(State = #ch{consumer_mapping = Consumers, sets:union(sets:from_list(consumer_queues(Consumers)), DQ)), {rabbit_amqqueue:notify_down_all(QPids, self()), State#ch{state = closing}}. -fold_per_queue(_F, Acc, []) -> - Acc; -fold_per_queue(F, Acc, [{_DTag, _CTag, {QPid, MsgId}}]) -> %% common case - F(QPid, [MsgId], Acc); -fold_per_queue(F, Acc, UAL) -> +foreach_per_queue(_F, []) -> + ok; +foreach_per_queue(F, [{_DTag, _CTag, {QPid, MsgId}}]) -> %% common case + F(QPid, [MsgId]); +%% NB: UAL should be in youngest-first order; the tree values will +%% then be in oldest-first order +foreach_per_queue(F, UAL) -> T = lists:foldl(fun ({_DTag, _CTag, {QPid, MsgId}}, T) -> rabbit_misc:gb_trees_cons(QPid, MsgId, T) end, gb_trees:empty(), UAL), - rabbit_misc:gb_trees_fold(F, Acc, T). - -enable_limiter(State = #ch{unacked_message_q = UAMQ, - limiter = Limiter}) -> - Limiter1 = rabbit_limiter:enable(Limiter, queue:len(UAMQ)), - ok = limit_queues(Limiter1, State), - Limiter1. - -limit_queues(Limiter, #ch{consumer_mapping = Consumers}) -> - rabbit_amqqueue:limit_all(consumer_queues(Consumers), self(), Limiter). + rabbit_misc:gb_trees_foreach(F, T). + +maybe_limit_queues(OldLimiter, NewLimiter, State) -> + case ((not rabbit_limiter:is_active(OldLimiter)) andalso + rabbit_limiter:is_active(NewLimiter)) of + true -> Queues = consumer_queues(State#ch.consumer_mapping), + rabbit_amqqueue:activate_limit_all(Queues, self()); + false -> ok + end, + State. consumer_queues(Consumers) -> lists:usort([QPid || @@ -1316,77 +1435,110 @@ consumer_queues(Consumers) -> %% messages sent in a response to a basic.get (identified by their %% 'none' consumer tag) notify_limiter(Limiter, Acked) -> - case rabbit_limiter:is_enabled(Limiter) of + %% optimisation: avoid the potentially expensive 'foldl' in the + %% common case. + case rabbit_limiter:is_prefetch_limited(Limiter) of false -> ok; true -> case lists:foldl(fun ({_, none, _}, Acc) -> Acc; - ({_, _, _}, Acc) -> Acc + 1 + ({_, _, _}, Acc) -> Acc + 1 end, 0, Acked) of 0 -> ok; Count -> rabbit_limiter:ack(Limiter, Count) end end. +deliver_to_queues({#delivery{message = #basic_message{exchange_name = XName}, + msg_seq_no = undefined, + mandatory = false}, + []}, State) -> %% optimisation + ?INCR_STATS([{exchange_stats, XName, 1}], publish, State), + State; deliver_to_queues({Delivery = #delivery{message = Message = #basic_message{ exchange_name = XName}, msg_seq_no = MsgSeqNo}, - QNames}, State) -> - {RoutingRes, DeliveredQPids} = - rabbit_amqqueue:deliver_flow(rabbit_amqqueue:lookup(QNames), Delivery), - State1 = State#ch{queue_monitors = - pmon:monitor_all(DeliveredQPids, - State#ch.queue_monitors)}, - State2 = process_routing_result(RoutingRes, DeliveredQPids, - XName, MsgSeqNo, Message, State1), - maybe_incr_stats([{XName, 1} | - [{{QPid, XName}, 1} || - QPid <- DeliveredQPids]], publish, State2), - State2. - -process_routing_result(unroutable, _, XName, MsgSeqNo, Msg, State) -> - ok = basic_return(Msg, State, no_route), - maybe_incr_stats([{Msg#basic_message.exchange_name, 1}], - return_unroutable, State), - record_confirm(MsgSeqNo, XName, State); -process_routing_result(not_delivered, _, XName, MsgSeqNo, Msg, State) -> - ok = basic_return(Msg, State, no_consumers), - maybe_incr_stats([{XName, 1}], return_not_delivered, State), - record_confirm(MsgSeqNo, XName, State); -process_routing_result(routed, [], XName, MsgSeqNo, _, State) -> - record_confirm(MsgSeqNo, XName, State); -process_routing_result(routed, _, _, undefined, _, State) -> + DelQNames}, State = #ch{queue_names = QNames, + queue_monitors = QMons}) -> + Qs = rabbit_amqqueue:lookup(DelQNames), + {RoutingRes, DeliveredQPids} = rabbit_amqqueue:deliver_flow(Qs, Delivery), + %% The pmon:monitor_all/2 monitors all queues to which we + %% delivered. But we want to monitor even queues we didn't deliver + %% to, since we need their 'DOWN' messages to clean + %% queue_names. So we also need to monitor each QPid from + %% queues. But that only gets the masters (which is fine for + %% cleaning queue_names), so we need the union of both. + %% + %% ...and we need to add even non-delivered queues to queue_names + %% since alternative algorithms to update queue_names less + %% frequently would in fact be more expensive in the common case. + {QNames1, QMons1} = + lists:foldl(fun (#amqqueue{pid = QPid, name = QName}, + {QNames0, QMons0}) -> + {case dict:is_key(QPid, QNames0) of + true -> QNames0; + false -> dict:store(QPid, QName, QNames0) + end, pmon:monitor(QPid, QMons0)} + end, {QNames, pmon:monitor_all(DeliveredQPids, QMons)}, Qs), + State1 = process_routing_result(RoutingRes, DeliveredQPids, + XName, MsgSeqNo, Message, + State#ch{queue_names = QNames1, + queue_monitors = QMons1}), + ?INCR_STATS([{exchange_stats, XName, 1} | + [{queue_exchange_stats, {QName, XName}, 1} || + QPid <- DeliveredQPids, + {ok, QName} <- [dict:find(QPid, QNames1)]]], + publish, State1), + State1. + +process_routing_result(routed, _, _, undefined, _, State) -> State; -process_routing_result(routed, QPids, XName, MsgSeqNo, _, State) -> +process_routing_result(routed, [], XName, MsgSeqNo, _, State) -> + record_confirms([{MsgSeqNo, XName}], State); +process_routing_result(routed, QPids, XName, MsgSeqNo, _, State) -> State#ch{unconfirmed = dtree:insert(MsgSeqNo, QPids, XName, - State#ch.unconfirmed)}. + State#ch.unconfirmed)}; +process_routing_result(unroutable, _, XName, MsgSeqNo, Msg, State) -> + ok = basic_return(Msg, State, no_route), + ?INCR_STATS([{exchange_stats, XName, 1}], return_unroutable, State), + case MsgSeqNo of + undefined -> State; + _ -> record_confirms([{MsgSeqNo, XName}], State) + end. send_nacks([], State) -> State; -send_nacks(MXs, State = #ch{tx_status = none}) -> +send_nacks(_MXs, State = #ch{state = closing, + tx = none}) -> %% optimisation + State; +send_nacks(MXs, State = #ch{tx = none}) -> coalesce_and_send([MsgSeqNo || {MsgSeqNo, _} <- MXs], fun(MsgSeqNo, Multiple) -> #'basic.nack'{delivery_tag = MsgSeqNo, multiple = Multiple} end, State); +send_nacks(_MXs, State = #ch{state = closing}) -> %% optimisation + State#ch{tx = failed}; send_nacks(_, State) -> - maybe_complete_tx(State#ch{tx_status = failed}). + maybe_complete_tx(State#ch{tx = failed}). -send_confirms(State = #ch{tx_status = none, confirmed = []}) -> +send_confirms(State = #ch{tx = none, confirmed = []}) -> State; -send_confirms(State = #ch{tx_status = none, confirmed = C}) -> +send_confirms(State = #ch{tx = none, confirmed = C}) -> MsgSeqNos = - lists:foldl(fun ({MsgSeqNo, XName}, MSNs) -> - maybe_incr_stats([{XName, 1}], confirm, State), - [MsgSeqNo | MSNs] - end, [], lists:append(C)), + lists:foldl( + fun ({MsgSeqNo, XName}, MSNs) -> + ?INCR_STATS([{exchange_stats, XName, 1}], confirm, State), + [MsgSeqNo | MSNs] + end, [], lists:append(C)), send_confirms(MsgSeqNos, State#ch{confirmed = []}); send_confirms(State) -> maybe_complete_tx(State). send_confirms([], State) -> State; -send_confirms([MsgSeqNo], State = #ch{writer_pid = WriterPid}) -> - ok = rabbit_writer:send_command(WriterPid, - #'basic.ack'{delivery_tag = MsgSeqNo}), +send_confirms(_Cs, State = #ch{state = closing}) -> %% optimisation + State; +send_confirms([MsgSeqNo], State) -> + ok = send(#'basic.ack'{delivery_tag = MsgSeqNo}, State), State; send_confirms(Cs, State) -> coalesce_and_send(Cs, fun(MsgSeqNo, Multiple) -> @@ -1394,8 +1546,7 @@ send_confirms(Cs, State) -> multiple = Multiple} end, State). -coalesce_and_send(MsgSeqNos, MkMsgFun, - State = #ch{writer_pid = WriterPid, unconfirmed = UC}) -> +coalesce_and_send(MsgSeqNos, MkMsgFun, State = #ch{unconfirmed = UC}) -> SMsgSeqNos = lists:usort(MsgSeqNos), CutOff = case dtree:is_empty(UC) of true -> lists:last(SMsgSeqNos) + 1; @@ -1404,14 +1555,17 @@ coalesce_and_send(MsgSeqNos, MkMsgFun, {Ms, Ss} = lists:splitwith(fun(X) -> X < CutOff end, SMsgSeqNos), case Ms of [] -> ok; - _ -> ok = rabbit_writer:send_command( - WriterPid, MkMsgFun(lists:last(Ms), true)) + _ -> ok = send(MkMsgFun(lists:last(Ms), true), State) end, - [ok = rabbit_writer:send_command( - WriterPid, MkMsgFun(SeqNo, false)) || SeqNo <- Ss], + [ok = send(MkMsgFun(SeqNo, false), State) || SeqNo <- Ss], State. -maybe_complete_tx(State = #ch{tx_status = in_progress}) -> +ack_cons(Tag, Acked, [{Tag, Acks} | L]) -> [{Tag, Acked ++ Acks} | L]; +ack_cons(Tag, Acked, Acks) -> [{Tag, Acked} | Acks]. + +ack_len(Acks) -> lists:sum([length(L) || {ack, L} <- Acks]). + +maybe_complete_tx(State = #ch{tx = {_, _}}) -> State; maybe_complete_tx(State = #ch{unconfirmed = UC}) -> case dtree:is_empty(UC) of @@ -1419,16 +1573,16 @@ maybe_complete_tx(State = #ch{unconfirmed = UC}) -> true -> complete_tx(State#ch{confirmed = []}) end. -complete_tx(State = #ch{tx_status = committing}) -> - ok = rabbit_writer:send_command(State#ch.writer_pid, #'tx.commit_ok'{}), - State#ch{tx_status = in_progress}; -complete_tx(State = #ch{tx_status = failed}) -> - {noreply, State1} = send_exception( +complete_tx(State = #ch{tx = committing}) -> + ok = send(#'tx.commit_ok'{}, State), + State#ch{tx = new_tx()}; +complete_tx(State = #ch{tx = failed}) -> + {noreply, State1} = handle_exception( rabbit_misc:amqp_error( precondition_failed, "partial tx completion", [], 'tx.commit'), State), - State1#ch{tx_status = in_progress}. + State1#ch{tx = new_tx()}. infos(Items, State) -> [{Item, i(Item, State)} || Item <- Items]. @@ -1437,21 +1591,18 @@ i(connection, #ch{conn_pid = ConnPid}) -> ConnPid; i(number, #ch{channel = Channel}) -> Channel; i(user, #ch{user = User}) -> User#user.username; i(vhost, #ch{virtual_host = VHost}) -> VHost; -i(transactional, #ch{tx_status = TE}) -> TE =/= none; +i(transactional, #ch{tx = Tx}) -> Tx =/= none; i(confirm, #ch{confirm_enabled = CE}) -> CE; i(name, State) -> name(State); -i(consumer_count, #ch{consumer_mapping = ConsumerMapping}) -> - dict:size(ConsumerMapping); -i(messages_unconfirmed, #ch{unconfirmed = UC}) -> - dtree:size(UC); -i(messages_unacknowledged, #ch{unacked_message_q = UAMQ}) -> - queue:len(UAMQ); -i(messages_uncommitted, #ch{uncommitted_message_q = TMQ}) -> - queue:len(TMQ); -i(acks_uncommitted, #ch{uncommitted_acks = TAL}) -> - length(TAL); +i(consumer_count, #ch{consumer_mapping = CM}) -> dict:size(CM); +i(messages_unconfirmed, #ch{unconfirmed = UC}) -> dtree:size(UC); +i(messages_unacknowledged, #ch{unacked_message_q = UAMQ}) -> queue:len(UAMQ); +i(messages_uncommitted, #ch{tx = {Msgs, _Acks}}) -> queue:len(Msgs); +i(messages_uncommitted, #ch{}) -> 0; +i(acks_uncommitted, #ch{tx = {_Msgs, Acks}}) -> ack_len(Acks); +i(acks_uncommitted, #ch{}) -> 0; i(prefetch_count, #ch{limiter = Limiter}) -> - rabbit_limiter:get_limit(Limiter); + rabbit_limiter:get_prefetch_limit(Limiter); i(client_flow_blocked, #ch{limiter = Limiter}) -> rabbit_limiter:is_blocked(Limiter); i(Item, _) -> @@ -1460,26 +1611,11 @@ i(Item, _) -> name(#ch{conn_name = ConnName, channel = Channel}) -> list_to_binary(rabbit_misc:format("~s (~p)", [ConnName, Channel])). -maybe_incr_redeliver_stats(true, QPid, State) -> - maybe_incr_stats([{QPid, 1}], redeliver, State); -maybe_incr_redeliver_stats(_, _, _State) -> - ok. - -maybe_incr_stats(QXIncs, Measure, State) -> - case rabbit_event:stats_level(State, #ch.stats_timer) of - fine -> [incr_stats(QX, Inc, Measure) || {QX, Inc} <- QXIncs]; - _ -> ok - end. - -incr_stats({_, _} = QX, Inc, Measure) -> - update_measures(queue_exchange_stats, QX, Inc, Measure); -incr_stats(QPid, Inc, Measure) when is_pid(QPid) -> - update_measures(queue_stats, QPid, Inc, Measure); -incr_stats(X, Inc, Measure) -> - update_measures(exchange_stats, X, Inc, Measure). +incr_stats(Incs, Measure) -> + [update_measures(Type, Key, Inc, Measure) || {Type, Key, Inc} <- Incs]. -update_measures(Type, QX, Inc, Measure) -> - Measures = case get({Type, QX}) of +update_measures(Type, Key, Inc, Measure) -> + Measures = case get({Type, Key}) of undefined -> []; D -> D end, @@ -1487,31 +1623,29 @@ update_measures(Type, QX, Inc, Measure) -> error -> 0; {ok, C} -> C end, - put({Type, QX}, - orddict:store(Measure, Cur + Inc, Measures)). + put({Type, Key}, orddict:store(Measure, Cur + Inc, Measures)). emit_stats(State) -> emit_stats(State, []). emit_stats(State, Extra) -> - CoarseStats = infos(?STATISTICS_KEYS, State), + Coarse = infos(?STATISTICS_KEYS, State), case rabbit_event:stats_level(State, #ch.stats_timer) of - coarse -> - rabbit_event:notify(channel_stats, Extra ++ CoarseStats); - fine -> - FineStats = - [{channel_queue_stats, - [{QPid, Stats} || {{queue_stats, QPid}, Stats} <- get()]}, - {channel_exchange_stats, - [{X, Stats} || {{exchange_stats, X}, Stats} <- get()]}, - {channel_queue_exchange_stats, - [{QX, Stats} || - {{queue_exchange_stats, QX}, Stats} <- get()]}], - rabbit_event:notify(channel_stats, - Extra ++ CoarseStats ++ FineStats) + coarse -> rabbit_event:notify(channel_stats, Extra ++ Coarse); + fine -> Fine = [{channel_queue_stats, + [{QName, Stats} || + {{queue_stats, QName}, Stats} <- get()]}, + {channel_exchange_stats, + [{XName, Stats} || + {{exchange_stats, XName}, Stats} <- get()]}, + {channel_queue_exchange_stats, + [{QX, Stats} || + {{queue_exchange_stats, QX}, Stats} <- get()]}], + rabbit_event:notify(channel_stats, Extra ++ Coarse ++ Fine) end. -erase_queue_stats(QPid) -> - erase({queue_stats, QPid}), +erase_queue_stats(QName) -> + erase({queue_stats, QName}), [erase({queue_exchange_stats, QX}) || - {{queue_exchange_stats, QX = {QPid0, _}}, _} <- get(), QPid =:= QPid0]. + {{queue_exchange_stats, QX = {QName0, _}}, _} <- get(), + QName0 =:= QName]. diff --git a/src/rabbit_channel_sup.erl b/src/rabbit_channel_sup.erl index bcb83851..df2e80ca 100644 --- a/src/rabbit_channel_sup.erl +++ b/src/rabbit_channel_sup.erl @@ -10,8 +10,8 @@ %% %% The Original Code is RabbitMQ. %% -%% The Initial Developer of the Original Code is VMware, Inc. -%% Copyright (c) 2007-2012 VMware, Inc. All rights reserved. +%% The Initial Developer of the Original Code is GoPivotal, Inc. +%% Copyright (c) 2007-2013 GoPivotal, Inc. All rights reserved. %% -module(rabbit_channel_sup). @@ -58,7 +58,7 @@ start_link({tcp, Sock, Channel, FrameMax, ReaderPid, ConnName, Protocol, User, {channel, {rabbit_channel, start_link, [Channel, ReaderPid, WriterPid, ReaderPid, ConnName, Protocol, User, VHost, Capabilities, Collector, - rabbit_limiter:make_token(LimiterPid)]}, + LimiterPid]}, intrinsic, ?MAX_WAIT, worker, [rabbit_channel]}), {ok, AState} = rabbit_command_assembler:init(Protocol), {ok, SupPid, {ChannelPid, AState}}; @@ -72,7 +72,7 @@ start_link({direct, Channel, ClientChannelPid, ConnPid, ConnName, Protocol, {channel, {rabbit_channel, start_link, [Channel, ClientChannelPid, ClientChannelPid, ConnPid, ConnName, Protocol, User, VHost, Capabilities, Collector, - rabbit_limiter:make_token(LimiterPid)]}, + LimiterPid]}, intrinsic, ?MAX_WAIT, worker, [rabbit_channel]}), {ok, SupPid, {ChannelPid, none}}. @@ -83,7 +83,7 @@ init(Type) -> child_specs({tcp, Sock, Channel, FrameMax, ReaderPid, Protocol}) -> [{writer, {rabbit_writer, start_link, - [Sock, Channel, FrameMax, Protocol, ReaderPid]}, + [Sock, Channel, FrameMax, Protocol, ReaderPid, true]}, intrinsic, ?MAX_WAIT, worker, [rabbit_writer]} | child_specs(direct)]; child_specs(direct) -> [{limiter, {rabbit_limiter, start_link, []}, diff --git a/src/rabbit_channel_sup_sup.erl b/src/rabbit_channel_sup_sup.erl index 995c41fb..e2c255db 100644 --- a/src/rabbit_channel_sup_sup.erl +++ b/src/rabbit_channel_sup_sup.erl @@ -10,8 +10,8 @@ %% %% The Original Code is RabbitMQ. %% -%% The Initial Developer of the Original Code is VMware, Inc. -%% Copyright (c) 2007-2012 VMware, Inc. All rights reserved. +%% The Initial Developer of the Original Code is GoPivotal, Inc. +%% Copyright (c) 2007-2013 GoPivotal, Inc. All rights reserved. %% -module(rabbit_channel_sup_sup). @@ -43,6 +43,6 @@ start_channel(Pid, Args) -> %%---------------------------------------------------------------------------- init([]) -> - {ok, {{simple_one_for_one_terminate, 0, 1}, + {ok, {{simple_one_for_one, 0, 1}, [{channel_sup, {rabbit_channel_sup, start_link, []}, temporary, infinity, supervisor, [rabbit_channel_sup]}]}}. diff --git a/src/rabbit_client_sup.erl b/src/rabbit_client_sup.erl index c508f1b9..843bb615 100644 --- a/src/rabbit_client_sup.erl +++ b/src/rabbit_client_sup.erl @@ -10,15 +10,15 @@ %% %% The Original Code is RabbitMQ. %% -%% The Initial Developer of the Original Code is VMware, Inc. -%% Copyright (c) 2007-2012 VMware, Inc. All rights reserved. +%% The Initial Developer of the Original Code is GoPivotal, Inc. +%% Copyright (c) 2007-2013 GoPivotal, Inc. All rights reserved. %% -module(rabbit_client_sup). -behaviour(supervisor2). --export([start_link/1, start_link/2]). +-export([start_link/1, start_link/2, start_link_worker/2]). -export([init/1]). @@ -32,6 +32,8 @@ rabbit_types:ok_pid_or_error()). -spec(start_link/2 :: ({'local', atom()}, rabbit_types:mfargs()) -> rabbit_types:ok_pid_or_error()). +-spec(start_link_worker/2 :: ({'local', atom()}, rabbit_types:mfargs()) -> + rabbit_types:ok_pid_or_error()). -endif. @@ -43,6 +45,13 @@ start_link(Callback) -> start_link(SupName, Callback) -> supervisor2:start_link(SupName, ?MODULE, Callback). +start_link_worker(SupName, Callback) -> + supervisor2:start_link(SupName, ?MODULE, {Callback, worker}). + init({M,F,A}) -> - {ok, {{simple_one_for_one_terminate, 0, 1}, - [{client, {M,F,A}, temporary, infinity, supervisor, [M]}]}}. + {ok, {{simple_one_for_one, 0, 1}, + [{client, {M,F,A}, temporary, infinity, supervisor, [M]}]}}; +init({{M,F,A}, worker}) -> + {ok, {{simple_one_for_one, 0, 1}, + [{client, {M,F,A}, temporary, ?MAX_WAIT, worker, [M]}]}}. + diff --git a/src/rabbit_command_assembler.erl b/src/rabbit_command_assembler.erl index adf6e417..4095ccf1 100644 --- a/src/rabbit_command_assembler.erl +++ b/src/rabbit_command_assembler.erl @@ -10,8 +10,8 @@ %% %% The Original Code is RabbitMQ. %% -%% The Initial Developer of the Original Code is VMware, Inc. -%% Copyright (c) 2007-2012 VMware, Inc. All rights reserved. +%% The Initial Developer of the Original Code is GoPivotal, Inc. +%% Copyright (c) 2007-2013 GoPivotal, Inc. All rights reserved. %% -module(rabbit_command_assembler). diff --git a/src/rabbit_connection_sup.erl b/src/rabbit_connection_sup.erl index 12a532b6..fee377e7 100644 --- a/src/rabbit_connection_sup.erl +++ b/src/rabbit_connection_sup.erl @@ -10,8 +10,8 @@ %% %% The Original Code is RabbitMQ. %% -%% The Initial Developer of the Original Code is VMware, Inc. -%% Copyright (c) 2007-2012 VMware, Inc. All rights reserved. +%% The Initial Developer of the Original Code is GoPivotal, Inc. +%% Copyright (c) 2007-2013 GoPivotal, Inc. All rights reserved. %% -module(rabbit_connection_sup). @@ -42,16 +42,20 @@ start_link() -> SupPid, {collector, {rabbit_queue_collector, start_link, []}, intrinsic, ?MAX_WAIT, worker, [rabbit_queue_collector]}), - {ok, ChannelSupSupPid} = + %% We need to get channels in the hierarchy here so they close + %% before the reader. But for 1.0 readers we can't start the real + %% ch_sup_sup (because we don't know if we will be 0-9-1 or 1.0) - + %% so we add another supervisor into the hierarchy. + {ok, ChannelSup3Pid} = supervisor2:start_child( SupPid, - {channel_sup_sup, {rabbit_channel_sup_sup, start_link, []}, - intrinsic, infinity, supervisor, [rabbit_channel_sup_sup]}), + {channel_sup3, {rabbit_intermediate_sup, start_link, []}, + intrinsic, infinity, supervisor, [rabbit_intermediate_sup]}), {ok, ReaderPid} = supervisor2:start_child( SupPid, {reader, {rabbit_reader, start_link, - [ChannelSupSupPid, Collector, + [ChannelSup3Pid, Collector, rabbit_heartbeat:start_heartbeat_fun(SupPid)]}, intrinsic, ?MAX_WAIT, worker, [rabbit_reader]}), {ok, SupPid, ReaderPid}. diff --git a/src/rabbit_control.erl b/src/rabbit_control_main.erl index 0c3ac966..6f36f99d 100644 --- a/src/rabbit_control.erl +++ b/src/rabbit_control_main.erl @@ -10,14 +10,15 @@ %% %% The Original Code is RabbitMQ. %% -%% The Initial Developer of the Original Code is VMware, Inc. -%% Copyright (c) 2007-2012 VMware, Inc. All rights reserved. +%% The Initial Developer of the Original Code is GoPivotal, Inc. +%% Copyright (c) 2007-2013 GoPivotal, Inc. All rights reserved. %% --module(rabbit_control). +-module(rabbit_control_main). -include("rabbit.hrl"). --export([start/0, stop/0, action/5]). +-export([start/0, stop/0, parse_arguments/2, action/5, + sync_queue/1, cancel_sync_queue/1]). -define(RPC_TIMEOUT, infinity). -define(EXTERNAL_CHECK_INTERVAL, 1000). @@ -25,10 +26,18 @@ -define(QUIET_OPT, "-q"). -define(NODE_OPT, "-n"). -define(VHOST_OPT, "-p"). +-define(PRIORITY_OPT, "--priority"). +-define(APPLY_TO_OPT, "--apply-to"). +-define(RAM_OPT, "--ram"). +-define(OFFLINE_OPT, "--offline"). -define(QUIET_DEF, {?QUIET_OPT, flag}). -define(NODE_DEF(Node), {?NODE_OPT, {option, Node}}). -define(VHOST_DEF, {?VHOST_OPT, {option, "/"}}). +-define(PRIORITY_DEF, {?PRIORITY_OPT, {option, "0"}}). +-define(APPLY_TO_DEF, {?APPLY_TO_OPT, {option, "all"}}). +-define(RAM_DEF, {?RAM_OPT, flag}). +-define(OFFLINE_DEF, {?OFFLINE_OPT, flag}). -define(GLOBAL_DEFS(Node), [?QUIET_DEF, ?NODE_DEF(Node)]). @@ -41,9 +50,13 @@ force_reset, rotate_logs, - cluster, - force_cluster, + {join_cluster, [?RAM_DEF]}, + change_cluster_node_type, + update_cluster_nodes, + {forget_cluster_node, [?OFFLINE_DEF]}, cluster_status, + {sync_queue, [?VHOST_DEF]}, + {cancel_sync_queue, [?VHOST_DEF]}, add_user, delete_user, @@ -60,9 +73,13 @@ {list_permissions, [?VHOST_DEF]}, list_user_permissions, - set_parameter, - clear_parameter, - list_parameters, + {set_parameter, [?VHOST_DEF]}, + {clear_parameter, [?VHOST_DEF]}, + {list_parameters, [?VHOST_DEF]}, + + {set_policy, [?VHOST_DEF, ?PRIORITY_DEF, ?APPLY_TO_DEF]}, + {clear_policy, [?VHOST_DEF]}, + {list_policies, [?VHOST_DEF]}, {list_queues, [?VHOST_DEF]}, {list_exchanges, [?VHOST_DEF]}, @@ -92,7 +109,9 @@ {"Bindings", rabbit_binding, info_all, info_keys}, {"Consumers", rabbit_amqqueue, consumers_all, consumer_info_keys}, {"Permissions", rabbit_auth_backend_internal, list_vhost_permissions, - vhost_perms_info_keys}]). + vhost_perms_info_keys}, + {"Policies", rabbit_policy, list_formatted, info_keys}, + {"Parameters", rabbit_runtime_parameters, list_formatted, info_keys}]). %%---------------------------------------------------------------------------- @@ -113,19 +132,13 @@ start() -> {ok, [[NodeStr|_]|_]} = init:get_argument(nodename), {Command, Opts, Args} = - case rabbit_misc:parse_arguments(?COMMANDS, ?GLOBAL_DEFS(NodeStr), - init:get_plain_arguments()) - of + case parse_arguments(init:get_plain_arguments(), NodeStr) of {ok, Res} -> Res; no_command -> print_error("could not recognise command", []), usage() end, - Opts1 = [case K of - ?NODE_OPT -> {?NODE_OPT, rabbit_nodes:make(V)}; - _ -> {K, V} - end || {K, V} <- Opts], - Quiet = proplists:get_bool(?QUIET_OPT, Opts1), - Node = proplists:get_value(?NODE_OPT, Opts1), + Quiet = proplists:get_bool(?QUIET_OPT, Opts), + Node = proplists:get_value(?NODE_OPT, Opts), Inform = case Quiet of true -> fun (_Format, _Args1) -> ok end; false -> fun (Format, Args1) -> @@ -147,6 +160,12 @@ start() -> false -> io:format("...done.~n") end, rabbit_misc:quit(0); + {ok, Info} -> + case Quiet of + true -> ok; + false -> io:format("...done (~p).~n", [Info]) + end, + rabbit_misc:quit(0); {'EXIT', {function_clause, [{?MODULE, action, _} | _]}} -> %% < R15 PrintInvalidCommandError(), usage(); @@ -156,6 +175,11 @@ start() -> {'EXIT', {badarg, _}} -> print_error("invalid parameter: ~p", [Args]), usage(); + {error, {Problem, Reason}} when is_atom(Problem), is_binary(Reason) -> + %% We handle this common case specially to avoid ~p since + %% that has i18n issues + print_error("~s: ~s", [Problem, Reason]), + rabbit_misc:quit(2); {error, Reason} -> print_error("~p", [Reason]), rabbit_misc:quit(2); @@ -185,11 +209,11 @@ print_report(Node, {Descr, Module, InfoFun, KeysFun}, VHostArg) -> print_report0(Node, {Module, InfoFun, KeysFun}, VHostArg). print_report0(Node, {Module, InfoFun, KeysFun}, VHostArg) -> - case Results = rpc_call(Node, Module, InfoFun, VHostArg) of - [_|_] -> InfoItems = rpc_call(Node, Module, KeysFun, []), - display_row([atom_to_list(I) || I <- InfoItems]), - display_info_list(Results, InfoItems); - _ -> ok + case rpc_call(Node, Module, InfoFun, VHostArg) of + [_|_] = Results -> InfoItems = rpc_call(Node, Module, KeysFun, []), + display_row([atom_to_list(I) || I <- InfoItems]), + display_info_list(Results, InfoItems); + _ -> ok end, io:nl(). @@ -205,6 +229,19 @@ usage() -> io:format("~s", [rabbit_ctl_usage:usage()]), rabbit_misc:quit(1). +parse_arguments(CmdLine, NodeStr) -> + case rabbit_misc:parse_arguments( + ?COMMANDS, ?GLOBAL_DEFS(NodeStr), CmdLine) of + {ok, {Cmd, Opts0, Args}} -> + Opts = [case K of + ?NODE_OPT -> {?NODE_OPT, rabbit_nodes:make(V)}; + _ -> {K, V} + end || {K, V} <- Opts0], + {ok, {Cmd, Opts, Args}}; + E -> + E + end. + %%---------------------------------------------------------------------------- action(stop, Node, Args, _Opts, Inform) -> @@ -234,21 +271,50 @@ action(force_reset, Node, [], _Opts, Inform) -> Inform("Forcefully resetting node ~p", [Node]), call(Node, {rabbit_mnesia, force_reset, []}); -action(cluster, Node, ClusterNodeSs, _Opts, Inform) -> - ClusterNodes = lists:map(fun list_to_atom/1, ClusterNodeSs), - Inform("Clustering node ~p with ~p", - [Node, ClusterNodes]), - rpc_call(Node, rabbit_mnesia, cluster, [ClusterNodes]); +action(join_cluster, Node, [ClusterNodeS], Opts, Inform) -> + ClusterNode = list_to_atom(ClusterNodeS), + NodeType = case proplists:get_bool(?RAM_OPT, Opts) of + true -> ram; + false -> disc + end, + Inform("Clustering node ~p with ~p", [Node, ClusterNode]), + rpc_call(Node, rabbit_mnesia, join_cluster, [ClusterNode, NodeType]); + +action(change_cluster_node_type, Node, ["ram"], _Opts, Inform) -> + Inform("Turning ~p into a ram node", [Node]), + rpc_call(Node, rabbit_mnesia, change_cluster_node_type, [ram]); +action(change_cluster_node_type, Node, [Type], _Opts, Inform) + when Type =:= "disc" orelse Type =:= "disk" -> + Inform("Turning ~p into a disc node", [Node]), + rpc_call(Node, rabbit_mnesia, change_cluster_node_type, [disc]); + +action(update_cluster_nodes, Node, [ClusterNodeS], _Opts, Inform) -> + ClusterNode = list_to_atom(ClusterNodeS), + Inform("Updating cluster nodes for ~p from ~p", [Node, ClusterNode]), + rpc_call(Node, rabbit_mnesia, update_cluster_nodes, [ClusterNode]); + +action(forget_cluster_node, Node, [ClusterNodeS], Opts, Inform) -> + ClusterNode = list_to_atom(ClusterNodeS), + RemoveWhenOffline = proplists:get_bool(?OFFLINE_OPT, Opts), + Inform("Removing node ~p from cluster", [ClusterNode]), + rpc_call(Node, rabbit_mnesia, forget_cluster_node, + [ClusterNode, RemoveWhenOffline]); + +action(sync_queue, Node, [Q], Opts, Inform) -> + VHost = proplists:get_value(?VHOST_OPT, Opts), + QName = rabbit_misc:r(list_to_binary(VHost), queue, list_to_binary(Q)), + Inform("Synchronising ~s", [rabbit_misc:rs(QName)]), + rpc_call(Node, rabbit_control_main, sync_queue, [QName]); -action(force_cluster, Node, ClusterNodeSs, _Opts, Inform) -> - ClusterNodes = lists:map(fun list_to_atom/1, ClusterNodeSs), - Inform("Forcefully clustering node ~p with ~p (ignoring offline nodes)", - [Node, ClusterNodes]), - rpc_call(Node, rabbit_mnesia, force_cluster, [ClusterNodes]); +action(cancel_sync_queue, Node, [Q], Opts, Inform) -> + VHost = proplists:get_value(?VHOST_OPT, Opts), + QName = rabbit_misc:r(list_to_binary(VHost), queue, list_to_binary(Q)), + Inform("Stopping synchronising ~s", [rabbit_misc:rs(QName)]), + rpc_call(Node, rabbit_control_main, cancel_sync_queue, [QName]); action(wait, Node, [PidFile], _Opts, Inform) -> Inform("Waiting for ~p", [Node]), - wait_for_application(Node, PidFile, rabbit, Inform); + wait_for_application(Node, PidFile, rabbit_and_plugins, Inform); action(wait, Node, [PidFile, App], _Opts, Inform) -> Inform("Waiting for ~p on ~p", [App, Node]), wait_for_application(Node, PidFile, list_to_atom(App), Inform); @@ -352,7 +418,7 @@ action(list_bindings, Node, Args, Opts, Inform) -> action(list_connections, Node, Args, _Opts, Inform) -> Inform("Listing connections", []), - ArgAtoms = default_if_empty(Args, [user, peer_address, peer_port, state]), + ArgAtoms = default_if_empty(Args, [user, peer_host, peer_port, state]), display_info_list(rpc_call(Node, rabbit_networking, connection_info_all, [ArgAtoms]), ArgAtoms); @@ -409,50 +475,85 @@ action(list_permissions, Node, [], Opts, Inform) -> list_vhost_permissions, [VHost]}), rabbit_auth_backend_internal:vhost_perms_info_keys()); -action(set_parameter, Node, [Component, Key, Value], _Opts, Inform) -> +action(set_parameter, Node, [Component, Key, Value], Opts, Inform) -> + VHostArg = list_to_binary(proplists:get_value(?VHOST_OPT, Opts)), Inform("Setting runtime parameter ~p for component ~p to ~p", [Key, Component, Value]), rpc_call(Node, rabbit_runtime_parameters, parse_set, - [list_to_binary(Component), list_to_binary(Key), Value]); + [VHostArg, list_to_binary(Component), list_to_binary(Key), Value]); -action(clear_parameter, Node, [Component, Key], _Opts, Inform) -> +action(clear_parameter, Node, [Component, Key], Opts, Inform) -> + VHostArg = list_to_binary(proplists:get_value(?VHOST_OPT, Opts)), Inform("Clearing runtime parameter ~p for component ~p", [Key, Component]), - rpc_call(Node, rabbit_runtime_parameters, clear, [list_to_binary(Component), + rpc_call(Node, rabbit_runtime_parameters, clear, [VHostArg, + list_to_binary(Component), list_to_binary(Key)]); -action(list_parameters, Node, Args = [], _Opts, Inform) -> +action(list_parameters, Node, [], Opts, Inform) -> + VHostArg = list_to_binary(proplists:get_value(?VHOST_OPT, Opts)), Inform("Listing runtime parameters", []), display_info_list( - rpc_call(Node, rabbit_runtime_parameters, list_formatted, Args), + rpc_call(Node, rabbit_runtime_parameters, list_formatted, [VHostArg]), rabbit_runtime_parameters:info_keys()); +action(set_policy, Node, [Key, Pattern, Defn], Opts, Inform) -> + Msg = "Setting policy ~p for pattern ~p to ~p with priority ~p", + VHostArg = list_to_binary(proplists:get_value(?VHOST_OPT, Opts)), + PriorityArg = proplists:get_value(?PRIORITY_OPT, Opts), + ApplyToArg = list_to_binary(proplists:get_value(?APPLY_TO_OPT, Opts)), + Inform(Msg, [Key, Pattern, Defn, PriorityArg]), + rpc_call( + Node, rabbit_policy, parse_set, + [VHostArg, list_to_binary(Key), Pattern, Defn, PriorityArg, ApplyToArg]); + +action(clear_policy, Node, [Key], Opts, Inform) -> + VHostArg = list_to_binary(proplists:get_value(?VHOST_OPT, Opts)), + Inform("Clearing policy ~p", [Key]), + rpc_call(Node, rabbit_policy, delete, [VHostArg, list_to_binary(Key)]); + +action(list_policies, Node, [], Opts, Inform) -> + VHostArg = list_to_binary(proplists:get_value(?VHOST_OPT, Opts)), + Inform("Listing policies", []), + display_info_list(rpc_call(Node, rabbit_policy, list_formatted, [VHostArg]), + rabbit_policy:info_keys()); + action(report, Node, _Args, _Opts, Inform) -> - io:format("Reporting server status on ~p~n~n", [erlang:universaltime()]), + Inform("Reporting server status on ~p~n~n", [erlang:universaltime()]), [begin ok = action(Action, N, [], [], Inform), io:nl() end || - N <- unsafe_rpc(Node, rabbit_mnesia, running_clustered_nodes, []), + N <- unsafe_rpc(Node, rabbit_mnesia, cluster_nodes, [running]), Action <- [status, cluster_status, environment]], VHosts = unsafe_rpc(Node, rabbit_vhost, list, []), [print_report(Node, Q) || Q <- ?GLOBAL_QUERIES], [print_report(Node, Q, [V]) || Q <- ?VHOST_QUERIES, V <- VHosts], - io:format("End of server status report~n"), ok; action(eval, Node, [Expr], _Opts, _Inform) -> case erl_scan:string(Expr) of {ok, Scanned, _} -> case erl_parse:parse_exprs(Scanned) of - {ok, Parsed} -> - {value, Value, _} = unsafe_rpc( - Node, erl_eval, exprs, [Parsed, []]), - io:format("~p~n", [Value]), - ok; - {error, E} -> - {error_string, format_parse_error(E)} + {ok, Parsed} -> {value, Value, _} = + unsafe_rpc( + Node, erl_eval, exprs, [Parsed, []]), + io:format("~p~n", [Value]), + ok; + {error, E} -> {error_string, format_parse_error(E)} end; {error, E, _} -> {error_string, format_parse_error(E)} end. +format_parse_error({_Line, Mod, Err}) -> lists:flatten(Mod:format_error(Err)). + +sync_queue(Q) -> + rabbit_amqqueue:with( + Q, fun(#amqqueue{pid = QPid}) -> rabbit_amqqueue:sync_mirrors(QPid) end). + +cancel_sync_queue(Q) -> + rabbit_amqqueue:with( + Q, fun(#amqqueue{pid = QPid}) -> + rabbit_amqqueue:cancel_sync_mirrors(QPid) + end). + %%---------------------------------------------------------------------------- wait_for_application(Node, PidFile, Application, Inform) -> @@ -460,12 +561,22 @@ wait_for_application(Node, PidFile, Application, Inform) -> Inform("pid is ~s", [Pid]), wait_for_application(Node, Pid, Application). +wait_for_application(Node, Pid, rabbit_and_plugins) -> + wait_for_startup(Node, Pid); wait_for_application(Node, Pid, Application) -> + while_process_is_alive( + Node, Pid, fun() -> rabbit_nodes:is_running(Node, Application) end). + +wait_for_startup(Node, Pid) -> + while_process_is_alive( + Node, Pid, fun() -> rpc:call(Node, rabbit, await_startup, []) =:= ok end). + +while_process_is_alive(Node, Pid, Activity) -> case process_up(Pid) of - true -> case rabbit_nodes:is_running(Node, Application) of + true -> case Activity() of true -> ok; false -> timer:sleep(?EXTERNAL_CHECK_INTERVAL), - wait_for_application(Node, Pid, Application) + while_process_is_alive(Node, Pid, Activity) end; false -> {error, process_not_running} end. @@ -480,12 +591,14 @@ wait_for_process_death(Pid) -> read_pid_file(PidFile, Wait) -> case {file:read_file(PidFile), Wait} of {{ok, Bin}, _} -> - S = string:strip(binary_to_list(Bin), right, $\n), - try list_to_integer(S) + S = binary_to_list(Bin), + {match, [PidS]} = re:run(S, "[^\\s]+", + [{capture, all, list}]), + try list_to_integer(PidS) catch error:badarg -> exit({error, {garbage_in_pid_file, PidFile}}) end, - S; + PidS; {{error, enoent}, true} -> timer:sleep(?EXTERNAL_CHECK_INTERVAL), read_pid_file(PidFile, Wait); @@ -497,12 +610,11 @@ read_pid_file(PidFile, Wait) -> % rpc:call(os, getpid, []) at this point process_up(Pid) -> with_os([{unix, fun () -> - system("ps -p " ++ Pid - ++ " >/dev/null 2>&1") =:= 0 + run_ps(Pid) =:= 0 end}, {win32, fun () -> - Res = os:cmd("tasklist /nh /fi \"pid eq " ++ - Pid ++ "\" 2>&1"), + Cmd = "tasklist /nh /fi \"pid eq " ++ Pid ++ "\" ", + Res = rabbit_misc:os_cmd(Cmd ++ "2>&1"), case re:run(Res, "erl\\.exe", [{capture, none}]) of match -> true; _ -> false @@ -516,18 +628,17 @@ with_os(Handlers) -> Handler -> Handler() end. -% Like system(3) -system(Cmd) -> - ShCmd = "sh -c '" ++ escape_quotes(Cmd) ++ "'", - Port = erlang:open_port({spawn, ShCmd}, [exit_status,nouse_stdio]), - receive {Port, {exit_status, Status}} -> Status end. - -% Escape the quotes in a shell command so that it can be used in "sh -c 'cmd'" -escape_quotes(Cmd) -> - lists:flatten(lists:map(fun ($') -> "'\\''"; (Ch) -> Ch end, Cmd)). +run_ps(Pid) -> + Port = erlang:open_port({spawn, "ps -p " ++ Pid}, + [exit_status, {line, 16384}, + use_stdio, stderr_to_stdout]), + exit_loop(Port). -format_parse_error({_Line, Mod, Err}) -> - lists:flatten(Mod:format_error(Err)). +exit_loop(Port) -> + receive + {Port, {exit_status, Rc}} -> Rc; + {Port, _} -> exit_loop(Port) + end. %%---------------------------------------------------------------------------- @@ -541,7 +652,7 @@ display_info_list(Results, InfoItemKeys) when is_list(Results) -> fun (Result) -> display_row( [format_info_item(proplists:get_value(X, Result)) || X <- InfoItemKeys]) - end, Results), + end, lists:sort(Results)), ok; display_info_list(Other, _) -> Other. diff --git a/src/rabbit_direct.erl b/src/rabbit_direct.erl index c07ad832..a7ee3276 100644 --- a/src/rabbit_direct.erl +++ b/src/rabbit_direct.erl @@ -10,8 +10,8 @@ %% %% The Original Code is RabbitMQ. %% -%% The Initial Developer of the Original Code is VMware, Inc. -%% Copyright (c) 2007-2012 VMware, Inc. All rights reserved. +%% The Initial Developer of the Original Code is GoPivotal, Inc. +%% Copyright (c) 2007-2013 GoPivotal, Inc. All rights reserved. %% -module(rabbit_direct). @@ -31,16 +31,18 @@ -spec(force_event_refresh/0 :: () -> 'ok'). -spec(list/0 :: () -> [pid()]). -spec(list_local/0 :: () -> [pid()]). --spec(connect/5 :: (rabbit_types:username(), rabbit_types:vhost(), - rabbit_types:protocol(), pid(), +-spec(connect/5 :: ((rabbit_types:username() | rabbit_types:user() | + {rabbit_types:username(), rabbit_types:password()}), + rabbit_types:vhost(), rabbit_types:protocol(), pid(), rabbit_event:event_props()) -> - {'ok', {rabbit_types:user(), - rabbit_framing:amqp_table()}}). + rabbit_types:ok_or_error2( + {rabbit_types:user(), rabbit_framing:amqp_table()}, + 'broker_not_found_on_node' | 'auth_failure' | + 'access_refused')). -spec(start_channel/9 :: (rabbit_channel:channel_number(), pid(), pid(), string(), rabbit_types:protocol(), rabbit_types:user(), rabbit_types:vhost(), rabbit_framing:amqp_table(), pid()) -> {'ok', pid()}). - -spec(disconnect/2 :: (pid(), rabbit_event:event_props()) -> 'ok'). -endif. @@ -60,32 +62,42 @@ list_local() -> pg_local:get_members(rabbit_direct). list() -> - rabbit_misc:append_rpc_all_nodes(rabbit_mnesia:running_clustered_nodes(), + rabbit_misc:append_rpc_all_nodes(rabbit_mnesia:cluster_nodes(running), rabbit_direct, list_local, []). %%---------------------------------------------------------------------------- +connect(User = #user{}, VHost, Protocol, Pid, Infos) -> + try rabbit_access_control:check_vhost_access(User, VHost) of + ok -> ok = pg_local:join(rabbit_direct, Pid), + rabbit_event:notify(connection_created, Infos), + {ok, {User, rabbit_reader:server_properties(Protocol)}} + catch + exit:#amqp_error{name = access_refused} -> + {error, access_refused} + end; + +connect({Username, Password}, VHost, Protocol, Pid, Infos) -> + connect0(fun () -> rabbit_access_control:check_user_pass_login( + Username, Password) end, + VHost, Protocol, Pid, Infos); + connect(Username, VHost, Protocol, Pid, Infos) -> + connect0(fun () -> rabbit_access_control:check_user_login( + Username, []) end, + VHost, Protocol, Pid, Infos). + +connect0(AuthFun, VHost, Protocol, Pid, Infos) -> case rabbit:is_running() of - true -> - case rabbit_access_control:check_user_login(Username, []) of - {ok, User} -> - try rabbit_access_control:check_vhost_access(User, VHost) of - ok -> ok = pg_local:join(rabbit_direct, Pid), - rabbit_event:notify(connection_created, Infos), - {ok, {User, - rabbit_reader:server_properties(Protocol)}} - catch - exit:#amqp_error{name = access_refused} -> - {error, access_refused} - end; - {refused, _Msg, _Args} -> - {error, auth_failure} - end; - false -> - {error, broker_not_found_on_node} + true -> case AuthFun() of + {ok, User} -> connect(User, VHost, Protocol, Pid, + Infos); + {refused, _M, _A} -> {error, auth_failure} + end; + false -> {error, broker_not_found_on_node} end. + start_channel(Number, ClientChannelPid, ConnPid, ConnName, Protocol, User, VHost, Capabilities, Collector) -> {ok, _, {ChannelPid, _}} = diff --git a/src/rabbit_disk_monitor.erl b/src/rabbit_disk_monitor.erl index d9e8e8e4..5aaa1b2d 100644 --- a/src/rabbit_disk_monitor.erl +++ b/src/rabbit_disk_monitor.erl @@ -10,8 +10,8 @@ %% %% The Original Code is RabbitMQ. %% -%% The Initial Developer of the Original Code is VMware, Inc. -%% Copyright (c) 2007-2012 VMware, Inc. All rights reserved. +%% The Initial Developer of the Original Code is GoPivotal, Inc. +%% Copyright (c) 2007-2013 GoPivotal, Inc. All rights reserved. %% -module(rabbit_disk_monitor). @@ -27,10 +27,11 @@ set_check_interval/1, get_disk_free/0]). -define(SERVER, ?MODULE). --define(DEFAULT_DISK_CHECK_INTERVAL, 60000). +-define(DEFAULT_DISK_CHECK_INTERVAL, 10000). -record(state, {dir, limit, + actual, timeout, timer, alarmed @@ -106,8 +107,8 @@ handle_call({set_check_interval, Timeout}, _From, State) -> {ok, cancel} = timer:cancel(State#state.timer), {reply, ok, State#state{timeout = Timeout, timer = start_timer(Timeout)}}; -handle_call(get_disk_free, _From, State = #state { dir = Dir }) -> - {reply, get_disk_free(Dir), State}; +handle_call(get_disk_free, _From, State = #state { actual = Actual }) -> + {reply, Actual, State}; handle_call(_Request, _From, State) -> {noreply, State}. @@ -137,7 +138,7 @@ dir() -> rabbit_mnesia:dir(). set_disk_limits(State, Limit) -> State1 = State#state { limit = Limit }, rabbit_log:info("Disk free limit set to ~pMB~n", - [trunc(interpret_limit(Limit) / 1048576)]), + [trunc(interpret_limit(Limit) / 1000000)]), internal_update(State1). internal_update(State = #state { limit = Limit, @@ -148,15 +149,15 @@ internal_update(State = #state { limit = Limit, NewAlarmed = CurrentFreeBytes < LimitBytes, case {Alarmed, NewAlarmed} of {false, true} -> - emit_update_info("exceeded", CurrentFreeBytes, LimitBytes), - alarm_handler:set_alarm({{resource_limit, disk, node()}, []}); + emit_update_info("insufficient", CurrentFreeBytes, LimitBytes), + rabbit_alarm:set_alarm({{resource_limit, disk, node()}, []}); {true, false} -> - emit_update_info("below limit", CurrentFreeBytes, LimitBytes), - alarm_handler:clear_alarm({resource_limit, disk, node()}); + emit_update_info("sufficient", CurrentFreeBytes, LimitBytes), + rabbit_alarm:clear_alarm({resource_limit, disk, node()}); _ -> ok end, - State #state {alarmed = NewAlarmed}. + State #state {alarmed = NewAlarmed, actual = CurrentFreeBytes}. get_disk_free(Dir) -> get_disk_free(Dir, os:type()). @@ -167,9 +168,9 @@ get_disk_free(Dir, {unix, Sun}) get_disk_free(Dir, {unix, _}) -> parse_free_unix(rabbit_misc:os_cmd("/bin/df -kP " ++ Dir)); get_disk_free(Dir, {win32, _}) -> - parse_free_win32(os:cmd("dir /-C /W \"" ++ Dir ++ [$"])); -get_disk_free(_, _) -> - unknown. + parse_free_win32(rabbit_misc:os_cmd("dir /-C /W \"" ++ Dir ++ [$"])); +get_disk_free(_, Platform) -> + {unknown, Platform}. parse_free_unix(CommandResult) -> [_, Stats | _] = string:tokens(CommandResult, "\n"), @@ -187,10 +188,10 @@ interpret_limit({mem_relative, R}) -> interpret_limit(L) -> L. -emit_update_info(State, CurrentFree, Limit) -> +emit_update_info(StateStr, CurrentFree, Limit) -> rabbit_log:info( - "Disk free space limit now ~s. Free bytes:~p Limit:~p~n", - [State, CurrentFree, Limit]). + "Disk free space ~s. Free bytes:~p Limit:~p~n", + [StateStr, CurrentFree, Limit]). start_timer(Timeout) -> {ok, TRef} = timer:send_interval(Timeout, update), diff --git a/src/rabbit_error_logger.erl b/src/rabbit_error_logger.erl index f1672f4e..17ed8563 100644 --- a/src/rabbit_error_logger.erl +++ b/src/rabbit_error_logger.erl @@ -10,8 +10,8 @@ %% %% The Original Code is RabbitMQ. %% -%% The Initial Developer of the Original Code is VMware, Inc. -%% Copyright (c) 2007-2012 VMware, Inc. All rights reserved. +%% The Initial Developer of the Original Code is GoPivotal, Inc. +%% Copyright (c) 2007-2013 GoPivotal, Inc. All rights reserved. %% -module(rabbit_error_logger). @@ -22,7 +22,7 @@ -behaviour(gen_event). --export([boot/0]). +-export([start/0, stop/0]). -export([init/1, terminate/2, code_change/3, handle_call/2, handle_event/2, handle_info/2]). @@ -31,16 +31,23 @@ -ifdef(use_specs). --spec(boot/0 :: () -> 'ok'). +-spec(start/0 :: () -> 'ok'). +-spec(stop/0 :: () -> 'ok'). -endif. %%---------------------------------------------------------------------------- -boot() -> +start() -> {ok, DefaultVHost} = application:get_env(default_vhost), ok = error_logger:add_report_handler(?MODULE, [DefaultVHost]). +stop() -> + terminated_ok = error_logger:delete_report_handler(rabbit_error_logger), + ok. + +%%---------------------------------------------------------------------------- + init([DefaultVHost]) -> #exchange{} = rabbit_exchange:declare( rabbit_misc:r(DefaultVHost, exchange, ?LOG_EXCH_NAME), @@ -81,7 +88,7 @@ publish1(RoutingKey, Format, Data, LogExch) -> %% second resolution, not millisecond. Timestamp = rabbit_misc:now_ms() div 1000, {ok, _RoutingRes, _DeliveredQPids} = - rabbit_basic:publish(LogExch, RoutingKey, false, false, + rabbit_basic:publish(LogExch, RoutingKey, #'P_basic'{content_type = <<"text/plain">>, timestamp = Timestamp}, list_to_binary(io_lib:format(Format, Data))), diff --git a/src/rabbit_error_logger_file_h.erl b/src/rabbit_error_logger_file_h.erl index 042ab23c..d59641b0 100644 --- a/src/rabbit_error_logger_file_h.erl +++ b/src/rabbit_error_logger_file_h.erl @@ -10,8 +10,8 @@ %% %% The Original Code is RabbitMQ. %% -%% The Initial Developer of the Original Code is VMware, Inc. -%% Copyright (c) 2007-2012 VMware, Inc. All rights reserved. +%% The Initial Developer of the Original Code is GoPivotal, Inc. +%% Copyright (c) 2007-2013 GoPivotal, Inc. All rights reserved. %% -module(rabbit_error_logger_file_h). @@ -76,6 +76,9 @@ init_file(File, PrevHandler) -> Error -> Error end. +%% filter out "application: foo; exited: stopped; type: temporary" +handle_event({info_report, _, {_, std_info, _}}, State) -> + {ok, State}; handle_event(Event, State) -> error_logger_file_h:handle_event(Event, State). diff --git a/src/rabbit_event.erl b/src/rabbit_event.erl index 3f1b20fe..a713d76b 100644 --- a/src/rabbit_event.erl +++ b/src/rabbit_event.erl @@ -10,8 +10,8 @@ %% %% The Original Code is RabbitMQ. %% -%% The Initial Developer of the Original Code is VMware, Inc. -%% Copyright (c) 2007-2012 VMware, Inc. All rights reserved. +%% The Initial Developer of the Original Code is GoPivotal, Inc. +%% Copyright (c) 2007-2013 GoPivotal, Inc. All rights reserved. %% -module(rabbit_event). @@ -19,8 +19,8 @@ -include("rabbit.hrl"). -export([start_link/0]). --export([init_stats_timer/2, ensure_stats_timer/3, stop_stats_timer/2]). --export([reset_stats_timer/2]). +-export([init_stats_timer/2, init_disabled_stats_timer/2, + ensure_stats_timer/3, stop_stats_timer/2, reset_stats_timer/2]). -export([stats_level/2, if_enabled/3]). -export([notify/2, notify_if/3]). @@ -51,6 +51,7 @@ -spec(start_link/0 :: () -> rabbit_types:ok_pid_or_error()). -spec(init_stats_timer/2 :: (container(), pos()) -> container()). +-spec(init_disabled_stats_timer/2 :: (container(), pos()) -> container()). -spec(ensure_stats_timer/3 :: (container(), pos(), term()) -> container()). -spec(stop_stats_timer/2 :: (container(), pos()) -> container()). -spec(reset_stats_timer/2 :: (container(), pos()) -> container()). @@ -90,10 +91,13 @@ start_link() -> init_stats_timer(C, P) -> {ok, StatsLevel} = application:get_env(rabbit, collect_statistics), - {ok, Interval} = application:get_env(rabbit, collect_statistics_interval), + {ok, Interval} = application:get_env(rabbit, collect_statistics_interval), setelement(P, C, #state{level = StatsLevel, interval = Interval, timer = undefined}). +init_disabled_stats_timer(C, P) -> + setelement(P, C, #state{level = none, interval = 0, timer = undefined}). + ensure_stats_timer(C, P, Msg) -> case element(P, C) of #state{level = Level, interval = Interval, timer = undefined} = State @@ -106,18 +110,18 @@ ensure_stats_timer(C, P, Msg) -> stop_stats_timer(C, P) -> case element(P, C) of - #state{level = Level, timer = TRef} = State - when Level =/= none andalso TRef =/= undefined -> - erlang:cancel_timer(TRef), - setelement(P, C, State#state{timer = undefined}); + #state{timer = TRef} = State when TRef =/= undefined -> + case erlang:cancel_timer(TRef) of + false -> C; + _ -> setelement(P, C, State#state{timer = undefined}) + end; #state{} -> C end. reset_stats_timer(C, P) -> case element(P, C) of - #state{timer = TRef} = State - when TRef =/= undefined -> + #state{timer = TRef} = State when TRef =/= undefined -> setelement(P, C, State#state{timer = undefined}); #state{} -> C @@ -137,8 +141,6 @@ notify_if(true, Type, Props) -> notify(Type, Props); notify_if(false, _Type, _Props) -> ok. notify(Type, Props) -> - %% TODO: switch to os:timestamp() when we drop support for - %% Erlang/OTP < R13B01 gen_event:notify(?MODULE, #event{type = Type, props = Props, - timestamp = now()}). + timestamp = os:timestamp()}). diff --git a/src/rabbit_exchange.erl b/src/rabbit_exchange.erl index 910a89b4..49952a4d 100644 --- a/src/rabbit_exchange.erl +++ b/src/rabbit_exchange.erl @@ -10,21 +10,21 @@ %% %% The Original Code is RabbitMQ. %% -%% The Initial Developer of the Original Code is VMware, Inc. -%% Copyright (c) 2007-2012 VMware, Inc. All rights reserved. +%% The Initial Developer of the Original Code is GoPivotal, Inc. +%% Copyright (c) 2007-2013 GoPivotal, Inc. All rights reserved. %% -module(rabbit_exchange). -include("rabbit.hrl"). -include("rabbit_framing.hrl"). --export([recover/0, callback/3, declare/6, +-export([recover/0, policy_changed/2, callback/4, declare/6, assert_equivalence/6, assert_args_equivalence/2, check_type/1, - lookup/1, lookup_or_die/1, list/1, update_scratch/2, + lookup/1, lookup_or_die/1, list/1, lookup_scratch/2, update_scratch/3, info_keys/0, info/1, info/2, info_all/1, info_all/2, - route/2, delete/2]). + route/2, delete/2, validate_binding/2]). %% these must be run inside a mnesia tx --export([maybe_auto_delete/1, serial/1, peek_serial/1]). +-export([maybe_auto_delete/1, serial/1, peek_serial/1, update/2]). %%---------------------------------------------------------------------------- @@ -37,7 +37,11 @@ -type(fun_name() :: atom()). -spec(recover/0 :: () -> [name()]). --spec(callback/3:: (rabbit_types:exchange(), fun_name(), [any()]) -> 'ok'). +-spec(callback/4:: + (rabbit_types:exchange(), fun_name(), + fun((boolean()) -> non_neg_integer()) | atom(), [any()]) -> 'ok'). +-spec(policy_changed/2 :: + (rabbit_types:exchange(), rabbit_types:exchange()) -> 'ok'). -spec(declare/6 :: (name(), type(), boolean(), boolean(), boolean(), rabbit_framing:amqp_table()) @@ -58,7 +62,14 @@ (name()) -> rabbit_types:exchange() | rabbit_types:channel_exit()). -spec(list/1 :: (rabbit_types:vhost()) -> [rabbit_types:exchange()]). --spec(update_scratch/2 :: (name(), fun((any()) -> any())) -> 'ok'). +-spec(lookup_scratch/2 :: (name(), atom()) -> + rabbit_types:ok(term()) | + rabbit_types:error('not_found')). +-spec(update_scratch/3 :: (name(), atom(), fun((any()) -> any())) -> 'ok'). +-spec(update/2 :: + (name(), + fun((rabbit_types:exchange()) -> rabbit_types:exchange())) + -> not_found | rabbit_types:exchange()). -spec(info_keys/0 :: () -> rabbit_types:info_keys()). -spec(info/1 :: (rabbit_types:exchange()) -> rabbit_types:infos()). -spec(info/2 :: @@ -73,17 +84,22 @@ (name(), boolean())-> 'ok' | rabbit_types:error('not_found') | rabbit_types:error('in_use')). +-spec(validate_binding/2 :: + (rabbit_types:exchange(), rabbit_types:binding()) + -> rabbit_types:ok_or_error({'binding_invalid', string(), [any()]})). -spec(maybe_auto_delete/1:: (rabbit_types:exchange()) -> 'not_deleted' | {'deleted', rabbit_binding:deletions()}). --spec(serial/1 :: (rabbit_types:exchange()) -> 'none' | pos_integer()). +-spec(serial/1 :: (rabbit_types:exchange()) -> + fun((boolean()) -> 'none' | pos_integer())). -spec(peek_serial/1 :: (name()) -> pos_integer() | 'undefined'). -endif. %%---------------------------------------------------------------------------- --define(INFO_KEYS, [name, type, durable, auto_delete, internal, arguments]). +-define(INFO_KEYS, [name, type, durable, auto_delete, internal, arguments, + policy]). recover() -> Xs = rabbit_misc:table_filter( @@ -95,21 +111,60 @@ recover() -> true -> store(X); false -> ok end, - rabbit_exchange:callback(X, create, [map_create_tx(Tx), X]) + callback(X, create, map_create_tx(Tx), [X]) end, rabbit_durable_exchange), + report_missing_decorators(Xs), [XName || #exchange{name = XName} <- Xs]. -callback(#exchange{type = XType}, Fun, Args) -> - apply(type_to_module(XType), Fun, Args). +report_missing_decorators(Xs) -> + Mods = lists:usort(lists:append([rabbit_exchange_decorator:select(raw, D) || + #exchange{decorators = D} <- Xs])), + case [M || M <- Mods, code:which(M) =:= non_existing] of + [] -> ok; + M -> rabbit_log:warning("Missing exchange decorators: ~p~n", [M]) + end. + +callback(X = #exchange{type = XType, + decorators = Decorators}, Fun, Serial0, Args) -> + Serial = if is_function(Serial0) -> Serial0; + is_atom(Serial0) -> fun (_Bool) -> Serial0 end + end, + [ok = apply(M, Fun, [Serial(M:serialise_events(X)) | Args]) || + M <- rabbit_exchange_decorator:select(all, Decorators)], + Module = type_to_module(XType), + apply(Module, Fun, [Serial(Module:serialise_events()) | Args]). + +policy_changed(X = #exchange{type = XType, + decorators = Decorators}, + X1 = #exchange{decorators = Decorators1}) -> + D = rabbit_exchange_decorator:select(all, Decorators), + D1 = rabbit_exchange_decorator:select(all, Decorators1), + DAll = lists:usort(D ++ D1), + [ok = M:policy_changed(X, X1) || M <- [type_to_module(XType) | DAll]], + ok. + +serialise_events(X = #exchange{type = Type, decorators = Decorators}) -> + lists:any(fun (M) -> M:serialise_events(X) end, + rabbit_exchange_decorator:select(all, Decorators)) + orelse (type_to_module(Type)):serialise_events(). + +serial(#exchange{name = XName} = X) -> + Serial = case serialise_events(X) of + true -> next_serial(XName); + false -> none + end, + fun (true) -> Serial; + (false) -> none + end. declare(XName, Type, Durable, AutoDelete, Internal, Args) -> - X = #exchange{name = XName, - type = Type, - durable = Durable, - auto_delete = AutoDelete, - internal = Internal, - arguments = Args}, + X = rabbit_policy:set(#exchange{name = XName, + type = Type, + durable = Durable, + auto_delete = AutoDelete, + internal = Internal, + arguments = Args}), XT = type_to_module(Type), %% We want to upset things if it isn't ok ok = XT:validate(X), @@ -129,7 +184,7 @@ declare(XName, Type, Durable, AutoDelete, Internal, Args) -> end end, fun ({new, Exchange}, Tx) -> - ok = XT:create(map_create_tx(Tx), Exchange), + ok = callback(X, create, map_create_tx(Tx), [Exchange]), rabbit_event:notify_if(not Tx, exchange_created, info(Exchange)), Exchange; ({existing, Exchange}, _Tx) -> @@ -141,13 +196,7 @@ declare(XName, Type, Durable, AutoDelete, Internal, Args) -> map_create_tx(true) -> transaction; map_create_tx(false) -> none. -store(X = #exchange{name = Name, type = Type}) -> - ok = mnesia:write(rabbit_exchange, X, write), - case (type_to_module(Type)):serialise_events() of - true -> S = #exchange_serial{name = Name, next = 1}, - ok = mnesia:write(rabbit_exchange_serial, S, write); - false -> ok - end. +store(X) -> ok = mnesia:write(rabbit_exchange, X, write). %% Used with binaries sent over the wire; the type may not exist. check_type(TypeBin) -> @@ -200,23 +249,53 @@ list(VHostPath) -> rabbit_exchange, #exchange{name = rabbit_misc:r(VHostPath, exchange), _ = '_'}). -update_scratch(Name, Fun) -> +lookup_scratch(Name, App) -> + case lookup(Name) of + {ok, #exchange{scratches = undefined}} -> + {error, not_found}; + {ok, #exchange{scratches = Scratches}} -> + case orddict:find(App, Scratches) of + {ok, Value} -> {ok, Value}; + error -> {error, not_found} + end; + {error, not_found} -> + {error, not_found} + end. + +update_scratch(Name, App, Fun) -> rabbit_misc:execute_mnesia_transaction( fun() -> - case mnesia:wread({rabbit_exchange, Name}) of - [X = #exchange{durable = Durable, scratch = Scratch}] -> - X1 = X#exchange{scratch = Fun(Scratch)}, - ok = mnesia:write(rabbit_exchange, X1, write), - case Durable of - true -> ok = mnesia:write(rabbit_durable_exchange, - X1, write); - _ -> ok - end; - [] -> - ok - end + update(Name, + fun(X = #exchange{scratches = Scratches0}) -> + Scratches1 = case Scratches0 of + undefined -> orddict:new(); + _ -> Scratches0 + end, + Scratch = case orddict:find(App, Scratches1) of + {ok, S} -> S; + error -> undefined + end, + Scratches2 = orddict:store( + App, Fun(Scratch), Scratches1), + X#exchange{scratches = Scratches2} + end), + ok end). +update(Name, Fun) -> + case mnesia:wread({rabbit_exchange, Name}) of + [X = #exchange{durable = Durable}] -> + X1 = Fun(X), + ok = mnesia:write(rabbit_exchange, X1, write), + case Durable of + true -> ok = mnesia:write(rabbit_durable_exchange, X1, write); + _ -> ok + end, + X1; + [] -> + not_found + end. + info_keys() -> ?INFO_KEYS. map(VHostPath, F) -> @@ -232,6 +311,10 @@ i(durable, #exchange{durable = Durable}) -> Durable; i(auto_delete, #exchange{auto_delete = AutoDelete}) -> AutoDelete; i(internal, #exchange{internal = Internal}) -> Internal; i(arguments, #exchange{arguments = Arguments}) -> Arguments; +i(policy, X) -> case rabbit_policy:name(X) of + none -> ''; + Policy -> Policy + end; i(Item, _) -> throw({bad_argument, Item}). info(X = #exchange{}) -> infos(?INFO_KEYS, X). @@ -242,58 +325,67 @@ info_all(VHostPath) -> map(VHostPath, fun (X) -> info(X) end). info_all(VHostPath, Items) -> map(VHostPath, fun (X) -> info(X, Items) end). -%% Optimisation -route(#exchange{name = #resource{name = <<"">>, virtual_host = VHost}}, - #delivery{message = #basic_message{routing_keys = RKs}}) -> - [rabbit_misc:r(VHost, queue, RK) || RK <- lists:usort(RKs)]; - -route(X = #exchange{name = XName}, Delivery) -> - route1(Delivery, {queue:from_list([X]), XName, []}). - -route1(Delivery, {WorkList, SeenXs, QNames}) -> - case queue:out(WorkList) of - {empty, _WorkList} -> - lists:usort(QNames); - {{value, X = #exchange{type = Type}}, WorkList1} -> - DstNames = process_alternate( - X, ((type_to_module(Type)):route(X, Delivery))), - route1(Delivery, - lists:foldl(fun process_route/2, {WorkList1, SeenXs, QNames}, - DstNames)) +route(#exchange{name = #resource{virtual_host = VHost, name = RName} = XName, + decorators = Decorators} = X, + #delivery{message = #basic_message{routing_keys = RKs}} = Delivery) -> + case {RName, rabbit_exchange_decorator:select(route, Decorators)} of + {<<"">>, []} -> + %% Optimisation + [rabbit_misc:r(VHost, queue, RK) || RK <- lists:usort(RKs)]; + {_, SelectedDecorators} -> + lists:usort(route1(Delivery, SelectedDecorators, {[X], XName, []})) end. -process_alternate(#exchange{arguments = []}, Results) -> %% optimisation - Results; +route1(_, _, {[], _, QNames}) -> + QNames; +route1(Delivery, Decorators, + {[X = #exchange{type = Type} | WorkList], SeenXs, QNames}) -> + ExchangeDests = (type_to_module(Type)):route(X, Delivery), + DecorateDests = process_decorators(X, Decorators, Delivery), + AlternateDests = process_alternate(X, ExchangeDests), + route1(Delivery, Decorators, + lists:foldl(fun process_route/2, {WorkList, SeenXs, QNames}, + AlternateDests ++ DecorateDests ++ ExchangeDests)). + +process_alternate(#exchange{arguments = []}, _Results) -> %% optimisation + []; process_alternate(#exchange{name = XName, arguments = Args}, []) -> case rabbit_misc:r_arg(XName, exchange, Args, <<"alternate-exchange">>) of undefined -> []; AName -> [AName] end; -process_alternate(_X, Results) -> - Results. +process_alternate(_X, _Results) -> + []. + +process_decorators(_, [], _) -> %% optimisation + []; +process_decorators(X, Decorators, Delivery) -> + lists:append([Decorator:route(X, Delivery) || Decorator <- Decorators]). process_route(#resource{kind = exchange} = XName, {_WorkList, XName, _QNames} = Acc) -> Acc; process_route(#resource{kind = exchange} = XName, {WorkList, #resource{kind = exchange} = SeenX, QNames}) -> - {case lookup(XName) of - {ok, X} -> queue:in(X, WorkList); - {error, not_found} -> WorkList - end, gb_sets:from_list([SeenX, XName]), QNames}; + {cons_if_present(XName, WorkList), + gb_sets:from_list([SeenX, XName]), QNames}; process_route(#resource{kind = exchange} = XName, {WorkList, SeenXs, QNames} = Acc) -> case gb_sets:is_element(XName, SeenXs) of true -> Acc; - false -> {case lookup(XName) of - {ok, X} -> queue:in(X, WorkList); - {error, not_found} -> WorkList - end, gb_sets:add_element(XName, SeenXs), QNames} + false -> {cons_if_present(XName, WorkList), + gb_sets:add_element(XName, SeenXs), QNames} end; process_route(#resource{kind = queue} = QName, {WorkList, SeenXs, QNames}) -> {WorkList, SeenXs, [QName | QNames]}. +cons_if_present(XName, L) -> + case lookup(XName) of + {ok, X} -> [X | L]; + {error, not_found} -> L + end. + call_with_exchange(XName, Fun) -> rabbit_misc:execute_mnesia_tx_with_tail( fun () -> case mnesia:read({rabbit_exchange, XName}) of @@ -320,6 +412,10 @@ delete(XName, IfUnused) -> end end). +validate_binding(X = #exchange{type = XType}, Binding) -> + Module = type_to_module(XType), + Module:validate_binding(X, Binding). + maybe_auto_delete(#exchange{auto_delete = false}) -> not_deleted; maybe_auto_delete(#exchange{auto_delete = true} = X) -> @@ -335,34 +431,33 @@ conditional_delete(X = #exchange{name = XName}) -> end. unconditional_delete(X = #exchange{name = XName}) -> - ok = mnesia:delete({rabbit_durable_exchange, XName}), + %% this 'guarded' delete prevents unnecessary writes to the mnesia + %% disk log + case mnesia:wread({rabbit_durable_exchange, XName}) of + [] -> ok; + [_] -> ok = mnesia:delete({rabbit_durable_exchange, XName}) + end, ok = mnesia:delete({rabbit_exchange, XName}), ok = mnesia:delete({rabbit_exchange_serial, XName}), Bindings = rabbit_binding:remove_for_source(XName), {deleted, X, Bindings, rabbit_binding:remove_for_destination(XName)}. -serial(#exchange{name = XName, type = Type}) -> - case (type_to_module(Type)):serialise_events() of - true -> next_serial(XName); - false -> none - end. - next_serial(XName) -> - [#exchange_serial{next = Serial}] = - mnesia:read(rabbit_exchange_serial, XName, write), + Serial = peek_serial(XName, write), ok = mnesia:write(rabbit_exchange_serial, #exchange_serial{name = XName, next = Serial + 1}, write), Serial. -peek_serial(XName) -> - case mnesia:read({rabbit_exchange_serial, XName}) of +peek_serial(XName) -> peek_serial(XName, read). + +peek_serial(XName, LockType) -> + case mnesia:read(rabbit_exchange_serial, XName, LockType) of [#exchange_serial{next = Serial}] -> Serial; - _ -> undefined + _ -> 1 end. invalid_module(T) -> - rabbit_log:warning( - "Could not find exchange type ~s.~n", [T]), + rabbit_log:warning("Could not find exchange type ~s.~n", [T]), put({xtype_to_module, T}, rabbit_exchange_type_invalid), rabbit_exchange_type_invalid. diff --git a/src/rabbit_exchange_decorator.erl b/src/rabbit_exchange_decorator.erl new file mode 100644 index 00000000..505998b9 --- /dev/null +++ b/src/rabbit_exchange_decorator.erl @@ -0,0 +1,106 @@ +%% The contents of this file are subject to the Mozilla Public License +%% Version 1.1 (the "License"); you may not use this file except in +%% compliance with the License. You may obtain a copy of the License +%% at http://www.mozilla.org/MPL/ +%% +%% Software distributed under the License is distributed on an "AS IS" +%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +%% the License for the specific language governing rights and +%% limitations under the License. +%% +%% The Original Code is RabbitMQ. +%% +%% The Initial Developer of the Original Code is GoPivotal, Inc. +%% Copyright (c) 2007-2013 GoPivotal, Inc. All rights reserved. +%% + +-module(rabbit_exchange_decorator). + +-include("rabbit.hrl"). + +-export([select/2, set/1]). + +%% This is like an exchange type except that: +%% +%% 1) It applies to all exchanges as soon as it is installed, therefore +%% 2) It is not allowed to affect validation, so no validate/1 or +%% assert_args_equivalence/2 +%% +%% It's possible in the future we might make decorators +%% able to manipulate messages as they are published. + +-ifdef(use_specs). + +-type(tx() :: 'transaction' | 'none'). +-type(serial() :: pos_integer() | tx()). + +-callback description() -> [proplists:property()]. + +%% Should Rabbit ensure that all binding events that are +%% delivered to an individual exchange can be serialised? (they +%% might still be delivered out of order, but there'll be a +%% serial number). +-callback serialise_events(rabbit_types:exchange()) -> boolean(). + +%% called after declaration and recovery +-callback create(tx(), rabbit_types:exchange()) -> 'ok'. + +%% called after exchange (auto)deletion. +-callback delete(tx(), rabbit_types:exchange(), [rabbit_types:binding()]) -> + 'ok'. + +%% called when the policy attached to this exchange changes. +-callback policy_changed(rabbit_types:exchange(), rabbit_types:exchange()) -> + 'ok'. + +%% called after a binding has been added or recovered +-callback add_binding(serial(), rabbit_types:exchange(), + rabbit_types:binding()) -> 'ok'. + +%% called after bindings have been deleted. +-callback remove_bindings(serial(), rabbit_types:exchange(), + [rabbit_types:binding()]) -> 'ok'. + +%% Allows additional destinations to be added to the routing decision. +-callback route(rabbit_types:exchange(), rabbit_types:delivery()) -> + [rabbit_amqqueue:name() | rabbit_exchange:name()]. + +%% Whether the decorator wishes to receive callbacks for the exchange +%% none:no callbacks, noroute:all callbacks except route, all:all callbacks +-callback active_for(rabbit_types:exchange()) -> 'none' | 'noroute' | 'all'. + +-else. + +-export([behaviour_info/1]). + +behaviour_info(callbacks) -> + [{description, 0}, {serialise_events, 1}, {create, 2}, {delete, 3}, + {policy_changed, 2}, {add_binding, 3}, {remove_bindings, 3}, + {route, 2}, {active_for, 1}]; +behaviour_info(_Other) -> + undefined. + +-endif. + +%%---------------------------------------------------------------------------- + +%% select a subset of active decorators +select(all, {Route, NoRoute}) -> filter(Route ++ NoRoute); +select(route, {Route, _NoRoute}) -> filter(Route); +select(raw, {Route, NoRoute}) -> Route ++ NoRoute. + +filter(Modules) -> + [M || M <- Modules, code:which(M) =/= non_existing]. + +set(X) -> + Decs = lists:foldl(fun (D, {Route, NoRoute}) -> + ActiveFor = D:active_for(X), + {cons_if_eq(all, ActiveFor, D, Route), + cons_if_eq(noroute, ActiveFor, D, NoRoute)} + end, {[], []}, list()), + X#exchange{decorators = Decs}. + +list() -> [M || {_, M} <- rabbit_registry:lookup_all(exchange_decorator)]. + +cons_if_eq(Select, Select, Item, List) -> [Item | List]; +cons_if_eq(_Select, _Other, _Item, List) -> List. diff --git a/src/rabbit_exchange_type.erl b/src/rabbit_exchange_type.erl index 1027570c..ce7a436b 100644 --- a/src/rabbit_exchange_type.erl +++ b/src/rabbit_exchange_type.erl @@ -10,8 +10,8 @@ %% %% The Original Code is RabbitMQ. %% -%% The Initial Developer of the Original Code is VMware, Inc. -%% Copyright (c) 2007-2012 VMware, Inc. All rights reserved. +%% The Initial Developer of the Original Code is GoPivotal, Inc. +%% Copyright (c) 2007-2013 GoPivotal, Inc. All rights reserved. %% -module(rabbit_exchange_type). @@ -21,7 +21,7 @@ -type(tx() :: 'transaction' | 'none'). -type(serial() :: pos_integer() | tx()). --callback description() -> [proplist:property()]. +-callback description() -> [proplists:property()]. %% Should Rabbit ensure that all binding events that are %% delivered to an individual exchange can be serialised? (they @@ -37,6 +37,10 @@ %% called BEFORE declaration, to check args etc; may exit with #amqp_error{} -callback validate(rabbit_types:exchange()) -> 'ok'. +%% called BEFORE declaration, to check args etc +-callback validate_binding(rabbit_types:exchange(), rabbit_types:binding()) -> + rabbit_types:ok_or_error({'binding_invalid', string(), [any()]}). + %% called after declaration and recovery -callback create(tx(), rabbit_types:exchange()) -> 'ok'. @@ -44,6 +48,10 @@ -callback delete(tx(), rabbit_types:exchange(), [rabbit_types:binding()]) -> 'ok'. +%% called when the policy attached to this exchange changes. +-callback policy_changed(rabbit_types:exchange(), rabbit_types:exchange()) -> + 'ok'. + %% called after a binding has been added or recovered -callback add_binding(serial(), rabbit_types:exchange(), rabbit_types:binding()) -> 'ok'. @@ -54,8 +62,8 @@ %% called when comparing exchanges for equivalence - should return ok or %% exit with #amqp_error{} --callback assert_args_equivalence (rabbit_types:exchange(), - rabbit_framing:amqp_table()) -> +-callback assert_args_equivalence(rabbit_types:exchange(), + rabbit_framing:amqp_table()) -> 'ok' | rabbit_types:connection_exit(). -else. @@ -63,7 +71,8 @@ -export([behaviour_info/1]). behaviour_info(callbacks) -> - [{description, 0}, {serialise_events, 0}, {route, 2}, {validate, 1}, + [{description, 0}, {serialise_events, 0}, {route, 2}, + {validate, 1}, {validate_binding, 2}, {policy_changed, 2}, {create, 2}, {delete, 3}, {add_binding, 3}, {remove_bindings, 3}, {assert_args_equivalence, 2}]; behaviour_info(_Other) -> diff --git a/src/rabbit_exchange_type_direct.erl b/src/rabbit_exchange_type_direct.erl index cdec1cb9..52704ab6 100644 --- a/src/rabbit_exchange_type_direct.erl +++ b/src/rabbit_exchange_type_direct.erl @@ -10,8 +10,8 @@ %% %% The Original Code is RabbitMQ. %% -%% The Initial Developer of the Original Code is VMware, Inc. -%% Copyright (c) 2007-2012 VMware, Inc. All rights reserved. +%% The Initial Developer of the Original Code is GoPivotal, Inc. +%% Copyright (c) 2007-2013 GoPivotal, Inc. All rights reserved. %% -module(rabbit_exchange_type_direct). @@ -20,8 +20,9 @@ -behaviour(rabbit_exchange_type). -export([description/0, serialise_events/0, route/2]). --export([validate/1, create/2, delete/3, - add_binding/3, remove_bindings/3, assert_args_equivalence/2]). +-export([validate/1, validate_binding/2, + create/2, delete/3, policy_changed/2, add_binding/3, + remove_bindings/3, assert_args_equivalence/2]). -rabbit_boot_step({?MODULE, [{description, "exchange type direct"}, @@ -31,8 +32,7 @@ {enables, kernel_ready}]}). description() -> - [{name, <<"direct">>}, - {description, <<"AMQP direct exchange, as per the AMQP specification">>}]. + [{description, <<"AMQP direct exchange, as per the AMQP specification">>}]. serialise_events() -> false. @@ -41,8 +41,10 @@ route(#exchange{name = Name}, rabbit_router:match_routing_key(Name, Routes). validate(_X) -> ok. +validate_binding(_X, _B) -> ok. create(_Tx, _X) -> ok. delete(_Tx, _X, _Bs) -> ok. +policy_changed(_X1, _X2) -> ok. add_binding(_Tx, _X, _B) -> ok. remove_bindings(_Tx, _X, _Bs) -> ok. assert_args_equivalence(X, Args) -> diff --git a/src/rabbit_exchange_type_fanout.erl b/src/rabbit_exchange_type_fanout.erl index a64f2c29..068472bb 100644 --- a/src/rabbit_exchange_type_fanout.erl +++ b/src/rabbit_exchange_type_fanout.erl @@ -10,8 +10,8 @@ %% %% The Original Code is RabbitMQ. %% -%% The Initial Developer of the Original Code is VMware, Inc. -%% Copyright (c) 2007-2012 VMware, Inc. All rights reserved. +%% The Initial Developer of the Original Code is GoPivotal, Inc. +%% Copyright (c) 2007-2013 GoPivotal, Inc. All rights reserved. %% -module(rabbit_exchange_type_fanout). @@ -20,7 +20,8 @@ -behaviour(rabbit_exchange_type). -export([description/0, serialise_events/0, route/2]). --export([validate/1, create/2, delete/3, add_binding/3, +-export([validate/1, validate_binding/2, + create/2, delete/3, policy_changed/2, add_binding/3, remove_bindings/3, assert_args_equivalence/2]). -rabbit_boot_step({?MODULE, @@ -31,8 +32,7 @@ {enables, kernel_ready}]}). description() -> - [{name, <<"fanout">>}, - {description, <<"AMQP fanout exchange, as per the AMQP specification">>}]. + [{description, <<"AMQP fanout exchange, as per the AMQP specification">>}]. serialise_events() -> false. @@ -40,8 +40,10 @@ route(#exchange{name = Name}, _Delivery) -> rabbit_router:match_routing_key(Name, ['_']). validate(_X) -> ok. +validate_binding(_X, _B) -> ok. create(_Tx, _X) -> ok. delete(_Tx, _X, _Bs) -> ok. +policy_changed(_X1, _X2) -> ok. add_binding(_Tx, _X, _B) -> ok. remove_bindings(_Tx, _X, _Bs) -> ok. assert_args_equivalence(X, Args) -> diff --git a/src/rabbit_exchange_type_headers.erl b/src/rabbit_exchange_type_headers.erl index 61917d8f..baec9c29 100644 --- a/src/rabbit_exchange_type_headers.erl +++ b/src/rabbit_exchange_type_headers.erl @@ -10,8 +10,8 @@ %% %% The Original Code is RabbitMQ. %% -%% The Initial Developer of the Original Code is VMware, Inc. -%% Copyright (c) 2007-2012 VMware, Inc. All rights reserved. +%% The Initial Developer of the Original Code is GoPivotal, Inc. +%% Copyright (c) 2007-2013 GoPivotal, Inc. All rights reserved. %% -module(rabbit_exchange_type_headers). @@ -21,7 +21,8 @@ -behaviour(rabbit_exchange_type). -export([description/0, serialise_events/0, route/2]). --export([validate/1, create/2, delete/3, add_binding/3, +-export([validate/1, validate_binding/2, + create/2, delete/3, policy_changed/2, add_binding/3, remove_bindings/3, assert_args_equivalence/2]). -rabbit_boot_step({?MODULE, @@ -37,8 +38,7 @@ -endif. description() -> - [{name, <<"headers">>}, - {description, <<"AMQP headers exchange, as per the AMQP specification">>}]. + [{description, <<"AMQP headers exchange, as per the AMQP specification">>}]. serialise_events() -> false. @@ -51,14 +51,26 @@ route(#exchange{name = Name}, rabbit_router:match_bindings( Name, fun (#binding{args = Spec}) -> headers_match(Spec, Headers) end). -default_headers_match_kind() -> all. +validate_binding(_X, #binding{args = Args}) -> + case rabbit_misc:table_lookup(Args, <<"x-match">>) of + {longstr, <<"all">>} -> ok; + {longstr, <<"any">>} -> ok; + {longstr, Other} -> {error, + {binding_invalid, + "Invalid x-match field value ~p; " + "expected all or any", [Other]}}; + {Type, Other} -> {error, + {binding_invalid, + "Invalid x-match field type ~p (value ~p); " + "expected longstr", [Type, Other]}}; + undefined -> ok %% [0] + end. +%% [0] spec is vague on whether it can be omitted but in practice it's +%% useful to allow people to do this -parse_x_match(<<"all">>) -> all; -parse_x_match(<<"any">>) -> any; -parse_x_match(Other) -> - rabbit_log:warning("Invalid x-match field value ~p; expected all or any", - [Other]), - default_headers_match_kind(). +parse_x_match({longstr, <<"all">>}) -> all; +parse_x_match({longstr, <<"any">>}) -> any; +parse_x_match(_) -> all. %% legacy; we didn't validate %% Horrendous matching algorithm. Depends for its merge-like %% (linear-time) behaviour on the lists:keysort @@ -69,17 +81,9 @@ parse_x_match(Other) -> %% In other words: REQUIRES BOTH PATTERN AND DATA TO BE SORTED ASCENDING BY KEY. %% !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! %% -headers_match(Pattern, Data) -> - MatchKind = case lists:keysearch(<<"x-match">>, 1, Pattern) of - {value, {_, longstr, MK}} -> parse_x_match(MK); - {value, {_, Type, MK}} -> - rabbit_log:warning("Invalid x-match field type ~p " - "(value ~p); expected longstr", - [Type, MK]), - default_headers_match_kind(); - _ -> default_headers_match_kind() - end, - headers_match(Pattern, Data, true, false, MatchKind). +headers_match(Args, Data) -> + MK = parse_x_match(rabbit_misc:table_lookup(Args, <<"x-match">>)), + headers_match(Args, Data, true, false, MK). headers_match([], _Data, AllMatch, _AnyMatch, all) -> AllMatch; @@ -116,6 +120,7 @@ headers_match([{PK, PT, PV} | PRest], [{DK, DT, DV} | DRest], validate(_X) -> ok. create(_Tx, _X) -> ok. delete(_Tx, _X, _Bs) -> ok. +policy_changed(_X1, _X2) -> ok. add_binding(_Tx, _X, _B) -> ok. remove_bindings(_Tx, _X, _Bs) -> ok. assert_args_equivalence(X, Args) -> diff --git a/src/rabbit_exchange_type_invalid.erl b/src/rabbit_exchange_type_invalid.erl index 82d27960..84bb2182 100644 --- a/src/rabbit_exchange_type_invalid.erl +++ b/src/rabbit_exchange_type_invalid.erl @@ -10,8 +10,8 @@ %% %% The Original Code is RabbitMQ. %% -%% The Initial Developer of the Original Code is VMware, Inc. -%% Copyright (c) 2007-2012 VMware, Inc. All rights reserved. +%% The Initial Developer of the Original Code is GoPivotal, Inc. +%% Copyright (c) 2007-2013 GoPivotal, Inc. All rights reserved. %% -module(rabbit_exchange_type_invalid). @@ -20,17 +20,21 @@ -behaviour(rabbit_exchange_type). -export([description/0, serialise_events/0, route/2]). --export([validate/1, create/2, delete/3, - add_binding/3, remove_bindings/3, assert_args_equivalence/2]). +-export([validate/1, validate_binding/2, + create/2, delete/3, policy_changed/2, add_binding/3, + remove_bindings/3, assert_args_equivalence/2]). description() -> - [{name, <<"invalid">>}, - {description, + [{description, <<"Dummy exchange type, to be used when the intended one is not found.">> }]. serialise_events() -> false. +-ifdef(use_specs). +-spec(route/2 :: (rabbit_types:exchange(), rabbit_types:delivery()) + -> no_return()). +-endif. route(#exchange{name = Name, type = Type}, _) -> rabbit_misc:protocol_error( precondition_failed, @@ -38,8 +42,10 @@ route(#exchange{name = Name, type = Type}, _) -> [rabbit_misc:rs(Name), Type]). validate(_X) -> ok. +validate_binding(_X, _B) -> ok. create(_Tx, _X) -> ok. delete(_Tx, _X, _Bs) -> ok. +policy_changed(_X1, _X2) -> ok. add_binding(_Tx, _X, _B) -> ok. remove_bindings(_Tx, _X, _Bs) -> ok. assert_args_equivalence(X, Args) -> diff --git a/src/rabbit_exchange_type_topic.erl b/src/rabbit_exchange_type_topic.erl index 3160fdf4..8ba29deb 100644 --- a/src/rabbit_exchange_type_topic.erl +++ b/src/rabbit_exchange_type_topic.erl @@ -10,8 +10,8 @@ %% %% The Original Code is RabbitMQ. %% -%% The Initial Developer of the Original Code is VMware, Inc. -%% Copyright (c) 2007-2012 VMware, Inc. All rights reserved. +%% The Initial Developer of the Original Code is GoPivotal, Inc. +%% Copyright (c) 2007-2013 GoPivotal, Inc. All rights reserved. %% -module(rabbit_exchange_type_topic). @@ -21,7 +21,8 @@ -behaviour(rabbit_exchange_type). -export([description/0, serialise_events/0, route/2]). --export([validate/1, create/2, delete/3, add_binding/3, +-export([validate/1, validate_binding/2, + create/2, delete/3, policy_changed/2, add_binding/3, remove_bindings/3, assert_args_equivalence/2]). -rabbit_boot_step({?MODULE, @@ -34,8 +35,7 @@ %%---------------------------------------------------------------------------- description() -> - [{name, <<"topic">>}, - {description, <<"AMQP topic exchange, as per the AMQP specification">>}]. + [{description, <<"AMQP topic exchange, as per the AMQP specification">>}]. serialise_events() -> false. @@ -48,6 +48,7 @@ route(#exchange{name = X}, end || RKey <- Routes]). validate(_X) -> ok. +validate_binding(_X, _B) -> ok. create(_Tx, _X) -> ok. delete(transaction, #exchange{name = X}, _Bs) -> @@ -58,6 +59,8 @@ delete(transaction, #exchange{name = X}, _Bs) -> delete(none, _Exchange, _Bs) -> ok. +policy_changed(_X1, _X2) -> ok. + add_binding(transaction, _Exchange, Binding) -> internal_add_binding(Binding); add_binding(none, _Exchange, _Binding) -> diff --git a/src/rabbit_file.erl b/src/rabbit_file.erl index 59df14f3..4cf314ca 100644 --- a/src/rabbit_file.erl +++ b/src/rabbit_file.erl @@ -10,8 +10,8 @@ %% %% The Original Code is RabbitMQ. %% -%% The Initial Developer of the Original Code is VMware, Inc. -%% Copyright (c) 2011-2012 VMware, Inc. All rights reserved. +%% The Initial Developer of the Original Code is GoPivotal, Inc. +%% Copyright (c) 2011-2013 GoPivotal, Inc. All rights reserved. %% -module(rabbit_file). @@ -24,6 +24,8 @@ -export([rename/2, delete/1, recursive_delete/1, recursive_copy/2]). -export([lock_file/1]). +-define(TMP_EXT, ".tmp"). + %%---------------------------------------------------------------------------- -ifdef(use_specs). @@ -102,9 +104,12 @@ read_file_info(File) -> with_fhc_handle(fun () -> prim_file:read_file_info(File) end). with_fhc_handle(Fun) -> - ok = file_handle_cache:obtain(), + with_fhc_handle(1, Fun). + +with_fhc_handle(N, Fun) -> + ok = file_handle_cache:obtain(N), try Fun() - after ok = file_handle_cache:release() + after ok = file_handle_cache:release(N) end. read_term_file(File) -> @@ -133,29 +138,17 @@ write_term_file(File, Terms) -> write_file(Path, Data) -> write_file(Path, Data, []). -%% write_file/3 and make_binary/1 are both based on corresponding -%% functions in the kernel/file.erl module of the Erlang R14B02 -%% release, which is licensed under the EPL. That implementation of -%% write_file/3 does not do an fsync prior to closing the file, hence -%% the existence of this version. APIs are otherwise identical. write_file(Path, Data, Modes) -> Modes1 = [binary, write | (Modes -- [binary, write])], case make_binary(Data) of - Bin when is_binary(Bin) -> - with_fhc_handle( - fun () -> case prim_file:open(Path, Modes1) of - {ok, Hdl} -> try prim_file:write(Hdl, Bin) of - ok -> prim_file:sync(Hdl); - {error, _} = E -> E - after - prim_file:close(Hdl) - end; - {error, _} = E -> E - end - end); - {error, _} = E -> E + Bin when is_binary(Bin) -> write_file1(Path, Bin, Modes1); + {error, _} = E -> E end. +%% make_binary/1 is based on the corresponding function in the +%% kernel/file.erl module of the Erlang R14B02 release, which is +%% licensed under the EPL. + make_binary(Bin) when is_binary(Bin) -> Bin; make_binary(List) -> @@ -165,7 +158,41 @@ make_binary(List) -> {error, Reason} end. +write_file1(Path, Bin, Modes) -> + try + with_synced_copy(Path, Modes, + fun (Hdl) -> + ok = prim_file:write(Hdl, Bin) + end) + catch + error:{badmatch, Error} -> Error; + _:{error, Error} -> {error, Error} + end. + +with_synced_copy(Path, Modes, Fun) -> + case lists:member(append, Modes) of + true -> + {error, append_not_supported, Path}; + false -> + with_fhc_handle( + fun () -> + Bak = Path ++ ?TMP_EXT, + case prim_file:open(Bak, Modes) of + {ok, Hdl} -> + try + Result = Fun(Hdl), + ok = prim_file:rename(Bak, Path), + ok = prim_file:sync(Hdl), + Result + after + prim_file:close(Hdl) + end; + {error, _} = E -> E + end + end) + end. +%% TODO the semantics of this function are rather odd. But see bug 25021. append_file(File, Suffix) -> case read_file_info(File) of {ok, FInfo} -> append_file(File, FInfo#file_info.size, Suffix); @@ -183,9 +210,11 @@ append_file(File, 0, Suffix) -> end end); append_file(File, _, Suffix) -> - case with_fhc_handle(fun () -> prim_file:read_file(File) end) of - {ok, Data} -> write_file([File, Suffix], Data, [append]); - Error -> Error + case with_fhc_handle(2, fun () -> + file:copy(File, {[File, Suffix], [append]}) + end) of + {ok, _BytesCopied} -> ok; + Error -> Error end. ensure_parent_dirs_exist(Filename) -> diff --git a/src/rabbit_framing.erl b/src/rabbit_framing.erl index a79188ab..51aaa999 100644 --- a/src/rabbit_framing.erl +++ b/src/rabbit_framing.erl @@ -10,8 +10,8 @@ %% %% The Original Code is RabbitMQ. %% -%% The Initial Developer of the Original Code is VMware, Inc. -%% Copyright (c) 2007-2012 VMware, Inc. All rights reserved. +%% The Initial Developer of the Original Code is GoPivotal, Inc. +%% Copyright (c) 2007-2013 GoPivotal, Inc. All rights reserved. %% %% TODO auto-generate diff --git a/src/rabbit_guid.erl b/src/rabbit_guid.erl index ba0cb04f..70d1f0c1 100644 --- a/src/rabbit_guid.erl +++ b/src/rabbit_guid.erl @@ -10,8 +10,8 @@ %% %% The Original Code is RabbitMQ. %% -%% The Initial Developer of the Original Code is VMware, Inc. -%% Copyright (c) 2007-2012 VMware, Inc. All rights reserved. +%% The Initial Developer of the Original Code is GoPivotal, Inc. +%% Copyright (c) 2007-2013 GoPivotal, Inc. All rights reserved. %% -module(rabbit_guid). @@ -63,6 +63,7 @@ update_disk_serial() -> Filename = filename(), Serial = case rabbit_file:read_term_file(Filename) of {ok, [Num]} -> Num; + {ok, []} -> 0; %% [1] {error, enoent} -> 0; {error, Reason} -> throw({error, {cannot_read_serial_file, Filename, Reason}}) @@ -73,6 +74,10 @@ update_disk_serial() -> throw({error, {cannot_write_serial_file, Filename, Reason1}}) end, Serial. +%% [1] a couple of users have reported startup failures due to the +%% file being empty, presumably as a result of filesystem +%% corruption. While rabbit doesn't cope with that in general, in this +%% specific case we can be more accommodating. %% Generate an un-hashed guid. fresh() -> @@ -104,8 +109,6 @@ advance_blocks({B1, B2, B3, B4}, I) -> B5 = erlang:phash2({B1, I}, 4294967296), {{(B2 bxor B5), (B3 bxor B5), (B4 bxor B5), B5}, I+1}. -blocks_to_binary({B1, B2, B3, B4}) -> <<B1:32, B2:32, B3:32, B4:32>>. - %% generate a GUID. This function should be used when performance is a %% priority and predictability is not an issue. Otherwise use %% gen_secure/0. @@ -114,14 +117,15 @@ gen() -> %% time we need a new guid we rotate them producing a new hash %% with the aid of the counter. Look at the comments in %% advance_blocks/2 for details. - {BS, I} = case get(guid) of - undefined -> <<B1:32, B2:32, B3:32, B4:32>> = - erlang:md5(term_to_binary(fresh())), - {{B1,B2,B3,B4}, 0}; - {BS0, I0} -> advance_blocks(BS0, I0) - end, - put(guid, {BS, I}), - blocks_to_binary(BS). + case get(guid) of + undefined -> <<B1:32, B2:32, B3:32, B4:32>> = Res = + erlang:md5(term_to_binary(fresh())), + put(guid, {{B1, B2, B3, B4}, 0}), + Res; + {BS, I} -> {{B1, B2, B3, B4}, _} = S = advance_blocks(BS, I), + put(guid, S), + <<B1:32, B2:32, B3:32, B4:32>> + end. %% generate a non-predictable GUID. %% @@ -144,11 +148,7 @@ gen_secure() -> %% employs base64url encoding, which is safer in more contexts than %% plain base64. string(G, Prefix) -> - Prefix ++ "-" ++ lists:foldl(fun ($\+, Acc) -> [$\- | Acc]; - ($\/, Acc) -> [$\_ | Acc]; - ($\=, Acc) -> Acc; - (Chr, Acc) -> [Chr | Acc] - end, [], base64:encode_to_string(G)). + Prefix ++ "-" ++ rabbit_misc:base64url(G). binary(G, Prefix) -> list_to_binary(string(G, Prefix)). diff --git a/src/rabbit_heartbeat.erl b/src/rabbit_heartbeat.erl index 80b4e768..fac74edb 100644 --- a/src/rabbit_heartbeat.erl +++ b/src/rabbit_heartbeat.erl @@ -10,8 +10,8 @@ %% %% The Original Code is RabbitMQ. %% -%% The Initial Developer of the Original Code is VMware, Inc. -%% Copyright (c) 2007-2012 VMware, Inc. All rights reserved. +%% The Initial Developer of the Original Code is GoPivotal, Inc. +%% Copyright (c) 2007-2013 GoPivotal, Inc. All rights reserved. %% -module(rabbit_heartbeat). @@ -19,6 +19,8 @@ -export([start_heartbeat_sender/3, start_heartbeat_receiver/3, start_heartbeat_fun/1, pause_monitor/1, resume_monitor/1]). +-export([system_continue/3, system_terminate/4, system_code_change/4]). + -include("rabbit.hrl"). %%---------------------------------------------------------------------------- @@ -51,6 +53,10 @@ -spec(pause_monitor/1 :: (heartbeaters()) -> 'ok'). -spec(resume_monitor/1 :: (heartbeaters()) -> 'ok'). +-spec(system_code_change/4 :: (_,_,_,_) -> {'ok',_}). +-spec(system_continue/3 :: (_,_,{_, _}) -> any()). +-spec(system_terminate/4 :: (_,_,_,_) -> none()). + -endif. %%---------------------------------------------------------------------------- @@ -59,21 +65,15 @@ start_heartbeat_sender(Sock, TimeoutSec, SendFun) -> %% the 'div 2' is there so that we don't end up waiting for nearly %% 2 * TimeoutSec before sending a heartbeat in the boundary case %% where the last message was sent just after a heartbeat. - heartbeater( - {Sock, TimeoutSec * 1000 div 2, send_oct, 0, - fun () -> - SendFun(), - continue - end}). + heartbeater({Sock, TimeoutSec * 1000 div 2, send_oct, 0, + fun () -> SendFun(), continue end}). start_heartbeat_receiver(Sock, TimeoutSec, ReceiveFun) -> %% we check for incoming data every interval, and time out after %% two checks with no change. As a result we will time out between %% 2 and 3 intervals after the last data has been received. - heartbeater({Sock, TimeoutSec * 1000, recv_oct, 1, fun () -> - ReceiveFun(), - stop - end}). + heartbeater({Sock, TimeoutSec * 1000, recv_oct, 1, + fun () -> ReceiveFun(), stop end}). start_heartbeat_fun(SupPid) -> fun (Sock, SendTimeoutSec, SendFun, ReceiveTimeoutSec, ReceiveFun) -> @@ -88,17 +88,20 @@ start_heartbeat_fun(SupPid) -> {Sender, Receiver} end. -pause_monitor({_Sender, none}) -> - ok; -pause_monitor({_Sender, Receiver}) -> - Receiver ! pause, - ok. +pause_monitor({_Sender, none}) -> ok; +pause_monitor({_Sender, Receiver}) -> Receiver ! pause, ok. + +resume_monitor({_Sender, none}) -> ok; +resume_monitor({_Sender, Receiver}) -> Receiver ! resume, ok. + +system_continue(_Parent, Deb, {Params, State}) -> + heartbeater(Params, Deb, State). + +system_terminate(Reason, _Parent, _Deb, _State) -> + exit(Reason). -resume_monitor({_Sender, none}) -> - ok; -resume_monitor({_Sender, Receiver}) -> - Receiver ! resume, - ok. +system_code_change(Misc, _Module, _OldVsn, _Extra) -> + {ok, Misc}. %%---------------------------------------------------------------------------- start_heartbeater(0, _SupPid, _Sock, _TimeoutFun, _Name, _Callback) -> @@ -106,24 +109,29 @@ start_heartbeater(0, _SupPid, _Sock, _TimeoutFun, _Name, _Callback) -> start_heartbeater(TimeoutSec, SupPid, Sock, TimeoutFun, Name, Callback) -> supervisor2:start_child( SupPid, {Name, - {rabbit_heartbeat, Callback, - [Sock, TimeoutSec, TimeoutFun]}, + {rabbit_heartbeat, Callback, [Sock, TimeoutSec, TimeoutFun]}, transient, ?MAX_WAIT, worker, [rabbit_heartbeat]}). heartbeater(Params) -> - {ok, proc_lib:spawn_link(fun () -> heartbeater(Params, {0, 0}) end)}. + Deb = sys:debug_options([]), + {ok, proc_lib:spawn_link(fun () -> heartbeater(Params, Deb, {0, 0}) end)}. heartbeater({Sock, TimeoutMillisec, StatName, Threshold, Handler} = Params, - {StatVal, SameCount}) -> - Recurse = fun (V) -> heartbeater(Params, V) end, + Deb, {StatVal, SameCount} = State) -> + Recurse = fun (State1) -> heartbeater(Params, Deb, State1) end, + System = fun (From, Req) -> + sys:handle_system_msg( + Req, From, self(), ?MODULE, Deb, {Params, State}) + end, receive pause -> receive - resume -> - Recurse({0, 0}); - Other -> - exit({unexpected_message, Other}) + resume -> Recurse({0, 0}); + {system, From, Req} -> System(From, Req); + Other -> exit({unexpected_message, Other}) end; + {system, From, Req} -> + System(From, Req); Other -> exit({unexpected_message, Other}) after TimeoutMillisec -> diff --git a/src/rabbit_intermediate_sup.erl b/src/rabbit_intermediate_sup.erl new file mode 100644 index 00000000..a9381f20 --- /dev/null +++ b/src/rabbit_intermediate_sup.erl @@ -0,0 +1,39 @@ +%% The contents of this file are subject to the Mozilla Public License +%% Version 1.1 (the "License"); you may not use this file except in +%% compliance with the License. You may obtain a copy of the License +%% at http://www.mozilla.org/MPL/ +%% +%% Software distributed under the License is distributed on an "AS IS" +%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +%% the License for the specific language governing rights and +%% limitations under the License. +%% +%% The Original Code is RabbitMQ. +%% +%% The Initial Developer of the Original Code is GoPivotal, Inc. +%% Copyright (c) 2013-2013 GoPivotal, Inc. All rights reserved. +%% + +-module(rabbit_intermediate_sup). + +-behaviour(supervisor2). + +-export([start_link/0]). + +-export([init/1]). + +%%---------------------------------------------------------------------------- + +-ifdef(use_specs). +-spec(start_link/0 :: () -> rabbit_types:ok_pid_or_error()). +-endif. + +%%---------------------------------------------------------------------------- + +start_link() -> + supervisor2:start_link(?MODULE, []). + +%%---------------------------------------------------------------------------- + +init([]) -> + {ok, {{one_for_one, 10, 10}, []}}. diff --git a/src/rabbit_limiter.erl b/src/rabbit_limiter.erl index 2b15498e..12a13c00 100644 --- a/src/rabbit_limiter.erl +++ b/src/rabbit_limiter.erl @@ -10,46 +10,169 @@ %% %% The Original Code is RabbitMQ. %% -%% The Initial Developer of the Original Code is VMware, Inc. -%% Copyright (c) 2007-2012 VMware, Inc. All rights reserved. +%% The Initial Developer of the Original Code is GoPivotal, Inc. +%% Copyright (c) 2007-2013 GoPivotal, Inc. All rights reserved. %% +%% The purpose of the limiter is to stem the flow of messages from +%% queues to channels, in order to act upon various protocol-level +%% flow control mechanisms, specifically AMQP 0-9-1's basic.qos +%% prefetch_count and channel.flow, and AMQP 1.0's link (aka consumer) +%% credit mechanism. +%% +%% Each channel has an associated limiter process, created with +%% start_link/1, which it passes to queues on consumer creation with +%% rabbit_amqqueue:basic_consume/9, and rabbit_amqqueue:basic_get/4. +%% The latter isn't strictly necessary, since basic.get is not +%% subject to limiting, but it means that whenever a queue knows about +%% a channel, it also knows about its limiter, which is less fiddly. +%% +%% The limiter process holds state that is, in effect, shared between +%% the channel and all queues from which the channel is +%% consuming. Essentially all these queues are competing for access to +%% a single, limited resource - the ability to deliver messages via +%% the channel - and it is the job of the limiter process to mediate +%% that access. +%% +%% The limiter process is separate from the channel process for two +%% reasons: separation of concerns, and efficiency. Channels can get +%% very busy, particularly if they are also dealing with publishes. +%% With a separate limiter process all the aforementioned access +%% mediation can take place without touching the channel. +%% +%% For efficiency, both the channel and the queues keep some local +%% state, initialised from the limiter pid with new/1 and client/1, +%% respectively. In particular this allows them to avoid any +%% interaction with the limiter process when it is 'inactive', i.e. no +%% protocol-level flow control is taking place. +%% +%% This optimisation does come at the cost of some complexity though: +%% when a limiter becomes active, the channel needs to inform all its +%% consumer queues of this change in status. It does this by invoking +%% rabbit_amqqueue:activate_limit_all/2. Note that there is no inverse +%% transition, i.e. once a queue has been told about an active +%% limiter, it is not subsequently told when that limiter becomes +%% inactive. In practice it is rare for that to happen, though we +%% could optimise this case in the future. +%% +%% In addition, the consumer credit bookkeeping is local to queues, so +%% it is not necessary to store information about it in the limiter +%% process. But for abstraction we hide it from the queue behind the +%% limiter API, and it therefore becomes part of the queue local +%% state. +%% +%% The interactions with the limiter are as follows: +%% +%% 1. Channels tell the limiter about basic.qos prefetch counts - +%% that's what the limit_prefetch/3, unlimit_prefetch/1, +%% is_prefetch_limited/1, get_prefetch_limit/1 API functions are +%% about - and channel.flow blocking - that's what block/1, +%% unblock/1 and is_blocked/1 are for. They also tell the limiter +%% queue state (via the queue) about consumer credit changes - +%% that's what credit/4 is for. +%% +%% 2. Queues also tell the limiter queue state about the queue +%% becoming empty (via drained/1) and consumers leaving (via +%% forget_consumer/2). +%% +%% 3. Queues register with the limiter - this happens as part of +%% activate/1. +%% +%% 4. The limiter process maintains an internal counter of 'messages +%% sent but not yet acknowledged', called the 'volume'. +%% +%% 5. Queues ask the limiter for permission (with can_send/3) whenever +%% they want to deliver a message to a channel. The limiter checks +%% whether a) the channel isn't blocked by channel.flow, b) the +%% volume has not yet reached the prefetch limit, and c) whether +%% the consumer has enough credit. If so it increments the volume +%% and tells the queue to proceed. Otherwise it marks the queue as +%% requiring notification (see below) and tells the queue not to +%% proceed. +%% +%% 6. A queue that has been told to proceed (by the return value of +%% can_send/3) sends the message to the channel. Conversely, a +%% queue that has been told not to proceed, will not attempt to +%% deliver that message, or any future messages, to the +%% channel. This is accomplished by can_send/3 capturing the +%% outcome in the local state, where it can be accessed with +%% is_suspended/1. +%% +%% 7. When a channel receives an ack it tells the limiter (via ack/2) +%% how many messages were ack'ed. The limiter process decrements +%% the volume and if it falls below the prefetch_count then it +%% notifies (through rabbit_amqqueue:resume/2) all the queues +%% requiring notification, i.e. all those that had a can_send/3 +%% request denied. +%% +%% 8. Upon receipt of such a notification, queues resume delivery to +%% the channel, i.e. they will once again start asking limiter, as +%% described in (5). +%% +%% 9. When a queue has no more consumers associated with a particular +%% channel, it deactivates use of the limiter with deactivate/1, +%% which alters the local state such that no further interactions +%% with the limiter process take place until a subsequent +%% activate/1. + -module(rabbit_limiter). -behaviour(gen_server2). +-export([start_link/0]). +%% channel API +-export([new/1, limit_prefetch/3, unlimit_prefetch/1, block/1, unblock/1, + is_prefetch_limited/1, is_blocked/1, is_active/1, + get_prefetch_limit/1, ack/2, pid/1]). +%% queue API +-export([client/1, activate/1, can_send/3, resume/1, deactivate/1, + is_suspended/1, is_consumer_blocked/2, credit/4, drained/1, + forget_consumer/2]). +%% callbacks -export([init/1, terminate/2, code_change/3, handle_call/3, handle_cast/2, - handle_info/2, prioritise_call/3]). --export([start_link/0, make_token/0, make_token/1, is_enabled/1, enable/2, - disable/1]). --export([limit/2, can_send/3, ack/2, register/2, unregister/2]). --export([get_limit/1, block/1, unblock/1, is_blocked/1]). + handle_info/2, prioritise_call/4]). %%---------------------------------------------------------------------------- --record(token, {pid, enabled}). +-record(lstate, {pid, prefetch_limited, blocked}). +-record(qstate, {pid, state, credits}). -ifdef(use_specs). --export_type([token/0]). - --opaque(token() :: #token{}). +-type(lstate() :: #lstate{pid :: pid(), + prefetch_limited :: boolean(), + blocked :: boolean()}). +-type(qstate() :: #qstate{pid :: pid(), + state :: 'dormant' | 'active' | 'suspended'}). -spec(start_link/0 :: () -> rabbit_types:ok_pid_or_error()). --spec(make_token/0 :: () -> token()). --spec(make_token/1 :: ('undefined' | pid()) -> token()). --spec(is_enabled/1 :: (token()) -> boolean()). --spec(enable/2 :: (token(), non_neg_integer()) -> token()). --spec(disable/1 :: (token()) -> token()). --spec(limit/2 :: (token(), non_neg_integer()) -> 'ok' | {'disabled', token()}). --spec(can_send/3 :: (token(), pid(), boolean()) -> boolean()). --spec(ack/2 :: (token(), non_neg_integer()) -> 'ok'). --spec(register/2 :: (token(), pid()) -> 'ok'). --spec(unregister/2 :: (token(), pid()) -> 'ok'). --spec(get_limit/1 :: (token()) -> non_neg_integer()). --spec(block/1 :: (token()) -> 'ok'). --spec(unblock/1 :: (token()) -> 'ok' | {'disabled', token()}). --spec(is_blocked/1 :: (token()) -> boolean()). +-spec(new/1 :: (pid()) -> lstate()). + +-spec(limit_prefetch/3 :: (lstate(), non_neg_integer(), non_neg_integer()) + -> lstate()). +-spec(unlimit_prefetch/1 :: (lstate()) -> lstate()). +-spec(block/1 :: (lstate()) -> lstate()). +-spec(unblock/1 :: (lstate()) -> lstate()). +-spec(is_prefetch_limited/1 :: (lstate()) -> boolean()). +-spec(is_blocked/1 :: (lstate()) -> boolean()). +-spec(is_active/1 :: (lstate()) -> boolean()). +-spec(get_prefetch_limit/1 :: (lstate()) -> non_neg_integer()). +-spec(ack/2 :: (lstate(), non_neg_integer()) -> 'ok'). +-spec(pid/1 :: (lstate()) -> pid()). + +-spec(client/1 :: (pid()) -> qstate()). +-spec(activate/1 :: (qstate()) -> qstate()). +-spec(can_send/3 :: (qstate(), boolean(), rabbit_types:ctag()) -> + {'continue' | 'suspend', qstate()}). +-spec(resume/1 :: (qstate()) -> qstate()). +-spec(deactivate/1 :: (qstate()) -> qstate()). +-spec(is_suspended/1 :: (qstate()) -> boolean()). +-spec(is_consumer_blocked/2 :: (qstate(), rabbit_types:ctag()) -> boolean()). +-spec(credit/4 :: (qstate(), rabbit_types:ctag(), non_neg_integer(), boolean()) + -> qstate()). +-spec(drained/1 :: (qstate()) + -> {[{rabbit_types:ctag(), non_neg_integer()}], qstate()}). +-spec(forget_consumer/2 :: (qstate(), rabbit_types:ctag()) -> qstate()). -endif. @@ -64,120 +187,183 @@ %% notified of a change in the limit or volume that may allow it to %% deliver more messages via the limiter's channel. +-record(credit, {credit = 0, drain = false}). + %%---------------------------------------------------------------------------- %% API %%---------------------------------------------------------------------------- start_link() -> gen_server2:start_link(?MODULE, [], []). -make_token() -> make_token(undefined). -make_token(Pid) -> #token{pid = Pid, enabled = false}. +new(Pid) -> + %% this a 'call' to ensure that it is invoked at most once. + ok = gen_server:call(Pid, {new, self()}), + #lstate{pid = Pid, prefetch_limited = false, blocked = false}. -is_enabled(#token{enabled = Enabled}) -> Enabled. +limit_prefetch(L, PrefetchCount, UnackedCount) when PrefetchCount > 0 -> + ok = gen_server:call(L#lstate.pid, + {limit_prefetch, PrefetchCount, UnackedCount}), + L#lstate{prefetch_limited = true}. -enable(#token{pid = Pid} = Token, Volume) -> - gen_server2:call(Pid, {enable, Token, self(), Volume}, infinity). +unlimit_prefetch(L) -> + ok = gen_server:call(L#lstate.pid, unlimit_prefetch), + L#lstate{prefetch_limited = false}. -disable(#token{pid = Pid} = Token) -> - gen_server2:call(Pid, {disable, Token}, infinity). +block(L) -> + ok = gen_server:call(L#lstate.pid, block), + L#lstate{blocked = true}. -limit(Limiter, PrefetchCount) -> - maybe_call(Limiter, {limit, PrefetchCount, Limiter}, ok). +unblock(L) -> + ok = gen_server:call(L#lstate.pid, unblock), + L#lstate{blocked = false}. -%% Ask the limiter whether the queue can deliver a message without -%% breaching a limit. Note that we don't use maybe_call here in order -%% to avoid always going through with_exit_handler/2, even when the -%% limiter is disabled. -can_send(#token{pid = Pid, enabled = true}, QPid, AckRequired) -> - rabbit_misc:with_exit_handler( - fun () -> true end, - fun () -> - gen_server2:call(Pid, {can_send, QPid, AckRequired}, infinity) - end); -can_send(_, _, _) -> - true. +is_prefetch_limited(#lstate{prefetch_limited = Limited}) -> Limited. + +is_blocked(#lstate{blocked = Blocked}) -> Blocked. -%% Let the limiter know that the channel has received some acks from a -%% consumer -ack(Limiter, Count) -> maybe_cast(Limiter, {ack, Count}). +is_active(L) -> is_prefetch_limited(L) orelse is_blocked(L). -register(Limiter, QPid) -> maybe_cast(Limiter, {register, QPid}). +get_prefetch_limit(#lstate{prefetch_limited = false}) -> 0; +get_prefetch_limit(L) -> gen_server:call(L#lstate.pid, get_prefetch_limit). -unregister(Limiter, QPid) -> maybe_cast(Limiter, {unregister, QPid}). +ack(#lstate{prefetch_limited = false}, _AckCount) -> ok; +ack(L, AckCount) -> gen_server:cast(L#lstate.pid, {ack, AckCount}). -get_limit(Limiter) -> +pid(#lstate{pid = Pid}) -> Pid. + +client(Pid) -> #qstate{pid = Pid, state = dormant, credits = gb_trees:empty()}. + +activate(L = #qstate{state = dormant}) -> + ok = gen_server:cast(L#qstate.pid, {register, self()}), + L#qstate{state = active}; +activate(L) -> L. + +can_send(L = #qstate{pid = Pid, state = State, credits = Credits}, + AckRequired, CTag) -> + case is_consumer_blocked(L, CTag) of + false -> case (State =/= active orelse + safe_call(Pid, {can_send, self(), AckRequired}, true)) of + true -> {continue, L#qstate{ + credits = record_send_q(CTag, Credits)}}; + false -> {suspend, L#qstate{state = suspended}} + end; + true -> {suspend, L} + end. + +safe_call(Pid, Msg, ExitValue) -> rabbit_misc:with_exit_handler( - fun () -> 0 end, - fun () -> maybe_call(Limiter, get_limit, 0) end). + fun () -> ExitValue end, + fun () -> gen_server2:call(Pid, Msg, infinity) end). + +resume(L = #qstate{state = suspended}) -> + L#qstate{state = active}; +resume(L) -> L. + +deactivate(L = #qstate{state = dormant}) -> L; +deactivate(L) -> + ok = gen_server:cast(L#qstate.pid, {unregister, self()}), + L#qstate{state = dormant}. + +is_suspended(#qstate{state = suspended}) -> true; +is_suspended(#qstate{}) -> false. + +is_consumer_blocked(#qstate{credits = Credits}, CTag) -> + case gb_trees:lookup(CTag, Credits) of + {value, #credit{credit = C}} when C > 0 -> false; + {value, #credit{}} -> true; + none -> false + end. -block(Limiter) -> - maybe_call(Limiter, block, ok). +credit(Limiter = #qstate{credits = Credits}, CTag, Credit, Drain) -> + Limiter#qstate{credits = update_credit(CTag, Credit, Drain, Credits)}. -unblock(Limiter) -> - maybe_call(Limiter, {unblock, Limiter}, ok). +drained(Limiter = #qstate{credits = Credits}) -> + {CTagCredits, Credits2} = + rabbit_misc:gb_trees_fold( + fun (CTag, #credit{credit = C, drain = true}, {Acc, Creds0}) -> + {[{CTag, C} | Acc], update_credit(CTag, 0, false, Creds0)}; + (_CTag, #credit{credit = _C, drain = false}, {Acc, Creds0}) -> + {Acc, Creds0} + end, {[], Credits}, Credits), + {CTagCredits, Limiter#qstate{credits = Credits2}}. -is_blocked(Limiter) -> - maybe_call(Limiter, is_blocked, false). +forget_consumer(Limiter = #qstate{credits = Credits}, CTag) -> + Limiter#qstate{credits = gb_trees:delete_any(CTag, Credits)}. + +%%---------------------------------------------------------------------------- +%% Queue-local code +%%---------------------------------------------------------------------------- + +%% We want to do all the AMQP 1.0-ish link level credit calculations +%% in the queue (to do them elsewhere introduces a ton of +%% races). However, it's a big chunk of code that is conceptually very +%% linked to the limiter concept. So we get the queue to hold a bit of +%% state for us (#qstate.credits), and maintain a fiction that the +%% limiter is making the decisions... + +record_send_q(CTag, Credits) -> + case gb_trees:lookup(CTag, Credits) of + {value, #credit{credit = Credit, drain = Drain}} -> + update_credit(CTag, Credit - 1, Drain, Credits); + none -> + Credits + end. + +update_credit(CTag, Credit, Drain, Credits) -> + %% Using up all credit implies no need to send a 'drained' event + Drain1 = Drain andalso Credit > 0, + gb_trees:enter(CTag, #credit{credit = Credit, drain = Drain1}, Credits). %%---------------------------------------------------------------------------- %% gen_server callbacks %%---------------------------------------------------------------------------- -init([]) -> - {ok, #lim{}}. +init([]) -> {ok, #lim{}}. + +prioritise_call(get_prefetch_limit, _From, _Len, _State) -> 9; +prioritise_call(_Msg, _From, _Len, _State) -> 0. + +handle_call({new, ChPid}, _From, State = #lim{ch_pid = undefined}) -> + {reply, ok, State#lim{ch_pid = ChPid}}; -prioritise_call(get_limit, _From, _State) -> 9; -prioritise_call(_Msg, _From, _State) -> 0. +handle_call({limit_prefetch, PrefetchCount, UnackedCount}, _From, + State = #lim{prefetch_count = 0}) -> + {reply, ok, maybe_notify(State, State#lim{prefetch_count = PrefetchCount, + volume = UnackedCount})}; +handle_call({limit_prefetch, PrefetchCount, _UnackedCount}, _From, State) -> + {reply, ok, maybe_notify(State, State#lim{prefetch_count = PrefetchCount})}; + +handle_call(unlimit_prefetch, _From, State) -> + {reply, ok, maybe_notify(State, State#lim{prefetch_count = 0, + volume = 0})}; + +handle_call(block, _From, State) -> + {reply, ok, State#lim{blocked = true}}; + +handle_call(unblock, _From, State) -> + {reply, ok, maybe_notify(State, State#lim{blocked = false})}; + +handle_call(get_prefetch_limit, _From, + State = #lim{prefetch_count = PrefetchCount}) -> + {reply, PrefetchCount, State}; handle_call({can_send, QPid, _AckRequired}, _From, State = #lim{blocked = true}) -> {reply, false, limit_queue(QPid, State)}; handle_call({can_send, QPid, AckRequired}, _From, State = #lim{volume = Volume}) -> - case limit_reached(State) of + case prefetch_limit_reached(State) of true -> {reply, false, limit_queue(QPid, State)}; false -> {reply, true, State#lim{volume = if AckRequired -> Volume + 1; true -> Volume end}} - end; - -handle_call(get_limit, _From, State = #lim{prefetch_count = PrefetchCount}) -> - {reply, PrefetchCount, State}; - -handle_call({limit, PrefetchCount, Token}, _From, State) -> - case maybe_notify(State, State#lim{prefetch_count = PrefetchCount}) of - {cont, State1} -> - {reply, ok, State1}; - {stop, State1} -> - {reply, {disabled, Token#token{enabled = false}}, State1} - end; - -handle_call(block, _From, State) -> - {reply, ok, State#lim{blocked = true}}; - -handle_call({unblock, Token}, _From, State) -> - case maybe_notify(State, State#lim{blocked = false}) of - {cont, State1} -> - {reply, ok, State1}; - {stop, State1} -> - {reply, {disabled, Token#token{enabled = false}}, State1} - end; - -handle_call(is_blocked, _From, State) -> - {reply, blocked(State), State}; - -handle_call({enable, Token, Channel, Volume}, _From, State) -> - {reply, Token#token{enabled = true}, - State#lim{ch_pid = Channel, volume = Volume}}; -handle_call({disable, Token}, _From, State) -> - {reply, Token#token{enabled = false}, State}. + end. handle_cast({ack, Count}, State = #lim{volume = Volume}) -> NewVolume = if Volume == 0 -> 0; true -> Volume - Count end, - {cont, State1} = maybe_notify(State, State#lim{volume = NewVolume}), - {noreply, State1}; + {noreply, maybe_notify(State, State#lim{volume = NewVolume})}; handle_cast({register, QPid}, State) -> {noreply, remember_queue(QPid, State)}; @@ -199,27 +385,13 @@ code_change(_, State, _) -> %%---------------------------------------------------------------------------- maybe_notify(OldState, NewState) -> - case (limit_reached(OldState) orelse blocked(OldState)) andalso - not (limit_reached(NewState) orelse blocked(NewState)) of - true -> NewState1 = notify_queues(NewState), - {case NewState1#lim.prefetch_count of - 0 -> stop; - _ -> cont - end, NewState1}; - false -> {cont, NewState} + case (prefetch_limit_reached(OldState) orelse blocked(OldState)) andalso + not (prefetch_limit_reached(NewState) orelse blocked(NewState)) of + true -> notify_queues(NewState); + false -> NewState end. -maybe_call(#token{pid = Pid, enabled = true}, Call, _Default) -> - gen_server2:call(Pid, Call, infinity); -maybe_call(_, _Call, Default) -> - Default. - -maybe_cast(#token{pid = Pid, enabled = true}, Cast) -> - gen_server2:cast(Pid, Cast); -maybe_cast(_, _Call) -> - ok. - -limit_reached(#lim{prefetch_count = Limit, volume = Volume}) -> +prefetch_limit_reached(#lim{prefetch_count = Limit, volume = Volume}) -> Limit =/= 0 andalso Volume >= Limit. blocked(#lim{blocked = Blocked}) -> Blocked. @@ -231,10 +403,9 @@ remember_queue(QPid, State = #lim{queues = Queues}) -> true -> State end. -forget_queue(QPid, State = #lim{ch_pid = ChPid, queues = Queues}) -> +forget_queue(QPid, State = #lim{queues = Queues}) -> case orddict:find(QPid, Queues) of {ok, {MRef, _}} -> true = erlang:demonitor(MRef), - ok = rabbit_amqqueue:unblock(QPid, ChPid), State#lim{queues = orddict:erase(QPid, Queues)}; error -> State end. @@ -251,13 +422,13 @@ notify_queues(State = #lim{ch_pid = ChPid, queues = Queues}) -> end, {[], Queues}, Queues), case length(QList) of 0 -> ok; - 1 -> ok = rabbit_amqqueue:unblock(hd(QList), ChPid); %% common case + 1 -> ok = rabbit_amqqueue:resume(hd(QList), ChPid); %% common case L -> %% We randomly vary the position of queues in the list, %% thus ensuring that each queue has an equal chance of %% being notified first. {L1, L2} = lists:split(random:uniform(L), QList), - [[ok = rabbit_amqqueue:unblock(Q, ChPid) || Q <- L3] + [[ok = rabbit_amqqueue:resume(Q, ChPid) || Q <- L3] || L3 <- [L2, L1]], ok end, diff --git a/src/rabbit_log.erl b/src/rabbit_log.erl index a6b4eeb0..2e3a1bbb 100644 --- a/src/rabbit_log.erl +++ b/src/rabbit_log.erl @@ -10,8 +10,8 @@ %% %% The Original Code is RabbitMQ. %% -%% The Initial Developer of the Original Code is VMware, Inc. -%% Copyright (c) 2007-2012 VMware, Inc. All rights reserved. +%% The Initial Developer of the Original Code is GoPivotal, Inc. +%% Copyright (c) 2007-2013 GoPivotal, Inc. All rights reserved. %% -module(rabbit_log). @@ -40,18 +40,20 @@ -spec(log/3 :: (category(), level(), string()) -> 'ok'). -spec(log/4 :: (category(), level(), string(), [any()]) -> 'ok'). --spec(info/1 :: (string()) -> 'ok'). --spec(info/2 :: (string(), [any()]) -> 'ok'). + +-spec(info/1 :: (string()) -> 'ok'). +-spec(info/2 :: (string(), [any()]) -> 'ok'). -spec(warning/1 :: (string()) -> 'ok'). -spec(warning/2 :: (string(), [any()]) -> 'ok'). --spec(error/1 :: (string()) -> 'ok'). --spec(error/2 :: (string(), [any()]) -> 'ok'). +-spec(error/1 :: (string()) -> 'ok'). +-spec(error/2 :: (string(), [any()]) -> 'ok'). -endif. %%---------------------------------------------------------------------------- start_link() -> gen_server:start_link({local, ?SERVER}, ?MODULE, [], []). + log(Category, Level, Fmt) -> log(Category, Level, Fmt, []). log(Category, Level, Fmt, Args) when is_list(Args) -> diff --git a/src/rabbit_memory_monitor.erl b/src/rabbit_memory_monitor.erl index f22ad874..b8d8023e 100644 --- a/src/rabbit_memory_monitor.erl +++ b/src/rabbit_memory_monitor.erl @@ -10,8 +10,8 @@ %% %% The Original Code is RabbitMQ. %% -%% The Initial Developer of the Original Code is VMware, Inc. -%% Copyright (c) 2007-2012 VMware, Inc. All rights reserved. +%% The Initial Developer of the Original Code is GoPivotal, Inc. +%% Copyright (c) 2007-2013 GoPivotal, Inc. All rights reserved. %% @@ -43,17 +43,6 @@ -define(DEFAULT_UPDATE_INTERVAL, 2500). -define(TABLE_NAME, ?MODULE). -%% Because we have a feedback loop here, we need to ensure that we -%% have some space for when the queues don't quite respond as fast as -%% we would like, or when there is buffering going on in other parts -%% of the system. In short, we aim to stay some distance away from -%% when the memory alarms will go off, which cause backpressure (of -%% some sort) on producers. Note that all other Thresholds are -%% relative to this scaling. --define(MEMORY_LIMIT_SCALING, 0.4). - --define(LIMIT_THRESHOLD, 0.5). %% don't limit queues when mem use is < this - %% If all queues are pushed to disk (duration 0), then the sum of %% their reported lengths will be 0. If memory then becomes available, %% unless we manually intervene, the sum will remain 0, and the queues @@ -207,7 +196,9 @@ internal_update(State = #state { queue_durations = Durations, desired_duration = DesiredDurationAvg, queue_duration_sum = Sum, queue_duration_count = Count }) -> - MemoryLimit = ?MEMORY_LIMIT_SCALING * vm_memory_monitor:get_memory_limit(), + {ok, LimitThreshold} = + application:get_env(rabbit, vm_memory_high_watermark_paging_ratio), + MemoryLimit = vm_memory_monitor:get_memory_limit(), MemoryRatio = case MemoryLimit > 0.0 of true -> erlang:memory(total) / MemoryLimit; false -> infinity @@ -215,7 +206,7 @@ internal_update(State = #state { queue_durations = Durations, DesiredDurationAvg1 = if MemoryRatio =:= infinity -> 0.0; - MemoryRatio < ?LIMIT_THRESHOLD orelse Count == 0 -> + MemoryRatio < LimitThreshold orelse Count == 0 -> infinity; MemoryRatio < ?SUM_INC_THRESHOLD -> ((Sum + ?SUM_INC_AMOUNT) / Count) / MemoryRatio; diff --git a/src/rabbit_mirror_queue_coordinator.erl b/src/rabbit_mirror_queue_coordinator.erl index 17e2ffb4..f54e9bd1 100644 --- a/src/rabbit_mirror_queue_coordinator.erl +++ b/src/rabbit_mirror_queue_coordinator.erl @@ -10,8 +10,8 @@ %% %% The Original Code is RabbitMQ. %% -%% The Initial Developer of the Original Code is VMware, Inc. -%% Copyright (c) 2010-2012 VMware, Inc. All rights reserved. +%% The Initial Developer of the Original Code is GoPivotal, Inc. +%% Copyright (c) 2010-2013 GoPivotal, Inc. All rights reserved. %% -module(rabbit_mirror_queue_coordinator). @@ -33,16 +33,14 @@ gm, monitors, death_fun, - length_fun + depth_fun }). --define(ONE_SECOND, 1000). - -ifdef(use_specs). -spec(start_link/4 :: (rabbit_types:amqqueue(), pid() | 'undefined', rabbit_mirror_queue_master:death_fun(), - rabbit_mirror_queue_master:length_fun()) -> + rabbit_mirror_queue_master:depth_fun()) -> rabbit_types:ok_pid_or_error()). -spec(get_gm/1 :: (pid()) -> pid()). -spec(ensure_monitoring/2 :: (pid(), [pid()]) -> 'ok'). @@ -103,19 +101,25 @@ %% channel during a publish, only some of the mirrors may receive that %% publish. As a result of this problem, the messages broadcast over %% the gm contain published content, and thus slaves can operate -%% successfully on messages that they only receive via the gm. The key -%% purpose of also sending messages directly from the channels to the -%% slaves is that without this, in the event of the death of the -%% master, messages could be lost until a suitable slave is promoted. +%% successfully on messages that they only receive via the gm. +%% +%% The key purpose of also sending messages directly from the channels +%% to the slaves is that without this, in the event of the death of +%% the master, messages could be lost until a suitable slave is +%% promoted. However, that is not the only reason. A slave cannot send +%% confirms for a message until it has seen it from the +%% channel. Otherwise, it might send a confirm to a channel for a +%% message that it might *never* receive from that channel. This can +%% happen because new slaves join the gm ring (and thus receive +%% messages from the master) before inserting themselves in the +%% queue's mnesia record (which is what channels look at for routing). +%% As it turns out, channels will simply ignore such bogus confirms, +%% but relying on that would introduce a dangerously tight coupling. %% -%% However, that is not the only reason. For example, if confirms are -%% in use, then there is no guarantee that every slave will see the -%% delivery with the same msg_seq_no. As a result, the slaves have to -%% wait until they've seen both the publish via gm, and the publish -%% via the channel before they have enough information to be able to -%% perform the publish to their own bq, and subsequently issue the -%% confirm, if necessary. Either form of publish can arrive first, and -%% a slave can be upgraded to the master at any point during this +%% Hence the slaves have to wait until they've seen both the publish +%% via gm, and the publish via the channel before they issue the +%% confirm. Either form of publish can arrive first, and a slave can +%% be upgraded to the master at any point during this %% process. Confirms continue to be issued correctly, however. %% %% Because the slave is a full process, it impersonates parts of the @@ -134,25 +138,31 @@ %% gm should be processed as normal, but fetches which are for %% messages the slave has never seen should be ignored. Similarly, %% acks for messages the slave never fetched should be -%% ignored. Eventually, as the master is consumed from, the messages -%% at the head of the queue which were there before the slave joined -%% will disappear, and the slave will become fully synced with the -%% state of the master. The detection of the sync-status of a slave is -%% done entirely based on length: if the slave and the master both -%% agree on the length of the queue after the fetch of the head of the -%% queue (or a 'set_length' results in a slave having to drop some -%% messages from the head of its queue), then the queues must be in -%% sync. The only other possibility is that the slave's queue is -%% shorter, and thus the fetch should be ignored. In case slaves are -%% joined to an empty queue which only goes on to receive publishes, -%% they start by asking the master to broadcast its length. This is -%% enough for slaves to always be able to work out when their head -%% does not differ from the master (and is much simpler and cheaper -%% than getting the master to hang on to the guid of the msg at the -%% head of its queue). When a slave is promoted to a master, it -%% unilaterally broadcasts its length, in order to solve the problem -%% of length requests from new slaves being unanswered by a dead -%% master. +%% ignored. Similarly, we don't republish rejected messages that we +%% haven't seen. Eventually, as the master is consumed from, the +%% messages at the head of the queue which were there before the slave +%% joined will disappear, and the slave will become fully synced with +%% the state of the master. +%% +%% The detection of the sync-status is based on the depth of the BQs, +%% where the depth is defined as the sum of the length of the BQ (as +%% per BQ:len) and the messages pending an acknowledgement. When the +%% depth of the slave is equal to the master's, then the slave is +%% synchronised. We only store the difference between the two for +%% simplicity. Comparing the length is not enough since we need to +%% take into account rejected messages which will make it back into +%% the master queue but can't go back in the slave, since we don't +%% want "holes" in the slave queue. Note that the depth, and the +%% length likewise, must always be shorter on the slave - we assert +%% that in various places. In case slaves are joined to an empty queue +%% which only goes on to receive publishes, they start by asking the +%% master to broadcast its depth. This is enough for slaves to always +%% be able to work out when their head does not differ from the master +%% (and is much simpler and cheaper than getting the master to hang on +%% to the guid of the msg at the head of its queue). When a slave is +%% promoted to a master, it unilaterally broadcasts its depth, in +%% order to solve the problem of depth requests from new slaves being +%% unanswered by a dead master. %% %% Obviously, due to the async nature of communication across gm, the %% slaves can fall behind. This does not matter from a sync pov: if @@ -212,20 +222,19 @@ %% sender_death message to all the slaves, saying the sender has %% died. Once the slaves receive the sender_death message, they know %% that they're not going to receive any more instructions from the gm -%% regarding that sender, thus they throw away any publications from -%% the sender pending publication instructions. However, it is -%% possible that the coordinator receives the DOWN and communicates -%% that to the master before the master has finished receiving and -%% processing publishes from the sender. This turns out not to be a -%% problem: the sender has actually died, and so will not need to -%% receive confirms or other feedback, and should further messages be -%% "received" from the sender, the master will ask the coordinator to -%% set up a new monitor, and will continue to process the messages -%% normally. Slaves may thus receive publishes via gm from previously -%% declared "dead" senders, but again, this is fine: should the slave -%% have just thrown out the message it had received directly from the -%% sender (due to receiving a sender_death message via gm), it will be -%% able to cope with the publication purely from the master via gm. +%% regarding that sender. However, it is possible that the coordinator +%% receives the DOWN and communicates that to the master before the +%% master has finished receiving and processing publishes from the +%% sender. This turns out not to be a problem: the sender has actually +%% died, and so will not need to receive confirms or other feedback, +%% and should further messages be "received" from the sender, the +%% master will ask the coordinator to set up a new monitor, and +%% will continue to process the messages normally. Slaves may thus +%% receive publishes via gm from previously declared "dead" senders, +%% but again, this is fine: should the slave have just thrown out the +%% message it had received directly from the sender (due to receiving +%% a sender_death message via gm), it will be able to cope with the +%% publication purely from the master via gm. %% %% When a slave receives a DOWN message for a sender, if it has not %% received the sender_death message from the master via gm already, @@ -293,15 +302,15 @@ %% if they have no mirrored content at all. This is not surprising: to %% achieve anything more sophisticated would require the master and %% recovering slave to be able to check to see whether they agree on -%% the last seen state of the queue: checking length alone is not +%% the last seen state of the queue: checking depth alone is not %% sufficient in this case. %% %% For more documentation see the comments in bug 23554. %% %%---------------------------------------------------------------------------- -start_link(Queue, GM, DeathFun, LengthFun) -> - gen_server2:start_link(?MODULE, [Queue, GM, DeathFun, LengthFun], []). +start_link(Queue, GM, DeathFun, DepthFun) -> + gen_server2:start_link(?MODULE, [Queue, GM, DeathFun, DepthFun], []). get_gm(CPid) -> gen_server2:call(CPid, get_gm, infinity). @@ -313,10 +322,12 @@ ensure_monitoring(CPid, Pids) -> %% gen_server %% --------------------------------------------------------------------------- -init([#amqqueue { name = QueueName } = Q, GM, DeathFun, LengthFun]) -> +init([#amqqueue { name = QueueName } = Q, GM, DeathFun, DepthFun]) -> GM1 = case GM of undefined -> - {ok, GM2} = gm:start_link(QueueName, ?MODULE, [self()]), + {ok, GM2} = gm:start_link( + QueueName, ?MODULE, [self()], + fun rabbit_misc:execute_mnesia_transaction/1), receive {joined, GM2, _Members} -> ok end, @@ -325,12 +336,11 @@ init([#amqqueue { name = QueueName } = Q, GM, DeathFun, LengthFun]) -> true = link(GM), GM end, - ensure_gm_heartbeat(), {ok, #state { q = Q, gm = GM1, monitors = pmon:new(), death_fun = DeathFun, - length_fun = LengthFun }, + depth_fun = DepthFun }, hibernate, {backoff, ?HIBERNATE_AFTER_MIN, ?HIBERNATE_AFTER_MIN, ?DESIRED_HIBERNATE}}. @@ -340,7 +350,7 @@ handle_call(get_gm, _From, State = #state { gm = GM }) -> handle_cast({gm_deaths, Deaths}, State = #state { q = #amqqueue { name = QueueName, pid = MPid } }) when node(MPid) =:= node() -> - case rabbit_mirror_queue_misc:remove_from_queue(QueueName, Deaths) of + case rabbit_mirror_queue_misc:remove_from_queue(QueueName, MPid, Deaths) of {ok, MPid, DeadPids} -> rabbit_mirror_queue_misc:report_deaths(MPid, true, QueueName, DeadPids), @@ -349,17 +359,15 @@ handle_cast({gm_deaths, Deaths}, {stop, normal, State} end; -handle_cast(request_length, State = #state { length_fun = LengthFun }) -> - ok = LengthFun(), +handle_cast(request_depth, State = #state { depth_fun = DepthFun }) -> + ok = DepthFun(), noreply(State); handle_cast({ensure_monitoring, Pids}, State = #state { monitors = Mons }) -> - noreply(State #state { monitors = pmon:monitor_all(Pids, Mons) }). + noreply(State #state { monitors = pmon:monitor_all(Pids, Mons) }); -handle_info(send_gm_heartbeat, State = #state { gm = GM }) -> - gm:broadcast(GM, heartbeat), - ensure_gm_heartbeat(), - noreply(State); +handle_cast({delete_and_terminate, Reason}, State) -> + {stop, Reason, State}. handle_info({'DOWN', _MonitorRef, process, Pid, _Reason}, State = #state { monitors = Mons, @@ -396,12 +404,13 @@ members_changed([_CPid], _Births, []) -> members_changed([CPid], _Births, Deaths) -> ok = gen_server2:cast(CPid, {gm_deaths, Deaths}). -handle_msg([_CPid], _From, heartbeat) -> - ok; -handle_msg([CPid], _From, request_length = Msg) -> +handle_msg([CPid], _From, request_depth = Msg) -> ok = gen_server2:cast(CPid, Msg); handle_msg([CPid], _From, {ensure_monitoring, _Pids} = Msg) -> ok = gen_server2:cast(CPid, Msg); +handle_msg([CPid], _From, {delete_and_terminate, _Reason} = Msg) -> + ok = gen_server2:cast(CPid, Msg), + {stop, {shutdown, ring_shutdown}}; handle_msg([_CPid], _From, _Msg) -> ok. @@ -414,6 +423,3 @@ noreply(State) -> reply(Reply, State) -> {reply, Reply, State, hibernate}. - -ensure_gm_heartbeat() -> - erlang:send_after(?ONE_SECOND, self(), send_gm_heartbeat). diff --git a/src/rabbit_mirror_queue_master.erl b/src/rabbit_mirror_queue_master.erl index 4e71cc43..3abd81f5 100644 --- a/src/rabbit_mirror_queue_master.erl +++ b/src/rabbit_mirror_queue_master.erl @@ -10,59 +10,67 @@ %% %% The Original Code is RabbitMQ. %% -%% The Initial Developer of the Original Code is VMware, Inc. -%% Copyright (c) 2010-2012 VMware, Inc. All rights reserved. +%% The Initial Developer of the Original Code is GoPivotal, Inc. +%% Copyright (c) 2010-2013 GoPivotal, Inc. All rights reserved. %% -module(rabbit_mirror_queue_master). -export([init/3, terminate/2, delete_and_terminate/2, - purge/1, publish/4, publish_delivered/5, fetch/2, ack/2, - requeue/2, len/1, is_empty/1, drain_confirmed/1, dropwhile/3, - set_ram_duration_target/2, ram_duration/1, + purge/1, purge_acks/1, publish/5, publish_delivered/4, + discard/3, fetch/2, drop/2, ack/2, requeue/2, ackfold/4, fold/3, + len/1, is_empty/1, depth/1, drain_confirmed/1, + dropwhile/2, fetchwhile/4, set_ram_duration_target/2, ram_duration/1, needs_timeout/1, timeout/1, handle_pre_hibernate/1, - status/1, invoke/3, is_duplicate/2, discard/3, fold/3]). + status/1, invoke/3, is_duplicate/2]). -export([start/1, stop/0]). --export([promote_backing_queue_state/6, sender_death_fun/0, length_fun/0]). +-export([promote_backing_queue_state/8, sender_death_fun/0, depth_fun/0]). + +-export([init_with_existing_bq/3, stop_mirroring/1, sync_mirrors/3]). -behaviour(rabbit_backing_queue). -include("rabbit.hrl"). --record(state, { gm, +-record(state, { name, + gm, coordinator, backing_queue, backing_queue_state, - set_delivered, seen_status, confirmed, - ack_msg_id, known_senders }). -ifdef(use_specs). --export_type([death_fun/0, length_fun/0]). +-export_type([death_fun/0, depth_fun/0, stats_fun/0]). -type(death_fun() :: fun ((pid()) -> 'ok')). --type(length_fun() :: fun (() -> 'ok')). --type(master_state() :: #state { gm :: pid(), +-type(depth_fun() :: fun (() -> 'ok')). +-type(stats_fun() :: fun ((any()) -> 'ok')). +-type(master_state() :: #state { name :: rabbit_amqqueue:name(), + gm :: pid(), coordinator :: pid(), backing_queue :: atom(), backing_queue_state :: any(), - set_delivered :: non_neg_integer(), seen_status :: dict(), confirmed :: [rabbit_guid:guid()], - ack_msg_id :: dict(), known_senders :: set() }). --spec(promote_backing_queue_state/6 :: - (pid(), atom(), any(), pid(), dict(), [pid()]) -> master_state()). +-spec(promote_backing_queue_state/8 :: + (rabbit_amqqueue:name(), pid(), atom(), any(), pid(), [any()], dict(), + [pid()]) -> master_state()). -spec(sender_death_fun/0 :: () -> death_fun()). --spec(length_fun/0 :: () -> length_fun()). +-spec(depth_fun/0 :: () -> depth_fun()). +-spec(init_with_existing_bq/3 :: (rabbit_types:amqqueue(), atom(), any()) -> + master_state()). +-spec(stop_mirroring/1 :: (master_state()) -> {atom(), any()}). +-spec(sync_mirrors/3 :: (stats_fun(), stats_fun(), master_state()) -> + {'ok', master_state()} | {stop, any(), master_state()}). -endif. @@ -82,41 +90,79 @@ stop() -> %% Same as start/1. exit({not_valid_for_generic_backing_queue, ?MODULE}). -init(#amqqueue { name = QName, mirror_nodes = MNodes } = Q, Recover, - AsyncCallback) -> - {ok, CPid} = rabbit_mirror_queue_coordinator:start_link( - Q, undefined, sender_death_fun(), length_fun()), - GM = rabbit_mirror_queue_coordinator:get_gm(CPid), - MNodes1 = - (case MNodes of - all -> rabbit_mnesia:all_clustered_nodes(); - undefined -> []; - _ -> MNodes - end) -- [node()], - [rabbit_mirror_queue_misc:add_mirror(QName, Node) || Node <- MNodes1], +init(Q, Recover, AsyncCallback) -> {ok, BQ} = application:get_env(backing_queue_module), BQS = BQ:init(Q, Recover, AsyncCallback), - ok = gm:broadcast(GM, {length, BQ:len(BQS)}), - #state { gm = GM, + State = #state{gm = GM} = init_with_existing_bq(Q, BQ, BQS), + ok = gm:broadcast(GM, {depth, BQ:depth(BQS)}), + State. + +init_with_existing_bq(Q = #amqqueue{name = QName}, BQ, BQS) -> + {ok, CPid} = rabbit_mirror_queue_coordinator:start_link( + Q, undefined, sender_death_fun(), depth_fun()), + GM = rabbit_mirror_queue_coordinator:get_gm(CPid), + Self = self(), + ok = rabbit_misc:execute_mnesia_transaction( + fun () -> + [Q1 = #amqqueue{gm_pids = GMPids}] + = mnesia:read({rabbit_queue, QName}), + ok = rabbit_amqqueue:store_queue( + Q1#amqqueue{gm_pids = [{GM, Self} | GMPids]}) + end), + {_MNode, SNodes} = rabbit_mirror_queue_misc:suggested_queue_nodes(Q), + rabbit_mirror_queue_misc:add_mirrors(QName, SNodes), + #state { name = QName, + gm = GM, coordinator = CPid, backing_queue = BQ, backing_queue_state = BQS, - set_delivered = 0, seen_status = dict:new(), confirmed = [], - ack_msg_id = dict:new(), known_senders = sets:new() }. +stop_mirroring(State = #state { coordinator = CPid, + backing_queue = BQ, + backing_queue_state = BQS }) -> + unlink(CPid), + stop_all_slaves(shutdown, State), + {BQ, BQS}. + +sync_mirrors(HandleInfo, EmitStats, + State = #state { name = QName, + gm = GM, + backing_queue = BQ, + backing_queue_state = BQS }) -> + Log = fun (Fmt, Params) -> + rabbit_log:info("Synchronising ~s: " ++ Fmt ++ "~n", + [rabbit_misc:rs(QName) | Params]) + end, + Log("~p messages to synchronise", [BQ:len(BQS)]), + {ok, #amqqueue{slave_pids = SPids}} = rabbit_amqqueue:lookup(QName), + Ref = make_ref(), + Syncer = rabbit_mirror_queue_sync:master_prepare(Ref, Log, SPids), + gm:broadcast(GM, {sync_start, Ref, Syncer, SPids}), + S = fun(BQSN) -> State#state{backing_queue_state = BQSN} end, + case rabbit_mirror_queue_sync:master_go( + Syncer, Ref, Log, HandleInfo, EmitStats, BQ, BQS) of + {shutdown, R, BQS1} -> {stop, R, S(BQS1)}; + {sync_died, R, BQS1} -> Log("~p", [R]), + {ok, S(BQS1)}; + {already_synced, BQS1} -> {ok, S(BQS1)}; + {ok, BQS1} -> Log("complete", []), + {ok, S(BQS1)} + end. + terminate({shutdown, dropped} = Reason, - State = #state { backing_queue = BQ, backing_queue_state = BQS }) -> + State = #state { backing_queue = BQ, + backing_queue_state = BQS }) -> %% Backing queue termination - this node has been explicitly %% dropped. Normally, non-durable queues would be tidied up on %% startup, but there's a possibility that we will be added back %% in without this node being restarted. Thus we must do the full %% blown delete_and_terminate now, but only locally: we do not %% broadcast delete_and_terminate. - State #state { backing_queue_state = BQ:delete_and_terminate(Reason, BQS), - set_delivered = 0 }; + State#state{backing_queue_state = BQ:delete_and_terminate(Reason, BQS)}; + terminate(Reason, State = #state { backing_queue = BQ, backing_queue_state = BQS }) -> %% Backing queue termination. The queue is going down but @@ -124,63 +170,77 @@ terminate(Reason, %% node. Thus just let some other slave take over. State #state { backing_queue_state = BQ:terminate(Reason, BQS) }. -delete_and_terminate(Reason, State = #state { gm = GM, - backing_queue = BQ, +delete_and_terminate(Reason, State = #state { backing_queue = BQ, backing_queue_state = BQS }) -> + stop_all_slaves(Reason, State), + State#state{backing_queue_state = BQ:delete_and_terminate(Reason, BQS)}. + +stop_all_slaves(Reason, #state{name = QName, gm = GM}) -> + {ok, #amqqueue{slave_pids = SPids}} = rabbit_amqqueue:lookup(QName), + MRefs = [erlang:monitor(process, SPid) || SPid <- SPids], ok = gm:broadcast(GM, {delete_and_terminate, Reason}), - State #state { backing_queue_state = BQ:delete_and_terminate(Reason, BQS), - set_delivered = 0 }. + [receive {'DOWN', MRef, process, _Pid, _Info} -> ok end || MRef <- MRefs], + %% Normally when we remove a slave another slave or master will + %% notice and update Mnesia. But we just removed them all, and + %% have stopped listening ourselves. So manually clean up. + rabbit_misc:execute_mnesia_transaction( + fun () -> + [Q] = mnesia:read({rabbit_queue, QName}), + rabbit_mirror_queue_misc:store_updated_slaves( + Q #amqqueue { gm_pids = [], slave_pids = [] }) + end), + ok = gm:forget_group(QName). purge(State = #state { gm = GM, backing_queue = BQ, backing_queue_state = BQS }) -> - ok = gm:broadcast(GM, {set_length, 0, false}), + ok = gm:broadcast(GM, {drop, 0, BQ:len(BQS), false}), {Count, BQS1} = BQ:purge(BQS), - {Count, State #state { backing_queue_state = BQS1, - set_delivered = 0 }}. + {Count, State #state { backing_queue_state = BQS1 }}. + +purge_acks(_State) -> exit({not_implemented, {?MODULE, purge_acks}}). -publish(Msg = #basic_message { id = MsgId }, MsgProps, ChPid, +publish(Msg = #basic_message { id = MsgId }, MsgProps, IsDelivered, ChPid, State = #state { gm = GM, seen_status = SS, backing_queue = BQ, backing_queue_state = BQS }) -> false = dict:is_key(MsgId, SS), %% ASSERTION - ok = gm:broadcast(GM, {publish, false, ChPid, MsgProps, Msg}), - BQS1 = BQ:publish(Msg, MsgProps, ChPid, BQS), + ok = gm:broadcast(GM, {publish, ChPid, MsgProps, Msg}), + BQS1 = BQ:publish(Msg, MsgProps, IsDelivered, ChPid, BQS), ensure_monitoring(ChPid, State #state { backing_queue_state = BQS1 }). -publish_delivered(AckRequired, Msg = #basic_message { id = MsgId }, MsgProps, +publish_delivered(Msg = #basic_message { id = MsgId }, MsgProps, ChPid, State = #state { gm = GM, seen_status = SS, backing_queue = BQ, - backing_queue_state = BQS, - ack_msg_id = AM }) -> + backing_queue_state = BQS }) -> false = dict:is_key(MsgId, SS), %% ASSERTION - %% Must use confirmed_broadcast here in order to guarantee that - %% all slaves are forced to interpret this publish_delivered at - %% the same point, especially if we die and a slave is promoted. - ok = gm:confirmed_broadcast( - GM, {publish, {true, AckRequired}, ChPid, MsgProps, Msg}), - {AckTag, BQS1} = - BQ:publish_delivered(AckRequired, Msg, MsgProps, ChPid, BQS), - AM1 = maybe_store_acktag(AckTag, MsgId, AM), - {AckTag, - ensure_monitoring(ChPid, State #state { backing_queue_state = BQS1, - ack_msg_id = AM1 })}. - -dropwhile(Pred, AckRequired, - State = #state{gm = GM, - backing_queue = BQ, - set_delivered = SetDelivered, - backing_queue_state = BQS }) -> + ok = gm:broadcast(GM, {publish_delivered, ChPid, MsgProps, Msg}), + {AckTag, BQS1} = BQ:publish_delivered(Msg, MsgProps, ChPid, BQS), + State1 = State #state { backing_queue_state = BQS1 }, + {AckTag, ensure_monitoring(ChPid, State1)}. + +discard(MsgId, ChPid, State = #state { gm = GM, + backing_queue = BQ, + backing_queue_state = BQS, + seen_status = SS }) -> + false = dict:is_key(MsgId, SS), %% ASSERTION + ok = gm:broadcast(GM, {discard, ChPid, MsgId}), + ensure_monitoring(ChPid, State #state { backing_queue_state = + BQ:discard(MsgId, ChPid, BQS) }). + +dropwhile(Pred, State = #state{backing_queue = BQ, + backing_queue_state = BQS }) -> Len = BQ:len(BQS), - {Msgs, BQS1} = BQ:dropwhile(Pred, AckRequired, BQS), - Len1 = BQ:len(BQS1), - ok = gm:broadcast(GM, {set_length, Len1, AckRequired}), - Dropped = Len - Len1, - SetDelivered1 = lists:max([0, SetDelivered - Dropped]), - {Msgs, State #state { backing_queue_state = BQS1, - set_delivered = SetDelivered1 } }. + {Next, BQS1} = BQ:dropwhile(Pred, BQS), + {Next, drop(Len, false, State #state { backing_queue_state = BQS1 })}. + +fetchwhile(Pred, Fun, Acc, State = #state{backing_queue = BQ, + backing_queue_state = BQS }) -> + Len = BQ:len(BQS), + {Next, Acc1, BQS1} = BQ:fetchwhile(Pred, Fun, Acc, BQS), + {Next, Acc1, drop(Len, true, State #state { backing_queue_state = BQS1 })}. drain_confirmed(State = #state { backing_queue = BQ, backing_queue_state = BQS, @@ -212,43 +272,33 @@ drain_confirmed(State = #state { backing_queue = BQ, seen_status = SS1, confirmed = [] }}. -fetch(AckRequired, State = #state { gm = GM, - backing_queue = BQ, - backing_queue_state = BQS, - set_delivered = SetDelivered, - ack_msg_id = AM }) -> +fetch(AckRequired, State = #state { backing_queue = BQ, + backing_queue_state = BQS }) -> {Result, BQS1} = BQ:fetch(AckRequired, BQS), State1 = State #state { backing_queue_state = BQS1 }, - case Result of - empty -> - {Result, State1}; - {#basic_message { id = MsgId } = Message, IsDelivered, AckTag, - Remaining} -> - ok = gm:broadcast(GM, {fetch, AckRequired, MsgId, Remaining}), - IsDelivered1 = IsDelivered orelse SetDelivered > 0, - SetDelivered1 = lists:max([0, SetDelivered - 1]), - AM1 = maybe_store_acktag(AckTag, MsgId, AM), - {{Message, IsDelivered1, AckTag, Remaining}, - State1 #state { set_delivered = SetDelivered1, - ack_msg_id = AM1 }} - end. + {Result, case Result of + empty -> State1; + {_MsgId, _IsDelivered, AckTag} -> drop_one(AckTag, State1) + end}. + +drop(AckRequired, State = #state { backing_queue = BQ, + backing_queue_state = BQS }) -> + {Result, BQS1} = BQ:drop(AckRequired, BQS), + State1 = State #state { backing_queue_state = BQS1 }, + {Result, case Result of + empty -> State1; + {_MsgId, AckTag} -> drop_one(AckTag, State1) + end}. ack(AckTags, State = #state { gm = GM, backing_queue = BQ, - backing_queue_state = BQS, - ack_msg_id = AM }) -> + backing_queue_state = BQS }) -> {MsgIds, BQS1} = BQ:ack(AckTags, BQS), case MsgIds of [] -> ok; _ -> ok = gm:broadcast(GM, {ack, MsgIds}) end, - AM1 = lists:foldl(fun dict:erase/2, AM, AckTags), - {MsgIds, State #state { backing_queue_state = BQS1, - ack_msg_id = AM1 }}. - -fold(MsgFun, State = #state { backing_queue = BQ, - backing_queue_state = BQS }, AckTags) -> - State #state { backing_queue_state = BQ:fold(MsgFun, BQS, AckTags) }. + {MsgIds, State #state { backing_queue_state = BQS1 }}. requeue(AckTags, State = #state { gm = GM, backing_queue = BQ, @@ -257,12 +307,25 @@ requeue(AckTags, State = #state { gm = GM, ok = gm:broadcast(GM, {requeue, MsgIds}), {MsgIds, State #state { backing_queue_state = BQS1 }}. +ackfold(MsgFun, Acc, State = #state { backing_queue = BQ, + backing_queue_state = BQS }, AckTags) -> + {Acc1, BQS1} = BQ:ackfold(MsgFun, Acc, BQS, AckTags), + {Acc1, State #state { backing_queue_state = BQS1 }}. + +fold(Fun, Acc, State = #state { backing_queue = BQ, + backing_queue_state = BQS }) -> + {Result, BQS1} = BQ:fold(Fun, Acc, BQS), + {Result, State #state { backing_queue_state = BQS1 }}. + len(#state { backing_queue = BQ, backing_queue_state = BQS }) -> BQ:len(BQS). is_empty(#state { backing_queue = BQ, backing_queue_state = BQS }) -> BQ:is_empty(BQS). +depth(#state { backing_queue = BQ, backing_queue_state = BQS }) -> + BQ:depth(BQS). + set_ram_duration_target(Target, State = #state { backing_queue = BQ, backing_queue_state = BQS }) -> State #state { backing_queue_state = @@ -319,8 +382,9 @@ is_duplicate(Message = #basic_message { id = MsgId }, %% immediately after calling is_duplicate). The msg is %% invalid. We will not see this again, nor will we be %% further involved in confirming this message, so erase. - {published, State #state { seen_status = dict:erase(MsgId, SS) }}; - {ok, confirmed} -> + {true, State #state { seen_status = dict:erase(MsgId, SS) }}; + {ok, Disposition} + when Disposition =:= confirmed %% It got published when we were a slave via gm, and %% confirmed some time after that (maybe even after %% promotion), but before we received the publish from the @@ -329,47 +393,31 @@ is_duplicate(Message = #basic_message { id = MsgId }, %% need to confirm now. As above, amqqueue_process will %% have the entry for the msg_id_to_channel mapping added %% immediately after calling is_duplicate/2. - {published, State #state { seen_status = dict:erase(MsgId, SS), - confirmed = [MsgId | Confirmed] }}; - {ok, discarded} -> - %% Don't erase from SS here because discard/2 is about to - %% be called and we need to be able to detect this case - {discarded, State} - end. - -discard(Msg = #basic_message { id = MsgId }, ChPid, - State = #state { gm = GM, - backing_queue = BQ, - backing_queue_state = BQS, - seen_status = SS }) -> - %% It's a massive error if we get told to discard something that's - %% already been published or published-and-confirmed. To do that - %% would require non FIFO access. Hence we should not find - %% 'published' or 'confirmed' in this dict:find. - case dict:find(MsgId, SS) of - error -> - ok = gm:broadcast(GM, {discard, ChPid, Msg}), - State #state { backing_queue_state = BQ:discard(Msg, ChPid, BQS), - seen_status = dict:erase(MsgId, SS) }; - {ok, discarded} -> - State + orelse Disposition =:= discarded -> + %% Message was discarded while we were a slave. Confirm now. + %% As above, amqqueue_process will have the entry for the + %% msg_id_to_channel mapping. + {true, State #state { seen_status = dict:erase(MsgId, SS), + confirmed = [MsgId | Confirmed] }} end. %% --------------------------------------------------------------------------- %% Other exported functions %% --------------------------------------------------------------------------- -promote_backing_queue_state(CPid, BQ, BQS, GM, SeenStatus, KS) -> - Len = BQ:len(BQS), - ok = gm:broadcast(GM, {length, Len}), - #state { gm = GM, +promote_backing_queue_state(QName, CPid, BQ, BQS, GM, AckTags, Seen, KS) -> + {_MsgIds, BQS1} = BQ:requeue(AckTags, BQS), + Len = BQ:len(BQS1), + Depth = BQ:depth(BQS1), + true = Len == Depth, %% ASSERTION: everything must have been requeued + ok = gm:broadcast(GM, {depth, Depth}), + #state { name = QName, + gm = GM, coordinator = CPid, backing_queue = BQ, - backing_queue_state = BQS, - set_delivered = Len, - seen_status = SeenStatus, + backing_queue_state = BQS1, + seen_status = Seen, confirmed = [], - ack_msg_id = dict:new(), known_senders = sets:from_list(KS) }. sender_death_fun() -> @@ -384,7 +432,7 @@ sender_death_fun() -> end) end. -length_fun() -> +depth_fun() -> Self = self(), fun () -> rabbit_amqqueue:run_backing_queue( @@ -392,15 +440,30 @@ length_fun() -> fun (?MODULE, State = #state { gm = GM, backing_queue = BQ, backing_queue_state = BQS }) -> - ok = gm:broadcast(GM, {length, BQ:len(BQS)}), + ok = gm:broadcast(GM, {depth, BQ:depth(BQS)}), State end) end. -maybe_store_acktag(undefined, _MsgId, AM) -> - AM; -maybe_store_acktag(AckTag, MsgId, AM) -> - dict:store(AckTag, MsgId, AM). +%% --------------------------------------------------------------------------- +%% Helpers +%% --------------------------------------------------------------------------- + +drop_one(AckTag, State = #state { gm = GM, + backing_queue = BQ, + backing_queue_state = BQS }) -> + ok = gm:broadcast(GM, {drop, BQ:len(BQS), 1, AckTag =/= undefined}), + State. + +drop(PrevLen, AckRequired, State = #state { gm = GM, + backing_queue = BQ, + backing_queue_state = BQS }) -> + Len = BQ:len(BQS), + case PrevLen - Len of + 0 -> State; + Dropped -> ok = gm:broadcast(GM, {drop, Len, Dropped, AckRequired}), + State + end. ensure_monitoring(ChPid, State = #state { coordinator = CPid, known_senders = KS }) -> diff --git a/src/rabbit_mirror_queue_misc.erl b/src/rabbit_mirror_queue_misc.erl index 180677fe..eded0411 100644 --- a/src/rabbit_mirror_queue_misc.erl +++ b/src/rabbit_mirror_queue_misc.erl @@ -10,33 +10,52 @@ %% %% The Original Code is RabbitMQ. %% -%% The Initial Developer of the Original Code is VMware, Inc. -%% Copyright (c) 2010-2012 VMware, Inc. All rights reserved. +%% The Initial Developer of the Original Code is GoPivotal, Inc. +%% Copyright (c) 2010-2013 GoPivotal, Inc. All rights reserved. %% -module(rabbit_mirror_queue_misc). +-behaviour(rabbit_policy_validator). --export([remove_from_queue/2, on_node_up/0, - drop_mirror/2, drop_mirror/3, add_mirror/2, add_mirror/3, - report_deaths/4]). +-export([remove_from_queue/3, on_node_up/0, add_mirrors/2, add_mirror/2, + report_deaths/4, store_updated_slaves/1, suggested_queue_nodes/1, + is_mirrored/1, update_mirrors/2, validate_policy/1]). + +%% for testing only +-export([module/1]). -include("rabbit.hrl"). +-rabbit_boot_step({?MODULE, + [{description, "HA policy validation"}, + {mfa, {rabbit_registry, register, + [policy_validator, <<"ha-mode">>, ?MODULE]}}, + {mfa, {rabbit_registry, register, + [policy_validator, <<"ha-params">>, ?MODULE]}}, + {mfa, {rabbit_registry, register, + [policy_validator, <<"ha-sync-mode">>, ?MODULE]}}, + {requires, rabbit_registry}, + {enables, recovery}]}). + %%---------------------------------------------------------------------------- -ifdef(use_specs). --spec(remove_from_queue/2 :: - (rabbit_amqqueue:name(), [pid()]) +-spec(remove_from_queue/3 :: + (rabbit_amqqueue:name(), pid(), [pid()]) -> {'ok', pid(), [pid()]} | {'error', 'not_found'}). -spec(on_node_up/0 :: () -> 'ok'). --spec(drop_mirror/2 :: - (rabbit_amqqueue:name(), node()) -> rabbit_types:ok_or_error(any())). +-spec(add_mirrors/2 :: (rabbit_amqqueue:name(), [node()]) -> 'ok'). -spec(add_mirror/2 :: - (rabbit_amqqueue:name(), node()) -> rabbit_types:ok_or_error(any())). --spec(add_mirror/3 :: - (rabbit_types:vhost(), binary(), atom()) - -> rabbit_types:ok_or_error(any())). + (rabbit_amqqueue:name(), node()) -> + {'ok', atom()} | rabbit_types:error(any())). +-spec(store_updated_slaves/1 :: (rabbit_types:amqqueue()) -> + rabbit_types:amqqueue()). +-spec(suggested_queue_nodes/1 :: (rabbit_types:amqqueue()) -> + {node(), [node()]}). +-spec(is_mirrored/1 :: (rabbit_types:amqqueue()) -> boolean()). +-spec(update_mirrors/2 :: + (rabbit_types:amqqueue(), rabbit_types:amqqueue()) -> 'ok'). -endif. @@ -50,29 +69,35 @@ %% slave (now master) receives messages it's not ready for (for %% example, new consumers). %% Returns {ok, NewMPid, DeadPids} -remove_from_queue(QueueName, DeadPids) -> - DeadNodes = [node(DeadPid) || DeadPid <- DeadPids], +remove_from_queue(QueueName, Self, DeadGMPids) -> rabbit_misc:execute_mnesia_transaction( fun () -> %% Someone else could have deleted the queue before we %% get here. case mnesia:read({rabbit_queue, QueueName}) of [] -> {error, not_found}; - [Q = #amqqueue { pid = QPid, - slave_pids = SPids }] -> - [QPid1 | SPids1] = Alive = - [Pid || Pid <- [QPid | SPids], - not lists:member(node(Pid), DeadNodes)], + [Q = #amqqueue { pid = QPid, + slave_pids = SPids, + gm_pids = GMPids }] -> + {Dead, GMPids1} = lists:partition( + fun ({GM, _}) -> + lists:member(GM, DeadGMPids) + end, GMPids), + DeadPids = [Pid || {_GM, Pid} <- Dead], + Alive = [QPid | SPids] -- DeadPids, + {QPid1, SPids1} = promote_slave(Alive), case {{QPid, SPids}, {QPid1, SPids1}} of {Same, Same} -> + GMPids = GMPids1, %% ASSERTION {ok, QPid1, []}; - _ when QPid =:= QPid1 orelse node(QPid1) =:= node() -> + _ when QPid =:= QPid1 orelse QPid1 =:= Self -> %% Either master hasn't changed, so %% we're ok to update mnesia; or we have %% become the master. - Q1 = Q #amqqueue { pid = QPid1, - slave_pids = SPids1 }, - ok = rabbit_amqqueue:store_queue(Q1), + store_updated_slaves( + Q #amqqueue { pid = QPid1, + slave_pids = SPids1, + gm_pids = GMPids1 }), {ok, QPid1, [QPid | SPids] -- Alive}; _ -> %% Master has changed, and we're not it, @@ -85,32 +110,41 @@ remove_from_queue(QueueName, DeadPids) -> end). on_node_up() -> - Qs = + QNames = rabbit_misc:execute_mnesia_transaction( fun () -> mnesia:foldl( - fun (#amqqueue { mirror_nodes = undefined }, QsN) -> - QsN; - (#amqqueue { name = QName, - mirror_nodes = all }, QsN) -> - [QName | QsN]; - (#amqqueue { name = QName, - mirror_nodes = MNodes }, QsN) -> - case lists:member(node(), MNodes) of - true -> [QName | QsN]; - false -> QsN + fun (Q = #amqqueue{name = QName, + pid = Pid, + slave_pids = SPids}, QNames0) -> + %% We don't want to pass in the whole + %% cluster - we don't want a situation + %% where starting one node causes us to + %% decide to start a mirror on another + PossibleNodes0 = [node(P) || P <- [Pid | SPids]], + PossibleNodes = + case lists:member(node(), PossibleNodes0) of + true -> PossibleNodes0; + false -> [node() | PossibleNodes0] + end, + {_MNode, SNodes} = suggested_queue_nodes( + Q, PossibleNodes), + case lists:member(node(), SNodes) of + true -> [QName | QNames0]; + false -> QNames0 end end, [], rabbit_queue) end), - [add_mirror(Q, node()) || Q <- Qs], + [add_mirror(QName, node()) || QName <- QNames], ok. -drop_mirror(VHostPath, QueueName, MirrorNode) -> - drop_mirror(rabbit_misc:r(VHostPath, queue, QueueName), MirrorNode). +drop_mirrors(QName, Nodes) -> + [drop_mirror(QName, Node) || Node <- Nodes], + ok. -drop_mirror(Queue, MirrorNode) -> - if_mirrored_queue( - Queue, +drop_mirror(QName, MirrorNode) -> + rabbit_amqqueue:with( + QName, fun (#amqqueue { name = Name, pid = QPid, slave_pids = SPids }) -> case [Pid || Pid <- [QPid | SPids], node(Pid) =:= MirrorNode] of [] -> @@ -122,48 +156,54 @@ drop_mirror(Queue, MirrorNode) -> "Dropping queue mirror on node ~p for ~s~n", [MirrorNode, rabbit_misc:rs(Name)]), exit(Pid, {shutdown, dropped}), - ok + {ok, dropped} end end). -add_mirror(VHostPath, QueueName, MirrorNode) -> - add_mirror(rabbit_misc:r(VHostPath, queue, QueueName), MirrorNode). +add_mirrors(QName, Nodes) -> + [add_mirror(QName, Node) || Node <- Nodes], + ok. -add_mirror(Queue, MirrorNode) -> - if_mirrored_queue( - Queue, +add_mirror(QName, MirrorNode) -> + rabbit_amqqueue:with( + QName, fun (#amqqueue { name = Name, pid = QPid, slave_pids = SPids } = Q) -> case [Pid || Pid <- [QPid | SPids], node(Pid) =:= MirrorNode] of - [] -> case rabbit_mirror_queue_slave_sup:start_child( - MirrorNode, [Q]) of - {ok, undefined} -> %% Already running - ok; - {ok, SPid} -> - rabbit_log:info( - "Adding mirror of ~s on node ~p: ~p~n", - [rabbit_misc:rs(Name), MirrorNode, SPid]), - ok; - Other -> - Other - end; - [_] -> {error, {queue_already_mirrored_on_node, MirrorNode}} + [] -> + start_child(Name, MirrorNode, Q); + [SPid] -> + case rabbit_misc:is_process_alive(SPid) of + true -> {ok, already_mirrored}; + false -> start_child(Name, MirrorNode, Q) + end end end). -if_mirrored_queue(Queue, Fun) -> - rabbit_amqqueue:with( - Queue, fun (#amqqueue { arguments = Args } = Q) -> - case rabbit_misc:table_lookup(Args, <<"x-ha-policy">>) of - undefined -> ok; - _ -> Fun(Q) - end - end). +start_child(Name, MirrorNode, Q) -> + case rabbit_misc:with_exit_handler( + rabbit_misc:const({ok, down}), + fun () -> + rabbit_mirror_queue_slave_sup:start_child(MirrorNode, [Q]) + end) of + {ok, SPid} when is_pid(SPid) -> + maybe_auto_sync(Q), + rabbit_log:info("Adding mirror of ~s on node ~p: ~p~n", + [rabbit_misc:rs(Name), MirrorNode, SPid]), + {ok, started}; + {error, {{stale_master_pid, StalePid}, _}} -> + rabbit_log:warning("Detected stale HA master while adding " + "mirror of ~s on node ~p: ~p~n", + [rabbit_misc:rs(Name), MirrorNode, StalePid]), + {ok, stale_master}; + {error, {{duplicate_live_master, _}=Err, _}} -> + Err; + Other -> + Other + end. report_deaths(_MirrorPid, _IsMaster, _QueueName, []) -> ok; report_deaths(MirrorPid, IsMaster, QueueName, DeadPids) -> - rabbit_event:notify(queue_mirror_deaths, [{name, QueueName}, - {pids, DeadPids}]), rabbit_log:info("Mirrored-queue (~s): ~s ~s saw deaths of mirrors ~s~n", [rabbit_misc:rs(QueueName), case IsMaster of @@ -172,3 +212,134 @@ report_deaths(MirrorPid, IsMaster, QueueName, DeadPids) -> end, rabbit_misc:pid_to_string(MirrorPid), [[rabbit_misc:pid_to_string(P), $ ] || P <- DeadPids]]). + +store_updated_slaves(Q = #amqqueue{slave_pids = SPids, + sync_slave_pids = SSPids}) -> + %% TODO now that we clear sync_slave_pids in rabbit_durable_queue, + %% do we still need this filtering? + SSPids1 = [SSPid || SSPid <- SSPids, lists:member(SSPid, SPids)], + Q1 = Q#amqqueue{sync_slave_pids = SSPids1}, + ok = rabbit_amqqueue:store_queue(Q1), + %% Wake it up so that we emit a stats event + rabbit_amqqueue:wake_up(Q1), + Q1. + +%%---------------------------------------------------------------------------- + +promote_slave([SPid | SPids]) -> + %% The slave pids are maintained in descending order of age, so + %% the one to promote is the oldest. + {SPid, SPids}. + +suggested_queue_nodes(Q) -> + suggested_queue_nodes(Q, rabbit_mnesia:cluster_nodes(running)). + +%% This variant exists so we can pull a call to +%% rabbit_mnesia:cluster_nodes(running) out of a loop or +%% transaction or both. +suggested_queue_nodes(Q, All) -> + {MNode0, SNodes, SSNodes} = actual_queue_nodes(Q), + MNode = case MNode0 of + none -> node(); + _ -> MNode0 + end, + Params = policy(<<"ha-params">>, Q), + case module(Q) of + {ok, M} -> M:suggested_queue_nodes(Params, MNode, SNodes, SSNodes, All); + _ -> {MNode, []} + end. + +policy(Policy, Q) -> + case rabbit_policy:get(Policy, Q) of + {ok, P} -> P; + _ -> none + end. + +module(#amqqueue{} = Q) -> + case rabbit_policy:get(<<"ha-mode">>, Q) of + {ok, Mode} -> module(Mode); + _ -> not_mirrored + end; + +module(Mode) when is_binary(Mode) -> + case rabbit_registry:binary_to_type(Mode) of + {error, not_found} -> not_mirrored; + T -> case rabbit_registry:lookup_module(ha_mode, T) of + {ok, Module} -> {ok, Module}; + _ -> not_mirrored + end + end. + +is_mirrored(Q) -> + case module(Q) of + {ok, _} -> true; + _ -> false + end. + +actual_queue_nodes(#amqqueue{pid = MPid, + slave_pids = SPids, + sync_slave_pids = SSPids}) -> + Nodes = fun (L) -> [node(Pid) || Pid <- L] end, + {case MPid of + none -> none; + _ -> node(MPid) + end, Nodes(SPids), Nodes(SSPids)}. + +maybe_auto_sync(Q = #amqqueue{pid = QPid}) -> + case policy(<<"ha-sync-mode">>, Q) of + <<"automatic">> -> + spawn(fun() -> rabbit_amqqueue:sync_mirrors(QPid) end); + _ -> + ok + end. + +update_mirrors(OldQ = #amqqueue{pid = QPid}, + NewQ = #amqqueue{pid = QPid}) -> + case {is_mirrored(OldQ), is_mirrored(NewQ)} of + {false, false} -> ok; + {true, false} -> rabbit_amqqueue:stop_mirroring(QPid); + {false, true} -> rabbit_amqqueue:start_mirroring(QPid); + {true, true} -> update_mirrors0(OldQ, NewQ) + end. + +update_mirrors0(OldQ = #amqqueue{name = QName}, + NewQ = #amqqueue{name = QName}) -> + {OldMNode, OldSNodes, _} = actual_queue_nodes(OldQ), + {NewMNode, NewSNodes} = suggested_queue_nodes(NewQ), + OldNodes = [OldMNode | OldSNodes], + NewNodes = [NewMNode | NewSNodes], + add_mirrors (QName, NewNodes -- OldNodes), + drop_mirrors(QName, OldNodes -- NewNodes), + maybe_auto_sync(NewQ), + ok. + +%%---------------------------------------------------------------------------- + +validate_policy(KeyList) -> + Mode = proplists:get_value(<<"ha-mode">>, KeyList, none), + Params = proplists:get_value(<<"ha-params">>, KeyList, none), + SyncMode = proplists:get_value(<<"ha-sync-mode">>, KeyList, none), + case {Mode, Params, SyncMode} of + {none, none, none} -> + ok; + {none, _, _} -> + {error, "ha-mode must be specified to specify ha-params or " + "ha-sync-mode", []}; + _ -> + case module(Mode) of + {ok, M} -> case M:validate_policy(Params) of + ok -> validate_sync_mode(SyncMode); + E -> E + end; + _ -> {error, "~p is not a valid ha-mode value", [Mode]} + end + end. + +validate_sync_mode(SyncMode) -> + case SyncMode of + <<"automatic">> -> ok; + <<"manual">> -> ok; + none -> ok; + Mode -> {error, "ha-sync-mode must be \"manual\" " + "or \"automatic\", got ~p", [Mode]} + end. diff --git a/src/rabbit_mirror_queue_mode.erl b/src/rabbit_mirror_queue_mode.erl new file mode 100644 index 00000000..9e2015d9 --- /dev/null +++ b/src/rabbit_mirror_queue_mode.erl @@ -0,0 +1,57 @@ +%% The contents of this file are subject to the Mozilla Public License +%% Version 1.1 (the "License"); you may not use this file except in +%% compliance with the License. You may obtain a copy of the License at +%% http://www.mozilla.org/MPL/ +%% +%% Software distributed under the License is distributed on an "AS IS" +%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See the +%% License for the specific language governing rights and limitations +%% under the License. +%% +%% The Original Code is RabbitMQ. +%% +%% The Initial Developer of the Original Code is GoPivotal, Inc. +%% Copyright (c) 2010-2013 GoPivotal, Inc. All rights reserved. +%% + +-module(rabbit_mirror_queue_mode). + +-ifdef(use_specs). + +-type(master() :: node()). +-type(slave() :: node()). +-type(params() :: any()). + +-callback description() -> [proplists:property()]. + +%% Called whenever we think we might need to change nodes for a +%% mirrored queue. Note that this is called from a variety of +%% contexts, both inside and outside Mnesia transactions. Ideally it +%% will be pure-functional. +%% +%% Takes: parameters set in the policy, +%% current master, +%% current slaves, +%% current synchronised slaves, +%% all nodes to consider +%% +%% Returns: tuple of new master, new slaves +%% +-callback suggested_queue_nodes( + params(), master(), [slave()], [slave()], [node()]) -> + {master(), [slave()]}. + +%% Are the parameters valid for this mode? +-callback validate_policy(params()) -> + rabbit_policy_validator:validate_results(). + +-else. + +-export([behaviour_info/1]). + +behaviour_info(callbacks) -> + [{description, 0}, {suggested_queue_nodes, 5}, {validate_policy, 1}]; +behaviour_info(_Other) -> + undefined. + +-endif. diff --git a/src/rabbit_mirror_queue_mode_all.erl b/src/rabbit_mirror_queue_mode_all.erl new file mode 100644 index 00000000..3b5163a3 --- /dev/null +++ b/src/rabbit_mirror_queue_mode_all.erl @@ -0,0 +1,41 @@ +%% The contents of this file are subject to the Mozilla Public License +%% Version 1.1 (the "License"); you may not use this file except in +%% compliance with the License. You may obtain a copy of the License at +%% http://www.mozilla.org/MPL/ +%% +%% Software distributed under the License is distributed on an "AS IS" +%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See the +%% License for the specific language governing rights and limitations +%% under the License. +%% +%% The Original Code is RabbitMQ. +%% +%% The Initial Developer of the Original Code is GoPivotal, Inc. +%% Copyright (c) 2010-2013 GoPivotal, Inc. All rights reserved. +%% + +-module(rabbit_mirror_queue_mode_all). + +-include("rabbit.hrl"). + +-behaviour(rabbit_mirror_queue_mode). + +-export([description/0, suggested_queue_nodes/5, validate_policy/1]). + +-rabbit_boot_step({?MODULE, + [{description, "mirror mode all"}, + {mfa, {rabbit_registry, register, + [ha_mode, <<"all">>, ?MODULE]}}, + {requires, rabbit_registry}, + {enables, kernel_ready}]}). + +description() -> + [{description, <<"Mirror queue to all nodes">>}]. + +suggested_queue_nodes(_Params, MNode, _SNodes, _SSNodes, Poss) -> + {MNode, Poss -- [MNode]}. + +validate_policy(none) -> + ok; +validate_policy(_Params) -> + {error, "ha-mode=\"all\" does not take parameters", []}. diff --git a/src/rabbit_mirror_queue_mode_exactly.erl b/src/rabbit_mirror_queue_mode_exactly.erl new file mode 100644 index 00000000..2841f87e --- /dev/null +++ b/src/rabbit_mirror_queue_mode_exactly.erl @@ -0,0 +1,56 @@ +%% The contents of this file are subject to the Mozilla Public License +%% Version 1.1 (the "License"); you may not use this file except in +%% compliance with the License. You may obtain a copy of the License at +%% http://www.mozilla.org/MPL/ +%% +%% Software distributed under the License is distributed on an "AS IS" +%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See the +%% License for the specific language governing rights and limitations +%% under the License. +%% +%% The Original Code is RabbitMQ. +%% +%% The Initial Developer of the Original Code is GoPivotal, Inc. +%% Copyright (c) 2010-2013 GoPivotal, Inc. All rights reserved. +%% + +-module(rabbit_mirror_queue_mode_exactly). + +-include("rabbit.hrl"). + +-behaviour(rabbit_mirror_queue_mode). + +-export([description/0, suggested_queue_nodes/5, validate_policy/1]). + +-rabbit_boot_step({?MODULE, + [{description, "mirror mode exactly"}, + {mfa, {rabbit_registry, register, + [ha_mode, <<"exactly">>, ?MODULE]}}, + {requires, rabbit_registry}, + {enables, kernel_ready}]}). + +description() -> + [{description, <<"Mirror queue to a specified number of nodes">>}]. + +%% When we need to add nodes, we randomise our candidate list as a +%% crude form of load-balancing. TODO it would also be nice to +%% randomise the list of ones to remove when we have too many - we +%% would have to take account of synchronisation though. +suggested_queue_nodes(Count, MNode, SNodes, _SSNodes, Poss) -> + SCount = Count - 1, + {MNode, case SCount > length(SNodes) of + true -> Cand = shuffle((Poss -- [MNode]) -- SNodes), + SNodes ++ lists:sublist(Cand, SCount - length(SNodes)); + false -> lists:sublist(SNodes, SCount) + end}. + +shuffle(L) -> + {A1,A2,A3} = now(), + random:seed(A1, A2, A3), + {_, L1} = lists:unzip(lists:keysort(1, [{random:uniform(), N} || N <- L])), + L1. + +validate_policy(N) when is_integer(N) andalso N > 0 -> + ok; +validate_policy(Params) -> + {error, "ha-mode=\"exactly\" takes an integer, ~p given", [Params]}. diff --git a/src/rabbit_mirror_queue_mode_nodes.erl b/src/rabbit_mirror_queue_mode_nodes.erl new file mode 100644 index 00000000..779b439d --- /dev/null +++ b/src/rabbit_mirror_queue_mode_nodes.erl @@ -0,0 +1,70 @@ +%% The contents of this file are subject to the Mozilla Public License +%% Version 1.1 (the "License"); you may not use this file except in +%% compliance with the License. You may obtain a copy of the License at +%% http://www.mozilla.org/MPL/ +%% +%% Software distributed under the License is distributed on an "AS IS" +%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See the +%% License for the specific language governing rights and limitations +%% under the License. +%% +%% The Original Code is RabbitMQ. +%% +%% The Initial Developer of the Original Code is GoPivotal, Inc. +%% Copyright (c) 2010-2013 GoPivotal, Inc. All rights reserved. +%% + +-module(rabbit_mirror_queue_mode_nodes). + +-include("rabbit.hrl"). + +-behaviour(rabbit_mirror_queue_mode). + +-export([description/0, suggested_queue_nodes/5, validate_policy/1]). + +-rabbit_boot_step({?MODULE, + [{description, "mirror mode nodes"}, + {mfa, {rabbit_registry, register, + [ha_mode, <<"nodes">>, ?MODULE]}}, + {requires, rabbit_registry}, + {enables, kernel_ready}]}). + +description() -> + [{description, <<"Mirror queue to specified nodes">>}]. + +suggested_queue_nodes(Nodes0, MNode, _SNodes, SSNodes, Poss) -> + Nodes1 = [list_to_atom(binary_to_list(Node)) || Node <- Nodes0], + %% If the current master is not in the nodes specified, then what we want + %% to do depends on whether there are any synchronised slaves. If there + %% are then we can just kill the current master - the admin has asked for + %% a migration and we should give it to them. If there are not however + %% then we must keep the master around so as not to lose messages. + Nodes = case SSNodes of + [] -> lists:usort([MNode | Nodes1]); + _ -> Nodes1 + end, + Unavailable = Nodes -- Poss, + Available = Nodes -- Unavailable, + case Available of + [] -> %% We have never heard of anything? Not much we can do but + %% keep the master alive. + {MNode, []}; + _ -> case lists:member(MNode, Available) of + true -> {MNode, Available -- [MNode]}; + false -> %% Make sure the new master is synced! In order to + %% get here SSNodes must not be empty. + [NewMNode | _] = SSNodes, + {NewMNode, Available -- [NewMNode]} + end + end. + +validate_policy([]) -> + {error, "ha-mode=\"nodes\" list must be non-empty", []}; +validate_policy(Nodes) when is_list(Nodes) -> + case [I || I <- Nodes, not is_binary(I)] of + [] -> ok; + Invalid -> {error, "ha-mode=\"nodes\" takes a list of strings, " + "~p was not a string", [Invalid]} + end; +validate_policy(Params) -> + {error, "ha-mode=\"nodes\" takes a list, ~p given", [Params]}. diff --git a/src/rabbit_mirror_queue_slave.erl b/src/rabbit_mirror_queue_slave.erl index e412fbbc..18f848c3 100644 --- a/src/rabbit_mirror_queue_slave.erl +++ b/src/rabbit_mirror_queue_slave.erl @@ -10,8 +10,8 @@ %% %% The Original Code is RabbitMQ. %% -%% The Initial Developer of the Original Code is VMware, Inc. -%% Copyright (c) 2010-2012 VMware, Inc. All rights reserved. +%% The Initial Developer of the Original Code is GoPivotal, Inc. +%% Copyright (c) 2010-2013 GoPivotal, Inc. All rights reserved. %% -module(rabbit_mirror_queue_slave). @@ -19,25 +19,16 @@ %% For general documentation of HA design, see %% rabbit_mirror_queue_coordinator %% -%% We join the GM group before we add ourselves to the amqqueue -%% record. As a result: -%% 1. We can receive msgs from GM that correspond to messages we will -%% never receive from publishers. -%% 2. When we receive a message from publishers, we must receive a -%% message from the GM group for it. -%% 3. However, that instruction from the GM group can arrive either -%% before or after the actual message. We need to be able to -%% distinguish between GM instructions arriving early, and case (1) -%% above. -%% +%% We receive messages from GM and from publishers, and the gm +%% messages can arrive either before or after the 'actual' message. %% All instructions from the GM group must be processed in the order %% in which they're received. -export([start_link/1, set_maximum_since_use/2, info/1]). -export([init/1, handle_call/3, handle_cast/2, handle_info/2, terminate/2, - code_change/3, handle_pre_hibernate/1, prioritise_call/3, - prioritise_cast/2, prioritise_info/2]). + code_change/3, handle_pre_hibernate/1, prioritise_call/4, + prioritise_cast/3, prioritise_info/3, format_message_queue/2]). -export([joined/2, members_changed/3, handle_msg/3]). @@ -46,90 +37,78 @@ -include("rabbit.hrl"). -%%---------------------------------------------------------------------------- - -include("gm_specs.hrl"). --ifdef(use_specs). -%% Shut dialyzer up --spec(promote_me/2 :: (_, _) -> no_return()). --endif. - %%---------------------------------------------------------------------------- - --define(CREATION_EVENT_KEYS, +-define(INFO_KEYS, [pid, name, master_pid, is_synchronised ]). --define(INFO_KEYS, ?CREATION_EVENT_KEYS). - -define(SYNC_INTERVAL, 25). %% milliseconds -define(RAM_DURATION_UPDATE_INTERVAL, 5000). -define(DEATH_TIMEOUT, 20000). %% 20 seconds -record(state, { q, gm, - master_pid, backing_queue, backing_queue_state, sync_timer_ref, rate_timer_ref, - sender_queues, %% :: Pid -> {Q {Msg, Bool}, Set MsgId} + sender_queues, %% :: Pid -> {Q Msg, Set MsgId, ChState} msg_id_ack, %% :: MsgId -> AckTag - ack_num, msg_id_status, known_senders, - synchronised + %% Master depth - local depth + depth_delta }). -start_link(Q) -> - gen_server2:start_link(?MODULE, Q, []). +%%---------------------------------------------------------------------------- + +start_link(Q) -> gen_server2:start_link(?MODULE, Q, []). set_maximum_since_use(QPid, Age) -> gen_server2:cast(QPid, {set_maximum_since_use, Age}). -info(QPid) -> - gen_server2:call(QPid, info, infinity). - -init(#amqqueue { name = QueueName } = Q) -> +info(QPid) -> gen_server2:call(QPid, info, infinity). + +init(Q = #amqqueue { name = QName }) -> + %% We join the GM group before we add ourselves to the amqqueue + %% record. As a result: + %% 1. We can receive msgs from GM that correspond to messages we will + %% never receive from publishers. + %% 2. When we receive a message from publishers, we must receive a + %% message from the GM group for it. + %% 3. However, that instruction from the GM group can arrive either + %% before or after the actual message. We need to be able to + %% distinguish between GM instructions arriving early, and case (1) + %% above. + %% + process_flag(trap_exit, true), %% amqqueue_process traps exits too. + {ok, GM} = gm:start_link(QName, ?MODULE, [self()], + fun rabbit_misc:execute_mnesia_transaction/1), + receive {joined, GM} -> ok end, Self = self(), Node = node(), case rabbit_misc:execute_mnesia_transaction( - fun () -> - [Q1 = #amqqueue { pid = QPid, slave_pids = MPids }] = - mnesia:read({rabbit_queue, QueueName}), - case [Pid || Pid <- [QPid | MPids], node(Pid) =:= Node] of - [] -> MPids1 = MPids ++ [Self], - ok = rabbit_amqqueue:store_queue( - Q1 #amqqueue { slave_pids = MPids1 }), - {new, QPid}; - [SPid] -> true = rabbit_misc:is_process_alive(SPid), - existing - end - end) of - {new, MPid} -> - process_flag(trap_exit, true), %% amqqueue_process traps exits too. - {ok, GM} = gm:start_link(QueueName, ?MODULE, [self()]), - receive {joined, GM} -> - ok - end, - erlang:monitor(process, MPid), + fun() -> init_it(Self, GM, Node, QName) end) of + {new, QPid, GMPids} -> + erlang:monitor(process, QPid), ok = file_handle_cache:register_callback( rabbit_amqqueue, set_maximum_since_use, [Self]), ok = rabbit_memory_monitor:register( Self, {rabbit_amqqueue, set_ram_duration_target, [Self]}), {ok, BQ} = application:get_env(backing_queue_module), - BQS = bq_init(BQ, Q, false), - State = #state { q = Q, + Q1 = Q #amqqueue { pid = QPid }, + BQS = bq_init(BQ, Q1, false), + State = #state { q = Q1, gm = GM, - master_pid = MPid, backing_queue = BQ, backing_queue_state = BQS, rate_timer_ref = undefined, @@ -137,70 +116,82 @@ init(#amqqueue { name = QueueName } = Q) -> sender_queues = dict:new(), msg_id_ack = dict:new(), - ack_num = 0, msg_id_status = dict:new(), - known_senders = pmon:new(), + known_senders = pmon:new(delegate), - synchronised = false + depth_delta = undefined }, - rabbit_event:notify(queue_slave_created, - infos(?CREATION_EVENT_KEYS, State)), - ok = gm:broadcast(GM, request_length), + ok = gm:broadcast(GM, request_depth), + ok = gm:validate_members(GM, [GM | [G || {G, _} <- GMPids]]), {ok, State, hibernate, {backoff, ?HIBERNATE_AFTER_MIN, ?HIBERNATE_AFTER_MIN, ?DESIRED_HIBERNATE}}; + {stale, StalePid} -> + {stop, {stale_master_pid, StalePid}}; + duplicate_live_master -> + {stop, {duplicate_live_master, Node}}; existing -> + gm:leave(GM), ignore end. -handle_call({deliver, Delivery = #delivery { immediate = true }}, - From, State) -> - %% It is safe to reply 'false' here even if a) we've not seen the - %% msg via gm, or b) the master dies before we receive the msg via - %% gm. In the case of (a), we will eventually receive the msg via - %% gm, and it's only the master's result to the channel that is - %% important. In the case of (b), if the master does die and we do - %% get promoted then at that point we have no consumers, thus - %% 'false' is precisely the correct answer. However, we must be - %% careful to _not_ enqueue the message in this case. - - %% Note this is distinct from the case where we receive the msg - %% via gm first, then we're promoted to master, and only then do - %% we receive the msg from the channel. - gen_server2:reply(From, false), %% master may deliver it, not us - noreply(maybe_enqueue_message(Delivery, false, State)); - -handle_call({deliver, Delivery = #delivery { mandatory = true }}, - From, State) -> - gen_server2:reply(From, true), %% amqqueue throws away the result anyway - noreply(maybe_enqueue_message(Delivery, true, State)); +init_it(Self, GM, Node, QName) -> + [Q = #amqqueue { pid = QPid, slave_pids = SPids, gm_pids = GMPids }] = + mnesia:read({rabbit_queue, QName}), + case [Pid || Pid <- [QPid | SPids], node(Pid) =:= Node] of + [] -> add_slave(Q, Self, GM), + {new, QPid, GMPids}; + [QPid] -> case rabbit_misc:is_process_alive(QPid) of + true -> duplicate_live_master; + false -> {stale, QPid} + end; + [SPid] -> case rabbit_misc:is_process_alive(SPid) of + true -> existing; + false -> Q1 = Q#amqqueue { + slave_pids = SPids -- [SPid], + gm_pids = [T || T = {_, S} <- GMPids, + S =/= SPid] }, + add_slave(Q1, Self, GM), + {new, QPid, GMPids} + end + end. + +%% Add to the end, so they are in descending order of age, see +%% rabbit_mirror_queue_misc:promote_slave/1 +add_slave(Q = #amqqueue { slave_pids = SPids, gm_pids = GMPids }, New, GM) -> + rabbit_mirror_queue_misc:store_updated_slaves( + Q#amqqueue{slave_pids = SPids ++ [New], gm_pids = [{GM, New} | GMPids]}). + +handle_call({deliver, Delivery, true}, From, State) -> + %% Synchronous, "mandatory" deliver mode. + gen_server2:reply(From, ok), + noreply(maybe_enqueue_message(Delivery, State)); handle_call({gm_deaths, Deaths}, From, - State = #state { q = #amqqueue { name = QueueName }, - gm = GM, - master_pid = MPid }) -> - %% The GM has told us about deaths, which means we're not going to - %% receive any more messages from GM - case rabbit_mirror_queue_misc:remove_from_queue(QueueName, Deaths) of + State = #state { q = Q = #amqqueue { name = QName, pid = MPid }}) -> + Self = self(), + case rabbit_mirror_queue_misc:remove_from_queue(QName, Self, Deaths) of {error, not_found} -> gen_server2:reply(From, ok), {stop, normal, State}; {ok, Pid, DeadPids} -> - rabbit_mirror_queue_misc:report_deaths(self(), false, QueueName, + rabbit_mirror_queue_misc:report_deaths(Self, false, QName, DeadPids), - if node(Pid) =:= node(MPid) -> + case Pid of + MPid -> %% master hasn't changed - reply(ok, State); - node(Pid) =:= node() -> + gen_server2:reply(From, ok), + noreply(State); + Self -> %% we've become master - promote_me(From, State); - true -> - %% master has changed to not us. + QueueState = promote_me(From, State), + {become, rabbit_amqqueue_process, QueueState, hibernate}; + _ -> + %% master has changed to not us gen_server2:reply(From, ok), erlang:monitor(process, Pid), - ok = gm:broadcast(GM, heartbeat), - noreply(State #state { master_pid = Pid }) + noreply(State #state { q = Q #amqqueue { pid = Pid } }) end end; @@ -213,13 +204,39 @@ handle_cast({run_backing_queue, Mod, Fun}, State) -> handle_cast({gm, Instruction}, State) -> handle_process_result(process_instruction(Instruction, State)); -handle_cast({deliver, Delivery = #delivery{sender = Sender}, Flow}, State) -> - %% Asynchronous, non-"mandatory", non-"immediate" deliver mode. +handle_cast({deliver, Delivery = #delivery{sender = Sender}, true, Flow}, + State) -> + %% Asynchronous, non-"mandatory", deliver mode. case Flow of flow -> credit_flow:ack(Sender); noflow -> ok end, - noreply(maybe_enqueue_message(Delivery, true, State)); + noreply(maybe_enqueue_message(Delivery, State)); + +handle_cast({sync_start, Ref, Syncer}, + State = #state { depth_delta = DD, + backing_queue = BQ, + backing_queue_state = BQS }) -> + State1 = #state{rate_timer_ref = TRef} = ensure_rate_timer(State), + S = fun({MA, TRefN, BQSN}) -> + State1#state{depth_delta = undefined, + msg_id_ack = dict:from_list(MA), + rate_timer_ref = TRefN, + backing_queue_state = BQSN} + end, + case rabbit_mirror_queue_sync:slave( + DD, Ref, TRef, Syncer, BQ, BQS, + fun (BQN, BQSN) -> + BQSN1 = update_ram_duration(BQN, BQSN), + TRefN = erlang:send_after(?RAM_DURATION_UPDATE_INTERVAL, + self(), update_ram_duration), + {TRefN, BQSN1} + end) of + denied -> noreply(State1); + {ok, Res} -> noreply(set_delta(0, S(Res))); + {failed, Res} -> noreply(S(Res)); + {stop, Reason, Res} -> {stop, Reason, S(Res)} + end; handle_cast({set_maximum_since_use, Age}, State) -> ok = file_handle_cache:set_maximum_since_use(Age), @@ -231,15 +248,14 @@ handle_cast({set_ram_duration_target, Duration}, BQS1 = BQ:set_ram_duration_target(Duration, BQS), noreply(State #state { backing_queue_state = BQS1 }). -handle_info(update_ram_duration, - State = #state { backing_queue = BQ, - backing_queue_state = BQS }) -> - {RamDuration, BQS1} = BQ:ram_duration(BQS), - DesiredDuration = - rabbit_memory_monitor:report_ram_duration(self(), RamDuration), - BQS2 = BQ:set_ram_duration_target(DesiredDuration, BQS1), - noreply(State #state { rate_timer_ref = just_measured, - backing_queue_state = BQS2 }); +handle_info(update_ram_duration, State = #state{backing_queue = BQ, + backing_queue_state = BQS}) -> + BQS1 = update_ram_duration(BQ, BQS), + %% Don't call noreply/1, we don't want to set timers + {State1, Timeout} = next_state(State #state { + rate_timer_ref = undefined, + backing_queue_state = BQS1 }), + {noreply, State1, Timeout}; handle_info(sync_timeout, State) -> noreply(backing_queue_timeout( @@ -249,12 +265,13 @@ handle_info(timeout, State) -> noreply(backing_queue_timeout(State)); handle_info({'DOWN', _MonitorRef, process, MPid, _Reason}, - State = #state { gm = GM, master_pid = MPid }) -> - ok = gm:broadcast(GM, {process_death, MPid}), + State = #state { gm = GM, q = #amqqueue { pid = MPid } }) -> + ok = gm:broadcast(GM, process_death), noreply(State); handle_info({'DOWN', _MonitorRef, process, ChPid, _Reason}, State) -> - noreply(local_sender_death(ChPid, State)); + local_sender_death(ChPid, State), + noreply(maybe_forget_sender(ChPid, down_from_ch, State)); handle_info({'EXIT', _Pid, Reason}, State) -> {stop, Reason, State}; @@ -286,7 +303,7 @@ terminate(Reason, #state { q = Q, rate_timer_ref = RateTRef }) -> ok = gm:leave(GM), QueueState = rabbit_amqqueue_process:init_with_backing_queue_state( - Q, BQ, BQS, RateTRef, [], [], pmon:new(), dict:new()), + Q, BQ, BQS, RateTRef, [], pmon:new(), dict:new()), rabbit_amqqueue_process:terminate(Reason, QueueState); terminate([_SPid], _Reason) -> %% gm case @@ -304,67 +321,71 @@ handle_pre_hibernate(State = #state { backing_queue = BQ, BQS3 = BQ:handle_pre_hibernate(BQS2), {hibernate, stop_rate_timer(State #state { backing_queue_state = BQS3 })}. -prioritise_call(Msg, _From, _State) -> +prioritise_call(Msg, _From, _Len, _State) -> case Msg of info -> 9; {gm_deaths, _Deaths} -> 5; _ -> 0 end. -prioritise_cast(Msg, _State) -> +prioritise_cast(Msg, _Len, _State) -> case Msg of {set_ram_duration_target, _Duration} -> 8; {set_maximum_since_use, _Age} -> 8; {run_backing_queue, _Mod, _Fun} -> 6; {gm, _Msg} -> 5; - {post_commit, _Txn, _AckTags} -> 4; _ -> 0 end. -prioritise_info(Msg, _State) -> +prioritise_info(Msg, _Len, _State) -> case Msg of update_ram_duration -> 8; sync_timeout -> 6; _ -> 0 end. +format_message_queue(Opt, MQ) -> rabbit_misc:format_message_queue(Opt, MQ). + %% --------------------------------------------------------------------------- %% GM %% --------------------------------------------------------------------------- -joined([SPid], _Members) -> - SPid ! {joined, self()}, - ok. +joined([SPid], _Members) -> SPid ! {joined, self()}, ok. -members_changed([_SPid], _Births, []) -> - ok; -members_changed([SPid], _Births, Deaths) -> - inform_deaths(SPid, Deaths). +members_changed([_SPid], _Births, []) -> ok; +members_changed([ SPid], _Births, Deaths) -> inform_deaths(SPid, Deaths). -handle_msg([_SPid], _From, heartbeat) -> - ok; -handle_msg([_SPid], _From, request_length) -> +handle_msg([_SPid], _From, request_depth) -> %% This is only of value to the master ok; handle_msg([_SPid], _From, {ensure_monitoring, _Pid}) -> %% This is only of value to the master ok; -handle_msg([SPid], _From, {process_death, Pid}) -> - inform_deaths(SPid, [Pid]); +handle_msg([_SPid], _From, process_death) -> + %% Since GM is by nature lazy we need to make sure there is some + %% traffic when a master dies, to make sure we get informed of the + %% death. That's all process_death does, create some traffic. We + %% must not take any notice of the master death here since it + %% comes without ordering guarantees - there could still be + %% messages from the master we have yet to receive. When we get + %% members_changed, then there will be no more messages. + ok; +handle_msg([CPid], _From, {delete_and_terminate, _Reason} = Msg) -> + ok = gen_server2:cast(CPid, {gm, Msg}), + {stop, {shutdown, ring_shutdown}}; +handle_msg([SPid], _From, {sync_start, Ref, Syncer, SPids}) -> + case lists:member(SPid, SPids) of + true -> gen_server2:cast(SPid, {sync_start, Ref, Syncer}); + false -> ok + end; handle_msg([SPid], _From, Msg) -> ok = gen_server2:cast(SPid, {gm, Msg}). inform_deaths(SPid, Deaths) -> - rabbit_misc:with_exit_handler( - fun () -> {stop, normal} end, - fun () -> - case gen_server2:call(SPid, {gm_deaths, Deaths}, infinity) of - ok -> - ok; - {promote, CPid} -> - {become, rabbit_mirror_queue_coordinator, [CPid]} - end - end). + case gen_server2:call(SPid, {gm_deaths, Deaths}, infinity) of + ok -> ok; + {promote, CPid} -> {become, rabbit_mirror_queue_coordinator, [CPid]} + end. %% --------------------------------------------------------------------------- %% Others @@ -374,8 +395,8 @@ infos(Items, State) -> [{Item, i(Item, State)} || Item <- Items]. i(pid, _State) -> self(); i(name, #state { q = #amqqueue { name = Name } }) -> Name; -i(master_pid, #state { master_pid = MPid }) -> MPid; -i(is_synchronised, #state { synchronised = Synchronised }) -> Synchronised; +i(master_pid, #state { q = #amqqueue { pid = MPid } }) -> MPid; +i(is_synchronised, #state { depth_delta = DD }) -> DD =:= 0; i(Item, _State) -> throw({bad_argument, Item}). bq_init(BQ, Q, Recover) -> @@ -393,14 +414,20 @@ run_backing_queue(Mod, Fun, State = #state { backing_queue = BQ, backing_queue_state = BQS }) -> State #state { backing_queue_state = BQ:invoke(Mod, Fun, BQS) }. -needs_confirming(#delivery{ msg_seq_no = undefined }, _State) -> - never; -needs_confirming(#delivery { message = #basic_message { - is_persistent = true } }, - #state { q = #amqqueue { durable = true } }) -> - eventually; -needs_confirming(_Delivery, _State) -> - immediately. +send_or_record_confirm(_, #delivery{ msg_seq_no = undefined }, MS, _State) -> + MS; +send_or_record_confirm(published, #delivery { sender = ChPid, + msg_seq_no = MsgSeqNo, + message = #basic_message { + id = MsgId, + is_persistent = true } }, + MS, #state { q = #amqqueue { durable = true } }) -> + dict:store(MsgId, {published, ChPid, MsgSeqNo} , MS); +send_or_record_confirm(_Status, #delivery { sender = ChPid, + msg_seq_no = MsgSeqNo }, + MS, _State) -> + ok = rabbit_misc:confirm_to_sender(ChPid, [MsgSeqNo]), + MS. confirm_messages(MsgIds, State = #state { msg_id_status = MS }) -> {CMs, MS1} = @@ -412,16 +439,16 @@ confirm_messages(MsgIds, State = #state { msg_id_status = MS }) -> %% If it needed confirming, it'll have %% already been done. Acc; - {ok, {published, ChPid}} -> + {ok, published} -> %% Still not seen it from the channel, just %% record that it's been confirmed. - {CMsN, dict:store(MsgId, {confirmed, ChPid}, MSN)}; + {CMsN, dict:store(MsgId, confirmed, MSN)}; {ok, {published, ChPid, MsgSeqNo}} -> %% Seen from both GM and Channel. Can now %% confirm. {rabbit_misc:gb_trees_cons(ChPid, MsgSeqNo, CMsN), dict:erase(MsgId, MSN)}; - {ok, {confirmed, _ChPid}} -> + {ok, confirmed} -> %% It's already been confirmed. This is %% probably it's been both sync'd to disk %% and then delivered and ack'd before we've @@ -436,6 +463,9 @@ confirm_messages(MsgIds, State = #state { msg_id_status = MS }) -> handle_process_result({ok, State}) -> noreply(State); handle_process_result({stop, State}) -> {stop, normal, State}. +-ifdef(use_specs). +-spec(promote_me/2 :: ({pid(), term()}, #state{}) -> no_return()). +-endif. promote_me(From, #state { q = Q = #amqqueue { name = QName }, gm = GM, backing_queue = BQ, @@ -445,17 +475,14 @@ promote_me(From, #state { q = Q = #amqqueue { name = QName }, msg_id_ack = MA, msg_id_status = MS, known_senders = KS }) -> - rabbit_event:notify(queue_slave_promoted, [{pid, self()}, - {name, QName}]), rabbit_log:info("Mirrored-queue (~s): Promoting slave ~s to master~n", [rabbit_misc:rs(QName), rabbit_misc:pid_to_string(self())]), Q1 = Q #amqqueue { pid = self() }, {ok, CPid} = rabbit_mirror_queue_coordinator:start_link( Q1, GM, rabbit_mirror_queue_master:sender_death_fun(), - rabbit_mirror_queue_master:length_fun()), + rabbit_mirror_queue_master:depth_fun()), true = unlink(GM), gen_server2:reply(From, {promote, CPid}), - ok = gm:confirmed_broadcast(GM, heartbeat), %% Everything that we're monitoring, we need to ensure our new %% coordinator is monitoring. @@ -463,8 +490,7 @@ promote_me(From, #state { q = Q = #amqqueue { name = QName }, ok = rabbit_mirror_queue_coordinator:ensure_monitoring(CPid, MPids), %% We find all the messages that we've received from channels but - %% not from gm, and if they're due to be enqueued on promotion - %% then we pass them to the + %% not from gm, and pass them to the %% queue_process:init_with_backing_queue_state to be enqueued. %% %% We also have to requeue messages which are pending acks: the @@ -492,18 +518,18 @@ promote_me(From, #state { q = Q = #amqqueue { name = QName }, %% %% MS contains the following three entry types: %% - %% a) {published, ChPid}: + %% a) published: %% published via gm only; pending arrival of publication from %% channel, maybe pending confirm. %% %% b) {published, ChPid, MsgSeqNo}: %% published via gm and channel; pending confirm. %% - %% c) {confirmed, ChPid}: + %% c) confirmed: %% published via gm only, and confirmed; pending publication %% from channel. %% - %% d) discarded + %% d) discarded: %% seen via gm only as discarded. Pending publication from %% channel %% @@ -520,96 +546,76 @@ promote_me(From, #state { q = Q = #amqqueue { name = QName }, %% those messages are then requeued. However, as discussed above, %% this does not affect MS, nor which bits go through to SS in %% Master, or MTC in queue_process. - %% - %% Everything that's in MA gets requeued. Consequently the new - %% master should start with a fresh AM as there are no messages - %% pending acks. - MSList = dict:to_list(MS), - SS = dict:from_list( - [E || E = {_MsgId, discarded} <- MSList] ++ - [{MsgId, Status} - || {MsgId, {Status, _ChPid}} <- MSList, - Status =:= published orelse Status =:= confirmed]), + St = [published, confirmed, discarded], + SS = dict:filter(fun (_MsgId, Status) -> lists:member(Status, St) end, MS), + AckTags = [AckTag || {_MsgId, AckTag} <- dict:to_list(MA)], MasterState = rabbit_mirror_queue_master:promote_backing_queue_state( - CPid, BQ, BQS, GM, SS, MPids), - - MTC = lists:foldl(fun ({MsgId, {published, ChPid, MsgSeqNo}}, MTC0) -> - gb_trees:insert(MsgId, {ChPid, MsgSeqNo}, MTC0); - (_, MTC0) -> - MTC0 - end, gb_trees:empty(), MSList), - NumAckTags = [NumAckTag || {_MsgId, NumAckTag} <- dict:to_list(MA)], - AckTags = [AckTag || {_Num, AckTag} <- lists:sort(NumAckTags)], - Deliveries = [Delivery || {_ChPid, {PubQ, _PendCh}} <- dict:to_list(SQ), - {Delivery, true} <- queue:to_list(PubQ)], - QueueState = rabbit_amqqueue_process:init_with_backing_queue_state( - Q1, rabbit_mirror_queue_master, MasterState, RateTRef, - AckTags, Deliveries, KS, MTC), - {become, rabbit_amqqueue_process, QueueState, hibernate}. + QName, CPid, BQ, BQS, GM, AckTags, SS, MPids), + + MTC = dict:fold(fun (MsgId, {published, ChPid, MsgSeqNo}, MTC0) -> + gb_trees:insert(MsgId, {ChPid, MsgSeqNo}, MTC0); + (_Msgid, _Status, MTC0) -> + MTC0 + end, gb_trees:empty(), MS), + Deliveries = [Delivery || + {_ChPid, {PubQ, _PendCh, _ChState}} <- dict:to_list(SQ), + Delivery <- queue:to_list(PubQ)], + AwaitGmDown = [ChPid || {ChPid, {_, _, down_from_ch}} <- dict:to_list(SQ)], + KS1 = lists:foldl(fun (ChPid0, KS0) -> + pmon:demonitor(ChPid0, KS0) + end, KS, AwaitGmDown), + rabbit_amqqueue_process:init_with_backing_queue_state( + Q1, rabbit_mirror_queue_master, MasterState, RateTRef, Deliveries, KS1, + MTC). noreply(State) -> {NewState, Timeout} = next_state(State), - {noreply, NewState, Timeout}. + {noreply, ensure_rate_timer(NewState), Timeout}. reply(Reply, State) -> {NewState, Timeout} = next_state(State), - {reply, Reply, NewState, Timeout}. + {reply, Reply, ensure_rate_timer(NewState), Timeout}. next_state(State = #state{backing_queue = BQ, backing_queue_state = BQS}) -> {MsgIds, BQS1} = BQ:drain_confirmed(BQS), - State1 = ensure_rate_timer( - confirm_messages(MsgIds, State #state { - backing_queue_state = BQS1 })), + State1 = confirm_messages(MsgIds, + State #state { backing_queue_state = BQS1 }), case BQ:needs_timeout(BQS1) of - false -> {stop_sync_timer(State1), hibernate}; - idle -> {stop_sync_timer(State1), 0 }; - timed -> {ensure_sync_timer(State1), 0 } + false -> {stop_sync_timer(State1), hibernate }; + idle -> {stop_sync_timer(State1), ?SYNC_INTERVAL}; + timed -> {ensure_sync_timer(State1), 0 } end. backing_queue_timeout(State = #state { backing_queue = BQ }) -> run_backing_queue(BQ, fun (M, BQS) -> M:timeout(BQS) end, State). -ensure_sync_timer(State = #state { sync_timer_ref = undefined }) -> - TRef = erlang:send_after(?SYNC_INTERVAL, self(), sync_timeout), - State #state { sync_timer_ref = TRef }; ensure_sync_timer(State) -> - State. + rabbit_misc:ensure_timer(State, #state.sync_timer_ref, + ?SYNC_INTERVAL, sync_timeout). + +stop_sync_timer(State) -> rabbit_misc:stop_timer(State, #state.sync_timer_ref). -stop_sync_timer(State = #state { sync_timer_ref = undefined }) -> - State; -stop_sync_timer(State = #state { sync_timer_ref = TRef }) -> - erlang:cancel_timer(TRef), - State #state { sync_timer_ref = undefined }. - -ensure_rate_timer(State = #state { rate_timer_ref = undefined }) -> - TRef = erlang:send_after(?RAM_DURATION_UPDATE_INTERVAL, - self(), update_ram_duration), - State #state { rate_timer_ref = TRef }; -ensure_rate_timer(State = #state { rate_timer_ref = just_measured }) -> - State #state { rate_timer_ref = undefined }; ensure_rate_timer(State) -> - State. + rabbit_misc:ensure_timer(State, #state.rate_timer_ref, + ?RAM_DURATION_UPDATE_INTERVAL, + update_ram_duration). -stop_rate_timer(State = #state { rate_timer_ref = undefined }) -> - State; -stop_rate_timer(State = #state { rate_timer_ref = just_measured }) -> - State #state { rate_timer_ref = undefined }; -stop_rate_timer(State = #state { rate_timer_ref = TRef }) -> - erlang:cancel_timer(TRef), - State #state { rate_timer_ref = undefined }. +stop_rate_timer(State) -> rabbit_misc:stop_timer(State, #state.rate_timer_ref). ensure_monitoring(ChPid, State = #state { known_senders = KS }) -> State #state { known_senders = pmon:monitor(ChPid, KS) }. -local_sender_death(ChPid, State = #state { known_senders = KS }) -> +local_sender_death(ChPid, #state { known_senders = KS }) -> + %% The channel will be monitored iff we have received a delivery + %% from it but not heard about its death from the master. So if it + %% is monitored we need to point the death out to the master (see + %% essay). ok = case pmon:is_monitored(ChPid, KS) of false -> ok; - true -> credit_flow:peer_down(ChPid), - confirm_sender_death(ChPid) - end, - State. + true -> confirm_sender_death(ChPid) + end. confirm_sender_death(Pid) -> %% We have to deal with the possibility that we'll be promoted to @@ -620,6 +626,10 @@ confirm_sender_death(Pid) -> fun (?MODULE, State = #state { known_senders = KS, gm = GM }) -> %% We're running still as a slave + %% + %% See comment in local_sender_death/2; we might have + %% received a sender_death in the meanwhile so check + %% again. ok = case pmon:is_monitored(Pid, KS) of false -> ok; true -> gm:broadcast(GM, {ensure_monitoring, [Pid]}), @@ -634,62 +644,62 @@ confirm_sender_death(Pid) -> State end, %% Note that we do not remove our knowledge of this ChPid until we - %% get the sender_death from GM. + %% get the sender_death from GM as well as a DOWN notification. {ok, _TRef} = timer:apply_after( ?DEATH_TIMEOUT, rabbit_amqqueue, run_backing_queue, [self(), rabbit_mirror_queue_master, Fun]), ok. +forget_sender(running, _) -> false; +forget_sender(_, running) -> false; +forget_sender(Down1, Down2) when Down1 =/= Down2 -> true. + +%% Record and process lifetime events from channels. Forget all about a channel +%% only when down notifications are received from both the channel and from gm. +maybe_forget_sender(ChPid, ChState, State = #state { sender_queues = SQ, + msg_id_status = MS, + known_senders = KS }) -> + case dict:find(ChPid, SQ) of + error -> + State; + {ok, {MQ, PendCh, ChStateRecord}} -> + case forget_sender(ChState, ChStateRecord) of + true -> + credit_flow:peer_down(ChPid), + State #state { sender_queues = dict:erase(ChPid, SQ), + msg_id_status = lists:foldl( + fun dict:erase/2, + MS, sets:to_list(PendCh)), + known_senders = pmon:demonitor(ChPid, KS) }; + false -> + SQ1 = dict:store(ChPid, {MQ, PendCh, ChState}, SQ), + State #state { sender_queues = SQ1 } + end + end. + maybe_enqueue_message( - Delivery = #delivery { message = #basic_message { id = MsgId }, - msg_seq_no = MsgSeqNo, - sender = ChPid }, - EnqueueOnPromotion, + Delivery = #delivery { message = #basic_message { id = MsgId }, + sender = ChPid }, State = #state { sender_queues = SQ, msg_id_status = MS }) -> State1 = ensure_monitoring(ChPid, State), %% We will never see {published, ChPid, MsgSeqNo} here. case dict:find(MsgId, MS) of error -> - {MQ, PendingCh} = get_sender_queue(ChPid, SQ), - MQ1 = queue:in({Delivery, EnqueueOnPromotion}, MQ), - SQ1 = dict:store(ChPid, {MQ1, PendingCh}, SQ), + {MQ, PendingCh, ChState} = get_sender_queue(ChPid, SQ), + MQ1 = queue:in(Delivery, MQ), + SQ1 = dict:store(ChPid, {MQ1, PendingCh, ChState}, SQ), State1 #state { sender_queues = SQ1 }; - {ok, {confirmed, ChPid}} -> - %% BQ has confirmed it but we didn't know what the - %% msg_seq_no was at the time. We do now! - ok = rabbit_misc:confirm_to_sender(ChPid, [MsgSeqNo]), - SQ1 = remove_from_pending_ch(MsgId, ChPid, SQ), - State1 #state { sender_queues = SQ1, - msg_id_status = dict:erase(MsgId, MS) }; - {ok, {published, ChPid}} -> - %% It was published to the BQ and we didn't know the - %% msg_seq_no so couldn't confirm it at the time. - case needs_confirming(Delivery, State1) of - never -> - SQ1 = remove_from_pending_ch(MsgId, ChPid, SQ), - State1 #state { msg_id_status = dict:erase(MsgId, MS), - sender_queues = SQ1 }; - eventually -> - State1 #state { - msg_id_status = - dict:store(MsgId, {published, ChPid, MsgSeqNo}, MS) }; - immediately -> - ok = rabbit_misc:confirm_to_sender(ChPid, [MsgSeqNo]), - SQ1 = remove_from_pending_ch(MsgId, ChPid, SQ), - State1 #state { msg_id_status = dict:erase(MsgId, MS), - sender_queues = SQ1 } - end; - {ok, discarded} -> - %% We've already heard from GM that the msg is to be - %% discarded. We won't see this again. + {ok, Status} -> + MS1 = send_or_record_confirm( + Status, Delivery, dict:erase(MsgId, MS), State1), SQ1 = remove_from_pending_ch(MsgId, ChPid, SQ), - State1 #state { msg_id_status = dict:erase(MsgId, MS), + State1 #state { msg_id_status = MS1, sender_queues = SQ1 } end. get_sender_queue(ChPid, SQ) -> case dict:find(ChPid, SQ) of - error -> {queue:new(), sets:new()}; + error -> {queue:new(), sets:new(), running}; {ok, Val} -> Val end. @@ -697,49 +707,32 @@ remove_from_pending_ch(MsgId, ChPid, SQ) -> case dict:find(ChPid, SQ) of error -> SQ; - {ok, {MQ, PendingCh}} -> - dict:store(ChPid, {MQ, sets:del_element(MsgId, PendingCh)}, SQ) + {ok, {MQ, PendingCh, ChState}} -> + dict:store(ChPid, {MQ, sets:del_element(MsgId, PendingCh), ChState}, + SQ) end. -process_instruction( - {publish, Deliver, ChPid, MsgProps, Msg = #basic_message { id = MsgId }}, - State = #state { sender_queues = SQ, - backing_queue = BQ, - backing_queue_state = BQS, - msg_id_status = MS }) -> - - %% We really are going to do the publish right now, even though we - %% may not have seen it directly from the channel. As a result, we - %% may know that it needs confirming without knowing its - %% msg_seq_no, which means that we can see the confirmation come - %% back from the backing queue without knowing the msg_seq_no, - %% which means that we're going to have to hang on to the fact - %% that we've seen the msg_id confirmed until we can associate it - %% with a msg_seq_no. +publish_or_discard(Status, ChPid, MsgId, + State = #state { sender_queues = SQ, msg_id_status = MS }) -> + %% We really are going to do the publish/discard right now, even + %% though we may not have seen it directly from the channel. But + %% we cannot issue confirms until the latter has happened. So we + %% need to keep track of the MsgId and its confirmation status in + %% the meantime. State1 = ensure_monitoring(ChPid, State), - {MQ, PendingCh} = get_sender_queue(ChPid, SQ), + {MQ, PendingCh, ChState} = get_sender_queue(ChPid, SQ), {MQ1, PendingCh1, MS1} = case queue:out(MQ) of {empty, _MQ2} -> {MQ, sets:add_element(MsgId, PendingCh), - dict:store(MsgId, {published, ChPid}, MS)}; - {{value, {Delivery = #delivery { - msg_seq_no = MsgSeqNo, - message = #basic_message { id = MsgId } }, - _EnqueueOnPromotion}}, MQ2} -> - %% We received the msg from the channel first. Thus we - %% need to deal with confirms here. - case needs_confirming(Delivery, State1) of - never -> - {MQ2, PendingCh, MS}; - eventually -> - {MQ2, PendingCh, - dict:store(MsgId, {published, ChPid, MsgSeqNo}, MS)}; - immediately -> - ok = rabbit_misc:confirm_to_sender(ChPid, [MsgSeqNo]), - {MQ2, PendingCh, MS} - end; - {{value, {#delivery {}, _EnqueueOnPromotion}}, _MQ2} -> + dict:store(MsgId, Status, MS)}; + {{value, Delivery = #delivery { + message = #basic_message { id = MsgId } }}, MQ2} -> + {MQ2, PendingCh, + %% We received the msg from the channel first. Thus + %% we need to deal with confirms here. + send_or_record_confirm(Status, Delivery, MS, State1)}; + {{value, #delivery {}}, _MQ2} -> %% The instruction was sent to us before we were %% within the slave_pids within the #amqqueue{} %% record. We'll never receive the message directly @@ -747,88 +740,47 @@ process_instruction( %% expecting any confirms from us. {MQ, PendingCh, MS} end, - - SQ1 = dict:store(ChPid, {MQ1, PendingCh1}, SQ), - State2 = State1 #state { sender_queues = SQ1, msg_id_status = MS1 }, - - {ok, - case Deliver of - false -> - BQS1 = BQ:publish(Msg, MsgProps, ChPid, BQS), - State2 #state { backing_queue_state = BQS1 }; - {true, AckRequired} -> - {AckTag, BQS1} = BQ:publish_delivered(AckRequired, Msg, MsgProps, - ChPid, BQS), - maybe_store_ack(AckRequired, MsgId, AckTag, - State2 #state { backing_queue_state = BQS1 }) - end}; -process_instruction({discard, ChPid, Msg = #basic_message { id = MsgId }}, - State = #state { sender_queues = SQ, - backing_queue = BQ, - backing_queue_state = BQS, - msg_id_status = MS }) -> - %% Many of the comments around the publish head above apply here - %% too. - State1 = ensure_monitoring(ChPid, State), - {MQ, PendingCh} = get_sender_queue(ChPid, SQ), - {MQ1, PendingCh1, MS1} = - case queue:out(MQ) of - {empty, _MQ} -> - {MQ, sets:add_element(MsgId, PendingCh), - dict:store(MsgId, discarded, MS)}; - {{value, {#delivery { message = #basic_message { id = MsgId } }, - _EnqueueOnPromotion}}, MQ2} -> - %% We've already seen it from the channel, we're not - %% going to see this again, so don't add it to MS - {MQ2, PendingCh, MS}; - {{value, {#delivery {}, _EnqueueOnPromotion}}, _MQ2} -> - %% The instruction was sent to us before we were - %% within the slave_pids within the #amqqueue{} - %% record. We'll never receive the message directly - %% from the channel. - {MQ, PendingCh, MS} - end, - SQ1 = dict:store(ChPid, {MQ1, PendingCh1}, SQ), - BQS1 = BQ:discard(Msg, ChPid, BQS), - {ok, State1 #state { sender_queues = SQ1, - msg_id_status = MS1, - backing_queue_state = BQS1 }}; -process_instruction({set_length, Length, AckRequired}, + SQ1 = dict:store(ChPid, {MQ1, PendingCh1, ChState}, SQ), + State1 #state { sender_queues = SQ1, msg_id_status = MS1 }. + + +process_instruction({publish, ChPid, MsgProps, + Msg = #basic_message { id = MsgId }}, State) -> + State1 = #state { backing_queue = BQ, backing_queue_state = BQS } = + publish_or_discard(published, ChPid, MsgId, State), + BQS1 = BQ:publish(Msg, MsgProps, true, ChPid, BQS), + {ok, State1 #state { backing_queue_state = BQS1 }}; +process_instruction({publish_delivered, ChPid, MsgProps, + Msg = #basic_message { id = MsgId }}, State) -> + State1 = #state { backing_queue = BQ, backing_queue_state = BQS } = + publish_or_discard(published, ChPid, MsgId, State), + true = BQ:is_empty(BQS), + {AckTag, BQS1} = BQ:publish_delivered(Msg, MsgProps, ChPid, BQS), + {ok, maybe_store_ack(true, MsgId, AckTag, + State1 #state { backing_queue_state = BQS1 })}; +process_instruction({discard, ChPid, MsgId}, State) -> + State1 = #state { backing_queue = BQ, backing_queue_state = BQS } = + publish_or_discard(discarded, ChPid, MsgId, State), + BQS1 = BQ:discard(MsgId, ChPid, BQS), + {ok, State1 #state { backing_queue_state = BQS1 }}; +process_instruction({drop, Length, Dropped, AckRequired}, State = #state { backing_queue = BQ, backing_queue_state = BQS }) -> QLen = BQ:len(BQS), - ToDrop = QLen - Length, - {ok, - case ToDrop >= 0 of - true -> - State1 = - lists:foldl( - fun (const, StateN = #state {backing_queue_state = BQSN}) -> - {{#basic_message{id = MsgId}, _IsDelivered, AckTag, - _Remaining}, BQSN1} = BQ:fetch(AckRequired, BQSN), - maybe_store_ack( - AckRequired, MsgId, AckTag, - StateN #state { backing_queue_state = BQSN1 }) - end, State, lists:duplicate(ToDrop, const)), - set_synchronised(true, State1); - false -> - State - end}; -process_instruction({fetch, AckRequired, MsgId, Remaining}, - State = #state { backing_queue = BQ, - backing_queue_state = BQS }) -> - QLen = BQ:len(BQS), - {ok, case QLen - 1 of - Remaining -> - {{#basic_message{id = MsgId}, _IsDelivered, - AckTag, Remaining}, BQS1} = BQ:fetch(AckRequired, BQS), - maybe_store_ack(AckRequired, MsgId, AckTag, - State #state { backing_queue_state = BQS1 }); - Other when Other + 1 =:= Remaining -> - set_synchronised(true, State); - Other when Other < Remaining -> - %% we must be shorter than the master - State + ToDrop = case QLen - Length of + N when N > 0 -> N; + _ -> 0 + end, + State1 = lists:foldl( + fun (const, StateN = #state{backing_queue_state = BQSN}) -> + {{MsgId, AckTag}, BQSN1} = BQ:drop(AckRequired, BQSN), + maybe_store_ack( + AckRequired, MsgId, AckTag, + StateN #state { backing_queue_state = BQSN1 }) + end, State, lists:duplicate(ToDrop, const)), + {ok, case AckRequired of + true -> State1; + false -> update_delta(ToDrop - Dropped, State1) end}; process_instruction({ack, MsgIds}, State = #state { backing_queue = BQ, @@ -837,48 +789,31 @@ process_instruction({ack, MsgIds}, {AckTags, MA1} = msg_ids_to_acktags(MsgIds, MA), {MsgIds1, BQS1} = BQ:ack(AckTags, BQS), [] = MsgIds1 -- MsgIds, %% ASSERTION - {ok, State #state { msg_id_ack = MA1, - backing_queue_state = BQS1 }}; + {ok, update_delta(length(MsgIds1) - length(MsgIds), + State #state { msg_id_ack = MA1, + backing_queue_state = BQS1 })}; process_instruction({requeue, MsgIds}, State = #state { backing_queue = BQ, backing_queue_state = BQS, msg_id_ack = MA }) -> {AckTags, MA1} = msg_ids_to_acktags(MsgIds, MA), - {ok, case length(AckTags) =:= length(MsgIds) of - true -> - {MsgIds, BQS1} = BQ:requeue(AckTags, BQS), - State #state { msg_id_ack = MA1, - backing_queue_state = BQS1 }; - false -> - %% The only thing we can safely do is nuke out our BQ - %% and MA. The interaction between this and confirms - %% doesn't really bear thinking about... - {_Count, BQS1} = BQ:purge(BQS), - {_MsgIds, BQS2} = ack_all(BQ, MA, BQS1), - State #state { msg_id_ack = dict:new(), - backing_queue_state = BQS2 } - end}; + {_MsgIds, BQS1} = BQ:requeue(AckTags, BQS), + {ok, State #state { msg_id_ack = MA1, + backing_queue_state = BQS1 }}; process_instruction({sender_death, ChPid}, - State = #state { sender_queues = SQ, - msg_id_status = MS, - known_senders = KS }) -> + State = #state { known_senders = KS }) -> + %% The channel will be monitored iff we have received a message + %% from it. In this case we just want to avoid doing work if we + %% never got any messages. {ok, case pmon:is_monitored(ChPid, KS) of false -> State; - true -> MS1 = case dict:find(ChPid, SQ) of - error -> - MS; - {ok, {_MQ, PendingCh}} -> - lists:foldl(fun dict:erase/2, MS, - sets:to_list(PendingCh)) - end, - State #state { sender_queues = dict:erase(ChPid, SQ), - msg_id_status = MS1, - known_senders = pmon:demonitor(ChPid, KS) } + true -> maybe_forget_sender(ChPid, down_from_gm, State) end}; -process_instruction({length, Length}, - State = #state { backing_queue = BQ, +process_instruction({depth, Depth}, + State = #state { backing_queue = BQ, backing_queue_state = BQS }) -> - {ok, set_synchronised(Length =:= BQ:len(BQS), State)}; + {ok, set_delta(Depth - BQ:depth(BQS), State)}; + process_instruction({delete_and_terminate, Reason}, State = #state { backing_queue = BQ, backing_queue_state = BQS }) -> @@ -890,31 +825,56 @@ msg_ids_to_acktags(MsgIds, MA) -> lists:foldl( fun (MsgId, {Acc, MAN}) -> case dict:find(MsgId, MA) of - error -> {Acc, MAN}; - {ok, {_Num, AckTag}} -> {[AckTag | Acc], - dict:erase(MsgId, MAN)} + error -> {Acc, MAN}; + {ok, AckTag} -> {[AckTag | Acc], dict:erase(MsgId, MAN)} end end, {[], MA}, MsgIds), {lists:reverse(AckTags), MA1}. -ack_all(BQ, MA, BQS) -> - BQ:ack([AckTag || {_MsgId, {_Num, AckTag}} <- dict:to_list(MA)], BQS). - maybe_store_ack(false, _MsgId, _AckTag, State) -> State; -maybe_store_ack(true, MsgId, AckTag, State = #state { msg_id_ack = MA, - ack_num = Num }) -> - State #state { msg_id_ack = dict:store(MsgId, {Num, AckTag}, MA), - ack_num = Num + 1 }. - -%% We intentionally leave out the head where a slave becomes -%% unsynchronised: we assert that can never happen. -set_synchronised(true, State = #state { q = #amqqueue { name = QName }, - synchronised = false }) -> - rabbit_event:notify(queue_slave_synchronised, [{pid, self()}, - {name, QName}]), - State #state { synchronised = true }; -set_synchronised(true, State) -> +maybe_store_ack(true, MsgId, AckTag, State = #state { msg_id_ack = MA }) -> + State #state { msg_id_ack = dict:store(MsgId, AckTag, MA) }. + +set_delta(0, State = #state { depth_delta = undefined }) -> + ok = record_synchronised(State#state.q), + State #state { depth_delta = 0 }; +set_delta(NewDelta, State = #state { depth_delta = undefined }) -> + true = NewDelta > 0, %% assertion + State #state { depth_delta = NewDelta }; +set_delta(NewDelta, State = #state { depth_delta = Delta }) -> + update_delta(NewDelta - Delta, State). + +update_delta(_DeltaChange, State = #state { depth_delta = undefined }) -> State; -set_synchronised(false, State = #state { synchronised = false }) -> - State. +update_delta( DeltaChange, State = #state { depth_delta = 0 }) -> + 0 = DeltaChange, %% assertion: we cannot become unsync'ed + State; +update_delta( DeltaChange, State = #state { depth_delta = Delta }) -> + true = DeltaChange =< 0, %% assertion: we cannot become 'less' sync'ed + set_delta(Delta + DeltaChange, State #state { depth_delta = undefined }). + +update_ram_duration(BQ, BQS) -> + {RamDuration, BQS1} = BQ:ram_duration(BQS), + DesiredDuration = + rabbit_memory_monitor:report_ram_duration(self(), RamDuration), + BQ:set_ram_duration_target(DesiredDuration, BQS1). + +%% [1] - the arrival of this newly synced slave may cause the master to die if +%% the admin has requested a migration-type change to policy. +record_synchronised(#amqqueue { name = QName }) -> + Self = self(), + case rabbit_misc:execute_mnesia_transaction( + fun () -> + case mnesia:read({rabbit_queue, QName}) of + [] -> + ok; + [Q1 = #amqqueue { sync_slave_pids = SSPids }] -> + Q2 = Q1#amqqueue{sync_slave_pids = [Self | SSPids]}, + rabbit_mirror_queue_misc:store_updated_slaves(Q2), + {ok, Q1, Q2} + end + end) of + ok -> ok; + {ok, Q1, Q2} -> rabbit_mirror_queue_misc:update_mirrors(Q1, Q2) %% [1] + end. diff --git a/src/rabbit_mirror_queue_slave_sup.erl b/src/rabbit_mirror_queue_slave_sup.erl index a2034876..6fba99db 100644 --- a/src/rabbit_mirror_queue_slave_sup.erl +++ b/src/rabbit_mirror_queue_slave_sup.erl @@ -10,8 +10,8 @@ %% %% The Original Code is RabbitMQ. %% -%% The Initial Developer of the Original Code is VMware, Inc. -%% Copyright (c) 2010-2012 VMware, Inc. All rights reserved. +%% The Initial Developer of the Original Code is GoPivotal, Inc. +%% Copyright (c) 2010-2013 GoPivotal, Inc. All rights reserved. %% -module(rabbit_mirror_queue_slave_sup). @@ -31,7 +31,7 @@ start_link() -> supervisor2:start_link({local, ?SERVER}, ?MODULE, []). start_child(Node, Args) -> supervisor2:start_child({?SERVER, Node}, Args). init([]) -> - {ok, {{simple_one_for_one_terminate, 10, 10}, + {ok, {{simple_one_for_one, 10, 10}, [{rabbit_mirror_queue_slave, {rabbit_mirror_queue_slave, start_link, []}, temporary, ?MAX_WAIT, worker, [rabbit_mirror_queue_slave]}]}}. diff --git a/src/rabbit_mirror_queue_sync.erl b/src/rabbit_mirror_queue_sync.erl new file mode 100644 index 00000000..61e90105 --- /dev/null +++ b/src/rabbit_mirror_queue_sync.erl @@ -0,0 +1,260 @@ +%% The contents of this file are subject to the Mozilla Public License +%% Version 1.1 (the "License"); you may not use this file except in +%% compliance with the License. You may obtain a copy of the License at +%% http://www.mozilla.org/MPL/ +%% +%% Software distributed under the License is distributed on an "AS IS" +%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See the +%% License for the specific language governing rights and limitations +%% under the License. +%% +%% The Original Code is RabbitMQ. +%% +%% The Initial Developer of the Original Code is GoPivotal, Inc. +%% Copyright (c) 2010-2012 GoPivotal, Inc. All rights reserved. +%% + +-module(rabbit_mirror_queue_sync). + +-include("rabbit.hrl"). + +-export([master_prepare/3, master_go/7, slave/7]). + +-define(SYNC_PROGRESS_INTERVAL, 1000000). + +%% There are three processes around, the master, the syncer and the +%% slave(s). The syncer is an intermediary, linked to the master in +%% order to make sure we do not mess with the master's credit flow or +%% set of monitors. +%% +%% Interactions +%% ------------ +%% +%% '*' indicates repeating messages. All are standard Erlang messages +%% except sync_start which is sent over GM to flush out any other +%% messages that we might have sent that way already. (credit) is the +%% usual credit_flow bump message every so often. +%% +%% Master Syncer Slave(s) +%% sync_mirrors -> || || +%% (from channel) || -- (spawns) --> || || +%% || --------- sync_start (over GM) -------> || +%% || || <--- sync_ready ---- || +%% || || (or) || +%% || || <--- sync_deny ----- || +%% || <--- ready ---- || || +%% || <--- next* ---- || || } +%% || ---- msg* ----> || || } loop +%% || || ---- sync_msg* ----> || } +%% || || <--- (credit)* ----- || } +%% || <--- next ---- || || +%% || ---- done ----> || || +%% || || -- sync_complete --> || +%% || (Dies) || + +-ifdef(use_specs). + +-type(log_fun() :: fun ((string(), [any()]) -> 'ok')). +-type(bq() :: atom()). +-type(bqs() :: any()). +-type(ack() :: any()). +-type(slave_sync_state() :: {[{rabbit_types:msg_id(), ack()}], timer:tref(), + bqs()}). + +-spec(master_prepare/3 :: (reference(), log_fun(), [pid()]) -> pid()). +-spec(master_go/7 :: (pid(), reference(), log_fun(), + rabbit_mirror_queue_master:stats_fun(), + rabbit_mirror_queue_master:stats_fun(), + bq(), bqs()) -> + {'already_synced', bqs()} | {'ok', bqs()} | + {'shutdown', any(), bqs()} | + {'sync_died', any(), bqs()}). +-spec(slave/7 :: (non_neg_integer(), reference(), timer:tref(), pid(), + bq(), bqs(), fun((bq(), bqs()) -> {timer:tref(), bqs()})) -> + 'denied' | + {'ok' | 'failed', slave_sync_state()} | + {'stop', any(), slave_sync_state()}). + +-endif. + +%% --------------------------------------------------------------------------- +%% Master + +master_prepare(Ref, Log, SPids) -> + MPid = self(), + spawn_link(fun () -> syncer(Ref, Log, MPid, SPids) end). + +master_go(Syncer, Ref, Log, HandleInfo, EmitStats, BQ, BQS) -> + Args = {Syncer, Ref, Log, HandleInfo, EmitStats, rabbit_misc:get_parent()}, + receive + {'EXIT', Syncer, normal} -> {already_synced, BQS}; + {'EXIT', Syncer, Reason} -> {sync_died, Reason, BQS}; + {ready, Syncer} -> EmitStats({syncing, 0}), + master_go0(Args, BQ, BQS) + end. + +master_go0(Args, BQ, BQS) -> + case BQ:fold(fun (Msg, MsgProps, Unacked, Acc) -> + master_send(Msg, MsgProps, Unacked, Args, Acc) + end, {0, erlang:now()}, BQS) of + {{shutdown, Reason}, BQS1} -> {shutdown, Reason, BQS1}; + {{sync_died, Reason}, BQS1} -> {sync_died, Reason, BQS1}; + {_, BQS1} -> master_done(Args, BQS1) + end. + +master_send(Msg, MsgProps, Unacked, + {Syncer, Ref, Log, HandleInfo, EmitStats, Parent}, {I, Last}) -> + T = case timer:now_diff(erlang:now(), Last) > ?SYNC_PROGRESS_INTERVAL of + true -> EmitStats({syncing, I}), + Log("~p messages", [I]), + erlang:now(); + false -> Last + end, + HandleInfo({syncing, I}), + receive + {'$gen_cast', {set_maximum_since_use, Age}} -> + ok = file_handle_cache:set_maximum_since_use(Age) + after 0 -> + ok + end, + receive + {'$gen_call', From, + cancel_sync_mirrors} -> stop_syncer(Syncer, {cancel, Ref}), + gen_server2:reply(From, ok), + {stop, cancelled}; + {next, Ref} -> Syncer ! {msg, Ref, Msg, MsgProps, Unacked}, + {cont, {I + 1, T}}; + {'EXIT', Parent, Reason} -> {stop, {shutdown, Reason}}; + {'EXIT', Syncer, Reason} -> {stop, {sync_died, Reason}} + end. + +master_done({Syncer, Ref, _Log, _HandleInfo, _EmitStats, Parent}, BQS) -> + receive + {next, Ref} -> stop_syncer(Syncer, {done, Ref}), + {ok, BQS}; + {'EXIT', Parent, Reason} -> {shutdown, Reason, BQS}; + {'EXIT', Syncer, Reason} -> {sync_died, Reason, BQS} + end. + +stop_syncer(Syncer, Msg) -> + unlink(Syncer), + Syncer ! Msg, + receive {'EXIT', Syncer, _} -> ok + after 0 -> ok + end. + +%% Master +%% --------------------------------------------------------------------------- +%% Syncer + +syncer(Ref, Log, MPid, SPids) -> + [erlang:monitor(process, SPid) || SPid <- SPids], + %% We wait for a reply from the slaves so that we know they are in + %% a receive block and will thus receive messages we send to them + %% *without* those messages ending up in their gen_server2 pqueue. + case [SPid || SPid <- SPids, + receive + {sync_ready, Ref, SPid} -> true; + {sync_deny, Ref, SPid} -> false; + {'DOWN', _, process, SPid, _} -> false + end] of + [] -> Log("all slaves already synced", []); + SPids1 -> MPid ! {ready, self()}, + Log("mirrors ~p to sync", [[node(SPid) || SPid <- SPids1]]), + syncer_loop(Ref, MPid, SPids1) + end. + +syncer_loop(Ref, MPid, SPids) -> + MPid ! {next, Ref}, + receive + {msg, Ref, Msg, MsgProps, Unacked} -> + SPids1 = wait_for_credit(SPids), + [begin + credit_flow:send(SPid), + SPid ! {sync_msg, Ref, Msg, MsgProps, Unacked} + end || SPid <- SPids1], + syncer_loop(Ref, MPid, SPids1); + {cancel, Ref} -> + %% We don't tell the slaves we will die - so when we do + %% they interpret that as a failure, which is what we + %% want. + ok; + {done, Ref} -> + [SPid ! {sync_complete, Ref} || SPid <- SPids] + end. + +wait_for_credit(SPids) -> + case credit_flow:blocked() of + true -> receive + {bump_credit, Msg} -> + credit_flow:handle_bump_msg(Msg), + wait_for_credit(SPids); + {'DOWN', _, process, SPid, _} -> + credit_flow:peer_down(SPid), + wait_for_credit(lists:delete(SPid, SPids)) + end; + false -> SPids + end. + +%% Syncer +%% --------------------------------------------------------------------------- +%% Slave + +slave(0, Ref, _TRef, Syncer, _BQ, _BQS, _UpdateRamDuration) -> + Syncer ! {sync_deny, Ref, self()}, + denied; + +slave(_DD, Ref, TRef, Syncer, BQ, BQS, UpdateRamDuration) -> + MRef = erlang:monitor(process, Syncer), + Syncer ! {sync_ready, Ref, self()}, + {_MsgCount, BQS1} = BQ:purge(BQ:purge_acks(BQS)), + slave_sync_loop({Ref, MRef, Syncer, BQ, UpdateRamDuration, + rabbit_misc:get_parent()}, {[], TRef, BQS1}). + +slave_sync_loop(Args = {Ref, MRef, Syncer, BQ, UpdateRamDuration, Parent}, + State = {MA, TRef, BQS}) -> + receive + {'DOWN', MRef, process, Syncer, _Reason} -> + %% If the master dies half way we are not in the usual + %% half-synced state (with messages nearer the tail of the + %% queue); instead we have ones nearer the head. If we then + %% sync with a newly promoted master, or even just receive + %% messages from it, we have a hole in the middle. So the + %% only thing to do here is purge. + {_MsgCount, BQS1} = BQ:purge(BQ:purge_acks(BQS)), + credit_flow:peer_down(Syncer), + {failed, {[], TRef, BQS1}}; + {bump_credit, Msg} -> + credit_flow:handle_bump_msg(Msg), + slave_sync_loop(Args, State); + {sync_complete, Ref} -> + erlang:demonitor(MRef, [flush]), + credit_flow:peer_down(Syncer), + {ok, State}; + {'$gen_cast', {set_maximum_since_use, Age}} -> + ok = file_handle_cache:set_maximum_since_use(Age), + slave_sync_loop(Args, State); + {'$gen_cast', {set_ram_duration_target, Duration}} -> + BQS1 = BQ:set_ram_duration_target(Duration, BQS), + slave_sync_loop(Args, {MA, TRef, BQS1}); + update_ram_duration -> + {TRef1, BQS1} = UpdateRamDuration(BQ, BQS), + slave_sync_loop(Args, {MA, TRef1, BQS1}); + {sync_msg, Ref, Msg, Props, Unacked} -> + credit_flow:ack(Syncer), + Props1 = Props#message_properties{needs_confirming = false}, + {MA1, BQS1} = + case Unacked of + false -> {MA, BQ:publish(Msg, Props1, true, none, BQS)}; + true -> {AckTag, BQS2} = BQ:publish_delivered( + Msg, Props1, none, BQS), + {[{Msg#basic_message.id, AckTag} | MA], BQS2} + end, + slave_sync_loop(Args, {MA1, TRef, BQS1}); + {'EXIT', Parent, Reason} -> + {stop, Reason, State}; + %% If the master throws an exception + {'$gen_cast', {gm, {delete_and_terminate, Reason}}} -> + BQ:delete_and_terminate(Reason, BQS), + {stop, Reason, {[], TRef, undefined}} + end. diff --git a/src/rabbit_misc.erl b/src/rabbit_misc.erl index 93c784ec..bca9d5ce 100644 --- a/src/rabbit_misc.erl +++ b/src/rabbit_misc.erl @@ -10,8 +10,8 @@ %% %% The Original Code is RabbitMQ. %% -%% The Initial Developer of the Original Code is VMware, Inc. -%% Copyright (c) 2007-2012 VMware, Inc. All rights reserved. +%% The Initial Developer of the Original Code is GoPivotal, Inc. +%% Copyright (c) 2007-2013 GoPivotal, Inc. All rights reserved. %% -module(rabbit_misc). @@ -19,9 +19,9 @@ -include("rabbit_framing.hrl"). -export([method_record_type/1, polite_pause/0, polite_pause/1]). --export([die/1, frame_error/2, amqp_error/4, +-export([die/1, frame_error/2, amqp_error/4, quit/1, protocol_error/3, protocol_error/4, protocol_error/1]). --export([not_found/1, assert_args_equivalence/4]). +-export([not_found/1, absent/1, assert_args_equivalence/4]). -export([dirty_read/1]). -export([table_lookup/2, set_table_value/4]). -export([r/3, r/2, r_arg/4, rs/1]). @@ -29,24 +29,24 @@ -export([enable_cover/1, report_cover/1]). -export([start_cover/1]). -export([confirm_to_sender/2]). --export([throw_on_error/2, with_exit_handler/2, filter_exit_map/2]). --export([is_abnormal_termination/1]). +-export([throw_on_error/2, with_exit_handler/2, is_abnormal_exit/1, + filter_exit_map/2]). -export([with_user/2, with_user_and_vhost/3]). -export([execute_mnesia_transaction/1]). -export([execute_mnesia_transaction/2]). -export([execute_mnesia_tx_with_tail/1]). -export([ensure_ok/2]). --export([tcp_name/3]). +-export([tcp_name/3, format_inet_error/1]). -export([upmap/2, map_in_order/2]). -export([table_filter/3]). -export([dirty_read_all/1, dirty_foreach_key/2, dirty_dump_log/1]). -export([format/2, format_many/1, format_stderr/2]). -export([with_local_io/1, local_info_msg/2]). --export([start_applications/1, stop_applications/1]). -export([unfold/2, ceil/1, queue_fold/3]). -export([sort_field_table/1]). -export([pid_to_string/1, string_to_pid/1]). -export([version_compare/2, version_compare/3]). +-export([version_minor_equivalent/2]). -export([dict_cons/3, orddict_cons/3, gb_trees_cons/3]). -export([gb_trees_fold/3, gb_trees_foreach/2]). -export([parse_arguments/3]). @@ -59,9 +59,24 @@ -export([format_message_queue/2]). -export([append_rpc_all_nodes/4]). -export([multi_call/2]). --export([quit/1]). -export([os_cmd/1]). -export([gb_sets_difference/2]). +-export([version/0, which_applications/0]). +-export([sequence_error/1]). +-export([json_encode/1, json_decode/1, json_to_term/1, term_to_json/1]). +-export([check_expiry/1]). +-export([base64url/1]). +-export([interval_operation/4]). +-export([ensure_timer/4, stop_timer/2]). +-export([get_parent/0]). + +%% Horrible macro to use in guards +-define(IS_BENIGN_EXIT(R), + R =:= noproc; R =:= noconnection; R =:= nodedown; R =:= normal; + R =:= shutdown). + +%% This is dictated by `erlang:send_after' on which we depend to implement TTL. +-define(MAX_EXPIRY_TIMER, 4294967295). %%---------------------------------------------------------------------------- @@ -87,6 +102,9 @@ -spec(polite_pause/1 :: (non_neg_integer()) -> 'done'). -spec(die/1 :: (rabbit_framing:amqp_exception()) -> channel_or_connection_exit()). + +-spec(quit/1 :: (integer()) -> no_return()). + -spec(frame_error/2 :: (rabbit_framing:amqp_method_name(), binary()) -> rabbit_types:connection_exit()). -spec(amqp_error/4 :: @@ -101,6 +119,7 @@ -spec(protocol_error/1 :: (rabbit_types:amqp_error()) -> channel_or_connection_exit()). -spec(not_found/1 :: (rabbit_types:r(atom())) -> rabbit_types:channel_exit()). +-spec(absent/1 :: (rabbit_types:amqqueue()) -> rabbit_types:channel_exit()). -spec(assert_args_equivalence/4 :: (rabbit_framing:amqp_table(), rabbit_framing:amqp_table(), rabbit_types:r(any()), [binary()]) -> @@ -123,9 +142,11 @@ when is_subtype(K, atom())). -spec(r_arg/4 :: (rabbit_types:vhost() | rabbit_types:r(atom()), K, - rabbit_framing:amqp_table(), binary()) - -> undefined | rabbit_types:r(K) - when is_subtype(K, atom())). + rabbit_framing:amqp_table(), binary()) -> + undefined | + rabbit_types:error( + {invalid_type, rabbit_framing:amqp_field_type()}) | + rabbit_types:r(K) when is_subtype(K, atom())). -spec(rs/1 :: (rabbit_types:r(atom())) -> string()). -spec(enable_cover/0 :: () -> ok_or_error()). -spec(start_cover/1 :: ([{string(), string()} | string()]) -> 'ok'). @@ -135,8 +156,8 @@ -spec(throw_on_error/2 :: (atom(), thunk(rabbit_types:error(any()) | {ok, A} | A)) -> A). -spec(with_exit_handler/2 :: (thunk(A), thunk(A)) -> A). +-spec(is_abnormal_exit/1 :: (any()) -> boolean()). -spec(filter_exit_map/2 :: (fun ((A) -> B), [A]) -> [B]). --spec(is_abnormal_termination/1 :: (any()) -> boolean()). -spec(with_user/2 :: (rabbit_types:username(), thunk(A)) -> A). -spec(with_user_and_vhost/3 :: (rabbit_types:username(), rabbit_types:vhost(), thunk(A)) @@ -150,6 +171,7 @@ -spec(tcp_name/3 :: (atom(), inet:ip_address(), rabbit_networking:ip_port()) -> atom()). +-spec(format_inet_error/1 :: (atom()) -> string()). -spec(upmap/2 :: (fun ((A) -> B), [A]) -> [B]). -spec(map_in_order/2 :: (fun ((A) -> B), [A]) -> [B]). -spec(table_filter/3:: (fun ((A) -> boolean()), fun ((A, boolean()) -> 'ok'), @@ -163,8 +185,6 @@ -spec(format_stderr/2 :: (string(), [any()]) -> 'ok'). -spec(with_local_io/1 :: (fun (() -> A)) -> A). -spec(local_info_msg/2 :: (string(), [any()]) -> 'ok'). --spec(start_applications/1 :: ([atom()]) -> 'ok'). --spec(stop_applications/1 :: ([atom()]) -> 'ok'). -spec(unfold/2 :: (fun ((A) -> ({'true', B, A} | 'false')), A) -> {[B], A}). -spec(ceil/1 :: (number()) -> integer()). -spec(queue_fold/3 :: (fun ((any(), B) -> B), B, queue()) -> B). @@ -176,6 +196,7 @@ -spec(version_compare/3 :: (string(), string(), ('lt' | 'lte' | 'eq' | 'gte' | 'gt')) -> boolean()). +-spec(version_minor_equivalent/2 :: (string(), string()) -> boolean()). -spec(dict_cons/3 :: (any(), any(), dict()) -> dict()). -spec(orddict_cons/3 :: (any(), any(), orddict:orddict()) -> orddict:orddict()). -spec(gb_trees_cons/3 :: (any(), any(), gb_tree()) -> gb_tree()). @@ -210,10 +231,24 @@ -spec(append_rpc_all_nodes/4 :: ([node()], atom(), atom(), [any()]) -> [any()]). -spec(multi_call/2 :: ([pid()], any()) -> {[{pid(), any()}], [{pid(), any()}]}). --spec(quit/1 :: (integer() | string()) -> no_return()). -spec(os_cmd/1 :: (string()) -> string()). -spec(gb_sets_difference/2 :: (gb_set(), gb_set()) -> gb_set()). - +-spec(version/0 :: () -> string()). +-spec(which_applications/0 :: () -> [{atom(), string(), string()}]). +-spec(sequence_error/1 :: ([({'error', any()} | any())]) + -> {'error', any()} | any()). +-spec(json_encode/1 :: (any()) -> {'ok', string()} | {'error', any()}). +-spec(json_decode/1 :: (string()) -> {'ok', any()} | 'error'). +-spec(json_to_term/1 :: (any()) -> any()). +-spec(term_to_json/1 :: (any()) -> any()). +-spec(check_expiry/1 :: (integer()) -> rabbit_types:ok_or_error(any())). +-spec(base64url/1 :: (binary()) -> string()). +-spec(interval_operation/4 :: + ({atom(), atom(), any()}, float(), non_neg_integer(), non_neg_integer()) + -> {any(), non_neg_integer()}). +-spec(ensure_timer/4 :: (A, non_neg_integer(), non_neg_integer(), any()) -> A). +-spec(stop_timer/2 :: (A, non_neg_integer()) -> A). +-spec(get_parent/0 :: () -> pid()). -endif. %%---------------------------------------------------------------------------- @@ -250,6 +285,15 @@ protocol_error(#amqp_error{} = Error) -> not_found(R) -> protocol_error(not_found, "no ~s", [rs(R)]). +absent(#amqqueue{name = QueueName, pid = QPid, durable = true}) -> + %% The assertion of durability is mainly there because we mention + %% durability in the error message. That way we will hopefully + %% notice if at some future point our logic changes s.t. we get + %% here with non-durable queues. + protocol_error(not_found, + "home node '~s' of durable ~s is down or inaccessible", + [node(QPid), rs(QueueName)]). + type_class(byte) -> int; type_class(short) -> int; type_class(signedint) -> int; @@ -315,13 +359,12 @@ set_table_value(Table, Key, Type, Value) -> sort_field_table( lists:keystore(Key, 1, Table, {Key, Type, Value})). -r(#resource{virtual_host = VHostPath}, Kind, Name) - when is_binary(Name) -> +r(#resource{virtual_host = VHostPath}, Kind, Name) -> #resource{virtual_host = VHostPath, kind = Kind, name = Name}; -r(VHostPath, Kind, Name) when is_binary(Name) andalso is_binary(VHostPath) -> +r(VHostPath, Kind, Name) -> #resource{virtual_host = VHostPath, kind = Kind, name = Name}. -r(VHostPath, Kind) when is_binary(VHostPath) -> +r(VHostPath, Kind) -> #resource{virtual_host = VHostPath, kind = Kind, name = '_'}. r_arg(#resource{virtual_host = VHostPath}, Kind, Table, Key) -> @@ -329,7 +372,8 @@ r_arg(#resource{virtual_host = VHostPath}, Kind, Table, Key) -> r_arg(VHostPath, Kind, Table, Key) -> case table_lookup(Table, Key) of {longstr, NameBin} -> r(VHostPath, Kind, NameBin); - undefined -> undefined + undefined -> undefined; + {Type, _} -> {error, {invalid_type, Type}} end. rs(#resource{virtual_host = VHostPath, kind = Kind, name = Name}) -> @@ -391,6 +435,18 @@ report_coverage_percentage(File, Cov, NotCov, Mod) -> confirm_to_sender(Pid, MsgSeqNos) -> gen_server2:cast(Pid, {confirm, MsgSeqNos, self()}). +%% @doc Halts the emulator returning the given status code to the os. +%% On Windows this function will block indefinitely so as to give the io +%% subsystem time to flush stdout completely. +quit(Status) -> + case os:type() of + {unix, _} -> halt(Status); + {win32, _} -> init:stop(Status), + receive + after infinity -> ok + end + end. + throw_on_error(E, Thunk) -> case Thunk() of {error, Reason} -> throw({E, Reason}); @@ -402,13 +458,14 @@ with_exit_handler(Handler, Thunk) -> try Thunk() catch - exit:{R, _} when R =:= noproc; R =:= nodedown; - R =:= normal; R =:= shutdown -> - Handler(); - exit:{{R, _}, _} when R =:= nodedown; R =:= shutdown -> - Handler() + exit:{R, _} when ?IS_BENIGN_EXIT(R) -> Handler(); + exit:{{R, _}, _} when ?IS_BENIGN_EXIT(R) -> Handler() end. +is_abnormal_exit(R) when ?IS_BENIGN_EXIT(R) -> false; +is_abnormal_exit({R, _}) when ?IS_BENIGN_EXIT(R) -> false; +is_abnormal_exit(_) -> true. + filter_exit_map(F, L) -> Ref = make_ref(), lists:filter(fun (R) -> R =/= Ref end, @@ -416,11 +473,6 @@ filter_exit_map(F, L) -> fun () -> Ref end, fun () -> F(I) end) || I <- L]). -is_abnormal_termination(Reason) - when Reason =:= noproc; Reason =:= noconnection; - Reason =:= normal; Reason =:= shutdown -> false; -is_abnormal_termination({shutdown, _}) -> false; -is_abnormal_termination(_) -> true. with_user(Username, Thunk) -> fun () -> @@ -489,6 +541,10 @@ tcp_name(Prefix, IPAddress, Port) list_to_atom( format("~w_~s:~w", [Prefix, inet_parse:ntoa(IPAddress), Port])). +format_inet_error(address) -> "cannot connect to host/port"; +format_inet_error(timeout) -> "timed out"; +format_inet_error(Error) -> inet:format_error(Error). + %% This is a modified version of Luke Gorrie's pmap - %% http://lukego.livejournal.com/6753.html - that doesn't care about %% the order in which results are received. @@ -593,34 +649,6 @@ with_local_io(Fun) -> local_info_msg(Format, Args) -> with_local_io(fun () -> error_logger:info_msg(Format, Args) end). -manage_applications(Iterate, Do, Undo, SkipError, ErrorTag, Apps) -> - Iterate(fun (App, Acc) -> - case Do(App) of - ok -> [App | Acc]; - {error, {SkipError, _}} -> Acc; - {error, Reason} -> - lists:foreach(Undo, Acc), - throw({error, {ErrorTag, App, Reason}}) - end - end, [], Apps), - ok. - -start_applications(Apps) -> - manage_applications(fun lists:foldl/3, - fun application:start/1, - fun application:stop/1, - already_started, - cannot_start_application, - Apps). - -stop_applications(Apps) -> - manage_applications(fun lists:foldr/3, - fun application:stop/1, - fun application:start/1, - not_started, - cannot_stop_application, - Apps). - unfold(Fun, Init) -> unfold(Fun, [], Init). @@ -715,6 +743,16 @@ version_compare(A, B) -> ANum > BNum -> gt end. +%% a.b.c and a.b.d match, but a.b.c and a.d.e don't. If +%% versions do not match that pattern, just compare them. +version_minor_equivalent(A, B) -> + {ok, RE} = re:compile("^(\\d+\\.\\d+)(\\.\\d+)\$"), + Opts = [{capture, all_but_first, list}], + case {re:run(A, RE, Opts), re:run(B, RE, Opts)} of + {{match, [A1|_]}, {match, [B1|_]}} -> A1 =:= B1; + _ -> A =:= B + end. + dropdot(A) -> lists:dropwhile(fun (X) -> X =:= $. end, A). dict_cons(Key, Value, Dict) -> @@ -865,13 +903,8 @@ ntoab(IP) -> _ -> "[" ++ Str ++ "]" end. -is_process_alive(Pid) when node(Pid) =:= node() -> - erlang:is_process_alive(Pid); is_process_alive(Pid) -> - case rpc:call(node(Pid), erlang, is_process_alive, [Pid]) of - true -> true; - _ -> false - end. + rpc:call(node(Pid), erlang, is_process_alive, [Pid]) =:= true. pget(K, P) -> proplists:get_value(K, P). pget(K, P, D) -> proplists:get_value(K, P, D). @@ -937,19 +970,149 @@ receive_multi_call([{Mref, Pid} | MonitorPids], Good, Bad) -> receive_multi_call(MonitorPids, Good, [{Pid, Reason} | Bad]) end. -%% the slower shutdown on windows required to flush stdout -quit(Status) -> - case os:type() of - {unix, _} -> halt(Status); - {win32, _} -> init:stop(Status) - end. - os_cmd(Command) -> - Exec = hd(string:tokens(Command, " ")), - case os:find_executable(Exec) of - false -> throw({command_not_found, Exec}); - _ -> os:cmd(Command) + case os:type() of + {win32, _} -> + %% Clink workaround; see + %% http://code.google.com/p/clink/issues/detail?id=141 + os:cmd(" " ++ Command); + _ -> + %% Don't just return "/bin/sh: <cmd>: not found" if not found + Exec = hd(string:tokens(Command, " ")), + case os:find_executable(Exec) of + false -> throw({command_not_found, Exec}); + _ -> os:cmd(Command) + end end. gb_sets_difference(S1, S2) -> gb_sets:fold(fun gb_sets:delete_any/2, S1, S2). + +version() -> + {ok, VSN} = application:get_key(rabbit, vsn), + VSN. + +%% application:which_applications(infinity) is dangerous, since it can +%% cause deadlocks on shutdown. So we have to use a timeout variant, +%% but w/o creating spurious timeout errors. +which_applications() -> + try + application:which_applications() + catch + exit:{timeout, _} -> [] + end. + +sequence_error([T]) -> T; +sequence_error([{error, _} = Error | _]) -> Error; +sequence_error([_ | Rest]) -> sequence_error(Rest). + +json_encode(Term) -> + try + {ok, mochijson2:encode(Term)} + catch + exit:{json_encode, E} -> + {error, E} + end. + +json_decode(Term) -> + try + {ok, mochijson2:decode(Term)} + catch + %% Sadly `mochijson2:decode/1' does not offer a nice way to catch + %% decoding errors... + error:_ -> error + end. + +json_to_term({struct, L}) -> + [{K, json_to_term(V)} || {K, V} <- L]; +json_to_term(L) when is_list(L) -> + [json_to_term(I) || I <- L]; +json_to_term(V) when is_binary(V) orelse is_number(V) orelse V =:= null orelse + V =:= true orelse V =:= false -> + V. + +%% This has the flaw that empty lists will never be JSON objects, so use with +%% care. +term_to_json([{_, _}|_] = L) -> + {struct, [{K, term_to_json(V)} || {K, V} <- L]}; +term_to_json(L) when is_list(L) -> + [term_to_json(I) || I <- L]; +term_to_json(V) when is_binary(V) orelse is_number(V) orelse V =:= null orelse + V =:= true orelse V =:= false -> + V. + +check_expiry(N) when N > ?MAX_EXPIRY_TIMER -> {error, {value_too_big, N}}; +check_expiry(N) when N < 0 -> {error, {value_negative, N}}; +check_expiry(_N) -> ok. + +base64url(In) -> + lists:reverse(lists:foldl(fun ($\+, Acc) -> [$\- | Acc]; + ($\/, Acc) -> [$\_ | Acc]; + ($\=, Acc) -> Acc; + (Chr, Acc) -> [Chr | Acc] + end, [], base64:encode_to_string(In))). + +%% Ideally, you'd want Fun to run every IdealInterval. but you don't +%% want it to take more than MaxRatio of IdealInterval. So if it takes +%% more then you want to run it less often. So we time how long it +%% takes to run, and then suggest how long you should wait before +%% running it again. Times are in millis. +interval_operation({M, F, A}, MaxRatio, IdealInterval, LastInterval) -> + {Micros, Res} = timer:tc(M, F, A), + {Res, case {Micros > 1000 * (MaxRatio * IdealInterval), + Micros > 1000 * (MaxRatio * LastInterval)} of + {true, true} -> round(LastInterval * 1.5); + {true, false} -> LastInterval; + {false, false} -> lists:max([IdealInterval, + round(LastInterval / 1.5)]) + end}. + +ensure_timer(State, Idx, After, Msg) -> + case element(Idx, State) of + undefined -> TRef = erlang:send_after(After, self(), Msg), + setelement(Idx, State, TRef); + _ -> State + end. + +stop_timer(State, Idx) -> + case element(Idx, State) of + undefined -> State; + TRef -> case erlang:cancel_timer(TRef) of + false -> State; + _ -> setelement(Idx, State, undefined) + end + end. + +%% ------------------------------------------------------------------------- +%% Begin copypasta from gen_server2.erl + +get_parent() -> + case get('$ancestors') of + [Parent | _] when is_pid (Parent) -> Parent; + [Parent | _] when is_atom(Parent) -> name_to_pid(Parent); + _ -> exit(process_was_not_started_by_proc_lib) + end. + +name_to_pid(Name) -> + case whereis(Name) of + undefined -> case whereis_name(Name) of + undefined -> exit(could_not_find_registerd_name); + Pid -> Pid + end; + Pid -> Pid + end. + +whereis_name(Name) -> + case ets:lookup(global_names, Name) of + [{_Name, Pid, _Method, _RPid, _Ref}] -> + if node(Pid) == node() -> case erlang:is_process_alive(Pid) of + true -> Pid; + false -> undefined + end; + true -> Pid + end; + [] -> undefined + end. + +%% End copypasta from gen_server2.erl +%% ------------------------------------------------------------------------- diff --git a/src/rabbit_mnesia.erl b/src/rabbit_mnesia.erl index 7e9346f9..85958400 100644 --- a/src/rabbit_mnesia.erl +++ b/src/rabbit_mnesia.erl @@ -10,27 +10,38 @@ %% %% The Original Code is RabbitMQ. %% -%% The Initial Developer of the Original Code is VMware, Inc. -%% Copyright (c) 2007-2012 VMware, Inc. All rights reserved. +%% The Initial Developer of the Original Code is GoPivotal, Inc. +%% Copyright (c) 2007-2013 GoPivotal, Inc. All rights reserved. %% - -module(rabbit_mnesia). --export([ensure_mnesia_dir/0, dir/0, status/0, init/0, is_db_empty/0, - cluster/1, force_cluster/1, reset/0, force_reset/0, init_db/3, - is_clustered/0, running_clustered_nodes/0, all_clustered_nodes/0, - empty_ram_only_tables/0, copy_db/1, wait_for_tables/1, - create_cluster_nodes_config/1, read_cluster_nodes_config/0, - record_running_nodes/0, read_previously_running_nodes/0, - running_nodes_filename/0, is_disc_node/0, on_node_down/1, - on_node_up/1]). - --export([table_names/0]). - -%% create_tables/0 exported for helping embed RabbitMQ in or alongside -%% other mnesia-using Erlang applications, such as ejabberd --export([create_tables/0]). +-export([init/0, + join_cluster/2, + reset/0, + force_reset/0, + update_cluster_nodes/1, + change_cluster_node_type/1, + forget_cluster_node/2, + + status/0, + is_clustered/0, + cluster_nodes/1, + node_type/0, + dir/0, + cluster_status_from_mnesia/0, + + init_db_unchecked/2, + copy_db/1, + check_cluster_consistency/0, + ensure_mnesia_dir/0, + + on_node_up/1, + on_node_down/1 + ]). + +%% Used internally in rpc calls +-export([node_info/0, remove_node_if_mnesia_running/1]). -include("rabbit.hrl"). @@ -38,314 +49,432 @@ -ifdef(use_specs). --export_type([node_type/0]). +-export_type([node_type/0, cluster_status/0]). --type(node_type() :: disc_only | disc | ram | unknown). --spec(status/0 :: () -> [{'nodes', [{node_type(), [node()]}]} | - {'running_nodes', [node()]}]). --spec(dir/0 :: () -> file:filename()). --spec(ensure_mnesia_dir/0 :: () -> 'ok'). +-type(node_type() :: disc | ram). +-type(cluster_status() :: {[node()], [node()], [node()]}). + +%% Main interface -spec(init/0 :: () -> 'ok'). --spec(init_db/3 :: ([node()], boolean(), rabbit_misc:thunk('ok')) -> 'ok'). --spec(is_db_empty/0 :: () -> boolean()). --spec(cluster/1 :: ([node()]) -> 'ok'). --spec(force_cluster/1 :: ([node()]) -> 'ok'). --spec(cluster/2 :: ([node()], boolean()) -> 'ok'). +-spec(join_cluster/2 :: (node(), node_type()) + -> 'ok' | {'ok', 'already_member'}). -spec(reset/0 :: () -> 'ok'). -spec(force_reset/0 :: () -> 'ok'). +-spec(update_cluster_nodes/1 :: (node()) -> 'ok'). +-spec(change_cluster_node_type/1 :: (node_type()) -> 'ok'). +-spec(forget_cluster_node/2 :: (node(), boolean()) -> 'ok'). + +%% Various queries to get the status of the db +-spec(status/0 :: () -> [{'nodes', [{node_type(), [node()]}]} | + {'running_nodes', [node()]} | + {'partitions', [{node(), [node()]}]}]). -spec(is_clustered/0 :: () -> boolean()). --spec(running_clustered_nodes/0 :: () -> [node()]). --spec(all_clustered_nodes/0 :: () -> [node()]). --spec(empty_ram_only_tables/0 :: () -> 'ok'). --spec(create_tables/0 :: () -> 'ok'). +-spec(cluster_nodes/1 :: ('all' | 'disc' | 'ram' | 'running') -> [node()]). +-spec(node_type/0 :: () -> node_type()). +-spec(dir/0 :: () -> file:filename()). +-spec(cluster_status_from_mnesia/0 :: () -> rabbit_types:ok_or_error2( + cluster_status(), any())). + +%% Operations on the db and utils, mainly used in `rabbit_upgrade' and `rabbit' +-spec(init_db_unchecked/2 :: ([node()], node_type()) -> 'ok'). -spec(copy_db/1 :: (file:filename()) -> rabbit_types:ok_or_error(any())). --spec(wait_for_tables/1 :: ([atom()]) -> 'ok'). --spec(create_cluster_nodes_config/1 :: ([node()]) -> 'ok'). --spec(read_cluster_nodes_config/0 :: () -> [node()]). --spec(record_running_nodes/0 :: () -> 'ok'). --spec(read_previously_running_nodes/0 :: () -> [node()]). --spec(running_nodes_filename/0 :: () -> file:filename()). --spec(is_disc_node/0 :: () -> boolean()). +-spec(check_cluster_consistency/0 :: () -> 'ok'). +-spec(ensure_mnesia_dir/0 :: () -> 'ok'). + +%% Hooks used in `rabbit_node_monitor' -spec(on_node_up/1 :: (node()) -> 'ok'). -spec(on_node_down/1 :: (node()) -> 'ok'). --spec(table_names/0 :: () -> [atom()]). - -endif. %%---------------------------------------------------------------------------- - -status() -> - [{nodes, case mnesia:system_info(is_running) of - yes -> [{Key, Nodes} || - {Key, CopyType} <- [{disc_only, disc_only_copies}, - {disc, disc_copies}, - {ram, ram_copies}], - begin - Nodes = nodes_of_type(CopyType), - Nodes =/= [] - end]; - no -> case all_clustered_nodes() of - [] -> []; - Nodes -> [{unknown, Nodes}] - end; - Reason when Reason =:= starting; Reason =:= stopping -> - exit({rabbit_busy, try_again_later}) - end}, - {running_nodes, running_clustered_nodes()}]. +%% Main interface +%%---------------------------------------------------------------------------- init() -> ensure_mnesia_running(), ensure_mnesia_dir(), - Nodes = read_cluster_nodes_config(), - ok = init_db(Nodes, should_be_disc_node(Nodes)), + case is_virgin_node() of + true -> init_from_config(); + false -> NodeType = node_type(), + init_db_and_upgrade(cluster_nodes(all), NodeType, + NodeType =:= ram) + end, %% We intuitively expect the global name server to be synced when - %% Mnesia is up. In fact that's not guaranteed to be the case - let's - %% make it so. + %% Mnesia is up. In fact that's not guaranteed to be the case - + %% let's make it so. ok = global:sync(), - ok = delete_previously_running_nodes(), ok. -is_db_empty() -> - lists:all(fun (Tab) -> mnesia:dirty_first(Tab) == '$end_of_table' end, - table_names()). - -cluster(ClusterNodes) -> - cluster(ClusterNodes, false). -force_cluster(ClusterNodes) -> - cluster(ClusterNodes, true). - -%% Alter which disk nodes this node is clustered with. This can be a -%% subset of all the disk nodes in the cluster but can (and should) -%% include the node itself if it is to be a disk rather than a ram -%% node. If Force is false, only connections to online nodes are -%% allowed. -cluster(ClusterNodes, Force) -> - rabbit_misc:local_info_msg("Clustering with ~p~s~n", - [ClusterNodes, if Force -> " forcefully"; - true -> "" - end]), +init_from_config() -> + {TryNodes, NodeType} = + case application:get_env(rabbit, cluster_nodes) of + {ok, Nodes} when is_list(Nodes) -> + Config = {Nodes -- [node()], case lists:member(node(), Nodes) of + true -> disc; + false -> ram + end}, + error_logger:warning_msg( + "Converting legacy 'cluster_nodes' configuration~n ~w~n" + "to~n ~w.~n~n" + "Please update the configuration to the new format " + "{Nodes, NodeType}, where Nodes contains the nodes that the " + "node will try to cluster with, and NodeType is either " + "'disc' or 'ram'~n", [Nodes, Config]), + Config; + {ok, Config} -> + Config + end, + case find_good_node(nodes_excl_me(TryNodes)) of + {ok, Node} -> + rabbit_log:info("Node '~p' selected for clustering from " + "configuration~n", [Node]), + {ok, {_, DiscNodes, _}} = discover_cluster(Node), + init_db_and_upgrade(DiscNodes, NodeType, true), + rabbit_node_monitor:notify_joined_cluster(); + none -> + rabbit_log:warning("Could not find any suitable node amongst the " + "ones provided in the configuration: ~p~n", + [TryNodes]), + init_db_and_upgrade([node()], disc, false) + end. + +%% Make the node join a cluster. The node will be reset automatically +%% before we actually cluster it. The nodes provided will be used to +%% find out about the nodes in the cluster. +%% +%% This function will fail if: +%% +%% * The node is currently the only disc node of its cluster +%% * We can't connect to any of the nodes provided +%% * The node is currently already clustered with the cluster of the nodes +%% provided +%% +%% Note that we make no attempt to verify that the nodes provided are +%% all in the same cluster, we simply pick the first online node and +%% we cluster to its cluster. +join_cluster(DiscoveryNode, NodeType) -> ensure_mnesia_not_running(), ensure_mnesia_dir(), + case is_only_clustered_disc_node() of + true -> e(clustering_only_disc_node); + false -> ok + end, + {ClusterNodes, _, _} = case discover_cluster(DiscoveryNode) of + {ok, Res} -> Res; + {error, _} = E -> throw(E) + end, + case me_in_nodes(ClusterNodes) of + false -> + %% reset the node. this simplifies things and it will be needed in + %% this case - we're joining a new cluster with new nodes which + %% are not in synch with the current node. I also lifts the burden + %% of reseting the node from the user. + reset_gracefully(), + + %% Join the cluster + rabbit_misc:local_info_msg("Clustering with ~p as ~p node~n", + [ClusterNodes, NodeType]), + ok = init_db_with_mnesia(ClusterNodes, NodeType, true, true), + rabbit_node_monitor:notify_joined_cluster(), + ok; + true -> + rabbit_misc:local_info_msg("Already member of cluster: ~p~n", + [ClusterNodes]), + {ok, already_member} + end. - case not Force andalso is_clustered() andalso - is_only_disc_node(node(), false) andalso - not should_be_disc_node(ClusterNodes) - of - true -> log_both("last running disc node leaving cluster"); - _ -> ok +%% return node to its virgin state, where it is not member of any +%% cluster, has no cluster configuration, no local database, and no +%% persisted messages +reset() -> + ensure_mnesia_not_running(), + rabbit_misc:local_info_msg("Resetting Rabbit~n", []), + reset_gracefully(). + +force_reset() -> + ensure_mnesia_not_running(), + rabbit_misc:local_info_msg("Resetting Rabbit forcefully~n", []), + wipe(). + +reset_gracefully() -> + AllNodes = cluster_nodes(all), + %% Reconnecting so that we will get an up to date nodes. We don't + %% need to check for consistency because we are resetting. + %% Force=true here so that reset still works when clustered with a + %% node which is down. + init_db_with_mnesia(AllNodes, node_type(), false, false), + case is_only_clustered_disc_node() of + true -> e(resetting_only_disc_node); + false -> ok end, + leave_cluster(), + rabbit_misc:ensure_ok(mnesia:delete_schema([node()]), cannot_delete_schema), + wipe(). + +wipe() -> + %% We need to make sure that we don't end up in a distributed + %% Erlang system with nodes while not being in an Mnesia cluster + %% with them. We don't handle that well. + [erlang:disconnect_node(N) || N <- cluster_nodes(all)], + %% remove persisted messages and any other garbage we find + ok = rabbit_file:recursive_delete(filelib:wildcard(dir() ++ "/*")), + ok = rabbit_node_monitor:reset_cluster_status(), + ok. - %% Wipe mnesia if we're changing type from disc to ram - case {is_disc_node(), should_be_disc_node(ClusterNodes)} of - {true, false} -> rabbit_misc:with_local_io( - fun () -> error_logger:warning_msg( - "changing node type; wiping " - "mnesia...~n~n") - end), - rabbit_misc:ensure_ok(mnesia:delete_schema([node()]), - cannot_delete_schema); - _ -> ok +change_cluster_node_type(Type) -> + ensure_mnesia_not_running(), + ensure_mnesia_dir(), + case is_clustered() of + false -> e(not_clustered); + true -> ok end, + {_, _, RunningNodes} = case discover_cluster(cluster_nodes(all)) of + {ok, Status} -> Status; + {error, _Reason} -> e(cannot_connect_to_cluster) + end, + %% We might still be marked as running by a remote node since the + %% information of us going down might not have propagated yet. + Node = case RunningNodes -- [node()] of + [] -> e(no_online_cluster_nodes); + [Node0|_] -> Node0 + end, + ok = reset(), + ok = join_cluster(Node, Type). - %% Pre-emptively leave the cluster - %% - %% We're trying to handle the following two cases: - %% 1. We have a two-node cluster, where both nodes are disc nodes. - %% One node is re-clustered as a ram node. When it tries to - %% re-join the cluster, but before it has time to update its - %% tables definitions, the other node will order it to re-create - %% its disc tables. So, we need to leave the cluster before we - %% can join it again. - %% 2. We have a two-node cluster, where both nodes are disc nodes. - %% One node is forcefully reset (so, the other node thinks its - %% still a part of the cluster). The reset node is re-clustered - %% as a ram node. Same as above, we need to leave the cluster - %% before we can join it. But, since we don't know if we're in a - %% cluster or not, we just pre-emptively leave it before joining. - ProperClusterNodes = ClusterNodes -- [node()], - try - ok = leave_cluster(ProperClusterNodes, ProperClusterNodes) - catch - {error, {no_running_cluster_nodes, _, _}} when Force -> - ok +update_cluster_nodes(DiscoveryNode) -> + ensure_mnesia_not_running(), + ensure_mnesia_dir(), + Status = {AllNodes, _, _} = + case discover_cluster(DiscoveryNode) of + {ok, Status0} -> Status0; + {error, _Reason} -> e(cannot_connect_to_node) + end, + case me_in_nodes(AllNodes) of + true -> + %% As in `check_consistency/0', we can safely delete the + %% schema here, since it'll be replicated from the other + %% nodes + mnesia:delete_schema([node()]), + rabbit_node_monitor:write_cluster_status(Status), + rabbit_misc:local_info_msg("Updating cluster nodes from ~p~n", + [DiscoveryNode]), + init_db_with_mnesia(AllNodes, node_type(), true, true); + false -> + e(inconsistent_cluster) end, + ok. - %% Join the cluster - start_mnesia(), - try - ok = init_db(ClusterNodes, Force), - ok = create_cluster_nodes_config(ClusterNodes) - after - stop_mnesia() +%% We proceed like this: try to remove the node locally. If the node +%% is offline, we remove the node if: +%% * This node is a disc node +%% * All other nodes are offline +%% * This node was, at the best of our knowledge (see comment below) +%% the last or second to last after the node we're removing to go +%% down +forget_cluster_node(Node, RemoveWhenOffline) -> + case lists:member(Node, cluster_nodes(all)) of + true -> ok; + false -> e(not_a_cluster_node) end, + case {RemoveWhenOffline, is_running()} of + {true, false} -> remove_node_offline_node(Node); + {true, true} -> e(online_node_offline_flag); + {false, false} -> e(offline_node_no_offline_flag); + {false, true} -> rabbit_misc:local_info_msg( + "Removing node ~p from cluster~n", [Node]), + case remove_node_if_mnesia_running(Node) of + ok -> ok; + {error, _} = Err -> throw(Err) + end + end. - ok. +remove_node_offline_node(Node) -> + %% Here `mnesia:system_info(running_db_nodes)' will RPC, but that's what we + %% want - we need to know the running nodes *now*. If the current node is a + %% RAM node it will return bogus results, but we don't care since we only do + %% this operation from disc nodes. + case {mnesia:system_info(running_db_nodes) -- [Node], node_type()} of + {[], disc} -> + start_mnesia(), + try + %% What we want to do here is replace the last node to + %% go down with the current node. The way we do this + %% is by force loading the table, and making sure that + %% they are loaded. + rabbit_table:force_load(), + rabbit_table:wait_for_replicated(), + forget_cluster_node(Node, false), + force_load_next_boot() + after + stop_mnesia() + end; + {_, _} -> + e(removing_node_from_offline_node) + end. -%% return node to its virgin state, where it is not member of any -%% cluster, has no cluster configuration, no local database, and no -%% persisted messages -reset() -> reset(false). -force_reset() -> reset(true). - -is_clustered() -> - RunningNodes = running_clustered_nodes(), - [node()] /= RunningNodes andalso [] /= RunningNodes. - -all_clustered_nodes() -> - mnesia:system_info(db_nodes). - -running_clustered_nodes() -> - mnesia:system_info(running_db_nodes). - -empty_ram_only_tables() -> - Node = node(), - lists:foreach( - fun (TabName) -> - case lists:member(Node, mnesia:table_info(TabName, ram_copies)) of - true -> {atomic, ok} = mnesia:clear_table(TabName); - false -> ok - end - end, table_names()), - ok. -%%-------------------------------------------------------------------- +%%---------------------------------------------------------------------------- +%% Queries +%%---------------------------------------------------------------------------- -nodes_of_type(Type) -> - %% This function should return the nodes of a certain type (ram, - %% disc or disc_only) in the current cluster. The type of nodes - %% is determined when the cluster is initially configured. - mnesia:table_info(schema, Type). - -%% The tables aren't supposed to be on disk on a ram node -table_definitions(disc) -> - table_definitions(); -table_definitions(ram) -> - [{Tab, copy_type_to_ram(TabDef)} || {Tab, TabDef} <- table_definitions()]. - -table_definitions() -> - [{rabbit_user, - [{record_name, internal_user}, - {attributes, record_info(fields, internal_user)}, - {disc_copies, [node()]}, - {match, #internal_user{_='_'}}]}, - {rabbit_user_permission, - [{record_name, user_permission}, - {attributes, record_info(fields, user_permission)}, - {disc_copies, [node()]}, - {match, #user_permission{user_vhost = #user_vhost{_='_'}, - permission = #permission{_='_'}, - _='_'}}]}, - {rabbit_vhost, - [{record_name, vhost}, - {attributes, record_info(fields, vhost)}, - {disc_copies, [node()]}, - {match, #vhost{_='_'}}]}, - {rabbit_listener, - [{record_name, listener}, - {attributes, record_info(fields, listener)}, - {type, bag}, - {match, #listener{_='_'}}]}, - {rabbit_durable_route, - [{record_name, route}, - {attributes, record_info(fields, route)}, - {disc_copies, [node()]}, - {match, #route{binding = binding_match(), _='_'}}]}, - {rabbit_semi_durable_route, - [{record_name, route}, - {attributes, record_info(fields, route)}, - {type, ordered_set}, - {match, #route{binding = binding_match(), _='_'}}]}, - {rabbit_route, - [{record_name, route}, - {attributes, record_info(fields, route)}, - {type, ordered_set}, - {match, #route{binding = binding_match(), _='_'}}]}, - {rabbit_reverse_route, - [{record_name, reverse_route}, - {attributes, record_info(fields, reverse_route)}, - {type, ordered_set}, - {match, #reverse_route{reverse_binding = reverse_binding_match(), - _='_'}}]}, - {rabbit_topic_trie_node, - [{record_name, topic_trie_node}, - {attributes, record_info(fields, topic_trie_node)}, - {type, ordered_set}, - {match, #topic_trie_node{trie_node = trie_node_match(), _='_'}}]}, - {rabbit_topic_trie_edge, - [{record_name, topic_trie_edge}, - {attributes, record_info(fields, topic_trie_edge)}, - {type, ordered_set}, - {match, #topic_trie_edge{trie_edge = trie_edge_match(), _='_'}}]}, - {rabbit_topic_trie_binding, - [{record_name, topic_trie_binding}, - {attributes, record_info(fields, topic_trie_binding)}, - {type, ordered_set}, - {match, #topic_trie_binding{trie_binding = trie_binding_match(), - _='_'}}]}, - {rabbit_durable_exchange, - [{record_name, exchange}, - {attributes, record_info(fields, exchange)}, - {disc_copies, [node()]}, - {match, #exchange{name = exchange_name_match(), _='_'}}]}, - {rabbit_exchange, - [{record_name, exchange}, - {attributes, record_info(fields, exchange)}, - {match, #exchange{name = exchange_name_match(), _='_'}}]}, - {rabbit_exchange_serial, - [{record_name, exchange_serial}, - {attributes, record_info(fields, exchange_serial)}, - {match, #exchange_serial{name = exchange_name_match(), _='_'}}]}, - {rabbit_runtime_parameters, - [{record_name, runtime_parameters}, - {attributes, record_info(fields, runtime_parameters)}, - {disc_copies, [node()]}, - {match, #runtime_parameters{_='_'}}]}, - {rabbit_durable_queue, - [{record_name, amqqueue}, - {attributes, record_info(fields, amqqueue)}, - {disc_copies, [node()]}, - {match, #amqqueue{name = queue_name_match(), _='_'}}]}, - {rabbit_queue, - [{record_name, amqqueue}, - {attributes, record_info(fields, amqqueue)}, - {match, #amqqueue{name = queue_name_match(), _='_'}}]}] - ++ gm:table_definitions() - ++ mirrored_supervisor:table_definitions(). - -binding_match() -> - #binding{source = exchange_name_match(), - destination = binding_destination_match(), - _='_'}. -reverse_binding_match() -> - #reverse_binding{destination = binding_destination_match(), - source = exchange_name_match(), - _='_'}. -binding_destination_match() -> - resource_match('_'). -trie_node_match() -> - #trie_node{ exchange_name = exchange_name_match(), _='_'}. -trie_edge_match() -> - #trie_edge{ exchange_name = exchange_name_match(), _='_'}. -trie_binding_match() -> - #trie_binding{exchange_name = exchange_name_match(), _='_'}. -exchange_name_match() -> - resource_match(exchange). -queue_name_match() -> - resource_match(queue). -resource_match(Kind) -> - #resource{kind = Kind, _='_'}. - -table_names() -> - [Tab || {Tab, _} <- table_definitions()]. - -replicated_table_names() -> - [Tab || {Tab, TabDef} <- table_definitions(), - not lists:member({local_content, true}, TabDef) - ]. +status() -> + IfNonEmpty = fun (_, []) -> []; + (Type, Nodes) -> [{Type, Nodes}] + end, + [{nodes, (IfNonEmpty(disc, cluster_nodes(disc)) ++ + IfNonEmpty(ram, cluster_nodes(ram)))}] ++ + case is_running() of + true -> RunningNodes = cluster_nodes(running), + [{running_nodes, RunningNodes}, + {partitions, mnesia_partitions(RunningNodes)}]; + false -> [] + end. + +mnesia_partitions(Nodes) -> + Replies = rabbit_node_monitor:partitions(Nodes), + [Reply || Reply = {_, R} <- Replies, R =/= []]. + +is_running() -> mnesia:system_info(is_running) =:= yes. + +is_clustered() -> AllNodes = cluster_nodes(all), + AllNodes =/= [] andalso AllNodes =/= [node()]. + +cluster_nodes(WhichNodes) -> cluster_status(WhichNodes). + +%% This function is the actual source of information, since it gets +%% the data from mnesia. Obviously it'll work only when mnesia is +%% running. +cluster_status_from_mnesia() -> + case is_running() of + false -> + {error, mnesia_not_running}; + true -> + %% If the tables are not present, it means that + %% `init_db/3' hasn't been run yet. In other words, either + %% we are a virgin node or a restarted RAM node. In both + %% cases we're not interested in what mnesia has to say. + NodeType = case mnesia:system_info(use_dir) of + true -> disc; + false -> ram + end, + case rabbit_table:is_present() of + true -> AllNodes = mnesia:system_info(db_nodes), + DiscCopies = mnesia:table_info(schema, disc_copies), + DiscNodes = case NodeType of + disc -> nodes_incl_me(DiscCopies); + ram -> DiscCopies + end, + %% `mnesia:system_info(running_db_nodes)' is safe since + %% we know that mnesia is running + RunningNodes = mnesia:system_info(running_db_nodes), + {ok, {AllNodes, DiscNodes, RunningNodes}}; + false -> {error, tables_not_present} + end + end. + +cluster_status(WhichNodes) -> + {AllNodes, DiscNodes, RunningNodes} = Nodes = + case cluster_status_from_mnesia() of + {ok, Nodes0} -> + Nodes0; + {error, _Reason} -> + {AllNodes0, DiscNodes0, RunningNodes0} = + rabbit_node_monitor:read_cluster_status(), + %% The cluster status file records the status when the node is + %% online, but we know for sure that the node is offline now, so + %% we can remove it from the list of running nodes. + {AllNodes0, DiscNodes0, nodes_excl_me(RunningNodes0)} + end, + case WhichNodes of + status -> Nodes; + all -> AllNodes; + disc -> DiscNodes; + ram -> AllNodes -- DiscNodes; + running -> RunningNodes + end. + +node_info() -> + {erlang:system_info(otp_release), rabbit_misc:version(), + cluster_status_from_mnesia()}. + +node_type() -> + DiscNodes = cluster_nodes(disc), + case DiscNodes =:= [] orelse me_in_nodes(DiscNodes) of + true -> disc; + false -> ram + end. dir() -> mnesia:system_info(directory). +%%---------------------------------------------------------------------------- +%% Operations on the db +%%---------------------------------------------------------------------------- + +%% Adds the provided nodes to the mnesia cluster, creating a new +%% schema if there is the need to and catching up if there are other +%% nodes in the cluster already. It also updates the cluster status +%% file. +init_db(ClusterNodes, NodeType, CheckOtherNodes) -> + Nodes = change_extra_db_nodes(ClusterNodes, CheckOtherNodes), + %% Note that we use `system_info' here and not the cluster status + %% since when we start rabbit for the first time the cluster + %% status will say we are a disc node but the tables won't be + %% present yet. + WasDiscNode = mnesia:system_info(use_dir), + case {Nodes, WasDiscNode, NodeType} of + {[], _, ram} -> + %% Standalone ram node, we don't want that + throw({error, cannot_create_standalone_ram_node}); + {[], false, disc} -> + %% RAM -> disc, starting from scratch + ok = create_schema(); + {[], true, disc} -> + %% First disc node up + maybe_force_load(), + ok; + {[AnotherNode | _], _, _} -> + %% Subsequent node in cluster, catch up + ensure_version_ok( + rpc:call(AnotherNode, rabbit_version, recorded, [])), + maybe_force_load(), + ok = rabbit_table:wait_for_replicated(), + ok = rabbit_table:create_local_copy(NodeType) + end, + ensure_schema_integrity(), + rabbit_node_monitor:update_cluster_status(), + ok. + +init_db_unchecked(ClusterNodes, NodeType) -> + init_db(ClusterNodes, NodeType, false). + +init_db_and_upgrade(ClusterNodes, NodeType, CheckOtherNodes) -> + ok = init_db(ClusterNodes, NodeType, CheckOtherNodes), + ok = case rabbit_upgrade:maybe_upgrade_local() of + ok -> ok; + starting_from_scratch -> rabbit_version:record_desired(); + version_not_available -> schema_ok_or_move() + end, + %% `maybe_upgrade_local' restarts mnesia, so ram nodes will forget + %% about the cluster + case NodeType of + ram -> start_mnesia(), + change_extra_db_nodes(ClusterNodes, false); + disc -> ok + end, + %% ...and all nodes will need to wait for tables + rabbit_table:wait_for_replicated(), + ok. + +init_db_with_mnesia(ClusterNodes, NodeType, + CheckOtherNodes, CheckConsistency) -> + start_mnesia(CheckConsistency), + try + init_db_and_upgrade(ClusterNodes, NodeType, CheckOtherNodes) + after + stop_mnesia() + end. + ensure_mnesia_dir() -> MnesiaDir = dir() ++ "/", case filelib:ensure_dir(MnesiaDir) of @@ -378,210 +507,124 @@ ensure_mnesia_not_running() -> end. ensure_schema_integrity() -> - case check_schema_integrity() of + case rabbit_table:check_schema_integrity() of ok -> ok; {error, Reason} -> throw({error, {schema_integrity_check_failed, Reason}}) end. -check_schema_integrity() -> - Tables = mnesia:system_info(tables), - case check_tables(fun (Tab, TabDef) -> - case lists:member(Tab, Tables) of - false -> {error, {table_missing, Tab}}; - true -> check_table_attributes(Tab, TabDef) - end - end) of - ok -> ok = wait_for_tables(), - check_tables(fun check_table_content/2); - Other -> Other - end. +copy_db(Destination) -> + ok = ensure_mnesia_not_running(), + rabbit_file:recursive_copy(dir(), Destination). + +force_load_filename() -> + filename:join(rabbit_mnesia:dir(), "force_load"). + +force_load_next_boot() -> + rabbit_file:write_file(force_load_filename(), <<"">>). -check_table_attributes(Tab, TabDef) -> - {_, ExpAttrs} = proplists:lookup(attributes, TabDef), - case mnesia:table_info(Tab, attributes) of - ExpAttrs -> ok; - Attrs -> {error, {table_attributes_mismatch, Tab, ExpAttrs, Attrs}} +maybe_force_load() -> + case rabbit_file:is_file(force_load_filename()) of + true -> rabbit_table:force_load(), + rabbit_file:delete(force_load_filename()); + false -> ok end. -check_table_content(Tab, TabDef) -> - {_, Match} = proplists:lookup(match, TabDef), - case mnesia:dirty_first(Tab) of - '$end_of_table' -> +%% This does not guarantee us much, but it avoids some situations that +%% will definitely end up badly +check_cluster_consistency() -> + %% We want to find 0 or 1 consistent nodes. + case lists:foldl( + fun (Node, {error, _}) -> check_cluster_consistency(Node); + (_Node, {ok, Status}) -> {ok, Status} + end, {error, not_found}, nodes_excl_me(cluster_nodes(all))) + of + {ok, Status = {RemoteAllNodes, _, _}} -> + case ordsets:is_subset(ordsets:from_list(cluster_nodes(all)), + ordsets:from_list(RemoteAllNodes)) of + true -> + ok; + false -> + %% We delete the schema here since we think we are + %% clustered with nodes that are no longer in the + %% cluster and there is no other way to remove + %% them from our schema. On the other hand, we are + %% sure that there is another online node that we + %% can use to sync the tables with. There is a + %% race here: if between this check and the + %% `init_db' invocation the cluster gets + %% disbanded, we're left with a node with no + %% mnesia data that will try to connect to offline + %% nodes. + mnesia:delete_schema([node()]) + end, + rabbit_node_monitor:write_cluster_status(Status); + {error, not_found} -> ok; - Key -> - ObjList = mnesia:dirty_read(Tab, Key), - MatchComp = ets:match_spec_compile([{Match, [], ['$_']}]), - case ets:match_spec_run(ObjList, MatchComp) of - ObjList -> ok; - _ -> {error, {table_content_invalid, Tab, Match, ObjList}} - end + {error, _} = E -> + throw(E) end. -check_tables(Fun) -> - case [Error || {Tab, TabDef} <- table_definitions( - case is_disc_node() of - true -> disc; - false -> ram - end), - case Fun(Tab, TabDef) of - ok -> Error = none, false; - {error, Error} -> true - end] of - [] -> ok; - Errors -> {error, Errors} +check_cluster_consistency(Node) -> + case rpc:call(Node, rabbit_mnesia, node_info, []) of + {badrpc, _Reason} -> + {error, not_found}; + {_OTP, _Rabbit, {error, _}} -> + {error, not_found}; + {OTP, Rabbit, {ok, Status}} -> + case check_consistency(OTP, Rabbit, Node, Status) of + {error, _} = E -> E; + {ok, Res} -> {ok, Res} + end; + {_OTP, Rabbit, _Hash, _Status} -> + %% delegate hash checking implies version mismatch + version_error("Rabbit", rabbit_misc:version(), Rabbit) end. -%% The cluster node config file contains some or all of the disk nodes -%% that are members of the cluster this node is / should be a part of. -%% -%% If the file is absent, the list is empty, or only contains the -%% current node, then the current node is a standalone (disk) -%% node. Otherwise it is a node that is part of a cluster as either a -%% disk node, if it appears in the cluster node config, or ram node if -%% it doesn't. - -cluster_nodes_config_filename() -> - dir() ++ "/cluster_nodes.config". - -create_cluster_nodes_config(ClusterNodes) -> - FileName = cluster_nodes_config_filename(), - case rabbit_file:write_term_file(FileName, [ClusterNodes]) of - ok -> ok; - {error, Reason} -> - throw({error, {cannot_create_cluster_nodes_config, - FileName, Reason}}) - end. +%%-------------------------------------------------------------------- +%% Hooks for `rabbit_node_monitor' +%%-------------------------------------------------------------------- -read_cluster_nodes_config() -> - FileName = cluster_nodes_config_filename(), - case rabbit_file:read_term_file(FileName) of - {ok, [ClusterNodes]} -> ClusterNodes; - {error, enoent} -> - {ok, ClusterNodes} = application:get_env(rabbit, cluster_nodes), - ClusterNodes; - {error, Reason} -> - throw({error, {cannot_read_cluster_nodes_config, - FileName, Reason}}) +on_node_up(Node) -> + case running_disc_nodes() of + [Node] -> rabbit_log:info("cluster contains disc nodes again~n"); + _ -> ok end. -delete_cluster_nodes_config() -> - FileName = cluster_nodes_config_filename(), - case file:delete(FileName) of - ok -> ok; - {error, enoent} -> ok; - {error, Reason} -> - throw({error, {cannot_delete_cluster_nodes_config, - FileName, Reason}}) +on_node_down(_Node) -> + case running_disc_nodes() of + [] -> rabbit_log:info("only running disc node went down~n"); + _ -> ok end. -running_nodes_filename() -> - filename:join(dir(), "nodes_running_at_shutdown"). - -record_running_nodes() -> - FileName = running_nodes_filename(), - Nodes = running_clustered_nodes() -- [node()], - %% Don't check the result: we're shutting down anyway and this is - %% a best-effort-basis. - rabbit_file:write_term_file(FileName, [Nodes]), - ok. - -read_previously_running_nodes() -> - FileName = running_nodes_filename(), - case rabbit_file:read_term_file(FileName) of - {ok, [Nodes]} -> Nodes; - {error, enoent} -> []; - {error, Reason} -> throw({error, {cannot_read_previous_nodes_file, - FileName, Reason}}) - end. +running_disc_nodes() -> + {_AllNodes, DiscNodes, RunningNodes} = cluster_status(status), + ordsets:to_list(ordsets:intersection(ordsets:from_list(DiscNodes), + ordsets:from_list(RunningNodes))). -delete_previously_running_nodes() -> - FileName = running_nodes_filename(), - case file:delete(FileName) of - ok -> ok; - {error, enoent} -> ok; - {error, Reason} -> throw({error, {cannot_delete_previous_nodes_file, - FileName, Reason}}) - end. +%%-------------------------------------------------------------------- +%% Internal helpers +%%-------------------------------------------------------------------- -init_db(ClusterNodes, Force) -> - init_db( - ClusterNodes, Force, - fun () -> - case rabbit_upgrade:maybe_upgrade_local() of - ok -> ok; - %% If we're just starting up a new node we won't have a - %% version - starting_from_scratch -> ok = rabbit_version:record_desired() - end - end). - -%% Take a cluster node config and create the right kind of node - a -%% standalone disk node, or disk or ram node connected to the -%% specified cluster nodes. If Force is false, don't allow -%% connections to offline nodes. -init_db(ClusterNodes, Force, SecondaryPostMnesiaFun) -> - UClusterNodes = lists:usort(ClusterNodes), - ProperClusterNodes = UClusterNodes -- [node()], - case mnesia:change_config(extra_db_nodes, ProperClusterNodes) of - {ok, []} when not Force andalso ProperClusterNodes =/= [] -> - throw({error, {failed_to_cluster_with, ProperClusterNodes, - "Mnesia could not connect to any disc nodes."}}); - {ok, Nodes} -> - WasDiscNode = is_disc_node(), - WantDiscNode = should_be_disc_node(ClusterNodes), - %% We create a new db (on disk, or in ram) in the first - %% two cases and attempt to upgrade the in the other two - case {Nodes, WasDiscNode, WantDiscNode} of - {[], _, false} -> - %% New ram node; start from scratch - ok = create_schema(ram); - {[], false, true} -> - %% Nothing there at all, start from scratch - ok = create_schema(disc); - {[], true, true} -> - %% We're the first node up - case rabbit_upgrade:maybe_upgrade_local() of - ok -> ensure_schema_integrity(); - version_not_available -> ok = schema_ok_or_move() - end; - {[AnotherNode|_], _, _} -> - %% Subsequent node in cluster, catch up - ensure_version_ok( - rpc:call(AnotherNode, rabbit_version, recorded, [])), - {CopyType, CopyTypeAlt} = - case WantDiscNode of - true -> {disc, disc_copies}; - false -> {ram, ram_copies} - end, - ok = wait_for_replicated_tables(), - ok = create_local_table_copy(schema, CopyTypeAlt), - ok = create_local_table_copies(CopyType), - - ok = SecondaryPostMnesiaFun(), - %% We've taken down mnesia, so ram nodes will need - %% to re-sync - case is_disc_node() of - false -> start_mnesia(), - mnesia:change_config(extra_db_nodes, - ProperClusterNodes), - wait_for_replicated_tables(); - true -> ok - end, - - ensure_schema_integrity(), - ok - end; - {error, Reason} -> - %% one reason we may end up here is if we try to join - %% nodes together that are currently running standalone or - %% are members of a different cluster - throw({error, {unable_to_join_cluster, ClusterNodes, Reason}}) +discover_cluster(Nodes) when is_list(Nodes) -> + lists:foldl(fun (_, {ok, Res}) -> {ok, Res}; + (Node, {error, _}) -> discover_cluster(Node) + end, {error, no_nodes_provided}, Nodes); +discover_cluster(Node) when Node == node() -> + {error, {cannot_discover_cluster, "Cannot cluster node with itself"}}; +discover_cluster(Node) -> + OfflineError = + {error, {cannot_discover_cluster, + "The nodes provided are either offline or not running"}}, + case rpc:call(Node, rabbit_mnesia, cluster_status_from_mnesia, []) of + {badrpc, _Reason} -> OfflineError; + {error, mnesia_not_running} -> OfflineError; + {ok, Res} -> {ok, Res} end. schema_ok_or_move() -> - case check_schema_integrity() of + case rabbit_table:check_schema_integrity() of ok -> ok; {error, Reason} -> @@ -592,7 +635,7 @@ schema_ok_or_move() -> "and recreating schema from scratch~n", [Reason]), ok = move_db(), - ok = create_schema(disc) + ok = create_schema() end. ensure_version_ok({ok, DiscVersion}) -> @@ -604,25 +647,16 @@ ensure_version_ok({ok, DiscVersion}) -> ensure_version_ok({error, _}) -> ok = rabbit_version:record_desired(). -create_schema(Type) -> +%% We only care about disc nodes since ram nodes are supposed to catch +%% up only +create_schema() -> stop_mnesia(), - case Type of - disc -> rabbit_misc:ensure_ok(mnesia:create_schema([node()]), - cannot_create_schema); - ram -> %% remove the disc schema since this is a ram node - rabbit_misc:ensure_ok(mnesia:delete_schema([node()]), - cannot_delete_schema) - end, + rabbit_misc:ensure_ok(mnesia:create_schema([node()]), cannot_create_schema), start_mnesia(), - ok = create_tables(Type), + ok = rabbit_table:create(), ensure_schema_integrity(), ok = rabbit_version:record_desired(). -is_disc_node() -> mnesia:system_info(use_dir). - -should_be_disc_node(ClusterNodes) -> - ClusterNodes == [] orelse lists:member(node(), ClusterNodes). - move_db() -> stop_mnesia(), MnesiaDir = filename:dirname(dir() ++ "/"), @@ -644,186 +678,192 @@ move_db() -> start_mnesia(), ok. -copy_db(Destination) -> - ok = ensure_mnesia_not_running(), - rabbit_file:recursive_copy(dir(), Destination). +remove_node_if_mnesia_running(Node) -> + case is_running() of + false -> + {error, mnesia_not_running}; + true -> + %% Deleting the the schema copy of the node will result in + %% the node being removed from the cluster, with that + %% change being propagated to all nodes + case mnesia:del_table_copy(schema, Node) of + {atomic, ok} -> + rabbit_amqqueue:forget_all_durable(Node), + rabbit_node_monitor:notify_left_cluster(Node), + ok; + {aborted, Reason} -> + {error, {failed_to_remove_node, Node, Reason}} + end + end. -create_tables() -> create_tables(disc). +leave_cluster() -> + case nodes_excl_me(cluster_nodes(all)) of + [] -> ok; + AllNodes -> case lists:any(fun leave_cluster/1, AllNodes) of + true -> ok; + false -> e(no_running_cluster_nodes) + end + end. -create_tables(Type) -> - lists:foreach(fun ({Tab, TabDef}) -> - TabDef1 = proplists:delete(match, TabDef), - case mnesia:create_table(Tab, TabDef1) of - {atomic, ok} -> ok; - {aborted, Reason} -> - throw({error, {table_creation_failed, - Tab, TabDef1, Reason}}) - end - end, - table_definitions(Type)), - ok. +leave_cluster(Node) -> + case rpc:call(Node, + rabbit_mnesia, remove_node_if_mnesia_running, [node()]) of + ok -> true; + {error, mnesia_not_running} -> false; + {error, Reason} -> throw({error, Reason}); + {badrpc, nodedown} -> false + end. -copy_type_to_ram(TabDef) -> - [{disc_copies, []}, {ram_copies, [node()]} - | proplists:delete(ram_copies, proplists:delete(disc_copies, TabDef))]. - -table_has_copy_type(TabDef, DiscType) -> - lists:member(node(), proplists:get_value(DiscType, TabDef, [])). - -create_local_table_copies(Type) -> - lists:foreach( - fun ({Tab, TabDef}) -> - HasDiscCopies = table_has_copy_type(TabDef, disc_copies), - HasDiscOnlyCopies = table_has_copy_type(TabDef, disc_only_copies), - LocalTab = proplists:get_bool(local_content, TabDef), - StorageType = - if - Type =:= disc orelse LocalTab -> - if - HasDiscCopies -> disc_copies; - HasDiscOnlyCopies -> disc_only_copies; - true -> ram_copies - end; -%%% unused code - commented out to keep dialyzer happy -%%% Type =:= disc_only -> -%%% if -%%% HasDiscCopies or HasDiscOnlyCopies -> -%%% disc_only_copies; -%%% true -> ram_copies -%%% end; - Type =:= ram -> - ram_copies - end, - ok = create_local_table_copy(Tab, StorageType) - end, - table_definitions(Type)), - ok. +wait_for(Condition) -> + error_logger:info_msg("Waiting for ~p...~n", [Condition]), + timer:sleep(1000). -create_local_table_copy(Tab, Type) -> - StorageType = mnesia:table_info(Tab, storage_type), - {atomic, ok} = - if - StorageType == unknown -> - mnesia:add_table_copy(Tab, node(), Type); - StorageType /= Type -> - mnesia:change_table_copy_type(Tab, node(), Type); - true -> {atomic, ok} - end, - ok. +start_mnesia(CheckConsistency) -> + case CheckConsistency of + true -> check_cluster_consistency(); + false -> ok + end, + rabbit_misc:ensure_ok(mnesia:start(), cannot_start_mnesia), + ensure_mnesia_running(). -wait_for_replicated_tables() -> wait_for_tables(replicated_table_names()). +start_mnesia() -> + start_mnesia(true). -wait_for_tables() -> wait_for_tables(table_names()). +stop_mnesia() -> + stopped = mnesia:stop(), + ensure_mnesia_not_running(). -wait_for_tables(TableNames) -> - case mnesia:wait_for_tables(TableNames, 30000) of - ok -> - ok; - {timeout, BadTabs} -> - throw({error, {timeout_waiting_for_tables, BadTabs}}); - {error, Reason} -> - throw({error, {failed_waiting_for_tables, Reason}}) +change_extra_db_nodes(ClusterNodes0, CheckOtherNodes) -> + ClusterNodes = nodes_excl_me(ClusterNodes0), + case {mnesia:change_config(extra_db_nodes, ClusterNodes), ClusterNodes} of + {{ok, []}, [_|_]} when CheckOtherNodes -> + throw({error, {failed_to_cluster_with, ClusterNodes, + "Mnesia could not connect to any nodes."}}); + {{ok, Nodes}, _} -> + Nodes end. -reset(Force) -> - rabbit_misc:local_info_msg("Resetting Rabbit~s~n", [if Force -> " forcefully"; - true -> "" - end]), - ensure_mnesia_not_running(), - case not Force andalso is_clustered() andalso - is_only_disc_node(node(), false) - of - true -> log_both("no other disc nodes running"); - false -> ok - end, - Node = node(), - Nodes = all_clustered_nodes() -- [Node], - case Force of - true -> ok; +check_consistency(OTP, Rabbit) -> + rabbit_misc:sequence_error( + [check_otp_consistency(OTP), + check_rabbit_consistency(Rabbit)]). + +check_consistency(OTP, Rabbit, Node, Status) -> + rabbit_misc:sequence_error( + [check_otp_consistency(OTP), + check_rabbit_consistency(Rabbit), + check_nodes_consistency(Node, Status)]). + +check_nodes_consistency(Node, RemoteStatus = {RemoteAllNodes, _, _}) -> + case me_in_nodes(RemoteAllNodes) of + true -> + {ok, RemoteStatus}; false -> - ensure_mnesia_dir(), - start_mnesia(), - RunningNodes = - try - %% Force=true here so that reset still works when clustered - %% with a node which is down - ok = init_db(read_cluster_nodes_config(), true), - running_clustered_nodes() -- [Node] - after - stop_mnesia() - end, - leave_cluster(Nodes, RunningNodes), - rabbit_misc:ensure_ok(mnesia:delete_schema([Node]), - cannot_delete_schema) - end, - %% We need to make sure that we don't end up in a distributed - %% Erlang system with nodes while not being in an Mnesia cluster - %% with them. We don't handle that well. - [erlang:disconnect_node(N) || N <- Nodes], - ok = delete_cluster_nodes_config(), - %% remove persisted messages and any other garbage we find - ok = rabbit_file:recursive_delete(filelib:wildcard(dir() ++ "/*")), - ok. - -leave_cluster([], _) -> ok; -leave_cluster(Nodes, RunningNodes) -> - %% find at least one running cluster node and instruct it to - %% remove our schema copy which will in turn result in our node - %% being removed as a cluster node from the schema, with that - %% change being propagated to all nodes - case lists:any( - fun (Node) -> - case rpc:call(Node, mnesia, del_table_copy, - [schema, node()]) of - {atomic, ok} -> true; - {badrpc, nodedown} -> false; - {aborted, {node_not_running, _}} -> false; - {aborted, Reason} -> - throw({error, {failed_to_leave_cluster, - Nodes, RunningNodes, Reason}}) - end - end, - RunningNodes) of - true -> ok; - false -> throw({error, {no_running_cluster_nodes, - Nodes, RunningNodes}}) + {error, {inconsistent_cluster, + rabbit_misc:format("Node ~p thinks it's clustered " + "with node ~p, but ~p disagrees", + [node(), Node, Node])}} end. -wait_for(Condition) -> - error_logger:info_msg("Waiting for ~p...~n", [Condition]), - timer:sleep(1000). +check_version_consistency(This, Remote, Name) -> + check_version_consistency(This, Remote, Name, fun (A, B) -> A =:= B end). -on_node_up(Node) -> - case is_only_disc_node(Node, true) of - true -> rabbit_log:info("cluster contains disc nodes again~n"); - false -> ok +check_version_consistency(This, Remote, Name, Comp) -> + case Comp(This, Remote) of + true -> ok; + false -> version_error(Name, This, Remote) end. -on_node_down(Node) -> - case is_only_disc_node(Node, true) of - true -> rabbit_log:info("only running disc node went down~n"); - false -> ok +version_error(Name, This, Remote) -> + {error, {inconsistent_cluster, + rabbit_misc:format("~s version mismatch: local node is ~s, " + "remote node ~s", [Name, This, Remote])}}. + +check_otp_consistency(Remote) -> + check_version_consistency(erlang:system_info(otp_release), Remote, "OTP"). + +check_rabbit_consistency(Remote) -> + check_version_consistency( + rabbit_misc:version(), Remote, "Rabbit", + fun rabbit_misc:version_minor_equivalent/2). + +%% This is fairly tricky. We want to know if the node is in the state +%% that a `reset' would leave it in. We cannot simply check if the +%% mnesia tables aren't there because restarted RAM nodes won't have +%% tables while still being non-virgin. What we do instead is to +%% check if the mnesia directory is non existant or empty, with the +%% exception of the cluster status files, which will be there thanks to +%% `rabbit_node_monitor:prepare_cluster_status_file/0'. +is_virgin_node() -> + case rabbit_file:list_dir(dir()) of + {error, enoent} -> + true; + {ok, []} -> + true; + {ok, [File1, File2]} -> + lists:usort([dir() ++ "/" ++ File1, dir() ++ "/" ++ File2]) =:= + lists:usort([rabbit_node_monitor:cluster_status_filename(), + rabbit_node_monitor:running_nodes_filename()]); + {ok, _} -> + false end. -is_only_disc_node(Node, _MnesiaRunning = true) -> - RunningSet = sets:from_list(running_clustered_nodes()), - DiscSet = sets:from_list(nodes_of_type(disc_copies)), - [Node] =:= sets:to_list(sets:intersection(RunningSet, DiscSet)); -is_only_disc_node(Node, false) -> - start_mnesia(), - Res = is_only_disc_node(Node, true), - stop_mnesia(), - Res. - -log_both(Warning) -> - io:format("Warning: ~s~n", [Warning]), - rabbit_misc:with_local_io( - fun () -> error_logger:warning_msg("~s~n", [Warning]) end). - -start_mnesia() -> - rabbit_misc:ensure_ok(mnesia:start(), cannot_start_mnesia), - ensure_mnesia_running(). +find_good_node([]) -> + none; +find_good_node([Node | Nodes]) -> + case rpc:call(Node, rabbit_mnesia, node_info, []) of + {badrpc, _Reason} -> find_good_node(Nodes); + %% old delegate hash check + {_OTP, _Rabbit, _Hash, _} -> find_good_node(Nodes); + {OTP, Rabbit, _} -> case check_consistency(OTP, Rabbit) of + {error, _} -> find_good_node(Nodes); + ok -> {ok, Node} + end + end. -stop_mnesia() -> - stopped = mnesia:stop(), - ensure_mnesia_not_running(). +is_only_clustered_disc_node() -> + node_type() =:= disc andalso is_clustered() andalso + cluster_nodes(disc) =:= [node()]. + +me_in_nodes(Nodes) -> lists:member(node(), Nodes). + +nodes_incl_me(Nodes) -> lists:usort([node()|Nodes]). + +nodes_excl_me(Nodes) -> Nodes -- [node()]. + +e(Tag) -> throw({error, {Tag, error_description(Tag)}}). + +error_description(clustering_only_disc_node) -> + "You cannot cluster a node if it is the only disc node in its existing " + " cluster. If new nodes joined while this node was offline, use " + "'update_cluster_nodes' to add them manually."; +error_description(resetting_only_disc_node) -> + "You cannot reset a node when it is the only disc node in a cluster. " + "Please convert another node of the cluster to a disc node first."; +error_description(not_clustered) -> + "Non-clustered nodes can only be disc nodes."; +error_description(cannot_connect_to_cluster) -> + "Could not connect to the cluster nodes present in this node's " + "status file. If the cluster has changed, you can use the " + "'update_cluster_nodes' command to point to the new cluster nodes."; +error_description(no_online_cluster_nodes) -> + "Could not find any online cluster nodes. If the cluster has changed, " + "you can use the 'update_cluster_nodes' command."; +error_description(cannot_connect_to_node) -> + "Could not connect to the cluster node provided."; +error_description(inconsistent_cluster) -> + "The nodes provided do not have this node as part of the cluster."; +error_description(not_a_cluster_node) -> + "The node selected is not in the cluster."; +error_description(online_node_offline_flag) -> + "You set the --offline flag, which is used to remove nodes remotely from " + "offline nodes, but this node is online."; +error_description(offline_node_no_offline_flag) -> + "You are trying to remove a node from an offline node. That is dangerous, " + "but can be done with the --offline flag. Please consult the manual " + "for rabbitmqctl for more information."; +error_description(removing_node_from_offline_node) -> + "To remove a node remotely from an offline node, the node you are removing " + "from must be a disc node and all the other nodes must be offline."; +error_description(no_running_cluster_nodes) -> + "You cannot leave a cluster if no online nodes are present.". diff --git a/src/rabbit_msg_file.erl b/src/rabbit_msg_file.erl index f685b109..a37106d6 100644 --- a/src/rabbit_msg_file.erl +++ b/src/rabbit_msg_file.erl @@ -10,8 +10,8 @@ %% %% The Original Code is RabbitMQ. %% -%% The Initial Developer of the Original Code is VMware, Inc. -%% Copyright (c) 2007-2012 VMware, Inc. All rights reserved. +%% The Initial Developer of the Original Code is GoPivotal, Inc. +%% Copyright (c) 2007-2013 GoPivotal, Inc. All rights reserved. %% -module(rabbit_msg_file). diff --git a/src/rabbit_msg_store.erl b/src/rabbit_msg_store.erl index d69dad1f..9a4439a7 100644 --- a/src/rabbit_msg_store.erl +++ b/src/rabbit_msg_store.erl @@ -10,8 +10,8 @@ %% %% The Original Code is RabbitMQ. %% -%% The Initial Developer of the Original Code is VMware, Inc. -%% Copyright (c) 2007-2012 VMware, Inc. All rights reserved. +%% The Initial Developer of the Original Code is GoPivotal, Inc. +%% Copyright (c) 2007-2013 GoPivotal, Inc. All rights reserved. %% -module(rabbit_msg_store). @@ -29,8 +29,8 @@ -export([transform_dir/3, force_recovery/2]). %% upgrade -export([init/1, handle_call/3, handle_cast/2, handle_info/2, terminate/2, - code_change/3, prioritise_call/3, prioritise_cast/2, - prioritise_info/2, format_message_queue/2]). + code_change/3, prioritise_call/4, prioritise_cast/3, + prioritise_info/3, format_message_queue/2]). %%---------------------------------------------------------------------------- @@ -51,6 +51,9 @@ -define(HANDLE_CACHE_BUFFER_SIZE, 1048576). %% 1MB + %% i.e. two pairs, so GC does not go idle when busy +-define(MAXIMUM_SIMULTANEOUS_GC_FILES, 4). + %%---------------------------------------------------------------------------- -record(msstate, @@ -624,7 +627,10 @@ client_update_flying(Diff, MsgId, #client_msstate { flying_ets = FlyingEts, Key = {MsgId, CRef}, case ets:insert_new(FlyingEts, {Key, Diff}) of true -> ok; - false -> try ets:update_counter(FlyingEts, Key, {2, Diff}) + false -> try ets:update_counter(FlyingEts, Key, {2, Diff}) of + 0 -> ok; + Diff -> ok; + Err -> throw({bad_flying_ets_update, Diff, Err, Key}) catch error:badarg -> %% this is guaranteed to succeed since the %% server only removes and updates flying_ets @@ -738,7 +744,7 @@ init([Server, BaseDir, ClientRefs, StartupFunState]) -> hibernate, {backoff, ?HIBERNATE_AFTER_MIN, ?HIBERNATE_AFTER_MIN, ?DESIRED_HIBERNATE}}. -prioritise_call(Msg, _From, _State) -> +prioritise_call(Msg, _From, _Len, _State) -> case Msg of successfully_recovered_state -> 7; {new_client_state, _Ref, _Pid, _MODC, _CloseFDsFun} -> 7; @@ -746,7 +752,7 @@ prioritise_call(Msg, _From, _State) -> _ -> 0 end. -prioritise_cast(Msg, _State) -> +prioritise_cast(Msg, _Len, _State) -> case Msg of {combine_files, _Source, _Destination, _Reclaimed} -> 8; {delete_file, _File, _Reclaimed} -> 8; @@ -755,7 +761,7 @@ prioritise_cast(Msg, _State) -> _ -> 0 end. -prioritise_info(Msg, _State) -> +prioritise_info(Msg, _Len, _State) -> case Msg of sync -> 8; _ -> 0 @@ -943,15 +949,12 @@ next_state(State = #msstate { cref_to_msg_ids = CTM }) -> _ -> {State, 0} end. -start_sync_timer(State = #msstate { sync_timer_ref = undefined }) -> - TRef = erlang:send_after(?SYNC_INTERVAL, self(), sync), - State #msstate { sync_timer_ref = TRef }. +start_sync_timer(State) -> + rabbit_misc:ensure_timer(State, #msstate.sync_timer_ref, + ?SYNC_INTERVAL, sync). -stop_sync_timer(State = #msstate { sync_timer_ref = undefined }) -> - State; -stop_sync_timer(State = #msstate { sync_timer_ref = TRef }) -> - erlang:cancel_timer(TRef), - State #msstate { sync_timer_ref = undefined }. +stop_sync_timer(State) -> + rabbit_misc:stop_timer(State, #msstate.sync_timer_ref). internal_sync(State = #msstate { current_file_handle = CurHdl, cref_to_msg_ids = CTM }) -> @@ -975,13 +978,21 @@ update_flying(Diff, MsgId, CRef, #msstate { flying_ets = FlyingEts }) -> NDiff = -Diff, case ets:lookup(FlyingEts, Key) of [] -> ignore; - [{_, Diff}] -> ignore; + [{_, Diff}] -> ignore; %% [1] [{_, NDiff}] -> ets:update_counter(FlyingEts, Key, {2, Diff}), true = ets:delete_object(FlyingEts, {Key, 0}), process; [{_, 0}] -> true = ets:delete_object(FlyingEts, {Key, 0}), - ignore + ignore; + [{_, Err}] -> throw({bad_flying_ets_record, Diff, Err, Key}) end. +%% [1] We can get here, for example, in the following scenario: There +%% is a write followed by a remove in flight. The counter will be 0, +%% so on processing the write the server attempts to delete the +%% entry. If at that point the client injects another write it will +%% either insert a new entry, containing +1, or increment the existing +%% entry to +1, thus preventing its removal. Either way therefore when +%% the server processes the read, the counter will be +1. write_action({true, not_found}, _MsgId, State) -> {ignore, undefined, State}; @@ -1394,7 +1405,7 @@ filenum_to_name(File) -> integer_to_list(File) ++ ?FILE_EXTENSION. filename_to_num(FileName) -> list_to_integer(filename:rootname(FileName)). -list_sorted_file_names(Dir, Ext) -> +list_sorted_filenames(Dir, Ext) -> lists:sort(fun (A, B) -> filename_to_num(A) < filename_to_num(B) end, filelib:wildcard("*" ++ Ext, Dir)). @@ -1531,8 +1542,8 @@ count_msg_refs(Gen, Seed, State) -> end. recover_crashed_compactions(Dir) -> - FileNames = list_sorted_file_names(Dir, ?FILE_EXTENSION), - TmpFileNames = list_sorted_file_names(Dir, ?FILE_EXTENSION_TMP), + FileNames = list_sorted_filenames(Dir, ?FILE_EXTENSION), + TmpFileNames = list_sorted_filenames(Dir, ?FILE_EXTENSION_TMP), lists:foreach( fun (TmpFileName) -> NonTmpRelatedFileName = @@ -1609,7 +1620,7 @@ build_index(false, {MsgRefDeltaGen, MsgRefDeltaGenInit}, ok = count_msg_refs(MsgRefDeltaGen, MsgRefDeltaGenInit, State), {ok, Pid} = gatherer:start_link(), case [filename_to_num(FileName) || - FileName <- list_sorted_file_names(Dir, ?FILE_EXTENSION)] of + FileName <- list_sorted_filenames(Dir, ?FILE_EXTENSION)] of [] -> build_index(Pid, undefined, [State #msstate.current_file], State); Files -> {Offset, State1} = build_index(Pid, undefined, Files, State), @@ -1731,10 +1742,12 @@ maybe_compact(State = #msstate { sum_valid_data = SumValid, (SumFileSize - SumValid) / SumFileSize > ?GARBAGE_FRACTION -> %% TODO: the algorithm here is sub-optimal - it may result in a %% complete traversal of FileSummaryEts. - case ets:first(FileSummaryEts) of - '$end_of_table' -> + First = ets:first(FileSummaryEts), + case First =:= '$end_of_table' orelse + orddict:size(Pending) >= ?MAXIMUM_SIMULTANEOUS_GC_FILES of + true -> State; - First -> + false -> case find_files_to_combine(FileSummaryEts, FileSizeLimit, ets:lookup(FileSummaryEts, First)) of not_found -> @@ -2023,7 +2036,7 @@ transform_dir(BaseDir, Store, TransformFun) -> CopyFile = fun (Src, Dst) -> {ok, _Bytes} = file:copy(Src, Dst), ok end, case filelib:is_dir(TmpDir) of true -> throw({error, transform_failed_previously}); - false -> FileList = list_sorted_file_names(Dir, ?FILE_EXTENSION), + false -> FileList = list_sorted_filenames(Dir, ?FILE_EXTENSION), foreach_file(Dir, TmpDir, TransformFile, FileList), foreach_file(Dir, fun file:delete/1, FileList), foreach_file(TmpDir, Dir, CopyFile, FileList), diff --git a/src/rabbit_msg_store_ets_index.erl b/src/rabbit_msg_store_ets_index.erl index 3defeaaf..c17ff2cb 100644 --- a/src/rabbit_msg_store_ets_index.erl +++ b/src/rabbit_msg_store_ets_index.erl @@ -10,8 +10,8 @@ %% %% The Original Code is RabbitMQ. %% -%% The Initial Developer of the Original Code is VMware, Inc. -%% Copyright (c) 2007-2012 VMware, Inc. All rights reserved. +%% The Initial Developer of the Original Code is GoPivotal, Inc. +%% Copyright (c) 2007-2013 GoPivotal, Inc. All rights reserved. %% -module(rabbit_msg_store_ets_index). diff --git a/src/rabbit_msg_store_gc.erl b/src/rabbit_msg_store_gc.erl index 3b61ed0b..1edd7d51 100644 --- a/src/rabbit_msg_store_gc.erl +++ b/src/rabbit_msg_store_gc.erl @@ -10,8 +10,8 @@ %% %% The Original Code is RabbitMQ. %% -%% The Initial Developer of the Original Code is VMware, Inc. -%% Copyright (c) 2007-2012 VMware, Inc. All rights reserved. +%% The Initial Developer of the Original Code is GoPivotal, Inc. +%% Copyright (c) 2007-2013 GoPivotal, Inc. All rights reserved. %% -module(rabbit_msg_store_gc). @@ -23,7 +23,7 @@ -export([set_maximum_since_use/2]). -export([init/1, handle_call/3, handle_cast/2, handle_info/2, - terminate/2, code_change/3, prioritise_cast/2]). + terminate/2, code_change/3, prioritise_cast/3]). -record(state, { pending_no_readers, @@ -79,8 +79,8 @@ init([MsgStoreState]) -> msg_store_state = MsgStoreState }, hibernate, {backoff, ?HIBERNATE_AFTER_MIN, ?HIBERNATE_AFTER_MIN, ?DESIRED_HIBERNATE}}. -prioritise_cast({set_maximum_since_use, _Age}, _State) -> 8; -prioritise_cast(_Msg, _State) -> 0. +prioritise_cast({set_maximum_since_use, _Age}, _Len, _State) -> 8; +prioritise_cast(_Msg, _Len, _State) -> 0. handle_call(stop, _From, State) -> {stop, normal, ok, State}. diff --git a/src/rabbit_msg_store_index.erl b/src/rabbit_msg_store_index.erl index 6cc0b2a7..bb5f11b0 100644 --- a/src/rabbit_msg_store_index.erl +++ b/src/rabbit_msg_store_index.erl @@ -10,8 +10,8 @@ %% %% The Original Code is RabbitMQ. %% -%% The Initial Developer of the Original Code is VMware, Inc. -%% Copyright (c) 2007-2012 VMware, Inc. All rights reserved. +%% The Initial Developer of the Original Code is GoPivotal, Inc. +%% Copyright (c) 2007-2013 GoPivotal, Inc. All rights reserved. %% -module(rabbit_msg_store_index). diff --git a/src/rabbit_net.erl b/src/rabbit_net.erl index bedf5142..e8c96818 100644 --- a/src/rabbit_net.erl +++ b/src/rabbit_net.erl @@ -10,17 +10,17 @@ %% %% The Original Code is RabbitMQ. %% -%% The Initial Developer of the Original Code is VMware, Inc. -%% Copyright (c) 2007-2012 VMware, Inc. All rights reserved. +%% The Initial Developer of the Original Code is GoPivotal, Inc. +%% Copyright (c) 2007-2013 GoPivotal, Inc. All rights reserved. %% -module(rabbit_net). -include("rabbit.hrl"). -export([is_ssl/1, ssl_info/1, controlling_process/2, getstat/2, - recv/1, async_recv/3, port_command/2, getopts/2, setopts/2, send/2, - close/1, maybe_fast_close/1, sockname/1, peername/1, peercert/1, - tune_buffer_size/1, connection_string/2]). + recv/1, sync_recv/2, async_recv/3, port_command/2, getopts/2, + setopts/2, send/2, close/1, fast_close/1, sockname/1, peername/1, + peercert/1, connection_string/2, socket_ends/2]). %%--------------------------------------------------------------------------- @@ -36,7 +36,7 @@ -type(socket() :: port() | #ssl_socket{}). -type(opts() :: [{atom(), any()} | {raw, non_neg_integer(), non_neg_integer(), binary()}]). - +-type(host_or_ip() :: binary() | inet:ip_address()). -spec(is_ssl/1 :: (socket()) -> boolean()). -spec(ssl_info/1 :: (socket()) -> 'nossl' | ok_val_or_error( @@ -48,6 +48,8 @@ -spec(recv/1 :: (socket()) -> {'data', [char()] | binary()} | 'closed' | rabbit_types:error(any()) | {'other', any()}). +-spec(sync_recv/2 :: (socket(), integer()) -> rabbit_types:ok(binary()) | + rabbit_types:error(any())). -spec(async_recv/3 :: (socket(), integer(), timeout()) -> rabbit_types:ok(any())). -spec(port_command/2 :: (socket(), iolist()) -> 'true'). @@ -59,7 +61,7 @@ -spec(setopts/2 :: (socket(), opts()) -> ok_or_any_error()). -spec(send/2 :: (socket(), binary() | iolist()) -> ok_or_any_error()). -spec(close/1 :: (socket()) -> ok_or_any_error()). --spec(maybe_fast_close/1 :: (socket()) -> ok_or_any_error()). +-spec(fast_close/1 :: (socket()) -> ok_or_any_error()). -spec(sockname/1 :: (socket()) -> ok_val_or_error({inet:ip_address(), rabbit_networking:ip_port()})). @@ -69,14 +71,19 @@ -spec(peercert/1 :: (socket()) -> 'nossl' | ok_val_or_error(rabbit_ssl:certificate())). --spec(tune_buffer_size/1 :: (socket()) -> ok_or_any_error()). -spec(connection_string/2 :: (socket(), 'inbound' | 'outbound') -> ok_val_or_error(string())). +-spec(socket_ends/2 :: + (socket(), 'inbound' | 'outbound') + -> ok_val_or_error({host_or_ip(), rabbit_networking:ip_port(), + host_or_ip(), rabbit_networking:ip_port()})). -endif. %%--------------------------------------------------------------------------- +-define(SSL_CLOSE_TIMEOUT, 5000). + -define(IS_SSL(Sock), is_record(Sock, ssl_socket)). is_ssl(Sock) -> ?IS_SSL(Sock). @@ -109,6 +116,11 @@ recv(S, {DataTag, ClosedTag, ErrorTag}) -> Other -> {other, Other} end. +sync_recv(Sock, Length) when ?IS_SSL(Sock) -> + ssl:recv(Sock#ssl_socket.ssl, Length); +sync_recv(Sock, Length) -> + gen_tcp:recv(Sock, Length). + async_recv(Sock, Length, Timeout) when ?IS_SSL(Sock) -> Pid = self(), Ref = make_ref(), @@ -148,8 +160,31 @@ send(Sock, Data) when is_port(Sock) -> gen_tcp:send(Sock, Data). close(Sock) when ?IS_SSL(Sock) -> ssl:close(Sock#ssl_socket.ssl); close(Sock) when is_port(Sock) -> gen_tcp:close(Sock). -maybe_fast_close(Sock) when ?IS_SSL(Sock) -> ok; -maybe_fast_close(Sock) when is_port(Sock) -> erlang:port_close(Sock), ok. +fast_close(Sock) when ?IS_SSL(Sock) -> + %% We cannot simply port_close the underlying tcp socket since the + %% TLS protocol is quite insistent that a proper closing handshake + %% should take place (see RFC 5245 s7.2.1). So we call ssl:close + %% instead, but that can block for a very long time, e.g. when + %% there is lots of pending output and there is tcp backpressure, + %% or the ssl_connection process has entered the the + %% workaround_transport_delivery_problems function during + %% termination, which, inexplicably, does a gen_tcp:recv(Socket, + %% 0), which may never return if the client doesn't send a FIN or + %% that gets swallowed by the network. Since there is no timeout + %% variant of ssl:close, we construct our own. + {Pid, MRef} = spawn_monitor(fun () -> ssl:close(Sock#ssl_socket.ssl) end), + erlang:send_after(?SSL_CLOSE_TIMEOUT, self(), {Pid, ssl_close_timeout}), + receive + {Pid, ssl_close_timeout} -> + erlang:demonitor(MRef, [flush]), + exit(Pid, kill); + {'DOWN', MRef, process, Pid, _Reason} -> + ok + end, + catch port_close(Sock#ssl_socket.tcp), + ok; +fast_close(Sock) when is_port(Sock) -> + catch port_close(Sock), ok. sockname(Sock) when ?IS_SSL(Sock) -> ssl:sockname(Sock#ssl_socket.ssl); sockname(Sock) when is_port(Sock) -> inet:sockname(Sock). @@ -160,25 +195,38 @@ peername(Sock) when is_port(Sock) -> inet:peername(Sock). peercert(Sock) when ?IS_SSL(Sock) -> ssl:peercert(Sock#ssl_socket.ssl); peercert(Sock) when is_port(Sock) -> nossl. -tune_buffer_size(Sock) -> - case getopts(Sock, [sndbuf, recbuf, buffer]) of - {ok, BufSizes} -> BufSz = lists:max([Sz || {_Opt, Sz} <- BufSizes]), - setopts(Sock, [{buffer, BufSz}]); - Err -> Err +connection_string(Sock, Direction) -> + case socket_ends(Sock, Direction) of + {ok, {FromAddress, FromPort, ToAddress, ToPort}} -> + {ok, rabbit_misc:format( + "~s:~p -> ~s:~p", + [maybe_ntoab(FromAddress), FromPort, + maybe_ntoab(ToAddress), ToPort])}; + Error -> + Error end. -connection_string(Sock, Direction) -> - {From, To} = case Direction of - inbound -> {fun peername/1, fun sockname/1}; - outbound -> {fun sockname/1, fun peername/1} - end, +socket_ends(Sock, Direction) -> + {From, To} = sock_funs(Direction), case {From(Sock), To(Sock)} of {{ok, {FromAddress, FromPort}}, {ok, {ToAddress, ToPort}}} -> - {ok, rabbit_misc:format("~s:~p -> ~s:~p", - [rabbit_misc:ntoab(FromAddress), FromPort, - rabbit_misc:ntoab(ToAddress), ToPort])}; + {ok, {rdns(FromAddress), FromPort, + rdns(ToAddress), ToPort}}; {{error, _Reason} = Error, _} -> Error; {_, {error, _Reason} = Error} -> Error end. + +maybe_ntoab(Addr) when is_tuple(Addr) -> rabbit_misc:ntoab(Addr); +maybe_ntoab(Host) -> Host. + +rdns(Addr) -> + {ok, Lookup} = application:get_env(rabbit, reverse_dns_lookups), + case Lookup of + true -> list_to_binary(rabbit_networking:tcp_host(Addr)); + _ -> Addr + end. + +sock_funs(inbound) -> {fun peername/1, fun sockname/1}; +sock_funs(outbound) -> {fun sockname/1, fun peername/1}. diff --git a/src/rabbit_networking.erl b/src/rabbit_networking.erl index 78deea97..46cfabe3 100644 --- a/src/rabbit_networking.erl +++ b/src/rabbit_networking.erl @@ -10,18 +10,19 @@ %% %% The Original Code is RabbitMQ. %% -%% The Initial Developer of the Original Code is VMware, Inc. -%% Copyright (c) 2007-2012 VMware, Inc. All rights reserved. +%% The Initial Developer of the Original Code is GoPivotal, Inc. +%% Copyright (c) 2007-2013 GoPivotal, Inc. All rights reserved. %% -module(rabbit_networking). -export([boot/0, start/0, start_tcp_listener/1, start_ssl_listener/2, stop_tcp_listener/1, on_node_down/1, active_listeners/0, - node_listeners/1, connections/0, connection_info_keys/0, + node_listeners/1, register_connection/1, unregister_connection/1, + connections/0, connection_info_keys/0, connection_info/1, connection_info/2, connection_info_all/0, connection_info_all/1, - close_connection/2, force_connection_event_refresh/0]). + close_connection/2, force_connection_event_refresh/0, tcp_host/1]). %%used by TCP-based transports, e.g. STOMP adapter -export([tcp_listener_addresses/1, tcp_listener_spec/6, @@ -65,6 +66,8 @@ -spec(stop_tcp_listener/1 :: (listener_config()) -> 'ok'). -spec(active_listeners/0 :: () -> [rabbit_types:listener()]). -spec(node_listeners/1 :: (node()) -> [rabbit_types:listener()]). +-spec(register_connection/1 :: (pid()) -> ok). +-spec(unregister_connection/1 :: (pid()) -> ok). -spec(connections/0 :: () -> [rabbit_types:connection()]). -spec(connections_local/0 :: () -> [rabbit_types:connection()]). -spec(connection_info_keys/0 :: () -> rabbit_types:info_keys()). @@ -142,7 +145,8 @@ start() -> rabbit_sup:start_supervisor_child( {rabbit_connection_sup,start_link,[]}]). ensure_ssl() -> - ok = rabbit_misc:start_applications([crypto, public_key, ssl]), + {ok, SslAppsConfig} = application:get_env(rabbit, ssl_apps), + ok = app_utils:start_applications(SslAppsConfig), {ok, SslOptsConfig} = application:get_env(rabbit, ssl_options), % unknown_ca errors are silently ignored prior to R14B unless we @@ -160,7 +164,19 @@ ssl_transform_fun(SslOpts) -> case catch ssl:ssl_accept(Sock, SslOpts, ?SSL_TIMEOUT * 1000) of {ok, SslSock} -> {ok, #ssl_socket{tcp = Sock, ssl = SslSock}}; + {error, timeout} -> + {error, {ssl_upgrade_error, timeout}}; {error, Reason} -> + %% We have no idea what state the ssl_connection + %% process is in - it could still be happily + %% going, it might be stuck, or it could be just + %% about to fail. There is little that our caller + %% can do but close the TCP socket, but this could + %% cause ssl alerts to get dropped (which is bad + %% form, according to the TLS spec). So we give + %% the ssl_connection a little bit of time to send + %% such alerts. + timer:sleep(?SSL_TIMEOUT * 1000), {error, {ssl_upgrade_error, Reason}}; {'EXIT', Reason} -> {error, {ssl_upgrade_failure, Reason}} @@ -282,20 +298,15 @@ start_client(Sock) -> start_ssl_client(SslOpts, Sock) -> start_client(Sock, ssl_transform_fun(SslOpts)). +register_connection(Pid) -> pg_local:join(rabbit_connections, Pid). + +unregister_connection(Pid) -> pg_local:leave(rabbit_connections, Pid). + connections() -> - rabbit_misc:append_rpc_all_nodes(rabbit_mnesia:running_clustered_nodes(), + rabbit_misc:append_rpc_all_nodes(rabbit_mnesia:cluster_nodes(running), rabbit_networking, connections_local, []). -connections_local() -> - [Reader || - {_, ConnSup, supervisor, _} - <- supervisor:which_children(rabbit_tcp_client_sup), - Reader <- [try - rabbit_connection_sup:reader(ConnSup) - catch exit:{noproc, _} -> - noproc - end], - Reader =/= noproc]. +connections_local() -> pg_local:get_members(rabbit_connections). connection_info_keys() -> rabbit_reader:info_keys(). diff --git a/src/rabbit_node_monitor.erl b/src/rabbit_node_monitor.erl index 323cf0ce..57dce7cd 100644 --- a/src/rabbit_node_monitor.erl +++ b/src/rabbit_node_monitor.erl @@ -10,8 +10,8 @@ %% %% The Original Code is RabbitMQ. %% -%% The Initial Developer of the Original Code is VMware, Inc. -%% Copyright (c) 2007-2012 VMware, Inc. All rights reserved. +%% The Initial Developer of the Original Code is GoPivotal, Inc. +%% Copyright (c) 2007-2013 GoPivotal, Inc. All rights reserved. %% -module(rabbit_node_monitor). @@ -19,78 +19,321 @@ -behaviour(gen_server). -export([start_link/0]). +-export([running_nodes_filename/0, + cluster_status_filename/0, prepare_cluster_status_files/0, + write_cluster_status/1, read_cluster_status/0, + update_cluster_status/0, reset_cluster_status/0]). +-export([notify_node_up/0, notify_joined_cluster/0, notify_left_cluster/1]). +-export([partitions/0, partitions/1, subscribe/1]). --export([init/1, handle_call/3, handle_cast/2, handle_info/2, - terminate/2, code_change/3]). --export([notify_cluster/0, rabbit_running_on/1]). +%% gen_server callbacks +-export([init/1, handle_call/3, handle_cast/2, handle_info/2, terminate/2, + code_change/3]). + + %% Utils +-export([all_rabbit_nodes_up/0, run_outside_applications/1]). -define(SERVER, ?MODULE). -define(RABBIT_UP_RPC_TIMEOUT, 2000). +-define(RABBIT_DOWN_PING_INTERVAL, 1000). + +-record(state, {monitors, partitions, subscribers, down_ping_timer, autoheal}). %%---------------------------------------------------------------------------- -ifdef(use_specs). -spec(start_link/0 :: () -> rabbit_types:ok_pid_or_error()). --spec(rabbit_running_on/1 :: (node()) -> 'ok'). --spec(notify_cluster/0 :: () -> 'ok'). + +-spec(running_nodes_filename/0 :: () -> string()). +-spec(cluster_status_filename/0 :: () -> string()). +-spec(prepare_cluster_status_files/0 :: () -> 'ok'). +-spec(write_cluster_status/1 :: (rabbit_mnesia:cluster_status()) -> 'ok'). +-spec(read_cluster_status/0 :: () -> rabbit_mnesia:cluster_status()). +-spec(update_cluster_status/0 :: () -> 'ok'). +-spec(reset_cluster_status/0 :: () -> 'ok'). + +-spec(notify_node_up/0 :: () -> 'ok'). +-spec(notify_joined_cluster/0 :: () -> 'ok'). +-spec(notify_left_cluster/1 :: (node()) -> 'ok'). + +-spec(partitions/0 :: () -> [node()]). +-spec(partitions/1 :: ([node()]) -> [{node(), [node()]}]). +-spec(subscribe/1 :: (pid()) -> 'ok'). + +-spec(all_rabbit_nodes_up/0 :: () -> boolean()). +-spec(run_outside_applications/1 :: (fun (() -> any())) -> pid()). -endif. -%%-------------------------------------------------------------------- +%%---------------------------------------------------------------------------- +%% Start +%%---------------------------------------------------------------------------- + +start_link() -> gen_server:start_link({local, ?SERVER}, ?MODULE, [], []). + +%%---------------------------------------------------------------------------- +%% Cluster file operations +%%---------------------------------------------------------------------------- + +%% The cluster file information is kept in two files. The "cluster +%% status file" contains all the clustered nodes and the disc nodes. +%% The "running nodes file" contains the currently running nodes or +%% the running nodes at shutdown when the node is down. +%% +%% We strive to keep the files up to date and we rely on this +%% assumption in various situations. Obviously when mnesia is offline +%% the information we have will be outdated, but it cannot be +%% otherwise. + +running_nodes_filename() -> + filename:join(rabbit_mnesia:dir(), "nodes_running_at_shutdown"). + +cluster_status_filename() -> + rabbit_mnesia:dir() ++ "/cluster_nodes.config". + +prepare_cluster_status_files() -> + rabbit_mnesia:ensure_mnesia_dir(), + Corrupt = fun(F) -> throw({error, corrupt_cluster_status_files, F}) end, + RunningNodes1 = case try_read_file(running_nodes_filename()) of + {ok, [Nodes]} when is_list(Nodes) -> Nodes; + {ok, Other} -> Corrupt(Other); + {error, enoent} -> [] + end, + ThisNode = [node()], + %% The running nodes file might contain a set or a list, in case + %% of the legacy file + RunningNodes2 = lists:usort(ThisNode ++ RunningNodes1), + {AllNodes1, WantDiscNode} = + case try_read_file(cluster_status_filename()) of + {ok, [{AllNodes, DiscNodes0}]} -> + {AllNodes, lists:member(node(), DiscNodes0)}; + {ok, [AllNodes0]} when is_list(AllNodes0) -> + {legacy_cluster_nodes(AllNodes0), + legacy_should_be_disc_node(AllNodes0)}; + {ok, Files} -> + Corrupt(Files); + {error, enoent} -> + {legacy_cluster_nodes([]), true} + end, + AllNodes2 = lists:usort(AllNodes1 ++ RunningNodes2), + DiscNodes = case WantDiscNode of + true -> ThisNode; + false -> [] + end, + ok = write_cluster_status({AllNodes2, DiscNodes, RunningNodes2}). + +write_cluster_status({All, Disc, Running}) -> + ClusterStatusFN = cluster_status_filename(), + Res = case rabbit_file:write_term_file(ClusterStatusFN, [{All, Disc}]) of + ok -> + RunningNodesFN = running_nodes_filename(), + {RunningNodesFN, + rabbit_file:write_term_file(RunningNodesFN, [Running])}; + E1 = {error, _} -> + {ClusterStatusFN, E1} + end, + case Res of + {_, ok} -> ok; + {FN, {error, E2}} -> throw({error, {could_not_write_file, FN, E2}}) + end. -start_link() -> - gen_server:start_link({local, ?SERVER}, ?MODULE, [], []). - -rabbit_running_on(Node) -> - gen_server:cast(rabbit_node_monitor, {rabbit_running_on, Node}). - -notify_cluster() -> - Node = node(), - Nodes = rabbit_mnesia:running_clustered_nodes() -- [Node], - %% notify other rabbits of this rabbit - case rpc:multicall(Nodes, rabbit_node_monitor, rabbit_running_on, - [Node], ?RABBIT_UP_RPC_TIMEOUT) of - {_, [] } -> ok; - {_, Bad} -> rabbit_log:info("failed to contact nodes ~p~n", [Bad]) - end, +read_cluster_status() -> + case {try_read_file(cluster_status_filename()), + try_read_file(running_nodes_filename())} of + {{ok, [{All, Disc}]}, {ok, [Running]}} when is_list(Running) -> + {All, Disc, Running}; + {Stat, Run} -> + throw({error, {corrupt_or_missing_cluster_files, Stat, Run}}) + end. + +update_cluster_status() -> + {ok, Status} = rabbit_mnesia:cluster_status_from_mnesia(), + write_cluster_status(Status). + +reset_cluster_status() -> + write_cluster_status({[node()], [node()], [node()]}). + +%%---------------------------------------------------------------------------- +%% Cluster notifications +%%---------------------------------------------------------------------------- + +notify_node_up() -> + Nodes = rabbit_mnesia:cluster_nodes(running) -- [node()], + gen_server:abcast(Nodes, ?SERVER, + {node_up, node(), rabbit_mnesia:node_type()}), %% register other active rabbits with this rabbit - [ rabbit_running_on(N) || N <- Nodes ], + DiskNodes = rabbit_mnesia:cluster_nodes(disc), + [gen_server:cast(?SERVER, {node_up, N, case lists:member(N, DiskNodes) of + true -> disc; + false -> ram + end}) || N <- Nodes], ok. -%%-------------------------------------------------------------------- +notify_joined_cluster() -> + Nodes = rabbit_mnesia:cluster_nodes(running) -- [node()], + gen_server:abcast(Nodes, ?SERVER, + {joined_cluster, node(), rabbit_mnesia:node_type()}), + ok. + +notify_left_cluster(Node) -> + Nodes = rabbit_mnesia:cluster_nodes(running), + gen_server:abcast(Nodes, ?SERVER, {left_cluster, Node}), + ok. + +%%---------------------------------------------------------------------------- +%% Server calls +%%---------------------------------------------------------------------------- + +partitions() -> + gen_server:call(?SERVER, partitions, infinity). + +partitions(Nodes) -> + {Replies, _} = gen_server:multi_call(Nodes, ?SERVER, partitions, infinity), + Replies. + +subscribe(Pid) -> + gen_server:cast(?SERVER, {subscribe, Pid}). + +%%---------------------------------------------------------------------------- +%% gen_server callbacks +%%---------------------------------------------------------------------------- init([]) -> - {ok, ordsets:new()}. + %% We trap exits so that the supervisor will not just kill us. We + %% want to be sure that we are not going to be killed while + %% writing out the cluster status files - bad things can then + %% happen. + process_flag(trap_exit, true), + net_kernel:monitor_nodes(true), + {ok, _} = mnesia:subscribe(system), + {ok, #state{monitors = pmon:new(), + subscribers = pmon:new(), + partitions = [], + autoheal = rabbit_autoheal:init()}}. + +handle_call(partitions, _From, State = #state{partitions = Partitions}) -> + {reply, Partitions, State}; handle_call(_Request, _From, State) -> {noreply, State}. -handle_cast({rabbit_running_on, Node}, Nodes) -> - case ordsets:is_element(Node, Nodes) of - true -> {noreply, Nodes}; +%% Note: when updating the status file, we can't simply write the +%% mnesia information since the message can (and will) overtake the +%% mnesia propagation. +handle_cast({node_up, Node, NodeType}, + State = #state{monitors = Monitors}) -> + case pmon:is_monitored({rabbit, Node}, Monitors) of + true -> {noreply, State}; false -> rabbit_log:info("rabbit on node ~p up~n", [Node]), - erlang:monitor(process, {rabbit, Node}), + {AllNodes, DiscNodes, RunningNodes} = read_cluster_status(), + write_cluster_status({add_node(Node, AllNodes), + case NodeType of + disc -> add_node(Node, DiscNodes); + ram -> DiscNodes + end, + add_node(Node, RunningNodes)}), ok = handle_live_rabbit(Node), - {noreply, ordsets:add_element(Node, Nodes)} + {noreply, State#state{ + monitors = pmon:monitor({rabbit, Node}, Monitors)}} end; +handle_cast({joined_cluster, Node, NodeType}, State) -> + {AllNodes, DiscNodes, RunningNodes} = read_cluster_status(), + write_cluster_status({add_node(Node, AllNodes), + case NodeType of + disc -> add_node(Node, DiscNodes); + ram -> DiscNodes + end, + RunningNodes}), + {noreply, State}; +handle_cast({left_cluster, Node}, State) -> + {AllNodes, DiscNodes, RunningNodes} = read_cluster_status(), + write_cluster_status({del_node(Node, AllNodes), del_node(Node, DiscNodes), + del_node(Node, RunningNodes)}), + {noreply, State}; +handle_cast({subscribe, Pid}, State = #state{subscribers = Subscribers}) -> + {noreply, State#state{subscribers = pmon:monitor(Pid, Subscribers)}}; handle_cast(_Msg, State) -> {noreply, State}. -handle_info({'DOWN', _MRef, process, {rabbit, Node}, _Reason}, Nodes) -> +handle_info({'DOWN', _MRef, process, {rabbit, Node}, _Reason}, + State = #state{monitors = Monitors, subscribers = Subscribers}) -> rabbit_log:info("rabbit on node ~p down~n", [Node]), + {AllNodes, DiscNodes, RunningNodes} = read_cluster_status(), + write_cluster_status({AllNodes, DiscNodes, del_node(Node, RunningNodes)}), ok = handle_dead_rabbit(Node), - {noreply, ordsets:del_element(Node, Nodes)}; + [P ! {node_down, Node} || P <- pmon:monitored(Subscribers)], + {noreply, handle_dead_rabbit_state( + Node, + State#state{monitors = pmon:erase({rabbit, Node}, Monitors)})}; + +handle_info({'DOWN', _MRef, process, Pid, _Reason}, + State = #state{subscribers = Subscribers}) -> + {noreply, State#state{subscribers = pmon:erase(Pid, Subscribers)}}; + +handle_info({nodedown, Node}, State) -> + ok = handle_dead_node(Node), + {noreply, State}; + +handle_info({mnesia_system_event, + {inconsistent_database, running_partitioned_network, Node}}, + State = #state{partitions = Partitions, + monitors = Monitors, + autoheal = AState}) -> + %% We will not get a node_up from this node - yet we should treat it as + %% up (mostly). + State1 = case pmon:is_monitored({rabbit, Node}, Monitors) of + true -> State; + false -> State#state{ + monitors = pmon:monitor({rabbit, Node}, Monitors)} + end, + ok = handle_live_rabbit(Node), + Partitions1 = ordsets:to_list( + ordsets:add_element(Node, ordsets:from_list(Partitions))), + {noreply, State1#state{partitions = Partitions1, + autoheal = rabbit_autoheal:maybe_start(AState)}}; + +handle_info({autoheal_msg, Msg}, State = #state{autoheal = AState, + partitions = Partitions}) -> + AState1 = rabbit_autoheal:handle_msg(Msg, AState, Partitions), + {noreply, State#state{autoheal = AState1}}; + +handle_info(ping_nodes, State) -> + %% We ping nodes when some are down to ensure that we find out + %% about healed partitions quickly. We ping all nodes rather than + %% just the ones we know are down for simplicity; it's not expensive + %% to ping the nodes that are up, after all. + State1 = State#state{down_ping_timer = undefined}, + Self = self(), + %% all_nodes_up() both pings all the nodes and tells us if we need to again. + %% + %% We ping in a separate process since in a partition it might + %% take some noticeable length of time and we don't want to block + %% the node monitor for that long. + spawn_link(fun () -> + case all_nodes_up() of + true -> ok; + false -> Self ! ping_again + end + end), + {noreply, State1}; + +handle_info(ping_again, State) -> + {noreply, ensure_ping_timer(State)}; + handle_info(_Info, State) -> {noreply, State}. -terminate(_Reason, _State) -> +terminate(_Reason, State) -> + rabbit_misc:stop_timer(State, #state.down_ping_timer), ok. code_change(_OldVsn, State, _Extra) -> {ok, State}. -%%-------------------------------------------------------------------- +%%---------------------------------------------------------------------------- +%% Functions that call the module specific hooks when nodes go up/down +%%---------------------------------------------------------------------------- %% TODO: This may turn out to be a performance hog when there are lots %% of nodes. We really only need to execute some of these statements @@ -99,8 +342,135 @@ handle_dead_rabbit(Node) -> ok = rabbit_networking:on_node_down(Node), ok = rabbit_amqqueue:on_node_down(Node), ok = rabbit_alarm:on_node_down(Node), - ok = rabbit_mnesia:on_node_down(Node). + ok = rabbit_mnesia:on_node_down(Node), + ok. + +handle_dead_node(_Node) -> + %% In general in rabbit_node_monitor we care about whether the + %% rabbit application is up rather than the node; we do this so + %% that we can respond in the same way to "rabbitmqctl stop_app" + %% and "rabbitmqctl stop" as much as possible. + %% + %% However, for pause_minority mode we can't do this, since we + %% depend on looking at whether other nodes are up to decide + %% whether to come back up ourselves - if we decide that based on + %% the rabbit application we would go down and never come back. + case application:get_env(rabbit, cluster_partition_handling) of + {ok, pause_minority} -> + case majority() of + true -> ok; + false -> await_cluster_recovery() + end; + {ok, ignore} -> + ok; + {ok, autoheal} -> + ok; + {ok, Term} -> + rabbit_log:warning("cluster_partition_handling ~p unrecognised, " + "assuming 'ignore'~n", [Term]), + ok + end. + +await_cluster_recovery() -> + rabbit_log:warning("Cluster minority status detected - awaiting recovery~n", + []), + Nodes = rabbit_mnesia:cluster_nodes(all), + run_outside_applications(fun () -> + rabbit:stop(), + wait_for_cluster_recovery(Nodes) + end), + ok. + +run_outside_applications(Fun) -> + spawn(fun () -> + %% If our group leader is inside an application we are about + %% to stop, application:stop/1 does not return. + group_leader(whereis(init), self()), + %% Ensure only one such process at a time, the + %% exit(badarg) is harmless if one is already running + try register(rabbit_outside_app_process, self()) of + true -> Fun() + catch error:badarg -> ok + end + end). + +wait_for_cluster_recovery(Nodes) -> + case majority() of + true -> rabbit:start(); + false -> timer:sleep(?RABBIT_DOWN_PING_INTERVAL), + wait_for_cluster_recovery(Nodes) + end. + +handle_dead_rabbit_state(Node, State = #state{partitions = Partitions, + autoheal = Autoheal}) -> + %% If we have been partitioned, and we are now in the only remaining + %% partition, we no longer care about partitions - forget them. Note + %% that we do not attempt to deal with individual (other) partitions + %% going away. It's only safe to forget anything about partitions when + %% there are no partitions. + Partitions1 = case Partitions -- (Partitions -- alive_rabbit_nodes()) of + [] -> []; + _ -> Partitions + end, + ensure_ping_timer( + State#state{partitions = Partitions1, + autoheal = rabbit_autoheal:node_down(Node, Autoheal)}). + +ensure_ping_timer(State) -> + rabbit_misc:ensure_timer( + State, #state.down_ping_timer, ?RABBIT_DOWN_PING_INTERVAL, ping_nodes). handle_live_rabbit(Node) -> ok = rabbit_alarm:on_node_up(Node), ok = rabbit_mnesia:on_node_up(Node). + +%%-------------------------------------------------------------------- +%% Internal utils +%%-------------------------------------------------------------------- + +try_read_file(FileName) -> + case rabbit_file:read_term_file(FileName) of + {ok, Term} -> {ok, Term}; + {error, enoent} -> {error, enoent}; + {error, E} -> throw({error, {cannot_read_file, FileName, E}}) + end. + +legacy_cluster_nodes(Nodes) -> + %% We get all the info that we can, including the nodes from + %% mnesia, which will be there if the node is a disc node (empty + %% list otherwise) + lists:usort(Nodes ++ mnesia:system_info(db_nodes)). + +legacy_should_be_disc_node(DiscNodes) -> + DiscNodes == [] orelse lists:member(node(), DiscNodes). + +add_node(Node, Nodes) -> lists:usort([Node | Nodes]). + +del_node(Node, Nodes) -> Nodes -- [Node]. + +%%-------------------------------------------------------------------- + +%% mnesia:system_info(db_nodes) (and hence +%% rabbit_mnesia:cluster_nodes(running)) does not give reliable +%% results when partitioned. So we have a small set of replacement +%% functions here. "rabbit" in a function's name implies we test if +%% the rabbit application is up, not just the node. + +majority() -> + Nodes = rabbit_mnesia:cluster_nodes(all), + length(alive_nodes(Nodes)) / length(Nodes) > 0.5. + +all_nodes_up() -> + Nodes = rabbit_mnesia:cluster_nodes(all), + length(alive_nodes(Nodes)) =:= length(Nodes). + +all_rabbit_nodes_up() -> + Nodes = rabbit_mnesia:cluster_nodes(all), + length(alive_rabbit_nodes(Nodes)) =:= length(Nodes). + +alive_nodes(Nodes) -> [N || N <- Nodes, pong =:= net_adm:ping(N)]. + +alive_rabbit_nodes() -> alive_rabbit_nodes(rabbit_mnesia:cluster_nodes(all)). + +alive_rabbit_nodes(Nodes) -> + [N || N <- alive_nodes(Nodes), rabbit:is_running(N)]. diff --git a/src/rabbit_nodes.erl b/src/rabbit_nodes.erl index 1c23632d..b54fdd2e 100644 --- a/src/rabbit_nodes.erl +++ b/src/rabbit_nodes.erl @@ -10,13 +10,14 @@ %% %% The Original Code is RabbitMQ. %% -%% The Initial Developer of the Original Code is VMware, Inc. -%% Copyright (c) 2007-2012 VMware, Inc. All rights reserved. +%% The Initial Developer of the Original Code is GoPivotal, Inc. +%% Copyright (c) 2007-2013 GoPivotal, Inc. All rights reserved. %% -module(rabbit_nodes). --export([names/1, diagnostics/1, make/1, parts/1, cookie_hash/0, is_running/2]). +-export([names/1, diagnostics/1, make/1, parts/1, cookie_hash/0, + is_running/2, is_process_running/2]). -define(EPMD_TIMEOUT, 30000). @@ -33,6 +34,7 @@ -spec(parts/1 :: (node() | string()) -> {string(), string()}). -spec(cookie_hash/0 :: () -> string()). -spec(is_running/2 :: (node(), atom()) -> boolean()). +-spec(is_process_running/2 :: (node(), atom()) -> boolean()). -endif. @@ -70,8 +72,8 @@ diagnostics0() -> diagnostics_host(Host) -> case names(Host) of {error, EpmdReason} -> - {"- unable to connect to epmd on ~s: ~w", - [Host, EpmdReason]}; + {"- unable to connect to epmd on ~s: ~w (~s)", + [Host, EpmdReason, rabbit_misc:format_inet_error(EpmdReason)]}; {ok, NamePorts} -> {"- ~s: ~p", [Host, [{list_to_atom(Name), Port} || @@ -94,7 +96,14 @@ cookie_hash() -> base64:encode_to_string(erlang:md5(atom_to_list(erlang:get_cookie()))). is_running(Node, Application) -> - case rpc:call(Node, application, which_applications, [infinity]) of + case rpc:call(Node, rabbit_misc, which_applications, []) of {badrpc, _} -> false; Apps -> proplists:is_defined(Application, Apps) end. + +is_process_running(Node, Process) -> + case rpc:call(Node, erlang, whereis, [Process]) of + {badrpc, _} -> false; + undefined -> false; + P when is_pid(P) -> true + end. diff --git a/src/rabbit_parameter_validation.erl b/src/rabbit_parameter_validation.erl new file mode 100644 index 00000000..0a878432 --- /dev/null +++ b/src/rabbit_parameter_validation.erl @@ -0,0 +1,87 @@ +%% The contents of this file are subject to the Mozilla Public License +%% Version 1.1 (the "License"); you may not use this file except in +%% compliance with the License. You may obtain a copy of the License +%% at http://www.mozilla.org/MPL/ +%% +%% Software distributed under the License is distributed on an "AS IS" +%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +%% the License for the specific language governing rights and +%% limitations under the License. +%% +%% The Original Code is RabbitMQ. +%% +%% The Initial Developer of the Original Code is GoPivotal, Inc. +%% Copyright (c) 2007-2013 GoPivotal, Inc. All rights reserved. +%% + +-module(rabbit_parameter_validation). + +-export([number/2, binary/2, boolean/2, list/2, regex/2, proplist/3, enum/1]). + +number(_Name, Term) when is_number(Term) -> + ok; + +number(Name, Term) -> + {error, "~s should be number, actually was ~p", [Name, Term]}. + +binary(_Name, Term) when is_binary(Term) -> + ok; + +binary(Name, Term) -> + {error, "~s should be binary, actually was ~p", [Name, Term]}. + +boolean(_Name, Term) when is_boolean(Term) -> + ok; +boolean(Name, Term) -> + {error, "~s should be boolean, actually was ~p", [Name, Term]}. + +list(_Name, Term) when is_list(Term) -> + ok; + +list(Name, Term) -> + {error, "~s should be list, actually was ~p", [Name, Term]}. + +regex(Name, Term) when is_binary(Term) -> + case re:compile(Term) of + {ok, _} -> ok; + {error, Reason} -> {error, "~s should be regular expression " + "but is invalid: ~p", [Name, Reason]} + end; +regex(Name, Term) -> + {error, "~s should be a binary but was ~p", [Name, Term]}. + +proplist(Name, Constraints, Term) when is_list(Term) -> + {Results, Remainder} + = lists:foldl( + fun ({Key, Fun, Needed}, {Results0, Term0}) -> + case {lists:keytake(Key, 1, Term0), Needed} of + {{value, {Key, Value}, Term1}, _} -> + {[Fun(Key, Value) | Results0], + Term1}; + {false, mandatory} -> + {[{error, "Key \"~s\" not found in ~s", + [Key, Name]} | Results0], Term0}; + {false, optional} -> + {Results0, Term0} + end + end, {[], Term}, Constraints), + case Remainder of + [] -> Results; + _ -> [{error, "Unrecognised terms ~p in ~s", [Remainder, Name]} + | Results] + end; + +proplist(Name, _Constraints, Term) -> + {error, "~s not a list ~p", [Name, Term]}. + +enum(OptionsA) -> + Options = [list_to_binary(atom_to_list(O)) || O <- OptionsA], + fun (Name, Term) when is_binary(Term) -> + case lists:member(Term, Options) of + true -> ok; + false -> {error, "~s should be one of ~p, actually was ~p", + [Name, Options, Term]} + end; + (Name, Term) -> + {error, "~s should be binary, actually was ~p", [Name, Term]} + end. diff --git a/src/rabbit_plugins.erl b/src/rabbit_plugins.erl index 30c7bb37..168ced3c 100644 --- a/src/rabbit_plugins.erl +++ b/src/rabbit_plugins.erl @@ -10,164 +10,48 @@ %% %% The Original Code is RabbitMQ. %% -%% The Initial Developer of the Original Code is VMware, Inc. -%% Copyright (c) 2011-2012 VMware, Inc. All rights reserved. +%% The Initial Developer of the Original Code is GoPivotal, Inc. +%% Copyright (c) 2011-2013 GoPivotal, Inc. All rights reserved. %% -module(rabbit_plugins). -include("rabbit.hrl"). --export([start/0, stop/0, find_plugins/1, read_enabled_plugins/1, - lookup_plugins/2, calculate_required_plugins/2, plugin_names/1]). - --define(VERBOSE_OPT, "-v"). --define(MINIMAL_OPT, "-m"). --define(ENABLED_OPT, "-E"). --define(ENABLED_ALL_OPT, "-e"). - --define(VERBOSE_DEF, {?VERBOSE_OPT, flag}). --define(MINIMAL_DEF, {?MINIMAL_OPT, flag}). --define(ENABLED_DEF, {?ENABLED_OPT, flag}). --define(ENABLED_ALL_DEF, {?ENABLED_ALL_OPT, flag}). - --define(GLOBAL_DEFS, []). - --define(COMMANDS, - [{list, [?VERBOSE_DEF, ?MINIMAL_DEF, ?ENABLED_DEF, ?ENABLED_ALL_DEF]}, - enable, - disable]). +-export([setup/0, active/0, read_enabled/1, list/1, dependencies/3]). %%---------------------------------------------------------------------------- -ifdef(use_specs). --spec(start/0 :: () -> no_return()). --spec(stop/0 :: () -> 'ok'). --spec(find_plugins/1 :: (file:filename()) -> [#plugin{}]). --spec(read_enabled_plugins/1 :: (file:filename()) -> [atom()]). --spec(lookup_plugins/2 :: ([atom()], [#plugin{}]) -> [#plugin{}]). --spec(calculate_required_plugins/2 :: ([atom()], [#plugin{}]) -> [atom()]). --spec(plugin_names/1 :: ([#plugin{}]) -> [atom()]). - --endif. - -%%---------------------------------------------------------------------------- - -start() -> - {ok, [[PluginsFile|_]|_]} = - init:get_argument(enabled_plugins_file), - {ok, [[PluginsDir|_]|_]} = init:get_argument(plugins_dist_dir), - {Command, Opts, Args} = - case rabbit_misc:parse_arguments(?COMMANDS, ?GLOBAL_DEFS, - init:get_plain_arguments()) - of - {ok, Res} -> Res; - no_command -> print_error("could not recognise command", []), - usage() - end, - - PrintInvalidCommandError = - fun () -> - print_error("invalid command '~s'", - [string:join([atom_to_list(Command) | Args], " ")]) - end, - - case catch action(Command, Args, Opts, PluginsFile, PluginsDir) of - ok -> - rabbit_misc:quit(0); - {'EXIT', {function_clause, [{?MODULE, action, _} | _]}} -> - PrintInvalidCommandError(), - usage(); - {'EXIT', {function_clause, [{?MODULE, action, _, _} | _]}} -> - PrintInvalidCommandError(), - usage(); - {error, Reason} -> - print_error("~p", [Reason]), - rabbit_misc:quit(2); - {error_string, Reason} -> - print_error("~s", [Reason]), - rabbit_misc:quit(2); - Other -> - print_error("~p", [Other]), - rabbit_misc:quit(2) - end. - -stop() -> - ok. +-type(plugin_name() :: atom()). -print_error(Format, Args) -> - rabbit_misc:format_stderr("Error: " ++ Format ++ "~n", Args). +-spec(setup/0 :: () -> [plugin_name()]). +-spec(active/0 :: () -> [plugin_name()]). +-spec(list/1 :: (string()) -> [#plugin{}]). +-spec(read_enabled/1 :: (file:filename()) -> [plugin_name()]). +-spec(dependencies/3 :: (boolean(), [plugin_name()], [#plugin{}]) -> + [plugin_name()]). -usage() -> - io:format("~s", [rabbit_plugins_usage:usage()]), - rabbit_misc:quit(1). - -%%---------------------------------------------------------------------------- - -action(list, [], Opts, PluginsFile, PluginsDir) -> - action(list, [".*"], Opts, PluginsFile, PluginsDir); -action(list, [Pat], Opts, PluginsFile, PluginsDir) -> - format_plugins(Pat, Opts, PluginsFile, PluginsDir); - -action(enable, ToEnable0, _Opts, PluginsFile, PluginsDir) -> - case ToEnable0 of - [] -> throw({error_string, "Not enough arguments for 'enable'"}); - _ -> ok - end, - AllPlugins = find_plugins(PluginsDir), - Enabled = read_enabled_plugins(PluginsFile), - ImplicitlyEnabled = calculate_required_plugins(Enabled, AllPlugins), - ToEnable = [list_to_atom(Name) || Name <- ToEnable0], - Missing = ToEnable -- plugin_names(AllPlugins), - case Missing of - [] -> ok; - _ -> throw({error_string, - fmt_list("The following plugins could not be found:", - Missing)}) - end, - NewEnabled = lists:usort(Enabled ++ ToEnable), - write_enabled_plugins(PluginsFile, NewEnabled), - NewImplicitlyEnabled = calculate_required_plugins(NewEnabled, AllPlugins), - maybe_warn_mochiweb(NewImplicitlyEnabled), - case NewEnabled -- ImplicitlyEnabled of - [] -> io:format("Plugin configuration unchanged.~n"); - _ -> print_list("The following plugins have been enabled:", - NewImplicitlyEnabled -- ImplicitlyEnabled), - report_change() - end; - -action(disable, ToDisable0, _Opts, PluginsFile, PluginsDir) -> - case ToDisable0 of - [] -> throw({error_string, "Not enough arguments for 'disable'"}); - _ -> ok - end, - ToDisable = [list_to_atom(Name) || Name <- ToDisable0], - Enabled = read_enabled_plugins(PluginsFile), - AllPlugins = find_plugins(PluginsDir), - Missing = ToDisable -- plugin_names(AllPlugins), - case Missing of - [] -> ok; - _ -> print_list("Warning: the following plugins could not be found:", - Missing) - end, - ToDisableDeps = calculate_dependencies(true, ToDisable, AllPlugins), - NewEnabled = Enabled -- ToDisableDeps, - case length(Enabled) =:= length(NewEnabled) of - true -> io:format("Plugin configuration unchanged.~n"); - false -> ImplicitlyEnabled = - calculate_required_plugins(Enabled, AllPlugins), - NewImplicitlyEnabled = - calculate_required_plugins(NewEnabled, AllPlugins), - print_list("The following plugins have been disabled:", - ImplicitlyEnabled -- NewImplicitlyEnabled), - write_enabled_plugins(PluginsFile, NewEnabled), - report_change() - end. +-endif. %%---------------------------------------------------------------------------- -%% Get the #plugin{}s ready to be enabled. -find_plugins(PluginsDir) -> +%% @doc Prepares the file system and installs all enabled plugins. +setup() -> + {ok, PluginDir} = application:get_env(rabbit, plugins_dir), + {ok, ExpandDir} = application:get_env(rabbit, plugins_expand_dir), + {ok, EnabledFile} = application:get_env(rabbit, enabled_plugins_file), + prepare_plugins(EnabledFile, PluginDir, ExpandDir). + +%% @doc Lists the plugins which are currently running. +active() -> + {ok, ExpandDir} = application:get_env(rabbit, plugins_expand_dir), + InstalledPlugins = [ P#plugin.name || P <- list(ExpandDir) ], + [App || {App, _, _} <- rabbit_misc:which_applications(), + lists:member(App, InstalledPlugins)]. + +%% @doc Get the list of plugins which are ready to be enabled. +list(PluginsDir) -> EZs = [{ez, EZ} || EZ <- filelib:wildcard("*.ez", PluginsDir)], FreeApps = [{app, App} || App <- filelib:wildcard("*/ebin/*.app", PluginsDir)], @@ -177,24 +61,105 @@ find_plugins(PluginsDir) -> (Plugin = #plugin{}, {Plugins1, Problems1}) -> {[Plugin|Plugins1], Problems1} end, {[], []}, - [get_plugin_info(PluginsDir, Plug) || - Plug <- EZs ++ FreeApps]), + [plugin_info(PluginsDir, Plug) || Plug <- EZs ++ FreeApps]), case Problems of [] -> ok; - _ -> io:format("Warning: Problem reading some plugins: ~p~n", - [Problems]) + _ -> error_logger:warning_msg( + "Problem reading some plugins: ~p~n", [Problems]) end, Plugins. -%% Get the #plugin{} from an .ez. -get_plugin_info(Base, {ez, EZ0}) -> +%% @doc Read the list of enabled plugins from the supplied term file. +read_enabled(PluginsFile) -> + case rabbit_file:read_term_file(PluginsFile) of + {ok, [Plugins]} -> Plugins; + {ok, []} -> []; + {ok, [_|_]} -> throw({error, {malformed_enabled_plugins_file, + PluginsFile}}); + {error, enoent} -> []; + {error, Reason} -> throw({error, {cannot_read_enabled_plugins_file, + PluginsFile, Reason}}) + end. + +%% @doc Calculate the dependency graph from <i>Sources</i>. +%% When Reverse =:= true the bottom/leaf level applications are returned in +%% the resulting list, otherwise they're skipped. +dependencies(Reverse, Sources, AllPlugins) -> + {ok, G} = rabbit_misc:build_acyclic_graph( + fun (App, _Deps) -> [{App, App}] end, + fun (App, Deps) -> [{App, Dep} || Dep <- Deps] end, + lists:ukeysort( + 1, [{Name, Deps} || + #plugin{name = Name, + dependencies = Deps} <- AllPlugins] ++ + [{Dep, []} || + #plugin{dependencies = Deps} <- AllPlugins, + Dep <- Deps])), + Dests = case Reverse of + false -> digraph_utils:reachable(Sources, G); + true -> digraph_utils:reaching(Sources, G) + end, + true = digraph:delete(G), + Dests. + +%%---------------------------------------------------------------------------- + +prepare_plugins(EnabledFile, PluginsDistDir, ExpandDir) -> + AllPlugins = list(PluginsDistDir), + Enabled = read_enabled(EnabledFile), + ToUnpack = dependencies(false, Enabled, AllPlugins), + ToUnpackPlugins = lookup_plugins(ToUnpack, AllPlugins), + + case Enabled -- plugin_names(ToUnpackPlugins) of + [] -> ok; + Missing -> error_logger:warning_msg( + "The following enabled plugins were not found: ~p~n", + [Missing]) + end, + + %% Eliminate the contents of the destination directory + case delete_recursively(ExpandDir) of + ok -> ok; + {error, E1} -> throw({error, {cannot_delete_plugins_expand_dir, + [ExpandDir, E1]}}) + end, + case filelib:ensure_dir(ExpandDir ++ "/") of + ok -> ok; + {error, E2} -> throw({error, {cannot_create_plugins_expand_dir, + [ExpandDir, E2]}}) + end, + + [prepare_plugin(Plugin, ExpandDir) || Plugin <- ToUnpackPlugins], + + [prepare_dir_plugin(PluginAppDescPath) || + PluginAppDescPath <- filelib:wildcard(ExpandDir ++ "/*/ebin/*.app")]. + +prepare_dir_plugin(PluginAppDescPath) -> + code:add_path(filename:dirname(PluginAppDescPath)), + list_to_atom(filename:basename(PluginAppDescPath, ".app")). + +%%---------------------------------------------------------------------------- + +delete_recursively(Fn) -> + case rabbit_file:recursive_delete([Fn]) of + ok -> ok; + {error, {Path, E}} -> {error, {cannot_delete, Path, E}}; + Error -> Error + end. + +prepare_plugin(#plugin{type = ez, location = Location}, ExpandDir) -> + zip:unzip(Location, [{cwd, ExpandDir}]); +prepare_plugin(#plugin{type = dir, name = Name, location = Location}, + ExpandDir) -> + rabbit_file:recursive_copy(Location, filename:join([ExpandDir, Name])). + +plugin_info(Base, {ez, EZ0}) -> EZ = filename:join([Base, EZ0]), case read_app_file(EZ) of {application, Name, Props} -> mkplugin(Name, Props, ez, EZ); {error, Reason} -> {error, EZ, Reason} end; -%% Get the #plugin{} from an .app. -get_plugin_info(Base, {app, App0}) -> +plugin_info(Base, {app, App0}) -> App = filename:join([Base, App0]), case rabbit_file:read_term_file(App) of {ok, [{application, Name, Props}]} -> @@ -213,7 +178,6 @@ mkplugin(Name, Props, Type, Location) -> #plugin{name = Name, version = Version, description = Description, dependencies = Dependencies, location = Location, type = Type}. -%% Read the .app file from an ez. read_app_file(EZ) -> case zip:list_dir(EZ) of {ok, [_|ZippedFiles]} -> @@ -229,13 +193,11 @@ read_app_file(EZ) -> {error, {invalid_ez, Reason}} end. -%% Return the path of the .app files in ebin/. find_app_files(ZippedFiles) -> {ok, RE} = re:compile("^.*/ebin/.*.app$"), [Path || {zip_file, Path, _, _, _, _} <- ZippedFiles, re:run(Path, RE, [{capture, none}]) =:= match]. -%% Parse a binary into a term. parse_binary(Bin) -> try {ok, Ts, _} = erl_scan:string(binary_to_list(Bin)), @@ -245,85 +207,10 @@ parse_binary(Bin) -> Err -> {error, {invalid_app, Err}} end. -%% Pretty print a list of plugins. -format_plugins(Pattern, Opts, PluginsFile, PluginsDir) -> - Verbose = proplists:get_bool(?VERBOSE_OPT, Opts), - Minimal = proplists:get_bool(?MINIMAL_OPT, Opts), - Format = case {Verbose, Minimal} of - {false, false} -> normal; - {true, false} -> verbose; - {false, true} -> minimal; - {true, true} -> throw({error_string, - "Cannot specify -m and -v together"}) - end, - OnlyEnabled = proplists:get_bool(?ENABLED_OPT, Opts), - OnlyEnabledAll = proplists:get_bool(?ENABLED_ALL_OPT, Opts), - - AvailablePlugins = find_plugins(PluginsDir), - EnabledExplicitly = read_enabled_plugins(PluginsFile), - EnabledImplicitly = - calculate_required_plugins(EnabledExplicitly, AvailablePlugins) -- - EnabledExplicitly, - {ok, RE} = re:compile(Pattern), - Plugins = [ Plugin || - Plugin = #plugin{name = Name} <- AvailablePlugins, - re:run(atom_to_list(Name), RE, [{capture, none}]) =:= match, - if OnlyEnabled -> lists:member(Name, EnabledExplicitly); - OnlyEnabledAll -> (lists:member(Name, EnabledExplicitly) or - lists:member(Name, EnabledImplicitly)); - true -> true - end], - Plugins1 = usort_plugins(Plugins), - MaxWidth = lists:max([length(atom_to_list(Name)) || - #plugin{name = Name} <- Plugins1] ++ [0]), - [format_plugin(P, EnabledExplicitly, EnabledImplicitly, Format, - MaxWidth) || P <- Plugins1], - ok. - -format_plugin(#plugin{name = Name, version = Version, - description = Description, dependencies = Deps}, - EnabledExplicitly, EnabledImplicitly, Format, MaxWidth) -> - Glyph = case {lists:member(Name, EnabledExplicitly), - lists:member(Name, EnabledImplicitly)} of - {true, false} -> "[E]"; - {false, true} -> "[e]"; - _ -> "[ ]" - end, - case Format of - minimal -> io:format("~s~n", [Name]); - normal -> io:format("~s ~-" ++ integer_to_list(MaxWidth) ++ - "w ~s~n", [Glyph, Name, Version]); - verbose -> io:format("~s ~w~n", [Glyph, Name]), - io:format(" Version: \t~s~n", [Version]), - case Deps of - [] -> ok; - _ -> io:format(" Dependencies:\t~p~n", [Deps]) - end, - io:format(" Description:\t~s~n", [Description]), - io:format("~n") - end. - -print_list(Header, Plugins) -> - io:format(fmt_list(Header, Plugins)). - -fmt_list(Header, Plugins) -> - lists:flatten( - [Header, $\n, [io_lib:format(" ~s~n", [P]) || P <- Plugins]]). - -usort_plugins(Plugins) -> - lists:usort(fun plugins_cmp/2, Plugins). - -plugins_cmp(#plugin{name = N1, version = V1}, - #plugin{name = N2, version = V2}) -> - {N1, V1} =< {N2, V2}. - -%% Filter out applications that can be loaded *right now*. filter_applications(Applications) -> [Application || Application <- Applications, not is_available_app(Application)]. -%% Return whether is application is already available (and hence -%% doesn't need enabling). is_available_app(Application) -> case application:load(Application) of {error, {already_loaded, _}} -> true; @@ -332,78 +219,8 @@ is_available_app(Application) -> _ -> false end. -%% Return the names of the given plugins. plugin_names(Plugins) -> [Name || #plugin{name = Name} <- Plugins]. -%% Find plugins by name in a list of plugins. lookup_plugins(Names, AllPlugins) -> [P || P = #plugin{name = Name} <- AllPlugins, lists:member(Name, Names)]. - -%% Read the enabled plugin names from disk. -read_enabled_plugins(PluginsFile) -> - case rabbit_file:read_term_file(PluginsFile) of - {ok, [Plugins]} -> Plugins; - {ok, []} -> []; - {ok, [_|_]} -> throw({error, {malformed_enabled_plugins_file, - PluginsFile}}); - {error, enoent} -> []; - {error, Reason} -> throw({error, {cannot_read_enabled_plugins_file, - PluginsFile, Reason}}) - end. - -%% Write the enabled plugin names on disk. -write_enabled_plugins(PluginsFile, Plugins) -> - case rabbit_file:write_term_file(PluginsFile, [Plugins]) of - ok -> ok; - {error, Reason} -> throw({error, {cannot_write_enabled_plugins_file, - PluginsFile, Reason}}) - end. - -calculate_required_plugins(Sources, AllPlugins) -> - calculate_dependencies(false, Sources, AllPlugins). - -calculate_dependencies(Reverse, Sources, AllPlugins) -> - {ok, G} = rabbit_misc:build_acyclic_graph( - fun (App, _Deps) -> [{App, App}] end, - fun (App, Deps) -> [{App, Dep} || Dep <- Deps] end, - [{Name, Deps} - || #plugin{name = Name, dependencies = Deps} <- AllPlugins]), - Dests = case Reverse of - false -> digraph_utils:reachable(Sources, G); - true -> digraph_utils:reaching(Sources, G) - end, - true = digraph:delete(G), - Dests. - -maybe_warn_mochiweb(Enabled) -> - V = erlang:system_info(otp_release), - case lists:member(mochiweb, Enabled) andalso V < "R13B01" of - true -> - Stars = string:copies("*", 80), - io:format("~n~n~s~n" - " Warning: Mochiweb enabled and Erlang version ~s " - "detected.~n" - " Enabling plugins that depend on Mochiweb is not " - "supported on this Erlang~n" - " version. At least R13B01 is required.~n~n" - " RabbitMQ will not start successfully in this " - "configuration. You *must*~n" - " disable the Mochiweb plugin, or upgrade Erlang.~n" - "~s~n~n~n", [Stars, V, Stars]); - false -> - ok - end. - -report_change() -> - io:format("Plugin configuration has changed. " - "Restart RabbitMQ for changes to take effect.~n"), - case os:type() of - {win32, _OsName} -> - io:format("If you have RabbitMQ running as a service then you must" - " reinstall by running~n rabbitmq-service.bat stop~n" - " rabbitmq-service.bat install~n" - " rabbitmq-service.bat start~n~n"); - _ -> - ok - end. diff --git a/src/rabbit_plugins_main.erl b/src/rabbit_plugins_main.erl new file mode 100644 index 00000000..948d2ab0 --- /dev/null +++ b/src/rabbit_plugins_main.erl @@ -0,0 +1,267 @@ +%% The contents of this file are subject to the Mozilla Public License +%% Version 1.1 (the "License"); you may not use this file except in +%% compliance with the License. You may obtain a copy of the License +%% at http://www.mozilla.org/MPL/ +%% +%% Software distributed under the License is distributed on an "AS IS" +%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +%% the License for the specific language governing rights and +%% limitations under the License. +%% +%% The Original Code is RabbitMQ. +%% +%% The Initial Developer of the Original Code is GoPivotal, Inc. +%% Copyright (c) 2011-2013 GoPivotal, Inc. All rights reserved. +%% + +-module(rabbit_plugins_main). +-include("rabbit.hrl"). + +-export([start/0, stop/0]). + +-define(VERBOSE_OPT, "-v"). +-define(MINIMAL_OPT, "-m"). +-define(ENABLED_OPT, "-E"). +-define(ENABLED_ALL_OPT, "-e"). + +-define(VERBOSE_DEF, {?VERBOSE_OPT, flag}). +-define(MINIMAL_DEF, {?MINIMAL_OPT, flag}). +-define(ENABLED_DEF, {?ENABLED_OPT, flag}). +-define(ENABLED_ALL_DEF, {?ENABLED_ALL_OPT, flag}). + +-define(GLOBAL_DEFS, []). + +-define(COMMANDS, + [{list, [?VERBOSE_DEF, ?MINIMAL_DEF, ?ENABLED_DEF, ?ENABLED_ALL_DEF]}, + enable, + disable]). + +%%---------------------------------------------------------------------------- + +-ifdef(use_specs). + +-spec(start/0 :: () -> no_return()). +-spec(stop/0 :: () -> 'ok'). +-spec(usage/0 :: () -> no_return()). + +-endif. + +%%---------------------------------------------------------------------------- + +start() -> + {ok, [[PluginsFile|_]|_]} = + init:get_argument(enabled_plugins_file), + {ok, [[PluginsDir|_]|_]} = init:get_argument(plugins_dist_dir), + {Command, Opts, Args} = + case rabbit_misc:parse_arguments(?COMMANDS, ?GLOBAL_DEFS, + init:get_plain_arguments()) + of + {ok, Res} -> Res; + no_command -> print_error("could not recognise command", []), + usage() + end, + + PrintInvalidCommandError = + fun () -> + print_error("invalid command '~s'", + [string:join([atom_to_list(Command) | Args], " ")]) + end, + + case catch action(Command, Args, Opts, PluginsFile, PluginsDir) of + ok -> + rabbit_misc:quit(0); + {'EXIT', {function_clause, [{?MODULE, action, _} | _]}} -> + PrintInvalidCommandError(), + usage(); + {'EXIT', {function_clause, [{?MODULE, action, _, _} | _]}} -> + PrintInvalidCommandError(), + usage(); + {error, Reason} -> + print_error("~p", [Reason]), + rabbit_misc:quit(2); + {error_string, Reason} -> + print_error("~s", [Reason]), + rabbit_misc:quit(2); + Other -> + print_error("~p", [Other]), + rabbit_misc:quit(2) + end. + +stop() -> + ok. + +%%---------------------------------------------------------------------------- + +action(list, [], Opts, PluginsFile, PluginsDir) -> + action(list, [".*"], Opts, PluginsFile, PluginsDir); +action(list, [Pat], Opts, PluginsFile, PluginsDir) -> + format_plugins(Pat, Opts, PluginsFile, PluginsDir); + +action(enable, ToEnable0, _Opts, PluginsFile, PluginsDir) -> + case ToEnable0 of + [] -> throw({error_string, "Not enough arguments for 'enable'"}); + _ -> ok + end, + AllPlugins = rabbit_plugins:list(PluginsDir), + Enabled = rabbit_plugins:read_enabled(PluginsFile), + ImplicitlyEnabled = rabbit_plugins:dependencies(false, + Enabled, AllPlugins), + ToEnable = [list_to_atom(Name) || Name <- ToEnable0], + Missing = ToEnable -- plugin_names(AllPlugins), + NewEnabled = lists:usort(Enabled ++ ToEnable), + NewImplicitlyEnabled = rabbit_plugins:dependencies(false, + NewEnabled, AllPlugins), + MissingDeps = (NewImplicitlyEnabled -- plugin_names(AllPlugins)) -- Missing, + case {Missing, MissingDeps} of + {[], []} -> ok; + {Miss, []} -> throw({error_string, fmt_missing("plugins", Miss)}); + {[], Miss} -> throw({error_string, fmt_missing("dependencies", Miss)}); + {_, _} -> throw({error_string, + fmt_missing("plugins", Missing) ++ + fmt_missing("dependencies", MissingDeps)}) + end, + write_enabled_plugins(PluginsFile, NewEnabled), + case NewEnabled -- ImplicitlyEnabled of + [] -> io:format("Plugin configuration unchanged.~n"); + _ -> print_list("The following plugins have been enabled:", + NewImplicitlyEnabled -- ImplicitlyEnabled), + report_change() + end; + +action(disable, ToDisable0, _Opts, PluginsFile, PluginsDir) -> + case ToDisable0 of + [] -> throw({error_string, "Not enough arguments for 'disable'"}); + _ -> ok + end, + ToDisable = [list_to_atom(Name) || Name <- ToDisable0], + Enabled = rabbit_plugins:read_enabled(PluginsFile), + AllPlugins = rabbit_plugins:list(PluginsDir), + Missing = ToDisable -- plugin_names(AllPlugins), + case Missing of + [] -> ok; + _ -> print_list("Warning: the following plugins could not be found:", + Missing) + end, + ToDisableDeps = rabbit_plugins:dependencies(true, ToDisable, AllPlugins), + NewEnabled = Enabled -- ToDisableDeps, + case length(Enabled) =:= length(NewEnabled) of + true -> io:format("Plugin configuration unchanged.~n"); + false -> ImplicitlyEnabled = + rabbit_plugins:dependencies(false, Enabled, AllPlugins), + NewImplicitlyEnabled = + rabbit_plugins:dependencies(false, + NewEnabled, AllPlugins), + print_list("The following plugins have been disabled:", + ImplicitlyEnabled -- NewImplicitlyEnabled), + write_enabled_plugins(PluginsFile, NewEnabled), + report_change() + end. + +%%---------------------------------------------------------------------------- + +print_error(Format, Args) -> + rabbit_misc:format_stderr("Error: " ++ Format ++ "~n", Args). + +usage() -> + io:format("~s", [rabbit_plugins_usage:usage()]), + rabbit_misc:quit(1). + +%% Pretty print a list of plugins. +format_plugins(Pattern, Opts, PluginsFile, PluginsDir) -> + Verbose = proplists:get_bool(?VERBOSE_OPT, Opts), + Minimal = proplists:get_bool(?MINIMAL_OPT, Opts), + Format = case {Verbose, Minimal} of + {false, false} -> normal; + {true, false} -> verbose; + {false, true} -> minimal; + {true, true} -> throw({error_string, + "Cannot specify -m and -v together"}) + end, + OnlyEnabled = proplists:get_bool(?ENABLED_OPT, Opts), + OnlyEnabledAll = proplists:get_bool(?ENABLED_ALL_OPT, Opts), + + AvailablePlugins = rabbit_plugins:list(PluginsDir), + EnabledExplicitly = rabbit_plugins:read_enabled(PluginsFile), + EnabledImplicitly = + rabbit_plugins:dependencies(false, EnabledExplicitly, + AvailablePlugins) -- EnabledExplicitly, + Missing = [#plugin{name = Name, dependencies = []} || + Name <- ((EnabledExplicitly ++ EnabledImplicitly) -- + plugin_names(AvailablePlugins))], + {ok, RE} = re:compile(Pattern), + Plugins = [ Plugin || + Plugin = #plugin{name = Name} <- AvailablePlugins ++ Missing, + re:run(atom_to_list(Name), RE, [{capture, none}]) =:= match, + if OnlyEnabled -> lists:member(Name, EnabledExplicitly); + OnlyEnabledAll -> (lists:member(Name, + EnabledExplicitly) or + lists:member(Name, EnabledImplicitly)); + true -> true + end], + Plugins1 = usort_plugins(Plugins), + MaxWidth = lists:max([length(atom_to_list(Name)) || + #plugin{name = Name} <- Plugins1] ++ [0]), + [format_plugin(P, EnabledExplicitly, EnabledImplicitly, + plugin_names(Missing), Format, MaxWidth) || P <- Plugins1], + ok. + +format_plugin(#plugin{name = Name, version = Version, + description = Description, dependencies = Deps}, + EnabledExplicitly, EnabledImplicitly, Missing, + Format, MaxWidth) -> + Glyph = case {lists:member(Name, EnabledExplicitly), + lists:member(Name, EnabledImplicitly), + lists:member(Name, Missing)} of + {true, false, false} -> "[E]"; + {false, true, false} -> "[e]"; + {_, _, true} -> "[!]"; + _ -> "[ ]" + end, + Opt = fun (_F, A, A) -> ok; + ( F, A, _) -> io:format(F, [A]) + end, + case Format of + minimal -> io:format("~s~n", [Name]); + normal -> io:format("~s ~-" ++ integer_to_list(MaxWidth) ++ "w ", + [Glyph, Name]), + Opt("~s", Version, undefined), + io:format("~n"); + verbose -> io:format("~s ~w~n", [Glyph, Name]), + Opt(" Version: \t~s~n", Version, undefined), + Opt(" Dependencies:\t~p~n", Deps, []), + Opt(" Description: \t~s~n", Description, undefined), + io:format("~n") + end. + +print_list(Header, Plugins) -> + io:format(fmt_list(Header, Plugins)). + +fmt_list(Header, Plugins) -> + lists:flatten( + [Header, $\n, [io_lib:format(" ~s~n", [P]) || P <- Plugins]]). + +fmt_missing(Desc, Missing) -> + fmt_list("The following " ++ Desc ++ " could not be found:", Missing). + +usort_plugins(Plugins) -> + lists:usort(fun plugins_cmp/2, Plugins). + +plugins_cmp(#plugin{name = N1, version = V1}, + #plugin{name = N2, version = V2}) -> + {N1, V1} =< {N2, V2}. + +%% Return the names of the given plugins. +plugin_names(Plugins) -> + [Name || #plugin{name = Name} <- Plugins]. + +%% Write the enabled plugin names on disk. +write_enabled_plugins(PluginsFile, Plugins) -> + case rabbit_file:write_term_file(PluginsFile, [Plugins]) of + ok -> ok; + {error, Reason} -> throw({error, {cannot_write_enabled_plugins_file, + PluginsFile, Reason}}) + end. + +report_change() -> + io:format("Plugin configuration has changed. " + "Restart RabbitMQ for changes to take effect.~n"). diff --git a/src/rabbit_policy.erl b/src/rabbit_policy.erl new file mode 100644 index 00000000..0785d278 --- /dev/null +++ b/src/rabbit_policy.erl @@ -0,0 +1,319 @@ +%% The contents of this file are subject to the Mozilla Public License +%% Version 1.1 (the "License"); you may not use this file except in +%% compliance with the License. You may obtain a copy of the License +%% at http://www.mozilla.org/MPL/ +%% +%% Software distributed under the License is distributed on an "AS IS" +%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +%% the License for the specific language governing rights and +%% limitations under the License. +%% +%% The Original Code is RabbitMQ. +%% +%% The Initial Developer of the Original Code is GoPivotal, Inc. +%% Copyright (c) 2007-2013 GoPivotal, Inc. All rights reserved. +%% + +-module(rabbit_policy). + +%% TODO specs + +-behaviour(rabbit_runtime_parameter). + +-include("rabbit.hrl"). + +-import(rabbit_misc, [pget/2]). + +-export([register/0]). +-export([invalidate/0, recover/0]). +-export([name/1, get/2, set/1]). +-export([validate/4, notify/4, notify_clear/3]). +-export([parse_set/6, set/6, delete/2, lookup/2, list/0, list/1, + list_formatted/1, info_keys/0]). + +-rabbit_boot_step({?MODULE, + [{description, "policy parameters"}, + {mfa, {rabbit_policy, register, []}}, + {requires, rabbit_registry}, + {enables, recovery}]}). + +register() -> + rabbit_registry:register(runtime_parameter, <<"policy">>, ?MODULE). + +name(#amqqueue{policy = Policy}) -> name0(Policy); +name(#exchange{policy = Policy}) -> name0(Policy). + +name0(undefined) -> none; +name0(Policy) -> pget(name, Policy). + +set(Q = #amqqueue{name = Name}) -> Q#amqqueue{policy = set0(Name)}; +set(X = #exchange{name = Name}) -> rabbit_exchange_decorator:set( + X#exchange{policy = set0(Name)}). + +set0(Name = #resource{virtual_host = VHost}) -> match(Name, list(VHost)). + +set(Q = #amqqueue{name = Name}, Ps) -> Q#amqqueue{policy = match(Name, Ps)}; +set(X = #exchange{name = Name}, Ps) -> rabbit_exchange_decorator:set( + X#exchange{policy = match(Name, Ps)}). + +get(Name, #amqqueue{policy = Policy}) -> get0(Name, Policy); +get(Name, #exchange{policy = Policy}) -> get0(Name, Policy); +%% Caution - SLOW. +get(Name, EntityName = #resource{virtual_host = VHost}) -> + get0(Name, match(EntityName, list(VHost))). + +get0(_Name, undefined) -> {error, not_found}; +get0(Name, List) -> case pget(definition, List) of + undefined -> {error, not_found}; + Policy -> case pget(Name, Policy) of + undefined -> {error, not_found}; + Value -> {ok, Value} + end + end. + +%%---------------------------------------------------------------------------- + +%% Gets called during upgrades - therefore must not assume anything about the +%% state of Mnesia +invalidate() -> + rabbit_file:write_file(invalid_file(), <<"">>). + +recover() -> + case rabbit_file:is_file(invalid_file()) of + true -> recover0(), + rabbit_file:delete(invalid_file()); + false -> ok + end. + +%% To get here we have to have just completed an Mnesia upgrade - i.e. we are +%% the first node starting. So we can rewrite the whole database. Note that +%% recovery has not yet happened; we must work with the rabbit_durable_<thing> +%% variants. +recover0() -> + Xs = mnesia:dirty_match_object(rabbit_durable_exchange, #exchange{_ = '_'}), + Qs = mnesia:dirty_match_object(rabbit_durable_queue, #amqqueue{_ = '_'}), + Policies = list(), + [rabbit_misc:execute_mnesia_transaction( + fun () -> + mnesia:write(rabbit_durable_exchange, set(X, Policies), write) + end) || X <- Xs], + [rabbit_misc:execute_mnesia_transaction( + fun () -> + mnesia:write(rabbit_durable_queue, set(Q, Policies), write) + end) || Q <- Qs], + ok. + +invalid_file() -> + filename:join(rabbit_mnesia:dir(), "policies_are_invalid"). + +%%---------------------------------------------------------------------------- + +parse_set(VHost, Name, Pattern, Definition, Priority, ApplyTo) -> + try list_to_integer(Priority) of + Num -> parse_set0(VHost, Name, Pattern, Definition, Num, ApplyTo) + catch + error:badarg -> {error, "~p priority must be a number", [Priority]} + end. + +parse_set0(VHost, Name, Pattern, Defn, Priority, ApplyTo) -> + case rabbit_misc:json_decode(Defn) of + {ok, JSON} -> + set0(VHost, Name, + [{<<"pattern">>, list_to_binary(Pattern)}, + {<<"definition">>, rabbit_misc:json_to_term(JSON)}, + {<<"priority">>, Priority}, + {<<"apply-to">>, ApplyTo}]); + error -> + {error_string, "JSON decoding error"} + end. + +set(VHost, Name, Pattern, Definition, Priority, ApplyTo) -> + PolicyProps = [{<<"pattern">>, Pattern}, + {<<"definition">>, Definition}, + {<<"priority">>, case Priority of + undefined -> 0; + _ -> Priority + end}, + {<<"apply-to">>, case ApplyTo of + undefined -> <<"all">>; + _ -> ApplyTo + end}], + set0(VHost, Name, PolicyProps). + +set0(VHost, Name, Term) -> + rabbit_runtime_parameters:set_any(VHost, <<"policy">>, Name, Term). + +delete(VHost, Name) -> + rabbit_runtime_parameters:clear_any(VHost, <<"policy">>, Name). + +lookup(VHost, Name) -> + case rabbit_runtime_parameters:lookup(VHost, <<"policy">>, Name) of + not_found -> not_found; + P -> p(P, fun ident/1) + end. + +list() -> + list('_'). + +list(VHost) -> + list0(VHost, fun ident/1). + +list_formatted(VHost) -> + order_policies(list0(VHost, fun format/1)). + +list0(VHost, DefnFun) -> + [p(P, DefnFun) || P <- rabbit_runtime_parameters:list(VHost, <<"policy">>)]. + +order_policies(PropList) -> + lists:sort(fun (A, B) -> pget(priority, A) < pget(priority, B) end, + PropList). + +p(Parameter, DefnFun) -> + Value = pget(value, Parameter), + [{vhost, pget(vhost, Parameter)}, + {name, pget(name, Parameter)}, + {pattern, pget(<<"pattern">>, Value)}, + {'apply-to', pget(<<"apply-to">>, Value)}, + {definition, DefnFun(pget(<<"definition">>, Value))}, + {priority, pget(<<"priority">>, Value)}]. + +format(Term) -> + {ok, JSON} = rabbit_misc:json_encode(rabbit_misc:term_to_json(Term)), + list_to_binary(JSON). + +ident(X) -> X. + +info_keys() -> [vhost, name, 'apply-to', pattern, definition, priority]. + +%%---------------------------------------------------------------------------- + +validate(_VHost, <<"policy">>, Name, Term) -> + rabbit_parameter_validation:proplist( + Name, policy_validation(), Term). + +notify(VHost, <<"policy">>, _Name, _Term) -> + update_policies(VHost). + +notify_clear(VHost, <<"policy">>, _Name) -> + update_policies(VHost). + +%%---------------------------------------------------------------------------- + +update_policies(VHost) -> + Policies = list(VHost), + {Xs, Qs} = rabbit_misc:execute_mnesia_transaction( + fun() -> + {[update_exchange(X, Policies) || + X <- rabbit_exchange:list(VHost)], + [update_queue(Q, Policies) || + Q <- rabbit_amqqueue:list(VHost)]} + end), + [catch notify(X) || X <- Xs], + [catch notify(Q) || Q <- Qs], + ok. + +update_exchange(X = #exchange{name = XName, policy = OldPolicy}, Policies) -> + case match(XName, Policies) of + OldPolicy -> no_change; + NewPolicy -> case rabbit_exchange:update( + XName, fun (X0) -> + rabbit_exchange_decorator:set( + X0 #exchange{policy = NewPolicy}) + end) of + #exchange{} = X1 -> {X, X1}; + not_found -> {X, X } + end + end. + +update_queue(Q = #amqqueue{name = QName, policy = OldPolicy}, Policies) -> + case match(QName, Policies) of + OldPolicy -> no_change; + NewPolicy -> rabbit_amqqueue:update( + QName, fun(Q1) -> Q1#amqqueue{policy = NewPolicy} end), + {Q, Q#amqqueue{policy = NewPolicy}} + end. + +notify(no_change)-> + ok; +notify({X1 = #exchange{}, X2 = #exchange{}}) -> + rabbit_exchange:policy_changed(X1, X2); +notify({Q1 = #amqqueue{}, Q2 = #amqqueue{}}) -> + rabbit_amqqueue:policy_changed(Q1, Q2). + +match(Name, Policies) -> + case lists:sort(fun sort_pred/2, [P || P <- Policies, matches(Name, P)]) of + [] -> undefined; + [Policy | _Rest] -> Policy + end. + +matches(#resource{name = Name, kind = Kind, virtual_host = VHost}, Policy) -> + matches_type(Kind, pget('apply-to', Policy)) andalso + match =:= re:run(Name, pget(pattern, Policy), [{capture, none}]) andalso + VHost =:= pget(vhost, Policy). + +matches_type(exchange, <<"exchanges">>) -> true; +matches_type(queue, <<"queues">>) -> true; +matches_type(exchange, <<"all">>) -> true; +matches_type(queue, <<"all">>) -> true; +matches_type(_, _) -> false. + +sort_pred(A, B) -> pget(priority, A) >= pget(priority, B). + +%%---------------------------------------------------------------------------- + +policy_validation() -> + [{<<"priority">>, fun rabbit_parameter_validation:number/2, mandatory}, + {<<"pattern">>, fun rabbit_parameter_validation:regex/2, mandatory}, + {<<"apply-to">>, fun apply_to_validation/2, optional}, + {<<"definition">>, fun validation/2, mandatory}]. + +validation(_Name, []) -> + {error, "no policy provided", []}; +validation(_Name, Terms) when is_list(Terms) -> + {Keys, Modules} = lists:unzip( + rabbit_registry:lookup_all(policy_validator)), + [] = dups(Keys), %% ASSERTION + Validators = lists:zipwith(fun (M, K) -> {M, a2b(K)} end, Modules, Keys), + case is_proplist(Terms) of + true -> {TermKeys, _} = lists:unzip(Terms), + case dups(TermKeys) of + [] -> validation0(Validators, Terms); + Dup -> {error, "~p duplicate keys not allowed", [Dup]} + end; + false -> {error, "definition must be a dictionary: ~p", [Terms]} + end; +validation(_Name, Term) -> + {error, "parse error while reading policy: ~p", [Term]}. + +validation0(Validators, Terms) -> + case lists:foldl( + fun (Mod, {ok, TermsLeft}) -> + ModKeys = proplists:get_all_values(Mod, Validators), + case [T || {Key, _} = T <- TermsLeft, + lists:member(Key, ModKeys)] of + [] -> {ok, TermsLeft}; + Scope -> {Mod:validate_policy(Scope), TermsLeft -- Scope} + end; + (_, Acc) -> + Acc + end, {ok, Terms}, proplists:get_keys(Validators)) of + {ok, []} -> + ok; + {ok, Unvalidated} -> + {error, "~p are not recognised policy settings", [Unvalidated]}; + {Error, _} -> + Error + end. + +a2b(A) -> list_to_binary(atom_to_list(A)). + +dups(L) -> L -- lists:usort(L). + +is_proplist(L) -> length(L) =:= length([I || I = {_, _} <- L]). + +apply_to_validation(_Name, <<"all">>) -> ok; +apply_to_validation(_Name, <<"exchanges">>) -> ok; +apply_to_validation(_Name, <<"queues">>) -> ok; +apply_to_validation(_Name, Term) -> + {error, "apply-to '~s' unrecognised; should be 'queues', 'exchanges' " + "or 'all'", [Term]}. diff --git a/src/rabbit_policy_validator.erl b/src/rabbit_policy_validator.erl new file mode 100644 index 00000000..661db73d --- /dev/null +++ b/src/rabbit_policy_validator.erl @@ -0,0 +1,39 @@ +%% The contents of this file are subject to the Mozilla Public License +%% Version 1.1 (the "License"); you may not use this file except in +%% compliance with the License. You may obtain a copy of the License +%% at http://www.mozilla.org/MPL/ +%% +%% Software distributed under the License is distributed on an "AS IS" +%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +%% the License for the specific language governing rights and +%% limitations under the License. +%% +%% The Original Code is RabbitMQ. +%% +%% The Initial Developer of the Original Code is GoPivotal, Inc. +%% Copyright (c) 2007-2013 GoPivotal, Inc. All rights reserved. +%% + +-module(rabbit_policy_validator). + +-ifdef(use_specs). + +-export_type([validate_results/0]). + +-type(validate_results() :: + 'ok' | {error, string(), [term()]} | [validate_results()]). + +-callback validate_policy([{binary(), term()}]) -> validate_results(). + +-else. + +-export([behaviour_info/1]). + +behaviour_info(callbacks) -> + [ + {validate_policy, 1} + ]; +behaviour_info(_Other) -> + undefined. + +-endif. diff --git a/src/rabbit_prelaunch.erl b/src/rabbit_prelaunch.erl index 162d44f1..be407a02 100644 --- a/src/rabbit_prelaunch.erl +++ b/src/rabbit_prelaunch.erl @@ -10,8 +10,8 @@ %% %% The Original Code is RabbitMQ. %% -%% The Initial Developer of the Original Code is VMware, Inc. -%% Copyright (c) 2007-2012 VMware, Inc. All rights reserved. +%% The Initial Developer of the Original Code is GoPivotal, Inc. +%% Copyright (c) 2007-2013 GoPivotal, Inc. All rights reserved. %% -module(rabbit_prelaunch). @@ -31,212 +31,21 @@ -spec(start/0 :: () -> no_return()). -spec(stop/0 :: () -> 'ok'). -%% Shut dialyzer up --spec(terminate/1 :: (string()) -> no_return()). --spec(terminate/2 :: (string(), [any()]) -> no_return()). -endif. %%---------------------------------------------------------------------------- start() -> - io:format("Activating RabbitMQ plugins ...~n"), - - %% Determine our various directories - [EnabledPluginsFile, PluginsDistDir, UnpackedPluginDir, NodeStr] = - init:get_plain_arguments(), - RootName = UnpackedPluginDir ++ "/rabbit", - - prepare_plugins(EnabledPluginsFile, PluginsDistDir, UnpackedPluginDir), - - %% Build a list of required apps based on the fixed set, and any plugins - PluginApps = find_plugins(UnpackedPluginDir), - RequiredApps = ?BaseApps ++ PluginApps, - - %% Build the entire set of dependencies - this will load the - %% applications along the way - AllApps = case catch sets:to_list(expand_dependencies(RequiredApps)) of - {failed_to_load_app, App, Err} -> - terminate("failed to load application ~s:~n~p", - [App, Err]); - AppList -> - AppList - end, - AppVersions = [determine_version(App) || App <- AllApps], - RabbitVersion = proplists:get_value(rabbit, AppVersions), - - %% Build the overall release descriptor - RDesc = {release, - {"rabbit", RabbitVersion}, - {erts, erlang:system_info(version)}, - AppVersions}, - - %% Write it out to $RABBITMQ_PLUGINS_EXPAND_DIR/rabbit.rel - rabbit_file:write_file(RootName ++ ".rel", io_lib:format("~p.~n", [RDesc])), - - %% We exclude mochiweb due to its optional use of fdsrv. - XRefExclude = [mochiweb], - - %% Compile the script - ScriptFile = RootName ++ ".script", - case systools:make_script(RootName, [local, silent, - {exref, AllApps -- XRefExclude}]) of - {ok, Module, Warnings} -> - %% This gets lots of spurious no-source warnings when we - %% have .ez files, so we want to supress them to prevent - %% hiding real issues. On Ubuntu, we also get warnings - %% about kernel/stdlib sources being out of date, which we - %% also ignore for the same reason. - WarningStr = Module:format_warning( - [W || W <- Warnings, - case W of - {warning, {source_not_found, _}} -> false; - {warning, {obj_out_of_date, {_,_,WApp,_,_}}} - when WApp == mnesia; - WApp == stdlib; - WApp == kernel; - WApp == sasl; - WApp == crypto; - WApp == os_mon -> false; - _ -> true - end]), - case length(WarningStr) of - 0 -> ok; - _ -> S = string:copies("*", 80), - io:format("~n~s~n~s~s~n~n", [S, WarningStr, S]) - end, - ok; - {error, Module, Error} -> - terminate("generation of boot script file ~s failed:~n~s", - [ScriptFile, Module:format_error(Error)]) - end, - - case post_process_script(ScriptFile) of - ok -> ok; - {error, Reason} -> - terminate("post processing of boot script file ~s failed:~n~w", - [ScriptFile, Reason]) - end, - case systools:script2boot(RootName) of - ok -> ok; - error -> terminate("failed to compile boot script file ~s", - [ScriptFile]) - end, - io:format("~w plugins activated:~n", [length(PluginApps)]), - [io:format("* ~s-~s~n", [App, proplists:get_value(App, AppVersions)]) - || App <- PluginApps], - io:nl(), - + [NodeStr] = init:get_plain_arguments(), ok = duplicate_node_check(NodeStr), - - terminate(0), + rabbit_misc:quit(0), ok. stop() -> ok. -determine_version(App) -> - application:load(App), - {ok, Vsn} = application:get_key(App, vsn), - {App, Vsn}. - -delete_recursively(Fn) -> - case rabbit_file:recursive_delete([Fn]) of - ok -> ok; - {error, {Path, E}} -> {error, {cannot_delete, Path, E}}; - Error -> Error - end. - -prepare_plugins(EnabledPluginsFile, PluginsDistDir, DestDir) -> - AllPlugins = rabbit_plugins:find_plugins(PluginsDistDir), - Enabled = rabbit_plugins:read_enabled_plugins(EnabledPluginsFile), - ToUnpack = rabbit_plugins:calculate_required_plugins(Enabled, AllPlugins), - ToUnpackPlugins = rabbit_plugins:lookup_plugins(ToUnpack, AllPlugins), - - Missing = Enabled -- rabbit_plugins:plugin_names(ToUnpackPlugins), - case Missing of - [] -> ok; - _ -> io:format("Warning: the following enabled plugins were " - "not found: ~p~n", [Missing]) - end, - - %% Eliminate the contents of the destination directory - case delete_recursively(DestDir) of - ok -> ok; - {error, E} -> terminate("Could not delete dir ~s (~p)", [DestDir, E]) - end, - case filelib:ensure_dir(DestDir ++ "/") of - ok -> ok; - {error, E2} -> terminate("Could not create dir ~s (~p)", [DestDir, E2]) - end, - - [prepare_plugin(Plugin, DestDir) || Plugin <- ToUnpackPlugins]. - -prepare_plugin(#plugin{type = ez, location = Location}, PluginDestDir) -> - zip:unzip(Location, [{cwd, PluginDestDir}]); -prepare_plugin(#plugin{type = dir, name = Name, location = Location}, - PluginsDestDir) -> - rabbit_file:recursive_copy(Location, - filename:join([PluginsDestDir, Name])). - -find_plugins(PluginDir) -> - [prepare_dir_plugin(PluginName) || - PluginName <- filelib:wildcard(PluginDir ++ "/*/ebin/*.app")]. - -prepare_dir_plugin(PluginAppDescFn) -> - %% Add the plugin ebin directory to the load path - PluginEBinDirN = filename:dirname(PluginAppDescFn), - code:add_path(PluginEBinDirN), - - %% We want the second-last token - NameTokens = string:tokens(PluginAppDescFn,"/."), - PluginNameString = lists:nth(length(NameTokens) - 1, NameTokens), - list_to_atom(PluginNameString). - -expand_dependencies(Pending) -> - expand_dependencies(sets:new(), Pending). -expand_dependencies(Current, []) -> - Current; -expand_dependencies(Current, [Next|Rest]) -> - case sets:is_element(Next, Current) of - true -> - expand_dependencies(Current, Rest); - false -> - case application:load(Next) of - ok -> - ok; - {error, {already_loaded, _}} -> - ok; - {error, Reason} -> - throw({failed_to_load_app, Next, Reason}) - end, - {ok, Required} = application:get_key(Next, applications), - Unique = [A || A <- Required, not(sets:is_element(A, Current))], - expand_dependencies(sets:add_element(Next, Current), Rest ++ Unique) - end. - -post_process_script(ScriptFile) -> - case file:consult(ScriptFile) of - {ok, [{script, Name, Entries}]} -> - NewEntries = lists:flatmap(fun process_entry/1, Entries), - case file:open(ScriptFile, [write]) of - {ok, Fd} -> - io:format(Fd, "%% script generated at ~w ~w~n~p.~n", - [date(), time(), {script, Name, NewEntries}]), - file:close(Fd), - ok; - {error, OReason} -> - {error, {failed_to_open_script_file_for_writing, OReason}} - end; - {error, Reason} -> - {error, {failed_to_load_script, Reason}} - end. - -process_entry(Entry = {apply,{application,start_boot,[mnesia,permanent]}}) -> - [{apply,{rabbit,maybe_hipe_compile,[]}}, - {apply,{rabbit,prepare,[]}}, Entry]; -process_entry(Entry) -> - [Entry]. +%%---------------------------------------------------------------------------- %% Check whether a node with the same name is already running duplicate_node_check([]) -> @@ -248,32 +57,16 @@ duplicate_node_check(NodeStr) -> case rabbit_nodes:names(NodeHost) of {ok, NamePorts} -> case proplists:is_defined(NodeName, NamePorts) of - true -> io:format("node with name ~p " + true -> io:format("ERROR: node with name ~p " "already running on ~p~n", [NodeName, NodeHost]), io:format(rabbit_nodes:diagnostics([Node]) ++ "~n"), - terminate(?ERROR_CODE); + rabbit_misc:quit(?ERROR_CODE); false -> ok end; {error, EpmdReason} -> - terminate("epmd error for host ~p: ~p (~s)~n", + io:format("ERROR: epmd error for host ~p: ~p (~s)~n", [NodeHost, EpmdReason, - case EpmdReason of - address -> "unable to establish tcp connection"; - timeout -> "timed out establishing tcp connection"; - _ -> inet:format_error(EpmdReason) - end]) - end. - -terminate(Fmt, Args) -> - io:format("ERROR: " ++ Fmt ++ "~n", Args), - terminate(?ERROR_CODE). - -terminate(Status) -> - case os:type() of - {unix, _} -> halt(Status); - {win32, _} -> init:stop(Status), - receive - after infinity -> ok - end + rabbit_misc:format_inet_error(EpmdReason)]), + rabbit_misc:quit(?ERROR_CODE) end. diff --git a/src/rabbit_queue_collector.erl b/src/rabbit_queue_collector.erl index 6dad01cc..6406f7e9 100644 --- a/src/rabbit_queue_collector.erl +++ b/src/rabbit_queue_collector.erl @@ -10,8 +10,8 @@ %% %% The Original Code is RabbitMQ. %% -%% The Initial Developer of the Original Code is VMware, Inc. -%% Copyright (c) 2007-2012 VMware, Inc. All rights reserved. +%% The Initial Developer of the Original Code is GoPivotal, Inc. +%% Copyright (c) 2007-2013 GoPivotal, Inc. All rights reserved. %% -module(rabbit_queue_collector). diff --git a/src/rabbit_queue_index.erl b/src/rabbit_queue_index.erl index 3ef769c7..f69d8355 100644 --- a/src/rabbit_queue_index.erl +++ b/src/rabbit_queue_index.erl @@ -10,8 +10,8 @@ %% %% The Original Code is RabbitMQ. %% -%% The Initial Developer of the Original Code is VMware, Inc. -%% Copyright (c) 2007-2012 VMware, Inc. All rights reserved. +%% The Initial Developer of the Original Code is GoPivotal, Inc. +%% Copyright (c) 2007-2013 GoPivotal, Inc. All rights reserved. %% -module(rabbit_queue_index). @@ -23,7 +23,7 @@ -export([scan/3]). --export([add_queue_ttl/0]). +-export([add_queue_ttl/0, avoid_zeroes/0]). -define(CLEAN_FILENAME, "clean.dot"). @@ -123,9 +123,9 @@ -define(REL_SEQ_BITS, 14). -define(SEGMENT_ENTRY_COUNT, 16384). %% trunc(math:pow(2,?REL_SEQ_BITS))). -%% seq only is binary 00 followed by 14 bits of rel seq id +%% seq only is binary 01 followed by 14 bits of rel seq id %% (range: 0 - 16383) --define(REL_SEQ_ONLY_PREFIX, 00). +-define(REL_SEQ_ONLY_PREFIX, 01). -define(REL_SEQ_ONLY_PREFIX_BITS, 2). -define(REL_SEQ_ONLY_RECORD_BYTES, 2). @@ -162,7 +162,7 @@ %%---------------------------------------------------------------------------- -record(qistate, { dir, segments, journal_handle, dirty_count, - max_journal_entries, on_sync, unsynced_msg_ids }). + max_journal_entries, on_sync, unconfirmed }). -record(segment, { num, path, journal_entries, unacked }). @@ -171,6 +171,7 @@ %%---------------------------------------------------------------------------- -rabbit_upgrade({add_queue_ttl, local, []}). +-rabbit_upgrade({avoid_zeroes, local, [add_queue_ttl]}). -ifdef(use_specs). @@ -190,7 +191,7 @@ dirty_count :: integer(), max_journal_entries :: non_neg_integer(), on_sync :: on_sync_fun(), - unsynced_msg_ids :: gb_set() + unconfirmed :: gb_set() }). -type(contains_predicate() :: fun ((rabbit_types:msg_id()) -> boolean())). -type(walker(A) :: fun ((A) -> 'finished' | @@ -210,7 +211,7 @@ -spec(deliver/2 :: ([seq_id()], qistate()) -> qistate()). -spec(ack/2 :: ([seq_id()], qistate()) -> qistate()). -spec(sync/1 :: (qistate()) -> qistate()). --spec(needs_sync/1 :: (qistate()) -> boolean()). +-spec(needs_sync/1 :: (qistate()) -> 'confirms' | 'other' | 'false'). -spec(flush/1 :: (qistate()) -> qistate()). -spec(read/3 :: (seq_id(), seq_id(), qistate()) -> {[{rabbit_types:msg_id(), seq_id(), @@ -269,13 +270,16 @@ delete_and_terminate(State) -> State1. publish(MsgId, SeqId, MsgProps, IsPersistent, - State = #qistate { unsynced_msg_ids = UnsyncedMsgIds }) + State = #qistate { unconfirmed = Unconfirmed }) when is_binary(MsgId) -> ?MSG_ID_BYTES = size(MsgId), {JournalHdl, State1} = get_journal_handle( - State #qistate { - unsynced_msg_ids = gb_sets:add_element(MsgId, UnsyncedMsgIds) }), + case MsgProps#message_properties.needs_confirming of + true -> Unconfirmed1 = gb_sets:add_element(MsgId, Unconfirmed), + State #qistate { unconfirmed = Unconfirmed1 }; + false -> State + end), ok = file_handle_cache:append( JournalHdl, [<<(case IsPersistent of true -> ?PUB_PERSIST_JPREFIX; @@ -302,8 +306,14 @@ sync(State = #qistate { journal_handle = JournalHdl }) -> needs_sync(#qistate { journal_handle = undefined }) -> false; -needs_sync(#qistate { journal_handle = JournalHdl }) -> - file_handle_cache:needs_sync(JournalHdl). +needs_sync(#qistate { journal_handle = JournalHdl, unconfirmed = UC }) -> + case gb_sets:is_empty(UC) of + true -> case file_handle_cache:needs_sync(JournalHdl) of + true -> other; + false -> false + end; + false -> confirms + end. flush(State = #qistate { dirty_count = 0 }) -> State; flush(State) -> flush_journal(State). @@ -398,21 +408,21 @@ blank_state_dir(Dir) -> dirty_count = 0, max_journal_entries = MaxJournal, on_sync = fun (_) -> ok end, - unsynced_msg_ids = gb_sets:new() }. + unconfirmed = gb_sets:new() }. -clean_file_name(Dir) -> filename:join(Dir, ?CLEAN_FILENAME). +clean_filename(Dir) -> filename:join(Dir, ?CLEAN_FILENAME). detect_clean_shutdown(Dir) -> - case rabbit_file:delete(clean_file_name(Dir)) of + case rabbit_file:delete(clean_filename(Dir)) of ok -> true; {error, enoent} -> false end. read_shutdown_terms(Dir) -> - rabbit_file:read_term_file(clean_file_name(Dir)). + rabbit_file:read_term_file(clean_filename(Dir)). store_clean_shutdown(Terms, Dir) -> - CleanFileName = clean_file_name(Dir), + CleanFileName = clean_filename(Dir), ok = rabbit_file:ensure_dir(CleanFileName), rabbit_file:write_term_file(CleanFileName, Terms). @@ -537,7 +547,7 @@ queue_index_walker_reader(QueueName, Gatherer) -> State = blank_state(QueueName), ok = scan_segments( fun (_SeqId, MsgId, _MsgProps, true, _IsDelivered, no_ack, ok) -> - gatherer:in(Gatherer, {MsgId, 1}); + gatherer:sync_in(Gatherer, {MsgId, 1}); (_SeqId, _MsgId, _MsgProps, _IsPersistent, _IsDelivered, _IsAcked, Acc) -> Acc @@ -607,19 +617,21 @@ add_to_journal(RelSeq, Action, end}; add_to_journal(RelSeq, Action, JEntries) -> - Val = case array:get(RelSeq, JEntries) of - undefined -> - case Action of - ?PUB -> {Action, no_del, no_ack}; - del -> {no_pub, del, no_ack}; - ack -> {no_pub, no_del, ack} - end; - ({Pub, no_del, no_ack}) when Action == del -> - {Pub, del, no_ack}; - ({Pub, Del, no_ack}) when Action == ack -> - {Pub, Del, ack} - end, - array:set(RelSeq, Val, JEntries). + case array:get(RelSeq, JEntries) of + undefined -> + array:set(RelSeq, + case Action of + ?PUB -> {Action, no_del, no_ack}; + del -> {no_pub, del, no_ack}; + ack -> {no_pub, no_del, ack} + end, JEntries); + ({Pub, no_del, no_ack}) when Action == del -> + array:set(RelSeq, {Pub, del, no_ack}, JEntries); + ({no_pub, del, no_ack}) when Action == ack -> + array:set(RelSeq, {no_pub, del, ack}, JEntries); + ({?PUB, del, no_ack}) when Action == ack -> + array:reset(RelSeq, JEntries) + end. maybe_flush_journal(State = #qistate { dirty_count = DCount, max_journal_entries = MaxJournal }) @@ -704,7 +716,11 @@ load_journal_entries(State = #qistate { journal_handle = Hdl }) -> load_journal_entries(add_to_journal(SeqId, ack, State)); _ -> case file_handle_cache:read(Hdl, ?PUB_RECORD_BODY_BYTES) of - {ok, Bin} -> + %% Journal entry composed only of zeroes was probably + %% produced during a dirty shutdown so stop reading + {ok, <<0:?PUB_RECORD_BODY_BYTES/unit:8>>} -> + State; + {ok, <<Bin:?PUB_RECORD_BODY_BYTES/binary>>} -> {MsgId, MsgProps} = parse_pub_record_body(Bin), IsPersistent = case Prefix of ?PUB_PERSIST_JPREFIX -> true; @@ -732,9 +748,12 @@ deliver_or_ack(Kind, SeqIds, State) -> add_to_journal(SeqId, Kind, StateN) end, State1, SeqIds)). -notify_sync(State = #qistate { unsynced_msg_ids = UG, on_sync = OnSyncFun }) -> - OnSyncFun(UG), - State #qistate { unsynced_msg_ids = gb_sets:new() }. +notify_sync(State = #qistate { unconfirmed = UC, on_sync = OnSyncFun }) -> + case gb_sets:is_empty(UC) of + true -> State; + false -> OnSyncFun(UC), + State #qistate { unconfirmed = gb_sets:new() } + end. %%---------------------------------------------------------------------------- %% segment manipulation @@ -1008,7 +1027,18 @@ journal_minus_segment1({no_pub, del, ack}, {?PUB, no_del, no_ack}) -> journal_minus_segment1({no_pub, del, ack}, {?PUB, del, no_ack}) -> {{no_pub, no_del, ack}, 0}; journal_minus_segment1({no_pub, del, ack}, {?PUB, del, ack}) -> - {undefined, -1}. + {undefined, -1}; + +%% Missing segment. If flush_journal/1 is interrupted after deleting +%% the segment but before truncating the journal we can get these +%% cases: a delivery and an acknowledgement in the journal, or just an +%% acknowledgement in the journal, but with no segment. In both cases +%% we have really forgotten the message; so ignore what's in the +%% journal. +journal_minus_segment1({no_pub, no_del, ack}, undefined) -> + {undefined, 0}; +journal_minus_segment1({no_pub, del, ack}, undefined) -> + {undefined, 0}. %%---------------------------------------------------------------------------- %% upgrade @@ -1043,6 +1073,21 @@ add_queue_ttl_segment(<<?REL_SEQ_ONLY_PREFIX:?REL_SEQ_ONLY_PREFIX_BITS, add_queue_ttl_segment(_) -> stop. +avoid_zeroes() -> + foreach_queue_index({none, fun avoid_zeroes_segment/1}). + +avoid_zeroes_segment(<<?PUB_PREFIX:?PUB_PREFIX_BITS, IsPersistentNum:1, + RelSeq:?REL_SEQ_BITS, MsgId:?MSG_ID_BITS, + Expiry:?EXPIRY_BITS, Rest/binary>>) -> + {<<?PUB_PREFIX:?PUB_PREFIX_BITS, IsPersistentNum:1, RelSeq:?REL_SEQ_BITS, + MsgId:?MSG_ID_BITS, Expiry:?EXPIRY_BITS>>, Rest}; +avoid_zeroes_segment(<<0:?REL_SEQ_ONLY_PREFIX_BITS, + RelSeq:?REL_SEQ_BITS, Rest/binary>>) -> + {<<?REL_SEQ_ONLY_PREFIX:?REL_SEQ_ONLY_PREFIX_BITS, RelSeq:?REL_SEQ_BITS>>, + Rest}; +avoid_zeroes_segment(_) -> + stop. + %%---------------------------------------------------------------------------- foreach_queue_index(Funs) -> @@ -1067,7 +1112,9 @@ transform_queue(Dir, Gatherer, {JournalFun, SegmentFun}) -> || Seg <- rabbit_file:wildcard(".*\\" ++ ?SEGMENT_EXTENSION, Dir)], ok = gatherer:finish(Gatherer). -transform_file(Path, Fun) -> +transform_file(_Path, none) -> + ok; +transform_file(Path, Fun) when is_function(Fun)-> PathTmp = Path ++ ".upgrade", case rabbit_file:file_size(Path) of 0 -> ok; diff --git a/src/rabbit_reader.erl b/src/rabbit_reader.erl index 6f12e22e..8106a46a 100644 --- a/src/rabbit_reader.erl +++ b/src/rabbit_reader.erl @@ -10,8 +10,8 @@ %% %% The Original Code is RabbitMQ. %% -%% The Initial Developer of the Original Code is VMware, Inc. -%% Copyright (c) 2007-2012 VMware, Inc. All rights reserved. +%% The Initial Developer of the Original Code is GoPivotal, Inc. +%% Copyright (c) 2007-2013 GoPivotal, Inc. All rights reserved. %% -module(rabbit_reader). @@ -23,7 +23,7 @@ -export([system_continue/3, system_terminate/4, system_code_change/4]). --export([init/4, mainloop/2]). +-export([init/4, mainloop/2, recvloop/2]). -export([conserve_resources/3, server_properties/1]). @@ -37,21 +37,27 @@ -record(v1, {parent, sock, connection, callback, recv_len, pending_recv, connection_state, queue_collector, heartbeater, stats_timer, - channel_sup_sup_pid, start_heartbeat_fun, buf, buf_len, - auth_mechanism, auth_state, conserve_resources, - last_blocked_by, last_blocked_at}). + ch_sup3_pid, channel_sup_sup_pid, start_heartbeat_fun, + buf, buf_len, throttle}). + +-record(connection, {name, host, peer_host, port, peer_port, + protocol, user, timeout_sec, frame_max, vhost, + client_properties, capabilities, + auth_mechanism, auth_state}). + +-record(throttle, {alarmed_by, last_blocked_by, last_blocked_at, + blocked_sent}). -define(STATISTICS_KEYS, [pid, recv_oct, recv_cnt, send_oct, send_cnt, send_pend, state, last_blocked_by, last_blocked_age, channels]). --define(CREATION_EVENT_KEYS, [pid, name, address, port, peer_address, peer_port, - ssl, peer_cert_subject, peer_cert_issuer, - peer_cert_validity, auth_mechanism, - ssl_protocol, ssl_key_exchange, - ssl_cipher, ssl_hash, - protocol, user, vhost, timeout, frame_max, - client_properties]). +-define(CREATION_EVENT_KEYS, + [pid, name, port, peer_port, host, + peer_host, ssl, peer_cert_subject, peer_cert_issuer, + peer_cert_validity, auth_mechanism, ssl_protocol, + ssl_key_exchange, ssl_cipher, ssl_hash, protocol, user, vhost, + timeout, frame_max, client_properties]). -define(INFO_KEYS, ?CREATION_EVENT_KEYS ++ ?STATISTICS_KEYS -- [pid]). @@ -60,6 +66,10 @@ State#v1.connection_state =:= blocking orelse State#v1.connection_state =:= blocked)). +-define(IS_STOPPING(State), + (State#v1.connection_state =:= closing orelse + State#v1.connection_state =:= closed)). + %%-------------------------------------------------------------------------- -ifdef(use_specs). @@ -94,24 +104,24 @@ %%-------------------------------------------------------------------------- -start_link(ChannelSupSupPid, Collector, StartHeartbeatFun) -> - {ok, proc_lib:spawn_link(?MODULE, init, [self(), ChannelSupSupPid, +start_link(ChannelSup3Pid, Collector, StartHeartbeatFun) -> + {ok, proc_lib:spawn_link(?MODULE, init, [self(), ChannelSup3Pid, Collector, StartHeartbeatFun])}. shutdown(Pid, Explanation) -> gen_server:call(Pid, {shutdown, Explanation}, infinity). -init(Parent, ChannelSupSupPid, Collector, StartHeartbeatFun) -> +init(Parent, ChSup3Pid, Collector, StartHeartbeatFun) -> Deb = sys:debug_options([]), receive {go, Sock, SockTransform} -> start_connection( - Parent, ChannelSupSupPid, Collector, StartHeartbeatFun, Deb, Sock, + Parent, ChSup3Pid, Collector, StartHeartbeatFun, Deb, Sock, SockTransform) end. system_continue(Parent, Deb, State) -> - ?MODULE:mainloop(Deb, State#v1{parent = Parent}). + mainloop(Deb, State#v1{parent = Parent}). system_terminate(Reason, _Parent, _Deb, _State) -> exit(Reason). @@ -133,8 +143,8 @@ info(Pid, Items) -> force_event_refresh(Pid) -> gen_server:cast(Pid, force_event_refresh). -conserve_resources(Pid, _Source, Conserve) -> - Pid ! {conserve_resources, Conserve}, +conserve_resources(Pid, Source, Conserve) -> + Pid ! {conserve_resources, Source, Conserve}, ok. server_properties(Protocol) -> @@ -169,84 +179,110 @@ server_capabilities(rabbit_framing_amqp_0_9_1) -> [{<<"publisher_confirms">>, bool, true}, {<<"exchange_exchange_bindings">>, bool, true}, {<<"basic.nack">>, bool, true}, - {<<"consumer_cancel_notify">>, bool, true}]; + {<<"consumer_cancel_notify">>, bool, true}, + {<<"connection.blocked">>, bool, true}]; server_capabilities(_) -> []. +%%-------------------------------------------------------------------------- + log(Level, Fmt, Args) -> rabbit_log:log(connection, Level, Fmt, Args). +socket_error(Reason) -> + log(error, "error on AMQP connection ~p: ~p (~s)~n", + [self(), Reason, rabbit_misc:format_inet_error(Reason)]). + inet_op(F) -> rabbit_misc:throw_on_error(inet_error, F). socket_op(Sock, Fun) -> case Fun(Sock) of {ok, Res} -> Res; - {error, Reason} -> log(error, "error on AMQP connection ~p: ~p~n", - [self(), Reason]), + {error, Reason} -> socket_error(Reason), + %% NB: this is tcp socket, even in case of ssl + rabbit_net:fast_close(Sock), exit(normal) end. -name(Sock) -> - socket_op(Sock, fun (S) -> rabbit_net:connection_string(S, inbound) end). - -start_connection(Parent, ChannelSupSupPid, Collector, StartHeartbeatFun, Deb, +start_connection(Parent, ChSup3Pid, Collector, StartHeartbeatFun, Deb, Sock, SockTransform) -> process_flag(trap_exit, true), - ConnStr = name(Sock), - log(info, "accepting AMQP connection ~p (~s)~n", [self(), ConnStr]), + Name = case rabbit_net:connection_string(Sock, inbound) of + {ok, Str} -> Str; + {error, enotconn} -> rabbit_net:fast_close(Sock), + exit(normal); + {error, Reason} -> socket_error(Reason), + rabbit_net:fast_close(Sock), + exit(normal) + end, + log(info, "accepting AMQP connection ~p (~s)~n", [self(), Name]), ClientSock = socket_op(Sock, SockTransform), - erlang:send_after(?HANDSHAKE_TIMEOUT * 1000, self(), - handshake_timeout), + erlang:send_after(?HANDSHAKE_TIMEOUT * 1000, self(), handshake_timeout), + {PeerHost, PeerPort, Host, Port} = + socket_op(Sock, fun (S) -> rabbit_net:socket_ends(S, inbound) end), State = #v1{parent = Parent, sock = ClientSock, connection = #connection{ + name = list_to_binary(Name), + host = Host, + peer_host = PeerHost, + port = Port, + peer_port = PeerPort, protocol = none, user = none, timeout_sec = ?HANDSHAKE_TIMEOUT, frame_max = ?FRAME_MIN_SIZE, vhost = none, client_properties = none, - capabilities = []}, + capabilities = [], + auth_mechanism = none, + auth_state = none}, callback = uninitialized_callback, recv_len = 0, pending_recv = false, connection_state = pre_init, queue_collector = Collector, heartbeater = none, - channel_sup_sup_pid = ChannelSupSupPid, + ch_sup3_pid = ChSup3Pid, + channel_sup_sup_pid = none, start_heartbeat_fun = StartHeartbeatFun, buf = [], buf_len = 0, - auth_mechanism = none, - auth_state = none, - conserve_resources = false, - last_blocked_by = none, - last_blocked_at = never}, + throttle = #throttle{ + alarmed_by = [], + last_blocked_by = none, + last_blocked_at = never, + blocked_sent = false}}, try - ok = inet_op(fun () -> rabbit_net:tune_buffer_size(ClientSock) end), - recvloop(Deb, switch_callback(rabbit_event:init_stats_timer( - State, #v1.stats_timer), - handshake, 8)), - log(info, "closing AMQP connection ~p (~s)~n", [self(), ConnStr]) + run({?MODULE, recvloop, + [Deb, switch_callback(rabbit_event:init_stats_timer( + State, #v1.stats_timer), + handshake, 8)]}), + log(info, "closing AMQP connection ~p (~s)~n", [self(), Name]) catch Ex -> log(case Ex of connection_closed_abruptly -> warning; _ -> error end, "closing AMQP connection ~p (~s):~n~p~n", - [self(), ConnStr, Ex]) + [self(), Name, Ex]) after - %% The reader is the controlling process and hence its - %% termination will close the socket. Furthermore, - %% gen_tcp:close/1 waits for pending output to be sent, which - %% results in unnecessary delays. However, to keep the - %% file_handle_cache accounting as accurate as possible it - %% would be good to close the socket immediately if we - %% can. But we can only do this for non-ssl sockets. - %% - rabbit_net:maybe_fast_close(ClientSock), + %% We don't call gen_tcp:close/1 here since it waits for + %% pending output to be sent, which results in unnecessary + %% delays. We could just terminate - the reader is the + %% controlling process and hence its termination will close + %% the socket. However, to keep the file_handle_cache + %% accounting as accurate as possible we ought to close the + %% socket w/o delay before termination. + rabbit_net:fast_close(ClientSock), + rabbit_networking:unregister_connection(self()), rabbit_event:notify(connection_closed, [{pid, self()}]) end, done. +run({M, F, A}) -> + try apply(M, F, A) + catch {become, MFA} -> run(MFA) + end. + recvloop(Deb, State = #v1{pending_recv = true}) -> mainloop(Deb, State); recvloop(Deb, State = #v1{connection_state = blocked}) -> @@ -276,18 +312,32 @@ mainloop(Deb, State = #v1{sock = Sock, buf = Buf, buf_len = BufLen}) -> _ -> throw(connection_closed_abruptly) end; {inet_async, _Sock, _Ref, {error, Reason}} -> + maybe_emit_stats(State), throw({inet_error, Reason}); + {system, From, Request} -> + sys:handle_system_msg(Request, From, State#v1.parent, + ?MODULE, Deb, State); Other -> - handle_other(Other, Deb, State) + case handle_other(Other, State) of + stop -> ok; + NewState -> recvloop(Deb, NewState) + end end. -handle_other({conserve_resources, Conserve}, Deb, State) -> - recvloop(Deb, control_throttle(State#v1{conserve_resources = Conserve})); -handle_other({channel_closing, ChPid}, Deb, State) -> +handle_other({conserve_resources, Source, Conserve}, + State = #v1{throttle = Throttle = + #throttle{alarmed_by = CR}}) -> + CR1 = case Conserve of + true -> lists:usort([Source | CR]); + false -> CR -- [Source] + end, + Throttle1 = Throttle#throttle{alarmed_by = CR1}, + control_throttle(State#v1{throttle = Throttle1}); +handle_other({channel_closing, ChPid}, State) -> ok = rabbit_channel:ready_for_close(ChPid), channel_cleanup(ChPid), - mainloop(Deb, maybe_close(control_throttle(State))); -handle_other({'EXIT', Parent, Reason}, _Deb, State = #v1{parent = Parent}) -> + maybe_close(control_throttle(State)); +handle_other({'EXIT', Parent, Reason}, State = #v1{parent = Parent}) -> terminate(io_lib:format("broker forced connection closure " "with reason '~w'", [Reason]), State), %% this is what we are expected to do according to @@ -298,94 +348,133 @@ handle_other({'EXIT', Parent, Reason}, _Deb, State = #v1{parent = Parent}) -> %% ordinary error case. However, since this termination is %% initiated by our parent it is probably more important to exit %% quickly. + maybe_emit_stats(State), exit(Reason); -handle_other({channel_exit, _Channel, E = {writer, send_failed, _Error}}, - _Deb, _State) -> +handle_other({channel_exit, _Channel, E = {writer, send_failed, _E}}, State) -> + maybe_emit_stats(State), throw(E); -handle_other({channel_exit, Channel, Reason}, Deb, State) -> - mainloop(Deb, handle_exception(State, Channel, Reason)); -handle_other({'DOWN', _MRef, process, ChPid, Reason}, Deb, State) -> - mainloop(Deb, handle_dependent_exit(ChPid, Reason, State)); -handle_other(terminate_connection, _Deb, State) -> +handle_other({channel_exit, Channel, Reason}, State) -> + handle_exception(State, Channel, Reason); +handle_other({'DOWN', _MRef, process, ChPid, Reason}, State) -> + handle_dependent_exit(ChPid, Reason, State); +handle_other(terminate_connection, State) -> + maybe_emit_stats(State), + stop; +handle_other(handshake_timeout, State) + when ?IS_RUNNING(State) orelse ?IS_STOPPING(State) -> State; -handle_other(handshake_timeout, Deb, State) - when ?IS_RUNNING(State) orelse - State#v1.connection_state =:= closing orelse - State#v1.connection_state =:= closed -> - mainloop(Deb, State); -handle_other(handshake_timeout, _Deb, State) -> +handle_other(handshake_timeout, State) -> + maybe_emit_stats(State), throw({handshake_timeout, State#v1.callback}); -handle_other(timeout, Deb, State = #v1{connection_state = closed}) -> - mainloop(Deb, State); -handle_other(timeout, _Deb, #v1{connection_state = S}) -> - throw({timeout, S}); -handle_other({'$gen_call', From, {shutdown, Explanation}}, Deb, State) -> +handle_other(heartbeat_timeout, State = #v1{connection_state = closed}) -> + State; +handle_other(heartbeat_timeout, State = #v1{connection_state = S}) -> + maybe_emit_stats(State), + throw({heartbeat_timeout, S}); +handle_other({'$gen_call', From, {shutdown, Explanation}}, State) -> {ForceTermination, NewState} = terminate(Explanation, State), gen_server:reply(From, ok), case ForceTermination of - force -> ok; - normal -> mainloop(Deb, NewState) + force -> stop; + normal -> NewState end; -handle_other({'$gen_call', From, info}, Deb, State) -> +handle_other({'$gen_call', From, info}, State) -> gen_server:reply(From, infos(?INFO_KEYS, State)), - mainloop(Deb, State); -handle_other({'$gen_call', From, {info, Items}}, Deb, State) -> + State; +handle_other({'$gen_call', From, {info, Items}}, State) -> gen_server:reply(From, try {ok, infos(Items, State)} catch Error -> {error, Error} end), - mainloop(Deb, State); -handle_other({'$gen_cast', force_event_refresh}, Deb, State) + State; +handle_other({'$gen_cast', force_event_refresh}, State) when ?IS_RUNNING(State) -> rabbit_event:notify(connection_created, [{type, network} | infos(?CREATION_EVENT_KEYS, State)]), - mainloop(Deb, State); -handle_other({'$gen_cast', force_event_refresh}, Deb, State) -> + State; +handle_other({'$gen_cast', force_event_refresh}, State) -> %% Ignore, we will emit a created event once we start running. - mainloop(Deb, State); -handle_other(emit_stats, Deb, State) -> - mainloop(Deb, emit_stats(State)); -handle_other({system, From, Request}, Deb, State = #v1{parent = Parent}) -> - sys:handle_system_msg(Request, From, Parent, ?MODULE, Deb, State); -handle_other({bump_credit, Msg}, Deb, State) -> + State; +handle_other(ensure_stats, State) -> + ensure_stats_timer(State); +handle_other(emit_stats, State) -> + emit_stats(State); +handle_other({bump_credit, Msg}, State) -> credit_flow:handle_bump_msg(Msg), - recvloop(Deb, control_throttle(State)); -handle_other(Other, _Deb, _State) -> + control_throttle(State); +handle_other(Other, State) -> %% internal error -> something worth dying for + maybe_emit_stats(State), exit({unexpected_message, Other}). switch_callback(State, Callback, Length) -> State#v1{callback = Callback, recv_len = Length}. terminate(Explanation, State) when ?IS_RUNNING(State) -> - {normal, send_exception(State, 0, - rabbit_misc:amqp_error( - connection_forced, Explanation, [], none))}; + {normal, handle_exception(State, 0, + rabbit_misc:amqp_error( + connection_forced, Explanation, [], none))}; terminate(_Explanation, State) -> {force, State}. -control_throttle(State = #v1{connection_state = CS, - conserve_resources = Mem}) -> - case {CS, Mem orelse credit_flow:blocked()} of +control_throttle(State = #v1{connection_state = CS, throttle = Throttle}) -> + IsThrottled = ((Throttle#throttle.alarmed_by =/= []) orelse + credit_flow:blocked()), + case {CS, IsThrottled} of {running, true} -> State#v1{connection_state = blocking}; {blocking, false} -> State#v1{connection_state = running}; {blocked, false} -> ok = rabbit_heartbeat:resume_monitor( State#v1.heartbeater), - State#v1{connection_state = running}; - {blocked, true} -> update_last_blocked_by(State); + maybe_send_unblocked(State), + State#v1{connection_state = running, + throttle = Throttle#throttle{ + blocked_sent = false}}; + {blocked, true} -> State#v1{throttle = update_last_blocked_by( + Throttle)}; {_, _} -> State end. -maybe_block(State = #v1{connection_state = blocking}) -> +maybe_block(State = #v1{connection_state = blocking, + throttle = Throttle}) -> ok = rabbit_heartbeat:pause_monitor(State#v1.heartbeater), - update_last_blocked_by(State#v1{connection_state = blocked, - last_blocked_at = erlang:now()}); + Sent = maybe_send_blocked(State), + State#v1{connection_state = blocked, + throttle = update_last_blocked_by( + Throttle#throttle{last_blocked_at = erlang:now(), + blocked_sent = Sent})}; maybe_block(State) -> State. -update_last_blocked_by(State = #v1{conserve_resources = true}) -> - State#v1{last_blocked_by = resource}; -update_last_blocked_by(State = #v1{conserve_resources = false}) -> - State#v1{last_blocked_by = flow}. +maybe_send_blocked(#v1{throttle = #throttle{alarmed_by = []}}) -> + false; +maybe_send_blocked(#v1{throttle = #throttle{alarmed_by = CR}, + connection = #connection{ + protocol = Protocol, + capabilities = Capabilities}, + sock = Sock}) -> + case rabbit_misc:table_lookup(Capabilities, <<"connection.blocked">>) of + {bool, true} -> + RStr = string:join([atom_to_list(A) || A <- CR], " & "), + Reason = list_to_binary(rabbit_misc:format("low on ~s", [RStr])), + ok = send_on_channel0(Sock, #'connection.blocked'{reason = Reason}, + Protocol), + true; + _ -> + false + end. + +maybe_send_unblocked(#v1{throttle = #throttle{blocked_sent = false}}) -> + ok; +maybe_send_unblocked(#v1{connection = #connection{protocol = Protocol}, + sock = Sock}) -> + ok = send_on_channel0(Sock, #'connection.unblocked'{}, Protocol). + +update_last_blocked_by(Throttle = #throttle{alarmed_by = []}) -> + Throttle#throttle{last_blocked_by = flow}; +update_last_blocked_by(Throttle) -> + Throttle#throttle{last_blocked_by = resource}. + +%%-------------------------------------------------------------------------- +%% error handling / termination close_connection(State = #v1{queue_collector = Collector, connection = #connection{ @@ -405,29 +494,15 @@ close_connection(State = #v1{queue_collector = Collector, handle_dependent_exit(ChPid, Reason, State) -> case {channel_cleanup(ChPid), termination_kind(Reason)} of - {undefined, uncontrolled} -> - exit({abnormal_dependent_exit, ChPid, Reason}); - {_Channel, controlled} -> - maybe_close(control_throttle(State)); - {Channel, uncontrolled} -> - log(error, "AMQP connection ~p, channel ~p - error:~n~p~n", - [self(), Channel, Reason]), - maybe_close(handle_exception(control_throttle(State), - Channel, Reason)) - end. - -channel_cleanup(ChPid) -> - case get({ch_pid, ChPid}) of - undefined -> undefined; - {Channel, MRef} -> credit_flow:peer_down(ChPid), - erase({channel, Channel}), - erase({ch_pid, ChPid}), - erlang:demonitor(MRef, [flush]), - Channel + {undefined, controlled} -> State; + {undefined, uncontrolled} -> exit({abnormal_dependent_exit, + ChPid, Reason}); + {_Channel, controlled} -> maybe_close(control_throttle(State)); + {Channel, uncontrolled} -> State1 = handle_exception( + State, Channel, Reason), + maybe_close(control_throttle(State1)) end. -all_channels() -> [ChPid || {{ch_pid, ChPid}, _ChannelMRef} <- get()]. - terminate_channels() -> NChannels = length([rabbit_channel:shutdown(ChPid) || ChPid <- all_channels()]), @@ -481,78 +556,179 @@ maybe_close(State) -> termination_kind(normal) -> controlled; termination_kind(_) -> uncontrolled. +handle_exception(State = #v1{connection_state = closed}, Channel, Reason) -> + log(error, "AMQP connection ~p (~p), channel ~p - error:~n~p~n", + [self(), closed, Channel, Reason]), + State; +handle_exception(State = #v1{connection = #connection{protocol = Protocol}, + connection_state = CS}, + Channel, Reason) + when ?IS_RUNNING(State) orelse CS =:= closing -> + log(error, "AMQP connection ~p (~p), channel ~p - error:~n~p~n", + [self(), CS, Channel, Reason]), + {0, CloseMethod} = + rabbit_binary_generator:map_exception(Channel, Reason, Protocol), + terminate_channels(), + State1 = close_connection(State), + ok = send_on_channel0(State1#v1.sock, CloseMethod, Protocol), + State1; +handle_exception(State, Channel, Reason) -> + %% We don't trust the client at this point - force them to wait + %% for a bit so they can't DOS us with repeated failed logins etc. + timer:sleep(?SILENT_CLOSE_DELAY * 1000), + throw({handshake_error, State#v1.connection_state, Channel, Reason}). + +%% we've "lost sync" with the client and hence must not accept any +%% more input +fatal_frame_error(Error, Type, Channel, Payload, State) -> + frame_error(Error, Type, Channel, Payload, State), + %% grace period to allow transmission of error + timer:sleep(?SILENT_CLOSE_DELAY * 1000), + throw(fatal_frame_error). + +frame_error(Error, Type, Channel, Payload, State) -> + {Str, Bin} = payload_snippet(Payload), + handle_exception(State, Channel, + rabbit_misc:amqp_error(frame_error, + "type ~p, ~s octets = ~p: ~p", + [Type, Str, Bin, Error], none)). + +unexpected_frame(Type, Channel, Payload, State) -> + {Str, Bin} = payload_snippet(Payload), + handle_exception(State, Channel, + rabbit_misc:amqp_error(unexpected_frame, + "type ~p, ~s octets = ~p", + [Type, Str, Bin], none)). + +payload_snippet(Payload) when size(Payload) =< 16 -> + {"all", Payload}; +payload_snippet(<<Snippet:16/binary, _/binary>>) -> + {"first 16", Snippet}. + +%%-------------------------------------------------------------------------- + +create_channel(Channel, State) -> + #v1{sock = Sock, queue_collector = Collector, + channel_sup_sup_pid = ChanSupSup, + connection = #connection{name = Name, + protocol = Protocol, + frame_max = FrameMax, + user = User, + vhost = VHost, + capabilities = Capabilities}} = State, + {ok, _ChSupPid, {ChPid, AState}} = + rabbit_channel_sup_sup:start_channel( + ChanSupSup, {tcp, Sock, Channel, FrameMax, self(), Name, + Protocol, User, VHost, Capabilities, Collector}), + MRef = erlang:monitor(process, ChPid), + put({ch_pid, ChPid}, {Channel, MRef}), + put({channel, Channel}, {ChPid, AState}), + {ChPid, AState}. + +channel_cleanup(ChPid) -> + case get({ch_pid, ChPid}) of + undefined -> undefined; + {Channel, MRef} -> credit_flow:peer_down(ChPid), + erase({channel, Channel}), + erase({ch_pid, ChPid}), + erlang:demonitor(MRef, [flush]), + Channel + end. + +all_channels() -> [ChPid || {{ch_pid, ChPid}, _ChannelMRef} <- get()]. + +%%-------------------------------------------------------------------------- + handle_frame(Type, 0, Payload, - State = #v1{connection_state = CS, - connection = #connection{protocol = Protocol}}) - when CS =:= closing; CS =:= closed -> + State = #v1{connection = #connection{protocol = Protocol}}) + when ?IS_STOPPING(State) -> case rabbit_command_assembler:analyze_frame(Type, Payload, Protocol) of {method, MethodName, FieldsBin} -> handle_method0(MethodName, FieldsBin, State); _Other -> State end; -handle_frame(_Type, _Channel, _Payload, State = #v1{connection_state = CS}) - when CS =:= closing; CS =:= closed -> - State; handle_frame(Type, 0, Payload, State = #v1{connection = #connection{protocol = Protocol}}) -> case rabbit_command_assembler:analyze_frame(Type, Payload, Protocol) of - error -> throw({unknown_frame, 0, Type, Payload}); + error -> frame_error(unknown_frame, Type, 0, Payload, State); heartbeat -> State; {method, MethodName, FieldsBin} -> handle_method0(MethodName, FieldsBin, State); - Other -> throw({unexpected_frame_on_channel0, Other}) + _Other -> unexpected_frame(Type, 0, Payload, State) end; handle_frame(Type, Channel, Payload, - State = #v1{connection = #connection{protocol = Protocol}}) -> + State = #v1{connection = #connection{protocol = Protocol}}) + when ?IS_RUNNING(State) -> case rabbit_command_assembler:analyze_frame(Type, Payload, Protocol) of - error -> throw({unknown_frame, Channel, Type, Payload}); - heartbeat -> throw({unexpected_heartbeat_frame, Channel}); - AnalyzedFrame -> process_frame(AnalyzedFrame, Channel, State) - end. + error -> frame_error(unknown_frame, Type, Channel, Payload, State); + heartbeat -> unexpected_frame(Type, Channel, Payload, State); + Frame -> process_frame(Frame, Channel, State) + end; +handle_frame(_Type, _Channel, _Payload, State) when ?IS_STOPPING(State) -> + State; +handle_frame(Type, Channel, Payload, State) -> + unexpected_frame(Type, Channel, Payload, State). process_frame(Frame, Channel, State) -> - case get({channel, Channel}) of - {ChPid, AState} -> - case process_channel_frame(Frame, ChPid, AState) of - {ok, NewAState} -> put({channel, Channel}, {ChPid, NewAState}), - post_process_frame(Frame, ChPid, State); - {error, Reason} -> handle_exception(State, Channel, Reason) - end; - undefined when ?IS_RUNNING(State) -> - ok = create_channel(Channel, State), - process_frame(Frame, Channel, State); - undefined -> - throw({channel_frame_while_starting, - Channel, State#v1.connection_state, Frame}) + ChKey = {channel, Channel}, + {ChPid, AState} = case get(ChKey) of + undefined -> create_channel(Channel, State); + Other -> Other + end, + case rabbit_command_assembler:process(Frame, AState) of + {ok, NewAState} -> + put(ChKey, {ChPid, NewAState}), + post_process_frame(Frame, ChPid, State); + {ok, Method, NewAState} -> + rabbit_channel:do(ChPid, Method), + put(ChKey, {ChPid, NewAState}), + post_process_frame(Frame, ChPid, State); + {ok, Method, Content, NewAState} -> + rabbit_channel:do_flow(ChPid, Method, Content), + put(ChKey, {ChPid, NewAState}), + post_process_frame(Frame, ChPid, control_throttle(State)); + {error, Reason} -> + handle_exception(State, Channel, Reason) end. post_process_frame({method, 'channel.close_ok', _}, ChPid, State) -> channel_cleanup(ChPid), + %% This is not strictly necessary, but more obviously + %% correct. Also note that we do not need to call maybe_close/1 + %% since we cannot possibly be in the 'closing' state. control_throttle(State); -post_process_frame({method, MethodName, _}, _ChPid, - State = #v1{connection = #connection{ - protocol = Protocol}}) -> - case Protocol:method_has_content(MethodName) of - true -> erlang:bump_reductions(2000), - maybe_block(control_throttle(State)); - false -> control_throttle(State) - end; +post_process_frame({content_header, _, _, _, _}, _ChPid, State) -> + maybe_block(State); +post_process_frame({content_body, _}, _ChPid, State) -> + maybe_block(State); post_process_frame(_Frame, _ChPid, State) -> - control_throttle(State). + State. + +%%-------------------------------------------------------------------------- +%% We allow clients to exceed the frame size a little bit since quite +%% a few get it wrong - off-by 1 or 8 (empty frame size) are typical. +-define(FRAME_SIZE_FUDGE, ?EMPTY_FRAME_SIZE). + +handle_input(frame_header, <<Type:8,Channel:16,PayloadSize:32>>, + State = #v1{connection = #connection{frame_max = FrameMax}}) + when FrameMax /= 0 andalso + PayloadSize > FrameMax - ?EMPTY_FRAME_SIZE + ?FRAME_SIZE_FUDGE -> + fatal_frame_error( + {frame_too_large, PayloadSize, FrameMax - ?EMPTY_FRAME_SIZE}, + Type, Channel, <<>>, State); handle_input(frame_header, <<Type:8,Channel:16,PayloadSize:32>>, State) -> ensure_stats_timer( switch_callback(State, {frame_payload, Type, Channel, PayloadSize}, PayloadSize + 1)); -handle_input({frame_payload, Type, Channel, PayloadSize}, - PayloadAndMarker, State) -> - case PayloadAndMarker of - <<Payload:PayloadSize/binary, ?FRAME_END>> -> - switch_callback(handle_frame(Type, Channel, Payload, State), - frame_header, 7); - _ -> - throw({bad_payload, Type, Channel, PayloadSize, PayloadAndMarker}) +handle_input({frame_payload, Type, Channel, PayloadSize}, Data, State) -> + <<Payload:PayloadSize/binary, EndMarker>> = Data, + case EndMarker of + ?FRAME_END -> State1 = handle_frame(Type, Channel, Payload, State), + switch_callback(State1, frame_header, 7); + _ -> fatal_frame_error({invalid_frame_end_marker, EndMarker}, + Type, Channel, Payload, State) end; %% The two rules pertaining to version negotiation: @@ -582,8 +758,12 @@ handle_input(handshake, <<"AMQP", 1, 1, 8, 0>>, State) -> handle_input(handshake, <<"AMQP", 1, 1, 9, 1>>, State) -> start_connection({8, 0, 0}, rabbit_framing_amqp_0_8, State); +%% ... and finally, the 1.0 spec is crystal clear! Note that the +handle_input(handshake, <<"AMQP", Id, 1, 0, 0>>, State) -> + become_1_0(Id, State); + handle_input(handshake, <<"AMQP", A, B, C, D>>, #v1{sock = Sock}) -> - refuse_connection(Sock, {bad_version, A, B, C, D}); + refuse_connection(Sock, {bad_version, {A, B, C, D}}); handle_input(handshake, Other, #v1{sock = Sock}) -> refuse_connection(Sock, {bad_header, Other}); @@ -597,6 +777,7 @@ handle_input(Callback, Data, _State) -> start_connection({ProtocolMajor, ProtocolMinor, _ProtocolRevision}, Protocol, State = #v1{sock = Sock, connection = Connection}) -> + rabbit_networking:register_connection(self()), Start = #'connection.start'{ version_major = ProtocolMajor, version_minor = ProtocolMinor, @@ -610,10 +791,16 @@ start_connection({ProtocolMajor, ProtocolMinor, _ProtocolRevision}, connection_state = starting}, frame_header, 7). -refuse_connection(Sock, Exception) -> - ok = inet_op(fun () -> rabbit_net:send(Sock, <<"AMQP",0,0,9,1>>) end), +refuse_connection(Sock, Exception, {A, B, C, D}) -> + ok = inet_op(fun () -> rabbit_net:send(Sock, <<"AMQP",A,B,C,D>>) end), throw(Exception). +-ifdef(use_specs). +-spec(refuse_connection/2 :: (rabbit_net:socket(), any()) -> no_return()). +-endif. +refuse_connection(Sock, Exception) -> + refuse_connection(Sock, Exception, {0, 0, 9, 1}). + ensure_stats_timer(State = #v1{connection_state = running}) -> rabbit_event:ensure_stats_timer(State, #v1.stats_timer, emit_stats); ensure_stats_timer(State) -> @@ -623,24 +810,14 @@ ensure_stats_timer(State) -> handle_method0(MethodName, FieldsBin, State = #v1{connection = #connection{protocol = Protocol}}) -> - HandleException = - fun(R) -> - case ?IS_RUNNING(State) of - true -> send_exception(State, 0, R); - %% We don't trust the client at this point - force - %% them to wait for a bit so they can't DOS us with - %% repeated failed logins etc. - false -> timer:sleep(?SILENT_CLOSE_DELAY * 1000), - throw({channel0_error, State#v1.connection_state, R}) - end - end, try handle_method0(Protocol:decode_method_fields(MethodName, FieldsBin), State) catch exit:#amqp_error{method = none} = Reason -> - HandleException(Reason#amqp_error{method = MethodName}); + handle_exception(State, 0, Reason#amqp_error{method = MethodName}); Type:Reason -> - HandleException({Type, Reason, MethodName, erlang:get_stacktrace()}) + Stack = erlang:get_stacktrace(), + handle_exception(State, 0, {Type, Reason, MethodName, Stack}) end. handle_method0(#'connection.start_ok'{mechanism = Mechanism, @@ -655,13 +832,13 @@ handle_method0(#'connection.start_ok'{mechanism = Mechanism, {table, Capabilities1} -> Capabilities1; _ -> [] end, - State = State0#v1{auth_mechanism = AuthMechanism, - auth_state = AuthMechanism:init(Sock), - connection_state = securing, + State = State0#v1{connection_state = securing, connection = Connection#connection{ client_properties = ClientProperties, - capabilities = Capabilities}}, + capabilities = Capabilities, + auth_mechanism = {Mechanism, AuthMechanism}, + auth_state = AuthMechanism:init(Sock)}}, auth_phase(Response, State); handle_method0(#'connection.secure_ok'{response = Response}, @@ -687,7 +864,7 @@ handle_method0(#'connection.tune_ok'{frame_max = FrameMax, Frame = rabbit_binary_generator:build_heartbeat_frame(), SendFun = fun() -> catch rabbit_net:send(Sock, Frame) end, Parent = self(), - ReceiveFun = fun() -> Parent ! timeout end, + ReceiveFun = fun() -> Parent ! heartbeat_timeout end, Heartbeater = SHF(Sock, ClientHeartbeat, SendFun, ClientHeartbeat, ReceiveFun), State#v1{connection_state = opening, @@ -699,32 +876,39 @@ handle_method0(#'connection.tune_ok'{frame_max = FrameMax, handle_method0(#'connection.open'{virtual_host = VHostPath}, State = #v1{connection_state = opening, - connection = Connection = #connection{ - user = User, - protocol = Protocol}, - sock = Sock}) -> + connection = Connection = #connection{ + user = User, + protocol = Protocol}, + ch_sup3_pid = ChSup3Pid, + sock = Sock, + throttle = Throttle}) -> ok = rabbit_access_control:check_vhost_access(User, VHostPath), NewConnection = Connection#connection{vhost = VHostPath}, ok = send_on_channel0(Sock, #'connection.open_ok'{}, Protocol), Conserve = rabbit_alarm:register(self(), {?MODULE, conserve_resources, []}), + Throttle1 = Throttle#throttle{alarmed_by = Conserve}, + {ok, ChannelSupSupPid} = + supervisor2:start_child( + ChSup3Pid, + {channel_sup_sup, {rabbit_channel_sup_sup, start_link, []}, + intrinsic, infinity, supervisor, [rabbit_channel_sup_sup]}), State1 = control_throttle( - State#v1{connection_state = running, - connection = NewConnection, - conserve_resources = Conserve}), + State#v1{connection_state = running, + connection = NewConnection, + channel_sup_sup_pid = ChannelSupSupPid, + throttle = Throttle1}), rabbit_event:notify(connection_created, [{type, network} | infos(?CREATION_EVENT_KEYS, State1)]), - rabbit_event:if_enabled(State1, #v1.stats_timer, - fun() -> emit_stats(State1) end), + maybe_emit_stats(State1), State1; handle_method0(#'connection.close'{}, State) when ?IS_RUNNING(State) -> lists:foreach(fun rabbit_channel:shutdown/1, all_channels()), maybe_close(State#v1{connection_state = closing}); handle_method0(#'connection.close'{}, - State = #v1{connection_state = CS, - connection = #connection{protocol = Protocol}, + State = #v1{connection = #connection{protocol = Protocol}, sock = Sock}) - when CS =:= closing; CS =:= closed -> + when ?IS_STOPPING(State) -> %% We're already closed or closing, so we don't need to cleanup %% anything. ok = send_on_channel0(Sock, #'connection.close_ok'{}, Protocol), @@ -733,19 +917,20 @@ handle_method0(#'connection.close_ok'{}, State = #v1{connection_state = closed}) -> self() ! terminate_connection, State; -handle_method0(_Method, State = #v1{connection_state = CS}) - when CS =:= closing; CS =:= closed -> +handle_method0(_Method, State) when ?IS_STOPPING(State) -> State; handle_method0(_Method, #v1{connection_state = S}) -> rabbit_misc:protocol_error( channel_error, "unexpected method in connection state ~w", [S]). -%% Compute frame_max for this instance. Could simply use 0, but breaks -%% QPid Java client. server_frame_max() -> {ok, FrameMax} = application:get_env(rabbit, frame_max), FrameMax. +server_heartbeat() -> + {ok, Heartbeat} = application:get_env(rabbit, heartbeat), + Heartbeat. + send_on_channel0(Sock, Method, Protocol) -> ok = rabbit_writer:internal_send_command(Sock, 0, Method, Protocol). @@ -777,115 +962,86 @@ auth_mechanisms_binary(Sock) -> string:join([atom_to_list(A) || A <- auth_mechanisms(Sock)], " ")). auth_phase(Response, - State = #v1{auth_mechanism = AuthMechanism, - auth_state = AuthState, - connection = Connection = - #connection{protocol = Protocol}, + State = #v1{connection = Connection = + #connection{protocol = Protocol, + auth_mechanism = {Name, AuthMechanism}, + auth_state = AuthState}, sock = Sock}) -> case AuthMechanism:handle_response(Response, AuthState) of {refused, Msg, Args} -> rabbit_misc:protocol_error( access_refused, "~s login refused: ~s", - [proplists:get_value(name, AuthMechanism:description()), - io_lib:format(Msg, Args)]); + [Name, io_lib:format(Msg, Args)]); {protocol_error, Msg, Args} -> rabbit_misc:protocol_error(syntax_error, Msg, Args); {challenge, Challenge, AuthState1} -> Secure = #'connection.secure'{challenge = Challenge}, ok = send_on_channel0(Sock, Secure, Protocol), - State#v1{auth_state = AuthState1}; + State#v1{connection = Connection#connection{ + auth_state = AuthState1}}; {ok, User} -> Tune = #'connection.tune'{channel_max = 0, frame_max = server_frame_max(), - heartbeat = 0}, + heartbeat = server_heartbeat()}, ok = send_on_channel0(Sock, Tune, Protocol), State#v1{connection_state = tuning, - connection = Connection#connection{user = User}} + connection = Connection#connection{user = User, + auth_state = none}} end. %%-------------------------------------------------------------------------- infos(Items, State) -> [{Item, i(Item, State)} || Item <- Items]. -i(pid, #v1{}) -> - self(); -i(name, #v1{sock = Sock}) -> - list_to_binary(name(Sock)); -i(address, #v1{sock = Sock}) -> - socket_info(fun rabbit_net:sockname/1, fun ({A, _}) -> A end, Sock); -i(port, #v1{sock = Sock}) -> - socket_info(fun rabbit_net:sockname/1, fun ({_, P}) -> P end, Sock); -i(peer_address, #v1{sock = Sock}) -> - socket_info(fun rabbit_net:peername/1, fun ({A, _}) -> A end, Sock); -i(peer_port, #v1{sock = Sock}) -> - socket_info(fun rabbit_net:peername/1, fun ({_, P}) -> P end, Sock); -i(ssl, #v1{sock = Sock}) -> - rabbit_net:is_ssl(Sock); -i(ssl_protocol, #v1{sock = Sock}) -> - ssl_info(fun ({P, _}) -> P end, Sock); -i(ssl_key_exchange, #v1{sock = Sock}) -> - ssl_info(fun ({_, {K, _, _}}) -> K end, Sock); -i(ssl_cipher, #v1{sock = Sock}) -> - ssl_info(fun ({_, {_, C, _}}) -> C end, Sock); -i(ssl_hash, #v1{sock = Sock}) -> - ssl_info(fun ({_, {_, _, H}}) -> H end, Sock); -i(peer_cert_issuer, #v1{sock = Sock}) -> - cert_info(fun rabbit_ssl:peer_cert_issuer/1, Sock); -i(peer_cert_subject, #v1{sock = Sock}) -> - cert_info(fun rabbit_ssl:peer_cert_subject/1, Sock); -i(peer_cert_validity, #v1{sock = Sock}) -> - cert_info(fun rabbit_ssl:peer_cert_validity/1, Sock); -i(SockStat, #v1{sock = Sock}) when SockStat =:= recv_oct; - SockStat =:= recv_cnt; - SockStat =:= send_oct; - SockStat =:= send_cnt; - SockStat =:= send_pend -> - socket_info(fun () -> rabbit_net:getstat(Sock, [SockStat]) end, - fun ([{_, I}]) -> I end); -i(state, #v1{connection_state = S}) -> - S; -i(last_blocked_by, #v1{last_blocked_by = By}) -> - By; -i(last_blocked_age, #v1{last_blocked_at = never}) -> +i(pid, #v1{}) -> self(); +i(SockStat, S) when SockStat =:= recv_oct; + SockStat =:= recv_cnt; + SockStat =:= send_oct; + SockStat =:= send_cnt; + SockStat =:= send_pend -> + socket_info(fun (Sock) -> rabbit_net:getstat(Sock, [SockStat]) end, + fun ([{_, I}]) -> I end, S); +i(ssl, #v1{sock = Sock}) -> rabbit_net:is_ssl(Sock); +i(ssl_protocol, S) -> ssl_info(fun ({P, _}) -> P end, S); +i(ssl_key_exchange, S) -> ssl_info(fun ({_, {K, _, _}}) -> K end, S); +i(ssl_cipher, S) -> ssl_info(fun ({_, {_, C, _}}) -> C end, S); +i(ssl_hash, S) -> ssl_info(fun ({_, {_, _, H}}) -> H end, S); +i(peer_cert_issuer, S) -> cert_info(fun rabbit_ssl:peer_cert_issuer/1, S); +i(peer_cert_subject, S) -> cert_info(fun rabbit_ssl:peer_cert_subject/1, S); +i(peer_cert_validity, S) -> cert_info(fun rabbit_ssl:peer_cert_validity/1, S); +i(state, #v1{connection_state = CS}) -> CS; +i(last_blocked_by, #v1{throttle = #throttle{last_blocked_by = By}}) -> By; +i(last_blocked_age, #v1{throttle = #throttle{last_blocked_at = never}}) -> infinity; -i(last_blocked_age, #v1{last_blocked_at = T}) -> +i(last_blocked_age, #v1{throttle = #throttle{last_blocked_at = T}}) -> timer:now_diff(erlang:now(), T) / 1000000; -i(channels, #v1{}) -> - length(all_channels()); -i(protocol, #v1{connection = #connection{protocol = none}}) -> - none; -i(protocol, #v1{connection = #connection{protocol = Protocol}}) -> - Protocol:version(); -i(auth_mechanism, #v1{auth_mechanism = none}) -> - none; -i(auth_mechanism, #v1{auth_mechanism = Mechanism}) -> - proplists:get_value(name, Mechanism:description()); -i(user, #v1{connection = #connection{user = #user{username = Username}}}) -> - Username; -i(user, #v1{connection = #connection{user = none}}) -> - ''; -i(vhost, #v1{connection = #connection{vhost = VHost}}) -> - VHost; -i(timeout, #v1{connection = #connection{timeout_sec = Timeout}}) -> - Timeout; -i(frame_max, #v1{connection = #connection{frame_max = FrameMax}}) -> - FrameMax; -i(client_properties, #v1{connection = #connection{ - client_properties = ClientProperties}}) -> - ClientProperties; -i(Item, #v1{}) -> - throw({bad_argument, Item}). - -socket_info(Get, Select, Sock) -> - socket_info(fun() -> Get(Sock) end, Select). - -socket_info(Get, Select) -> - case Get() of +i(channels, #v1{}) -> length(all_channels()); +i(Item, #v1{connection = Conn}) -> ic(Item, Conn). + +ic(name, #connection{name = Name}) -> Name; +ic(host, #connection{host = Host}) -> Host; +ic(peer_host, #connection{peer_host = PeerHost}) -> PeerHost; +ic(port, #connection{port = Port}) -> Port; +ic(peer_port, #connection{peer_port = PeerPort}) -> PeerPort; +ic(protocol, #connection{protocol = none}) -> none; +ic(protocol, #connection{protocol = P}) -> P:version(); +ic(user, #connection{user = none}) -> ''; +ic(user, #connection{user = U}) -> U#user.username; +ic(vhost, #connection{vhost = VHost}) -> VHost; +ic(timeout, #connection{timeout_sec = Timeout}) -> Timeout; +ic(frame_max, #connection{frame_max = FrameMax}) -> FrameMax; +ic(client_properties, #connection{client_properties = CP}) -> CP; +ic(auth_mechanism, #connection{auth_mechanism = none}) -> none; +ic(auth_mechanism, #connection{auth_mechanism = {Name, _Mod}}) -> Name; +ic(Item, #connection{}) -> throw({bad_argument, Item}). + +socket_info(Get, Select, #v1{sock = Sock}) -> + case Get(Sock) of {ok, T} -> Select(T); {error, _} -> '' end. -ssl_info(F, Sock) -> +ssl_info(F, #v1{sock = Sock}) -> %% The first ok form is R14 %% The second is R13 - the extra term is exportability (by inspection, %% the docs are wrong) @@ -896,58 +1052,47 @@ ssl_info(F, Sock) -> {ok, {P, {K, C, H, _}}} -> F({P, {K, C, H}}) end. -cert_info(F, Sock) -> +cert_info(F, #v1{sock = Sock}) -> case rabbit_net:peercert(Sock) of nossl -> ''; {error, no_peercert} -> ''; {ok, Cert} -> list_to_binary(F(Cert)) end. -%%-------------------------------------------------------------------------- - -create_channel(Channel, State) -> - #v1{sock = Sock, queue_collector = Collector, - channel_sup_sup_pid = ChanSupSup, - connection = #connection{protocol = Protocol, - frame_max = FrameMax, - user = User, - vhost = VHost, - capabilities = Capabilities}} = State, - {ok, _ChSupPid, {ChPid, AState}} = - rabbit_channel_sup_sup:start_channel( - ChanSupSup, {tcp, Sock, Channel, FrameMax, self(), name(Sock), - Protocol, User, VHost, Capabilities, Collector}), - MRef = erlang:monitor(process, ChPid), - put({ch_pid, ChPid}, {Channel, MRef}), - put({channel, Channel}, {ChPid, AState}), - ok. - -process_channel_frame(Frame, ChPid, AState) -> - case rabbit_command_assembler:process(Frame, AState) of - {ok, NewAState} -> {ok, NewAState}; - {ok, Method, NewAState} -> rabbit_channel:do(ChPid, Method), - {ok, NewAState}; - {ok, Method, Content, NewAState} -> rabbit_channel:do_flow( - ChPid, Method, Content), - {ok, NewAState}; - {error, Reason} -> {error, Reason} - end. - -handle_exception(State = #v1{connection_state = closed}, _Channel, _Reason) -> - State; -handle_exception(State, Channel, Reason) -> - send_exception(State, Channel, Reason). - -send_exception(State = #v1{connection = #connection{protocol = Protocol}}, - Channel, Reason) -> - {0, CloseMethod} = - rabbit_binary_generator:map_exception(Channel, Reason, Protocol), - terminate_channels(), - State1 = close_connection(State), - ok = rabbit_writer:internal_send_command( - State1#v1.sock, 0, CloseMethod, Protocol), - State1. +maybe_emit_stats(State) -> + rabbit_event:if_enabled(State, #v1.stats_timer, + fun() -> emit_stats(State) end). emit_stats(State) -> rabbit_event:notify(connection_stats, infos(?STATISTICS_KEYS, State)), rabbit_event:reset_stats_timer(State, #v1.stats_timer). + +%% 1.0 stub +-ifdef(use_specs). +-spec(become_1_0/2 :: (non_neg_integer(), #v1{}) -> no_return()). +-endif. +become_1_0(Id, State = #v1{sock = Sock}) -> + case code:is_loaded(rabbit_amqp1_0_reader) of + false -> refuse_connection(Sock, amqp1_0_plugin_not_enabled); + _ -> Mode = case Id of + 0 -> amqp; + 3 -> sasl; + _ -> refuse_connection( + Sock, {unsupported_amqp1_0_protocol_id, Id}, + {3, 1, 0, 0}) + end, + throw({become, {rabbit_amqp1_0_reader, init, + [Mode, pack_for_1_0(State)]}}) + end. + +pack_for_1_0(#v1{parent = Parent, + sock = Sock, + recv_len = RecvLen, + pending_recv = PendingRecv, + queue_collector = QueueCollector, + ch_sup3_pid = ChSup3Pid, + start_heartbeat_fun = SHF, + buf = Buf, + buf_len = BufLen}) -> + {Parent, Sock, RecvLen, PendingRecv, QueueCollector, ChSup3Pid, SHF, + Buf, BufLen}. diff --git a/src/rabbit_registry.erl b/src/rabbit_registry.erl index 637835c3..f933e4e9 100644 --- a/src/rabbit_registry.erl +++ b/src/rabbit_registry.erl @@ -10,8 +10,8 @@ %% %% The Original Code is RabbitMQ. %% -%% The Initial Developer of the Original Code is VMware, Inc. -%% Copyright (c) 2007-2012 VMware, Inc. All rights reserved. +%% The Initial Developer of the Original Code is GoPivotal, Inc. +%% Copyright (c) 2007-2013 GoPivotal, Inc. All rights reserved. %% -module(rabbit_registry). @@ -84,12 +84,34 @@ internal_binary_to_type(TypeBin) when is_binary(TypeBin) -> internal_register(Class, TypeName, ModuleName) when is_atom(Class), is_binary(TypeName), is_atom(ModuleName) -> ok = sanity_check_module(class_module(Class), ModuleName), - true = ets:insert(?ETS_NAME, - {{Class, internal_binary_to_type(TypeName)}, ModuleName}), + RegArg = {{Class, internal_binary_to_type(TypeName)}, ModuleName}, + true = ets:insert(?ETS_NAME, RegArg), + conditional_register(RegArg), ok. internal_unregister(Class, TypeName) -> - true = ets:delete(?ETS_NAME, {Class, internal_binary_to_type(TypeName)}), + UnregArg = {Class, internal_binary_to_type(TypeName)}, + conditional_unregister(UnregArg), + true = ets:delete(?ETS_NAME, UnregArg), + ok. + +%% register exchange decorator route callback only when implemented, +%% in order to avoid unnecessary decorator calls on the fast +%% publishing path +conditional_register({{exchange_decorator, Type}, ModuleName}) -> + case erlang:function_exported(ModuleName, route, 2) of + true -> true = ets:insert(?ETS_NAME, + {{exchange_decorator_route, Type}, + ModuleName}); + false -> ok + end; +conditional_register(_) -> + ok. + +conditional_unregister({exchange_decorator, Type}) -> + true = ets:delete(?ETS_NAME, {exchange_decorator_route, Type}), + ok; +conditional_unregister(_) -> ok. sanity_check_module(ClassModule, Module) -> @@ -104,9 +126,12 @@ sanity_check_module(ClassModule, Module) -> true -> ok end. -class_module(exchange) -> rabbit_exchange_type; -class_module(auth_mechanism) -> rabbit_auth_mechanism; -class_module(runtime_parameter) -> rabbit_runtime_parameter. +class_module(exchange) -> rabbit_exchange_type; +class_module(auth_mechanism) -> rabbit_auth_mechanism; +class_module(runtime_parameter) -> rabbit_runtime_parameter; +class_module(exchange_decorator) -> rabbit_exchange_decorator; +class_module(policy_validator) -> rabbit_policy_validator; +class_module(ha_mode) -> rabbit_mirror_queue_mode. %%--------------------------------------------------------------------------- diff --git a/src/rabbit_restartable_sup.erl b/src/rabbit_restartable_sup.erl index 237ab78c..65a2ca0a 100644 --- a/src/rabbit_restartable_sup.erl +++ b/src/rabbit_restartable_sup.erl @@ -10,8 +10,8 @@ %% %% The Original Code is RabbitMQ. %% -%% The Initial Developer of the Original Code is VMware, Inc. -%% Copyright (c) 2007-2012 VMware, Inc. All rights reserved. +%% The Initial Developer of the Original Code is GoPivotal, Inc. +%% Copyright (c) 2007-2013 GoPivotal, Inc. All rights reserved. %% -module(rabbit_restartable_sup). diff --git a/src/rabbit_router.erl b/src/rabbit_router.erl index f4bbda0f..00343570 100644 --- a/src/rabbit_router.erl +++ b/src/rabbit_router.erl @@ -10,8 +10,8 @@ %% %% The Original Code is RabbitMQ. %% -%% The Initial Developer of the Original Code is VMware, Inc. -%% Copyright (c) 2007-2012 VMware, Inc. All rights reserved. +%% The Initial Developer of the Original Code is GoPivotal, Inc. +%% Copyright (c) 2007-2013 GoPivotal, Inc. All rights reserved. %% -module(rabbit_router). diff --git a/src/rabbit_runtime_parameter.erl b/src/rabbit_runtime_parameter.erl index c7d30116..ee48165b 100644 --- a/src/rabbit_runtime_parameter.erl +++ b/src/rabbit_runtime_parameter.erl @@ -10,8 +10,8 @@ %% %% The Original Code is RabbitMQ. %% -%% The Initial Developer of the Original Code is VMware, Inc. -%% Copyright (c) 2007-2012 VMware, Inc. All rights reserved. +%% The Initial Developer of the Original Code is GoPivotal, Inc. +%% Copyright (c) 2007-2013 GoPivotal, Inc. All rights reserved. %% -module(rabbit_runtime_parameter). @@ -21,10 +21,10 @@ -type(validate_results() :: 'ok' | {error, string(), [term()]} | [validate_results()]). --callback validate(binary(), binary(), term()) -> validate_results(). --callback validate_clear(binary(), binary()) -> validate_results(). --callback notify(binary(), binary(), term()) -> 'ok'. --callback notify_clear(binary(), binary()) -> 'ok'. +-callback validate(rabbit_types:vhost(), binary(), binary(), + term()) -> validate_results(). +-callback notify(rabbit_types:vhost(), binary(), binary(), term()) -> 'ok'. +-callback notify_clear(rabbit_types:vhost(), binary(), binary()) -> 'ok'. -else. @@ -32,10 +32,9 @@ behaviour_info(callbacks) -> [ - {validate, 3}, - {validate_clear, 2}, - {notify, 3}, - {notify_clear, 2} + {validate, 4}, + {notify, 4}, + {notify_clear, 3} ]; behaviour_info(_Other) -> undefined. diff --git a/src/rabbit_runtime_parameters.erl b/src/rabbit_runtime_parameters.erl index 172cee92..c13c333e 100644 --- a/src/rabbit_runtime_parameters.erl +++ b/src/rabbit_runtime_parameters.erl @@ -10,16 +10,17 @@ %% %% The Original Code is RabbitMQ. %% -%% The Initial Developer of the Original Code is VMware, Inc. -%% Copyright (c) 2007-2012 VMware, Inc. All rights reserved. +%% The Initial Developer of the Original Code is GoPivotal, Inc. +%% Copyright (c) 2007-2013 GoPivotal, Inc. All rights reserved. %% -module(rabbit_runtime_parameters). -include("rabbit.hrl"). --export([parse_set/3, set/3, clear/2, list/0, list/1, list_formatted/0, - lookup/2, value/2, value/3, info_keys/0]). +-export([parse_set/4, set/4, set_any/4, clear/3, clear_any/3, list/0, list/1, + list_component/1, list/2, list_formatted/1, lookup/3, + value/3, value/4, info_keys/0]). %%---------------------------------------------------------------------------- @@ -27,15 +28,26 @@ -type(ok_or_error_string() :: 'ok' | {'error_string', string()}). --spec(parse_set/3 :: (binary(), binary(), string()) -> ok_or_error_string()). --spec(set/3 :: (binary(), binary(), term()) -> ok_or_error_string()). --spec(clear/2 :: (binary(), binary()) -> ok_or_error_string()). +-spec(parse_set/4 :: (rabbit_types:vhost(), binary(), binary(), string()) + -> ok_or_error_string()). +-spec(set/4 :: (rabbit_types:vhost(), binary(), binary(), term()) + -> ok_or_error_string()). +-spec(set_any/4 :: (rabbit_types:vhost(), binary(), binary(), term()) + -> ok_or_error_string()). +-spec(clear/3 :: (rabbit_types:vhost(), binary(), binary()) + -> ok_or_error_string()). +-spec(clear_any/3 :: (rabbit_types:vhost(), binary(), binary()) + -> ok_or_error_string()). -spec(list/0 :: () -> [rabbit_types:infos()]). --spec(list/1 :: (binary()) -> [rabbit_types:infos()] | 'not_found'). --spec(list_formatted/0 :: () -> [rabbit_types:infos()]). --spec(lookup/2 :: (binary(), binary()) -> rabbit_types:infos()). --spec(value/2 :: (binary(), binary()) -> term()). --spec(value/3 :: (binary(), binary(), term()) -> term()). +-spec(list/1 :: (rabbit_types:vhost() | '_') -> [rabbit_types:infos()]). +-spec(list_component/1 :: (binary()) -> [rabbit_types:infos()]). +-spec(list/2 :: (rabbit_types:vhost() | '_', binary() | '_') + -> [rabbit_types:infos()]). +-spec(list_formatted/1 :: (rabbit_types:vhost()) -> [rabbit_types:infos()]). +-spec(lookup/3 :: (rabbit_types:vhost(), binary(), binary()) + -> rabbit_types:infos() | 'not_found'). +-spec(value/3 :: (rabbit_types:vhost(), binary(), binary()) -> term()). +-spec(value/4 :: (rabbit_types:vhost(), binary(), binary(), term()) -> term()). -spec(info_keys/0 :: () -> rabbit_types:info_keys()). -endif. @@ -48,36 +60,39 @@ %%--------------------------------------------------------------------------- -parse_set(Component, Key, String) -> - case parse(String) of - {ok, Term} -> set(Component, Key, Term); - {errors, L} -> format_error(L) +parse_set(_, <<"policy">>, _, _) -> + {error_string, "policies may not be set using this method"}; +parse_set(VHost, Component, Name, String) -> + case rabbit_misc:json_decode(String) of + {ok, JSON} -> set(VHost, Component, Name, + rabbit_misc:json_to_term(JSON)); + error -> {error_string, "JSON decoding error"} end. -set(Component, Key, Term) -> - case set0(Component, Key, Term) of - ok -> ok; - {errors, L} -> format_error(L) - end. +set(_, <<"policy">>, _, _) -> + {error_string, "policies may not be set using this method"}; +set(VHost, Component, Name, Term) -> + set_any(VHost, Component, Name, Term). format_error(L) -> {error_string, rabbit_misc:format_many([{"Validation failed~n", []} | L])}. -set0(Component, Key, Term) -> +set_any(VHost, Component, Name, Term) -> + case set_any0(VHost, Component, Name, Term) of + ok -> ok; + {errors, L} -> format_error(L) + end. + +set_any0(VHost, Component, Name, Term) -> case lookup_component(Component) of {ok, Mod} -> - case flatten_errors(validate(Term)) of + case flatten_errors(Mod:validate(VHost, Component, Name, Term)) of ok -> - case flatten_errors(Mod:validate(Component, Key, Term)) of - ok -> - case mnesia_update(Component, Key, Term) of - {old, Term} -> ok; - _ -> Mod:notify(Component, Key, Term) - end, - ok; - E -> - E - end; + case mnesia_update(VHost, Component, Name, Term) of + {old, Term} -> ok; + _ -> Mod:notify(VHost, Component, Name, Term) + end, + ok; E -> E end; @@ -85,96 +100,105 @@ set0(Component, Key, Term) -> E end. -mnesia_update(Component, Key, Term) -> - rabbit_misc:execute_mnesia_transaction( - fun () -> - Res = case mnesia:read(?TABLE, {Component, Key}, read) of - [] -> new; - [Params] -> {old, Params#runtime_parameters.value} - end, - ok = mnesia:write(?TABLE, c(Component, Key, Term), write), - Res - end). - -clear(Component, Key) -> - case clear0(Component, Key) of - ok -> ok; - {errors, L} -> format_error(L) +mnesia_update(VHost, Comp, Name, Term) -> + F = fun () -> + Res = case mnesia:read(?TABLE, {VHost, Comp, Name}, read) of + [] -> new; + [Params] -> {old, Params#runtime_parameters.value} + end, + ok = mnesia:write(?TABLE, c(VHost, Comp, Name, Term), write), + Res + end, + rabbit_misc:execute_mnesia_transaction(rabbit_vhost:with(VHost, F)). + +clear(_, <<"policy">> , _) -> + {error_string, "policies may not be cleared using this method"}; +clear(VHost, Component, Name) -> + clear_any(VHost, Component, Name). + +clear_any(VHost, Component, Name) -> + case lookup(VHost, Component, Name) of + not_found -> {error_string, "Parameter does not exist"}; + _ -> mnesia_clear(VHost, Component, Name), + case lookup_component(Component) of + {ok, Mod} -> Mod:notify_clear(VHost, Component, Name); + _ -> ok + end end. -clear0(Component, Key) -> - case lookup_component(Component) of - {ok, Mod} -> case flatten_errors(Mod:validate_clear(Component, Key)) of - ok -> mnesia_clear(Component, Key), - Mod:notify_clear(Component, Key), - ok; - E -> E - end; - E -> E - end. - -mnesia_clear(Component, Key) -> - ok = rabbit_misc:execute_mnesia_transaction( - fun () -> - ok = mnesia:delete(?TABLE, {Component, Key}, write) - end). +mnesia_clear(VHost, Component, Name) -> + F = fun () -> + ok = mnesia:delete(?TABLE, {VHost, Component, Name}, write) + end, + ok = rabbit_misc:execute_mnesia_transaction(rabbit_vhost:with(VHost, F)). list() -> - [p(P) || P <- rabbit_misc:dirty_read_all(?TABLE)]. - -list(Component) -> - case lookup_component(Component) of - {ok, _} -> Match = #runtime_parameters{key = {Component, '_'}, _ = '_'}, - [p(P) || P <- mnesia:dirty_match_object(?TABLE, Match)]; - _ -> not_found - end. - -list_formatted() -> - [pset(value, format(pget(value, P)), P) || P <- list()]. - -lookup(Component, Key) -> - case lookup0(Component, Key, rabbit_misc:const(not_found)) of + [p(P) || #runtime_parameters{ key = {_VHost, Comp, _Name}} = P <- + rabbit_misc:dirty_read_all(?TABLE), Comp /= <<"policy">>]. + +list(VHost) -> list(VHost, '_'). +list_component(Component) -> list('_', Component). + +list(VHost, Component) -> + case VHost of + '_' -> ok; + _ -> rabbit_vhost:assert(VHost) + end, + Match = #runtime_parameters{key = {VHost, Component, '_'}, _ = '_'}, + [p(P) || #runtime_parameters{key = {_VHost, Comp, _Name}} = P <- + mnesia:dirty_match_object(?TABLE, Match), + Comp =/= <<"policy">> orelse Component =:= <<"policy">>]. + +list_formatted(VHost) -> + [pset(value, format(pget(value, P)), P) || P <- list(VHost)]. + +lookup(VHost, Component, Name) -> + case lookup0(VHost, Component, Name, rabbit_misc:const(not_found)) of not_found -> not_found; Params -> p(Params) end. -value(Component, Key) -> - case lookup0(Component, Key, rabbit_misc:const(not_found)) of +value(VHost, Component, Name) -> + case lookup0(VHost, Component, Name, rabbit_misc:const(not_found)) of not_found -> not_found; Params -> Params#runtime_parameters.value end. -value(Component, Key, Default) -> - Params = lookup0(Component, Key, - fun () -> lookup_missing(Component, Key, Default) end), +value(VHost, Component, Name, Default) -> + Params = lookup0(VHost, Component, Name, + fun () -> + lookup_missing(VHost, Component, Name, Default) + end), Params#runtime_parameters.value. -lookup0(Component, Key, DefaultFun) -> - case mnesia:dirty_read(?TABLE, {Component, Key}) of +lookup0(VHost, Component, Name, DefaultFun) -> + case mnesia:dirty_read(?TABLE, {VHost, Component, Name}) of [] -> DefaultFun(); [R] -> R end. -lookup_missing(Component, Key, Default) -> +lookup_missing(VHost, Component, Name, Default) -> rabbit_misc:execute_mnesia_transaction( fun () -> - case mnesia:read(?TABLE, {Component, Key}, read) of - [] -> Record = c(Component, Key, Default), + case mnesia:read(?TABLE, {VHost, Component, Name}, read) of + [] -> Record = c(VHost, Component, Name, Default), mnesia:write(?TABLE, Record, write), Record; [R] -> R end end). -c(Component, Key, Default) -> #runtime_parameters{key = {Component, Key}, - value = Default}. +c(VHost, Component, Name, Default) -> + #runtime_parameters{key = {VHost, Component, Name}, + value = Default}. -p(#runtime_parameters{key = {Component, Key}, value = Value}) -> - [{component, Component}, - {key, Key}, +p(#runtime_parameters{key = {VHost, Component, Name}, value = Value}) -> + [{vhost, VHost}, + {component, Component}, + {name, Name}, {value, Value}]. -info_keys() -> [component, key, value]. +info_keys() -> [component, name, value]. %%--------------------------------------------------------------------------- @@ -186,51 +210,9 @@ lookup_component(Component) -> {ok, Module} -> {ok, Module} end. -parse(Src0) -> - Src1 = string:strip(Src0), - Src = case lists:reverse(Src1) of - [$. |_] -> Src1; - _ -> Src1 ++ "." - end, - case erl_scan:string(Src) of - {ok, Scanned, _} -> - case erl_parse:parse_term(Scanned) of - {ok, Parsed} -> - {ok, Parsed}; - {error, E} -> - {errors, - [{"Could not parse value: ~s", [format_parse_error(E)]}]} - end; - {error, E, _} -> - {errors, [{"Could not scan value: ~s", [format_parse_error(E)]}]} - end. - -format_parse_error({_Line, Mod, Err}) -> - lists:flatten(Mod:format_error(Err)). - format(Term) -> - list_to_binary(rabbit_misc:format("~p", [Term])). - -%%--------------------------------------------------------------------------- - -%% We will want to be able to biject these to JSON. So we have some -%% generic restrictions on what we consider acceptable. -validate(Proplist = [T | _]) when is_tuple(T) -> validate_proplist(Proplist); -validate(L) when is_list(L) -> validate_list(L); -validate(T) when is_tuple(T) -> {error, "tuple: ~p", [T]}; -validate(B) when is_boolean(B) -> ok; -validate(null) -> ok; -validate(A) when is_atom(A) -> {error, "atom: ~p", [A]}; -validate(N) when is_number(N) -> ok; -validate(B) when is_binary(B) -> ok; -validate(B) when is_bitstring(B) -> {error, "bitstring: ~p", [B]}. - -validate_list(L) -> [validate(I) || I <- L]. -validate_proplist(L) -> [vp(I) || I <- L]. - -vp({K, V}) when is_binary(K) -> validate(V); -vp({K, _V}) -> {error, "bad key: ~p", [K]}; -vp(H) -> {error, "not two tuple: ~p", [H]}. + {ok, JSON} = rabbit_misc:json_encode(rabbit_misc:term_to_json(Term)), + list_to_binary(JSON). flatten_errors(L) -> case [{F, A} || I <- lists:flatten([L]), {error, F, A} <- [I]] of diff --git a/src/rabbit_runtime_parameters_test.erl b/src/rabbit_runtime_parameters_test.erl index f23b3227..05c85881 100644 --- a/src/rabbit_runtime_parameters_test.erl +++ b/src/rabbit_runtime_parameters_test.erl @@ -10,15 +10,20 @@ %% %% The Original Code is RabbitMQ. %% -%% The Initial Developer of the Original Code is VMware, Inc. -%% Copyright (c) 2007-2012 VMware, Inc. All rights reserved. +%% The Initial Developer of the Original Code is GoPivotal, Inc. +%% Copyright (c) 2007-2013 GoPivotal, Inc. All rights reserved. %% -module(rabbit_runtime_parameters_test). -behaviour(rabbit_runtime_parameter). +-behaviour(rabbit_policy_validator). --export([validate/3, validate_clear/2, notify/3, notify_clear/2]). +-export([validate/4, notify/4, notify_clear/3]). -export([register/0, unregister/0]). +-export([validate_policy/1]). +-export([register_policy_validator/0, unregister_policy_validator/0]). + +%---------------------------------------------------------------------------- register() -> rabbit_registry:register(runtime_parameter, <<"test">>, ?MODULE). @@ -26,13 +31,34 @@ register() -> unregister() -> rabbit_registry:unregister(runtime_parameter, <<"test">>). -validate(<<"test">>, <<"good">>, _Term) -> ok; -validate(<<"test">>, <<"maybe">>, <<"good">>) -> ok; -validate(<<"test">>, _, _) -> {error, "meh", []}. +validate(_, <<"test">>, <<"good">>, _Term) -> ok; +validate(_, <<"test">>, <<"maybe">>, <<"good">>) -> ok; +validate(_, <<"test">>, _, _) -> {error, "meh", []}. + +notify(_, _, _, _) -> ok. +notify_clear(_, _, _) -> ok. + +%---------------------------------------------------------------------------- + +register_policy_validator() -> + rabbit_registry:register(policy_validator, <<"testeven">>, ?MODULE), + rabbit_registry:register(policy_validator, <<"testpos">>, ?MODULE). + +unregister_policy_validator() -> + rabbit_registry:unregister(policy_validator, <<"testeven">>), + rabbit_registry:unregister(policy_validator, <<"testpos">>). + +validate_policy([{<<"testeven">>, Terms}]) when is_list(Terms) -> + case length(Terms) rem 2 =:= 0 of + true -> ok; + false -> {error, "meh", []} + end; -validate_clear(<<"test">>, <<"good">>) -> ok; -validate_clear(<<"test">>, <<"maybe">>) -> ok; -validate_clear(<<"test">>, _) -> {error, "meh", []}. +validate_policy([{<<"testpos">>, Terms}]) when is_list(Terms) -> + case lists:all(fun (N) -> is_integer(N) andalso N > 0 end, Terms) of + true -> ok; + false -> {error, "meh", []} + end; -notify(_, _, _) -> ok. -notify_clear(_, _) -> ok. +validate_policy(_) -> + {error, "meh", []}. diff --git a/src/rabbit_sasl_report_file_h.erl b/src/rabbit_sasl_report_file_h.erl index e8beecfe..39a10ac3 100644 --- a/src/rabbit_sasl_report_file_h.erl +++ b/src/rabbit_sasl_report_file_h.erl @@ -10,8 +10,8 @@ %% %% The Original Code is RabbitMQ. %% -%% The Initial Developer of the Original Code is VMware, Inc. -%% Copyright (c) 2007-2012 VMware, Inc. All rights reserved. +%% The Initial Developer of the Original Code is GoPivotal, Inc. +%% Copyright (c) 2007-2013 GoPivotal, Inc. All rights reserved. %% -module(rabbit_sasl_report_file_h). diff --git a/src/rabbit_ssl.erl b/src/rabbit_ssl.erl index 22ff555f..109bff30 100644 --- a/src/rabbit_ssl.erl +++ b/src/rabbit_ssl.erl @@ -10,8 +10,8 @@ %% %% The Original Code is RabbitMQ. %% -%% The Initial Developer of the Original Code is VMware, Inc. -%% Copyright (c) 2007-2012 VMware, Inc. All rights reserved. +%% The Initial Developer of the Original Code is GoPivotal, Inc. +%% Copyright (c) 2007-2013 GoPivotal, Inc. All rights reserved. %% -module(rabbit_ssl). @@ -162,15 +162,16 @@ format_rdn(#'AttributeTypeAndValue'{type = T, value = V}) -> {?'id-at-pseudonym' , "PSEUDONYM"}, {?'id-domainComponent' , "DC"}, {?'id-emailAddress' , "EMAILADDRESS"}, - {?'street-address' , "STREET"}], + {?'street-address' , "STREET"}, + {{0,9,2342,19200300,100,1,1} , "UID"}], %% Not in public_key.hrl case proplists:lookup(T, Fmts) of {_, Fmt} -> - io_lib:format(Fmt ++ "=~s", [FV]); + rabbit_misc:format(Fmt ++ "=~s", [FV]); none when is_tuple(T) -> - TypeL = [io_lib:format("~w", [X]) || X <- tuple_to_list(T)], - io_lib:format("~s:~s", [string:join(TypeL, "."), FV]); + TypeL = [rabbit_misc:format("~w", [X]) || X <- tuple_to_list(T)], + rabbit_misc:format("~s=~s", [string:join(TypeL, "."), FV]); none -> - io_lib:format("~p:~s", [T, FV]) + rabbit_misc:format("~p=~s", [T, FV]) end. %% Escape a string as per RFC4514. @@ -204,14 +205,26 @@ format_asn1_value({ST, S}) when ST =:= teletexString; ST =:= printableString; format_directory_string(ST, S); format_asn1_value({utcTime, [Y1, Y2, M1, M2, D1, D2, H1, H2, Min1, Min2, S1, S2, $Z]}) -> - io_lib:format("20~c~c-~c~c-~c~cT~c~c:~c~c:~c~cZ", - [Y1, Y2, M1, M2, D1, D2, H1, H2, Min1, Min2, S1, S2]); + rabbit_misc:format("20~c~c-~c~c-~c~cT~c~c:~c~c:~c~cZ", + [Y1, Y2, M1, M2, D1, D2, H1, H2, Min1, Min2, S1, S2]); %% We appear to get an untagged value back for an ia5string %% (e.g. domainComponent). format_asn1_value(V) when is_list(V) -> V; +format_asn1_value(V) when is_binary(V) -> + %% OTP does not decode some values when combined with an unknown + %% type. That's probably wrong, so as a last ditch effort let's + %% try manually decoding. 'DirectoryString' is semi-arbitrary - + %% but it is the type which covers the various string types we + %% handle below. + try + {ST, S} = public_key:der_decode('DirectoryString', V), + format_directory_string(ST, S) + catch _:_ -> + rabbit_misc:format("~p", [V]) + end; format_asn1_value(V) -> - io_lib:format("~p", [V]). + rabbit_misc:format("~p", [V]). %% DirectoryString { INTEGER : maxSize } ::= CHOICE { %% teletexString TeletexString (SIZE (1..maxSize)), diff --git a/src/rabbit_sup.erl b/src/rabbit_sup.erl index f142d233..c1deb14b 100644 --- a/src/rabbit_sup.erl +++ b/src/rabbit_sup.erl @@ -10,8 +10,8 @@ %% %% The Original Code is RabbitMQ. %% -%% The Initial Developer of the Original Code is VMware, Inc. -%% Copyright (c) 2007-2012 VMware, Inc. All rights reserved. +%% The Initial Developer of the Original Code is GoPivotal, Inc. +%% Copyright (c) 2007-2013 GoPivotal, Inc. All rights reserved. %% -module(rabbit_sup). diff --git a/src/rabbit_table.erl b/src/rabbit_table.erl new file mode 100644 index 00000000..a29c57d5 --- /dev/null +++ b/src/rabbit_table.erl @@ -0,0 +1,311 @@ +%% The contents of this file are subject to the Mozilla Public License +%% Version 1.1 (the "License"); you may not use this file except in +%% compliance with the License. You may obtain a copy of the License +%% at http://www.mozilla.org/MPL/ +%% +%% Software distributed under the License is distributed on an "AS IS" +%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +%% the License for the specific language governing rights and +%% limitations under the License. +%% +%% The Original Code is RabbitMQ. +%% +%% The Initial Developer of the Original Code is GoPivotal, Inc. +%% Copyright (c) 2007-2013 GoPivotal, Inc. All rights reserved. +%% + +-module(rabbit_table). + +-export([create/0, create_local_copy/1, wait_for_replicated/0, wait/1, + force_load/0, is_present/0, is_empty/0, + check_schema_integrity/0, clear_ram_only_tables/0]). + +-include("rabbit.hrl"). + +%%---------------------------------------------------------------------------- + +-ifdef(use_specs). + +-spec(create/0 :: () -> 'ok'). +-spec(create_local_copy/1 :: ('disc' | 'ram') -> 'ok'). +-spec(wait_for_replicated/0 :: () -> 'ok'). +-spec(wait/1 :: ([atom()]) -> 'ok'). +-spec(force_load/0 :: () -> 'ok'). +-spec(is_present/0 :: () -> boolean()). +-spec(is_empty/0 :: () -> boolean()). +-spec(check_schema_integrity/0 :: () -> rabbit_types:ok_or_error(any())). +-spec(clear_ram_only_tables/0 :: () -> 'ok'). + +-endif. + +%%---------------------------------------------------------------------------- +%% Main interface +%%---------------------------------------------------------------------------- + +create() -> + lists:foreach(fun ({Tab, TabDef}) -> + TabDef1 = proplists:delete(match, TabDef), + case mnesia:create_table(Tab, TabDef1) of + {atomic, ok} -> ok; + {aborted, Reason} -> + throw({error, {table_creation_failed, + Tab, TabDef1, Reason}}) + end + end, definitions()), + ok. + +%% The sequence in which we delete the schema and then the other +%% tables is important: if we delete the schema first when moving to +%% RAM mnesia will loudly complain since it doesn't make much sense to +%% do that. But when moving to disc, we need to move the schema first. +create_local_copy(disc) -> + create_local_copy(schema, disc_copies), + create_local_copies(disc); +create_local_copy(ram) -> + create_local_copies(ram), + create_local_copy(schema, ram_copies). + +wait_for_replicated() -> + wait([Tab || {Tab, TabDef} <- definitions(), + not lists:member({local_content, true}, TabDef)]). + +wait(TableNames) -> + case mnesia:wait_for_tables(TableNames, 30000) of + ok -> + ok; + {timeout, BadTabs} -> + throw({error, {timeout_waiting_for_tables, BadTabs}}); + {error, Reason} -> + throw({error, {failed_waiting_for_tables, Reason}}) + end. + +force_load() -> [mnesia:force_load_table(T) || T <- names()], ok. + +is_present() -> names() -- mnesia:system_info(tables) =:= []. + +is_empty() -> + lists:all(fun (Tab) -> mnesia:dirty_first(Tab) == '$end_of_table' end, + names()). + +check_schema_integrity() -> + Tables = mnesia:system_info(tables), + case check(fun (Tab, TabDef) -> + case lists:member(Tab, Tables) of + false -> {error, {table_missing, Tab}}; + true -> check_attributes(Tab, TabDef) + end + end) of + ok -> ok = wait(names()), + check(fun check_content/2); + Other -> Other + end. + +clear_ram_only_tables() -> + Node = node(), + lists:foreach( + fun (TabName) -> + case lists:member(Node, mnesia:table_info(TabName, ram_copies)) of + true -> {atomic, ok} = mnesia:clear_table(TabName); + false -> ok + end + end, names()), + ok. + +%%-------------------------------------------------------------------- +%% Internal helpers +%%-------------------------------------------------------------------- + +create_local_copies(Type) -> + lists:foreach( + fun ({Tab, TabDef}) -> + HasDiscCopies = has_copy_type(TabDef, disc_copies), + HasDiscOnlyCopies = has_copy_type(TabDef, disc_only_copies), + LocalTab = proplists:get_bool(local_content, TabDef), + StorageType = + if + Type =:= disc orelse LocalTab -> + if + HasDiscCopies -> disc_copies; + HasDiscOnlyCopies -> disc_only_copies; + true -> ram_copies + end; + Type =:= ram -> + ram_copies + end, + ok = create_local_copy(Tab, StorageType) + end, definitions(Type)), + ok. + +create_local_copy(Tab, Type) -> + StorageType = mnesia:table_info(Tab, storage_type), + {atomic, ok} = + if + StorageType == unknown -> + mnesia:add_table_copy(Tab, node(), Type); + StorageType /= Type -> + mnesia:change_table_copy_type(Tab, node(), Type); + true -> {atomic, ok} + end, + ok. + +has_copy_type(TabDef, DiscType) -> + lists:member(node(), proplists:get_value(DiscType, TabDef, [])). + +check_attributes(Tab, TabDef) -> + {_, ExpAttrs} = proplists:lookup(attributes, TabDef), + case mnesia:table_info(Tab, attributes) of + ExpAttrs -> ok; + Attrs -> {error, {table_attributes_mismatch, Tab, ExpAttrs, Attrs}} + end. + +check_content(Tab, TabDef) -> + {_, Match} = proplists:lookup(match, TabDef), + case mnesia:dirty_first(Tab) of + '$end_of_table' -> + ok; + Key -> + ObjList = mnesia:dirty_read(Tab, Key), + MatchComp = ets:match_spec_compile([{Match, [], ['$_']}]), + case ets:match_spec_run(ObjList, MatchComp) of + ObjList -> ok; + _ -> {error, {table_content_invalid, Tab, Match, ObjList}} + end + end. + +check(Fun) -> + case [Error || {Tab, TabDef} <- definitions(), + case Fun(Tab, TabDef) of + ok -> Error = none, false; + {error, Error} -> true + end] of + [] -> ok; + Errors -> {error, Errors} + end. + +%%-------------------------------------------------------------------- +%% Table definitions +%%-------------------------------------------------------------------- + +names() -> [Tab || {Tab, _} <- definitions()]. + +%% The tables aren't supposed to be on disk on a ram node +definitions(disc) -> + definitions(); +definitions(ram) -> + [{Tab, [{disc_copies, []}, {ram_copies, [node()]} | + proplists:delete( + ram_copies, proplists:delete(disc_copies, TabDef))]} || + {Tab, TabDef} <- definitions()]. + +definitions() -> + [{rabbit_user, + [{record_name, internal_user}, + {attributes, record_info(fields, internal_user)}, + {disc_copies, [node()]}, + {match, #internal_user{_='_'}}]}, + {rabbit_user_permission, + [{record_name, user_permission}, + {attributes, record_info(fields, user_permission)}, + {disc_copies, [node()]}, + {match, #user_permission{user_vhost = #user_vhost{_='_'}, + permission = #permission{_='_'}, + _='_'}}]}, + {rabbit_vhost, + [{record_name, vhost}, + {attributes, record_info(fields, vhost)}, + {disc_copies, [node()]}, + {match, #vhost{_='_'}}]}, + {rabbit_listener, + [{record_name, listener}, + {attributes, record_info(fields, listener)}, + {type, bag}, + {match, #listener{_='_'}}]}, + {rabbit_durable_route, + [{record_name, route}, + {attributes, record_info(fields, route)}, + {disc_copies, [node()]}, + {match, #route{binding = binding_match(), _='_'}}]}, + {rabbit_semi_durable_route, + [{record_name, route}, + {attributes, record_info(fields, route)}, + {type, ordered_set}, + {match, #route{binding = binding_match(), _='_'}}]}, + {rabbit_route, + [{record_name, route}, + {attributes, record_info(fields, route)}, + {type, ordered_set}, + {match, #route{binding = binding_match(), _='_'}}]}, + {rabbit_reverse_route, + [{record_name, reverse_route}, + {attributes, record_info(fields, reverse_route)}, + {type, ordered_set}, + {match, #reverse_route{reverse_binding = reverse_binding_match(), + _='_'}}]}, + {rabbit_topic_trie_node, + [{record_name, topic_trie_node}, + {attributes, record_info(fields, topic_trie_node)}, + {type, ordered_set}, + {match, #topic_trie_node{trie_node = trie_node_match(), _='_'}}]}, + {rabbit_topic_trie_edge, + [{record_name, topic_trie_edge}, + {attributes, record_info(fields, topic_trie_edge)}, + {type, ordered_set}, + {match, #topic_trie_edge{trie_edge = trie_edge_match(), _='_'}}]}, + {rabbit_topic_trie_binding, + [{record_name, topic_trie_binding}, + {attributes, record_info(fields, topic_trie_binding)}, + {type, ordered_set}, + {match, #topic_trie_binding{trie_binding = trie_binding_match(), + _='_'}}]}, + {rabbit_durable_exchange, + [{record_name, exchange}, + {attributes, record_info(fields, exchange)}, + {disc_copies, [node()]}, + {match, #exchange{name = exchange_name_match(), _='_'}}]}, + {rabbit_exchange, + [{record_name, exchange}, + {attributes, record_info(fields, exchange)}, + {match, #exchange{name = exchange_name_match(), _='_'}}]}, + {rabbit_exchange_serial, + [{record_name, exchange_serial}, + {attributes, record_info(fields, exchange_serial)}, + {match, #exchange_serial{name = exchange_name_match(), _='_'}}]}, + {rabbit_runtime_parameters, + [{record_name, runtime_parameters}, + {attributes, record_info(fields, runtime_parameters)}, + {disc_copies, [node()]}, + {match, #runtime_parameters{_='_'}}]}, + {rabbit_durable_queue, + [{record_name, amqqueue}, + {attributes, record_info(fields, amqqueue)}, + {disc_copies, [node()]}, + {match, #amqqueue{name = queue_name_match(), _='_'}}]}, + {rabbit_queue, + [{record_name, amqqueue}, + {attributes, record_info(fields, amqqueue)}, + {match, #amqqueue{name = queue_name_match(), _='_'}}]}] + ++ gm:table_definitions() + ++ mirrored_supervisor:table_definitions(). + +binding_match() -> + #binding{source = exchange_name_match(), + destination = binding_destination_match(), + _='_'}. +reverse_binding_match() -> + #reverse_binding{destination = binding_destination_match(), + source = exchange_name_match(), + _='_'}. +binding_destination_match() -> + resource_match('_'). +trie_node_match() -> + #trie_node{ exchange_name = exchange_name_match(), _='_'}. +trie_edge_match() -> + #trie_edge{ exchange_name = exchange_name_match(), _='_'}. +trie_binding_match() -> + #trie_binding{exchange_name = exchange_name_match(), _='_'}. +exchange_name_match() -> + resource_match(exchange). +queue_name_match() -> + resource_match(queue). +resource_match(Kind) -> + #resource{kind = Kind, _='_'}. diff --git a/src/rabbit_tests.erl b/src/rabbit_tests.erl index bae4928d..5af4969a 100644 --- a/src/rabbit_tests.erl +++ b/src/rabbit_tests.erl @@ -10,15 +10,15 @@ %% %% The Original Code is RabbitMQ. %% -%% The Initial Developer of the Original Code is VMware, Inc. -%% Copyright (c) 2007-2012 VMware, Inc. All rights reserved. +%% The Initial Developer of the Original Code is GoPivotal, Inc. +%% Copyright (c) 2007-2013 GoPivotal, Inc. All rights reserved. %% -module(rabbit_tests). -compile([export_all]). --export([all_tests/0, test_parsing/0]). +-export([all_tests/0]). -import(rabbit_misc, [pget/2]). @@ -29,20 +29,25 @@ -define(PERSISTENT_MSG_STORE, msg_store_persistent). -define(TRANSIENT_MSG_STORE, msg_store_transient). -define(CLEANUP_QUEUE_NAME, <<"cleanup-queue">>). +-define(TIMEOUT, 5000). all_tests() -> + ok = setup_cluster(), + ok = supervisor2_tests:test_all(), passed = gm_tests:all_tests(), passed = mirrored_supervisor_tests:all_tests(), application:set_env(rabbit, file_handles_high_watermark, 10, infinity), ok = file_handle_cache:set_limit(10), + passed = test_version_equivalance(), passed = test_multi_call(), passed = test_file_handle_cache(), passed = test_backing_queue(), + passed = test_rabbit_basic_header_handling(), passed = test_priority_queue(), passed = test_pg_local(), passed = test_unfold(), passed = test_supervisor_delayed_restart(), - passed = test_parsing(), + passed = test_table_codec(), passed = test_content_framing(), passed = test_content_transcoding(), passed = test_topic_matching(), @@ -51,36 +56,68 @@ all_tests() -> passed = test_log_management_during_startup(), passed = test_statistics(), passed = test_arguments_parser(), - passed = test_cluster_management(), + passed = test_dynamic_mirroring(), passed = test_user_management(), passed = test_runtime_parameters(), + passed = test_policy_validation(), + passed = test_policy_opts_validation(), + passed = test_ha_policy_validation(), passed = test_server_status(), + passed = test_amqp_connection_refusal(), passed = test_confirms(), - passed = maybe_run_cluster_dependent_tests(), + passed = test_with_state(), + passed = + do_if_secondary_node( + fun run_cluster_dependent_tests/1, + fun (SecondaryNode) -> + io:format("Skipping cluster dependent tests with node ~p~n", + [SecondaryNode]), + passed + end), passed = test_configurable_server_properties(), passed. -maybe_run_cluster_dependent_tests() -> + +do_if_secondary_node(Up, Down) -> SecondaryNode = rabbit_nodes:make("hare"), case net_adm:ping(SecondaryNode) of - pong -> passed = run_cluster_dependent_tests(SecondaryNode); - pang -> io:format("Skipping cluster dependent tests with node ~p~n", - [SecondaryNode]) - end, - passed. + pong -> Up(SecondaryNode); + pang -> Down(SecondaryNode) + end. -run_cluster_dependent_tests(SecondaryNode) -> - SecondaryNodeS = atom_to_list(SecondaryNode), +setup_cluster() -> + do_if_secondary_node( + fun (SecondaryNode) -> + cover:stop(SecondaryNode), + ok = control_action(stop_app, []), + %% 'cover' does not cope at all well with nodes disconnecting, + %% which happens as part of reset. So we turn it off + %% temporarily. That is ok even if we're not in general using + %% cover, it just turns the engine on / off and doesn't log + %% anything. Note that this way cover won't be on when joining + %% the cluster, but this is OK since we're testing the clustering + %% interface elsewere anyway. + cover:stop(nodes()), + ok = control_action(join_cluster, + [atom_to_list(SecondaryNode)]), + cover:start(nodes()), + ok = control_action(start_app, []), + ok = control_action(start_app, SecondaryNode, [], []) + end, + fun (_) -> ok end). - cover:stop(SecondaryNode), - ok = control_action(stop_app, []), - ok = control_action(reset, []), - ok = control_action(cluster, [SecondaryNodeS]), - ok = control_action(start_app, []), - cover:start(SecondaryNode), - ok = control_action(start_app, SecondaryNode, [], []), +maybe_run_cluster_dependent_tests() -> + do_if_secondary_node( + fun (SecondaryNode) -> + passed = run_cluster_dependent_tests(SecondaryNode) + end, + fun (SecondaryNode) -> + io:format("Skipping cluster dependent tests with node ~p~n", + [SecondaryNode]) + end). +run_cluster_dependent_tests(SecondaryNode) -> io:format("Running cluster dependent tests with node ~p~n", [SecondaryNode]), passed = test_delegates_async(SecondaryNode), passed = test_delegates_sync(SecondaryNode), @@ -109,6 +146,16 @@ run_cluster_dependent_tests(SecondaryNode) -> passed. +test_version_equivalance() -> + true = rabbit_misc:version_minor_equivalent("3.0.0", "3.0.0"), + true = rabbit_misc:version_minor_equivalent("3.0.0", "3.0.1"), + true = rabbit_misc:version_minor_equivalent("%%VSN%%", "%%VSN%%"), + false = rabbit_misc:version_minor_equivalent("3.0.0", "3.1.0"), + false = rabbit_misc:version_minor_equivalent("3.0.0", "3.0"), + false = rabbit_misc:version_minor_equivalent("3.0.0", "3.0.0.1"), + false = rabbit_misc:version_minor_equivalent("3.0.0", "3.0.foo"), + passed. + test_multi_call() -> Fun = fun() -> receive @@ -129,6 +176,78 @@ test_multi_call() -> exit(Pid3, bang), passed. +test_rabbit_basic_header_handling() -> + passed = write_table_with_invalid_existing_type_test(), + passed = invalid_existing_headers_test(), + passed = disparate_invalid_header_entries_accumulate_separately_test(), + passed = corrupt_or_invalid_headers_are_overwritten_test(), + passed = invalid_same_header_entry_accumulation_test(), + passed. + +-define(XDEATH_TABLE, + [{<<"reason">>, longstr, <<"blah">>}, + {<<"queue">>, longstr, <<"foo.bar.baz">>}, + {<<"exchange">>, longstr, <<"my-exchange">>}, + {<<"routing-keys">>, array, []}]). + +-define(ROUTE_TABLE, [{<<"redelivered">>, bool, <<"true">>}]). + +-define(BAD_HEADER(K), {<<K>>, longstr, <<"bad ", K>>}). +-define(BAD_HEADER2(K, Suf), {<<K>>, longstr, <<"bad ", K, Suf>>}). +-define(FOUND_BAD_HEADER(K), {<<K>>, array, [{longstr, <<"bad ", K>>}]}). + +write_table_with_invalid_existing_type_test() -> + prepend_check(<<"header1">>, ?XDEATH_TABLE, [?BAD_HEADER("header1")]), + passed. + +invalid_existing_headers_test() -> + Headers = + prepend_check(<<"header2">>, ?ROUTE_TABLE, [?BAD_HEADER("header2")]), + {array, [{table, ?ROUTE_TABLE}]} = + rabbit_misc:table_lookup(Headers, <<"header2">>), + passed. + +disparate_invalid_header_entries_accumulate_separately_test() -> + BadHeaders = [?BAD_HEADER("header2")], + Headers = prepend_check(<<"header2">>, ?ROUTE_TABLE, BadHeaders), + Headers2 = prepend_check(<<"header1">>, ?XDEATH_TABLE, + [?BAD_HEADER("header1") | Headers]), + {table, [?FOUND_BAD_HEADER("header1"), + ?FOUND_BAD_HEADER("header2")]} = + rabbit_misc:table_lookup(Headers2, ?INVALID_HEADERS_KEY), + passed. + +corrupt_or_invalid_headers_are_overwritten_test() -> + Headers0 = [?BAD_HEADER("header1"), + ?BAD_HEADER("x-invalid-headers")], + Headers1 = prepend_check(<<"header1">>, ?XDEATH_TABLE, Headers0), + {table,[?FOUND_BAD_HEADER("header1"), + ?FOUND_BAD_HEADER("x-invalid-headers")]} = + rabbit_misc:table_lookup(Headers1, ?INVALID_HEADERS_KEY), + passed. + +invalid_same_header_entry_accumulation_test() -> + BadHeader1 = ?BAD_HEADER2("header1", "a"), + Headers = prepend_check(<<"header1">>, ?ROUTE_TABLE, [BadHeader1]), + Headers2 = prepend_check(<<"header1">>, ?ROUTE_TABLE, + [?BAD_HEADER2("header1", "b") | Headers]), + {table, InvalidHeaders} = + rabbit_misc:table_lookup(Headers2, ?INVALID_HEADERS_KEY), + {array, [{longstr,<<"bad header1b">>}, + {longstr,<<"bad header1a">>}]} = + rabbit_misc:table_lookup(InvalidHeaders, <<"header1">>), + passed. + +prepend_check(HeaderKey, HeaderTable, Headers) -> + Headers1 = rabbit_basic:prepend_table_header( + HeaderKey, HeaderTable, Headers), + {table, Invalid} = + rabbit_misc:table_lookup(Headers1, ?INVALID_HEADERS_KEY), + {Type, Value} = rabbit_misc:table_lookup(Headers, HeaderKey), + {array, [{Type, Value} | _]} = + rabbit_misc:table_lookup(Invalid, HeaderKey), + Headers1. + test_priority_queue() -> false = priority_queue:is_queue(not_a_queue), @@ -320,113 +439,45 @@ test_unfold() -> end, 10), passed. -test_parsing() -> - passed = test_content_properties(), - passed = test_field_values(), - passed. - -test_content_prop_encoding(Datum, Binary) -> - Types = [element(1, E) || E <- Datum], - Values = [element(2, E) || E <- Datum], - Binary = rabbit_binary_generator:encode_properties(Types, Values). %% assertion - -test_content_properties() -> - test_content_prop_encoding([], <<0, 0>>), - test_content_prop_encoding([{bit, true}, {bit, false}, {bit, true}, {bit, false}], - <<16#A0, 0>>), - test_content_prop_encoding([{bit, true}, {octet, 123}, {bit, true}, {octet, undefined}, - {bit, true}], - <<16#E8,0,123>>), - test_content_prop_encoding([{bit, true}, {octet, 123}, {octet, 123}, {bit, true}], - <<16#F0,0,123,123>>), - test_content_prop_encoding([{bit, true}, {shortstr, <<"hi">>}, {bit, true}, - {shortint, 54321}, {bit, true}], - <<16#F8,0,2,"hi",16#D4,16#31>>), - test_content_prop_encoding([{bit, true}, {shortstr, undefined}, {bit, true}, - {shortint, 54321}, {bit, true}], - <<16#B8,0,16#D4,16#31>>), - test_content_prop_encoding([{table, [{<<"a signedint">>, signedint, 12345678}, - {<<"a longstr">>, longstr, <<"yes please">>}, - {<<"a decimal">>, decimal, {123, 12345678}}, - {<<"a timestamp">>, timestamp, 123456789012345}, - {<<"a nested table">>, table, - [{<<"one">>, signedint, 1}, - {<<"two">>, signedint, 2}]}]}], - << - %% property-flags - 16#8000:16, - - %% property-list: - - %% table - 117:32, % table length in bytes - - 11,"a signedint", % name - "I",12345678:32, % type and value - - 9,"a longstr", - "S",10:32,"yes please", - - 9,"a decimal", - "D",123,12345678:32, - - 11,"a timestamp", - "T", 123456789012345:64, - - 14,"a nested table", - "F", - 18:32, - - 3,"one", - "I",1:32, - - 3,"two", - "I",2:32 >>), - passed. - -test_field_values() -> +test_table_codec() -> %% FIXME this does not test inexact numbers (double and float) yet, %% because they won't pass the equality assertions - test_content_prop_encoding( - [{table, [{<<"longstr">>, longstr, <<"Here is a long string">>}, - {<<"signedint">>, signedint, 12345}, - {<<"decimal">>, decimal, {3, 123456}}, - {<<"timestamp">>, timestamp, 109876543209876}, - {<<"table">>, table, [{<<"one">>, signedint, 54321}, - {<<"two">>, longstr, <<"A long string">>}]}, - {<<"byte">>, byte, 255}, - {<<"long">>, long, 1234567890}, - {<<"short">>, short, 655}, - {<<"bool">>, bool, true}, - {<<"binary">>, binary, <<"a binary string">>}, - {<<"void">>, void, undefined}, - {<<"array">>, array, [{signedint, 54321}, - {longstr, <<"A long string">>}]} - - ]}], - << - %% property-flags - 16#8000:16, - %% table length in bytes - 228:32, - - 7,"longstr", "S", 21:32, "Here is a long string", % = 34 - 9,"signedint", "I", 12345:32/signed, % + 15 = 49 - 7,"decimal", "D", 3, 123456:32, % + 14 = 63 - 9,"timestamp", "T", 109876543209876:64, % + 19 = 82 - 5,"table", "F", 31:32, % length of table % + 11 = 93 - 3,"one", "I", 54321:32, % + 9 = 102 - 3,"two", "S", 13:32, "A long string", % + 22 = 124 - 4,"byte", "b", 255:8, % + 7 = 131 - 4,"long", "l", 1234567890:64, % + 14 = 145 - 5,"short", "s", 655:16, % + 9 = 154 - 4,"bool", "t", 1, % + 7 = 161 - 6,"binary", "x", 15:32, "a binary string", % + 27 = 188 - 4,"void", "V", % + 6 = 194 - 5,"array", "A", 23:32, % + 11 = 205 - "I", 54321:32, % + 5 = 210 - "S", 13:32, "A long string" % + 18 = 228 - >>), + Table = [{<<"longstr">>, longstr, <<"Here is a long string">>}, + {<<"signedint">>, signedint, 12345}, + {<<"decimal">>, decimal, {3, 123456}}, + {<<"timestamp">>, timestamp, 109876543209876}, + {<<"table">>, table, [{<<"one">>, signedint, 54321}, + {<<"two">>, longstr, + <<"A long string">>}]}, + {<<"byte">>, byte, 255}, + {<<"long">>, long, 1234567890}, + {<<"short">>, short, 655}, + {<<"bool">>, bool, true}, + {<<"binary">>, binary, <<"a binary string">>}, + {<<"void">>, void, undefined}, + {<<"array">>, array, [{signedint, 54321}, + {longstr, <<"A long string">>}]} + ], + Binary = << + 7,"longstr", "S", 21:32, "Here is a long string", + 9,"signedint", "I", 12345:32/signed, + 7,"decimal", "D", 3, 123456:32, + 9,"timestamp", "T", 109876543209876:64, + 5,"table", "F", 31:32, % length of table + 3,"one", "I", 54321:32, + 3,"two", "S", 13:32, "A long string", + 4,"byte", "b", 255:8, + 4,"long", "l", 1234567890:64, + 5,"short", "s", 655:16, + 4,"bool", "t", 1, + 6,"binary", "x", 15:32, "a binary string", + 4,"void", "V", + 5,"array", "A", 23:32, + "I", 54321:32, + "S", 13:32, "A long string" + >>, + Binary = rabbit_binary_generator:generate_table(Table), + Table = rabbit_binary_parser:parse_table(Binary), passed. %% Test that content frames don't exceed frame-max @@ -515,8 +566,9 @@ test_topic_matching() -> XName = #resource{virtual_host = <<"/">>, kind = exchange, name = <<"test_exchange">>}, - X = #exchange{name = XName, type = topic, durable = false, - auto_delete = false, arguments = []}, + X0 = #exchange{name = XName, type = topic, durable = false, + auto_delete = false, arguments = []}, + X = rabbit_exchange_decorator:set(X0), %% create rabbit_exchange_type_topic:validate(X), exchange_op_callback(X, create, []), @@ -617,8 +669,8 @@ test_topic_matching() -> exchange_op_callback(X, Fun, Args) -> rabbit_misc:execute_mnesia_transaction( - fun () -> rabbit_exchange:callback(X, Fun, [transaction, X] ++ Args) end), - rabbit_exchange:callback(X, Fun, [none, X] ++ Args). + fun () -> rabbit_exchange:callback(X, Fun, transaction, [X] ++ Args) end), + rabbit_exchange:callback(X, Fun, none, [X] ++ Args). test_topic_expect_match(X, List) -> lists:foreach( @@ -628,7 +680,6 @@ test_topic_expect_match(X, List) -> #'P_basic'{}, <<>>), Res = rabbit_exchange_type_topic:route( X, #delivery{mandatory = false, - immediate = false, sender = self(), message = Message}), ExpectedRes = lists:map( @@ -746,7 +797,9 @@ test_log_management_during_startup() -> ok = case catch control_action(start_app, []) of ok -> exit({got_success_but_expected_failure, log_rotation_tty_no_handlers_test}); - {error, {cannot_log_to_tty, _, _}} -> ok + {badrpc, {'EXIT', {rabbit,failure_during_boot, + {error,{cannot_log_to_tty, + _, not_installed}}}}} -> ok end, %% fix sasl logging @@ -770,7 +823,9 @@ test_log_management_during_startup() -> ok = case control_action(start_app, []) of ok -> exit({got_success_but_expected_failure, log_rotation_no_write_permission_dir_test}); - {error, {cannot_log_to_file, _, _}} -> ok + {badrpc, {'EXIT', + {rabbit, failure_during_boot, + {error, {cannot_log_to_file, _, _}}}}} -> ok end, %% start application with logging to a subdirectory which @@ -781,8 +836,11 @@ test_log_management_during_startup() -> ok = case control_action(start_app, []) of ok -> exit({got_success_but_expected_failure, log_rotatation_parent_dirs_test}); - {error, {cannot_log_to_file, _, - {error, {cannot_create_parent_dirs, _, eacces}}}} -> ok + {badrpc, + {'EXIT', {rabbit,failure_during_boot, + {error, {cannot_log_to_file, _, + {error, + {cannot_create_parent_dirs, _, eacces}}}}}}} -> ok end, ok = set_permissions(TmpDir, 8#00700), ok = set_permissions(TmpLog, 8#00600), @@ -855,199 +913,60 @@ test_arguments_parser() -> passed. -test_cluster_management() -> - %% 'cluster' and 'reset' should only work if the app is stopped - {error, _} = control_action(cluster, []), - {error, _} = control_action(reset, []), - {error, _} = control_action(force_reset, []), +test_dynamic_mirroring() -> + %% Just unit tests of the node selection logic, see multi node + %% tests for the rest... + Test = fun ({NewM, NewSs, ExtraSs}, Policy, Params, + {MNode, SNodes, SSNodes}, All) -> + {ok, M} = rabbit_mirror_queue_misc:module(Policy), + {NewM, NewSs0} = M:suggested_queue_nodes( + Params, MNode, SNodes, SSNodes, All), + NewSs1 = lists:sort(NewSs0), + case dm_list_match(NewSs, NewSs1, ExtraSs) of + ok -> ok; + error -> exit({no_match, NewSs, NewSs1, ExtraSs}) + end + end, + + Test({a,[b,c],0},<<"all">>,'_',{a,[], []}, [a,b,c]), + Test({a,[b,c],0},<<"all">>,'_',{a,[b,c],[b,c]},[a,b,c]), + Test({a,[b,c],0},<<"all">>,'_',{a,[d], [d]}, [a,b,c]), + + N = fun (Atoms) -> [list_to_binary(atom_to_list(A)) || A <- Atoms] end, + + %% Add a node + Test({a,[b,c],0},<<"nodes">>,N([a,b,c]),{a,[b],[b]},[a,b,c,d]), + Test({b,[a,c],0},<<"nodes">>,N([a,b,c]),{b,[a],[a]},[a,b,c,d]), + %% Add two nodes and drop one + Test({a,[b,c],0},<<"nodes">>,N([a,b,c]),{a,[d],[d]},[a,b,c,d]), + %% Don't try to include nodes that are not running + Test({a,[b], 0},<<"nodes">>,N([a,b,f]),{a,[b],[b]},[a,b,c,d]), + %% If we can't find any of the nodes listed then just keep the master + Test({a,[], 0},<<"nodes">>,N([f,g,h]),{a,[b],[b]},[a,b,c,d]), + %% And once that's happened, still keep the master even when not listed, + %% if nothing is synced + Test({a,[b,c],0},<<"nodes">>,N([b,c]), {a,[], []}, [a,b,c,d]), + Test({a,[b,c],0},<<"nodes">>,N([b,c]), {a,[b],[]}, [a,b,c,d]), + %% But if something is synced we can lose the master - but make + %% sure we pick the new master from the nodes which are synced! + Test({b,[c], 0},<<"nodes">>,N([b,c]), {a,[b],[b]},[a,b,c,d]), + Test({b,[c], 0},<<"nodes">>,N([c,b]), {a,[b],[b]},[a,b,c,d]), + + Test({a,[], 1},<<"exactly">>,2,{a,[], []}, [a,b,c,d]), + Test({a,[], 2},<<"exactly">>,3,{a,[], []}, [a,b,c,d]), + Test({a,[c], 0},<<"exactly">>,2,{a,[c], [c]}, [a,b,c,d]), + Test({a,[c], 1},<<"exactly">>,3,{a,[c], [c]}, [a,b,c,d]), + Test({a,[c], 0},<<"exactly">>,2,{a,[c,d],[c,d]},[a,b,c,d]), + Test({a,[c,d],0},<<"exactly">>,3,{a,[c,d],[c,d]},[a,b,c,d]), - ok = control_action(stop_app, []), - - %% various ways of creating a standalone node - NodeS = atom_to_list(node()), - ClusteringSequence = [[], - [NodeS], - ["invalid@invalid", NodeS], - [NodeS, "invalid@invalid"]], - - ok = control_action(reset, []), - lists:foreach(fun (Arg) -> - ok = control_action(force_cluster, Arg), - ok - end, - ClusteringSequence), - lists:foreach(fun (Arg) -> - ok = control_action(reset, []), - ok = control_action(force_cluster, Arg), - ok - end, - ClusteringSequence), - ok = control_action(reset, []), - lists:foreach(fun (Arg) -> - ok = control_action(force_cluster, Arg), - ok = control_action(start_app, []), - ok = control_action(stop_app, []), - ok - end, - ClusteringSequence), - lists:foreach(fun (Arg) -> - ok = control_action(reset, []), - ok = control_action(force_cluster, Arg), - ok = control_action(start_app, []), - ok = control_action(stop_app, []), - ok - end, - ClusteringSequence), - - %% convert a disk node into a ram node - ok = control_action(reset, []), - ok = control_action(start_app, []), - ok = control_action(stop_app, []), - ok = assert_disc_node(), - ok = control_action(force_cluster, ["invalid1@invalid", - "invalid2@invalid"]), - ok = assert_ram_node(), - - %% join a non-existing cluster as a ram node - ok = control_action(reset, []), - ok = control_action(force_cluster, ["invalid1@invalid", - "invalid2@invalid"]), - ok = assert_ram_node(), - - ok = control_action(reset, []), - - SecondaryNode = rabbit_nodes:make("hare"), - case net_adm:ping(SecondaryNode) of - pong -> passed = test_cluster_management2(SecondaryNode); - pang -> io:format("Skipping clustering tests with node ~p~n", - [SecondaryNode]) - end, - - ok = control_action(start_app, []), passed. -test_cluster_management2(SecondaryNode) -> - NodeS = atom_to_list(node()), - SecondaryNodeS = atom_to_list(SecondaryNode), - - %% make a disk node - ok = control_action(cluster, [NodeS]), - ok = assert_disc_node(), - %% make a ram node - ok = control_action(reset, []), - ok = control_action(cluster, [SecondaryNodeS]), - ok = assert_ram_node(), - - %% join cluster as a ram node - ok = control_action(reset, []), - ok = control_action(force_cluster, [SecondaryNodeS, "invalid1@invalid"]), - ok = control_action(start_app, []), - ok = control_action(stop_app, []), - ok = assert_ram_node(), - - %% ram node will not start by itself - ok = control_action(stop_app, []), - ok = control_action(stop_app, SecondaryNode, [], []), - {error, _} = control_action(start_app, []), - ok = control_action(start_app, SecondaryNode, [], []), - ok = control_action(start_app, []), - ok = control_action(stop_app, []), - - %% change cluster config while remaining in same cluster - ok = control_action(force_cluster, ["invalid2@invalid", SecondaryNodeS]), - ok = control_action(start_app, []), - ok = control_action(stop_app, []), - - %% join non-existing cluster as a ram node - ok = control_action(force_cluster, ["invalid1@invalid", - "invalid2@invalid"]), - {error, _} = control_action(start_app, []), - ok = assert_ram_node(), - - %% join empty cluster as a ram node (converts to disc) - ok = control_action(cluster, []), - ok = control_action(start_app, []), - ok = control_action(stop_app, []), - ok = assert_disc_node(), - - %% make a new ram node - ok = control_action(reset, []), - ok = control_action(force_cluster, [SecondaryNodeS]), - ok = control_action(start_app, []), - ok = control_action(stop_app, []), - ok = assert_ram_node(), - - %% turn ram node into disk node - ok = control_action(cluster, [SecondaryNodeS, NodeS]), - ok = control_action(start_app, []), - ok = control_action(stop_app, []), - ok = assert_disc_node(), - - %% convert a disk node into a ram node - ok = assert_disc_node(), - ok = control_action(force_cluster, ["invalid1@invalid", - "invalid2@invalid"]), - ok = assert_ram_node(), - - %% make a new disk node - ok = control_action(force_reset, []), - ok = control_action(start_app, []), - ok = control_action(stop_app, []), - ok = assert_disc_node(), - - %% turn a disk node into a ram node - ok = control_action(reset, []), - ok = control_action(cluster, [SecondaryNodeS]), - ok = control_action(start_app, []), - ok = control_action(stop_app, []), - ok = assert_ram_node(), - - %% NB: this will log an inconsistent_database error, which is harmless - %% Turning cover on / off is OK even if we're not in general using cover, - %% it just turns the engine on / off, doesn't actually log anything. - cover:stop([SecondaryNode]), - true = disconnect_node(SecondaryNode), - pong = net_adm:ping(SecondaryNode), - cover:start([SecondaryNode]), - - %% leaving a cluster as a ram node - ok = control_action(reset, []), - %% ...and as a disk node - ok = control_action(cluster, [SecondaryNodeS, NodeS]), - ok = control_action(start_app, []), - ok = control_action(stop_app, []), - cover:stop(SecondaryNode), - ok = control_action(reset, []), - cover:start(SecondaryNode), - - %% attempt to leave cluster when no other node is alive - ok = control_action(cluster, [SecondaryNodeS, NodeS]), - ok = control_action(start_app, []), - ok = control_action(stop_app, SecondaryNode, [], []), - ok = control_action(stop_app, []), - {error, {no_running_cluster_nodes, _, _}} = - control_action(reset, []), - - %% attempt to change type when no other node is alive - {error, {no_running_cluster_nodes, _, _}} = - control_action(cluster, [SecondaryNodeS]), - - %% leave system clustered, with the secondary node as a ram node - ok = control_action(force_reset, []), - ok = control_action(start_app, []), - %% Yes, this is rather ugly. But since we're a clustered Mnesia - %% node and we're telling another clustered node to reset itself, - %% we will get disconnected half way through causing a - %% badrpc. This never happens in real life since rabbitmqctl is - %% not a clustered Mnesia node. - cover:stop(SecondaryNode), - {badrpc, nodedown} = control_action(force_reset, SecondaryNode, [], []), - pong = net_adm:ping(SecondaryNode), - cover:start(SecondaryNode), - ok = control_action(cluster, SecondaryNode, [NodeS], []), - ok = control_action(start_app, SecondaryNode, [], []), - - passed. +%% Does the first list match the second where the second is required +%% to have exactly Extra superfluous items? +dm_list_match([], [], 0) -> ok; +dm_list_match(_, [], _Extra) -> error; +dm_list_match([H|T1], [H |T2], Extra) -> dm_list_match(T1, T2, Extra); +dm_list_match(L1, [_H|T2], Extra) -> dm_list_match(L1, T2, Extra - 1). test_user_management() -> @@ -1134,22 +1053,21 @@ test_runtime_parameters() -> Bad = fun(L) -> {error_string, _} = control_action(set_parameter, L) end, %% Acceptable for bijection - Good(["test", "good", "<<\"ignore\">>"]), + Good(["test", "good", "\"ignore\""]), Good(["test", "good", "123"]), Good(["test", "good", "true"]), Good(["test", "good", "false"]), Good(["test", "good", "null"]), - Good(["test", "good", "[{<<\"key\">>, <<\"value\">>}]"]), + Good(["test", "good", "{\"key\": \"value\"}"]), - %% Various forms of fail due to non-bijectability + %% Invalid json Bad(["test", "good", "atom"]), - Bad(["test", "good", "{tuple, foo}"]), - Bad(["test", "good", "[{<<\"key\">>, <<\"value\">>, 1}]"]), - Bad(["test", "good", "[{key, <<\"value\">>}]"]), + Bad(["test", "good", "{\"foo\": \"bar\""]), + Bad(["test", "good", "{foo: \"bar\"}"]), %% Test actual validation hook - Good(["test", "maybe", "<<\"good\">>"]), - Bad(["test", "maybe", "<<\"bad\">>"]), + Good(["test", "maybe", "\"good\""]), + Bad(["test", "maybe", "\"bad\""]), ok = control_action(list_parameters, []), @@ -1157,25 +1075,100 @@ test_runtime_parameters() -> ok = control_action(clear_parameter, ["test", "maybe"]), {error_string, _} = control_action(clear_parameter, ["test", "neverexisted"]), + + %% We can delete for a component that no longer exists + Good(["test", "good", "\"ignore\""]), rabbit_runtime_parameters_test:unregister(), + ok = control_action(clear_parameter, ["test", "good"]), + passed. + +test_policy_validation() -> + rabbit_runtime_parameters_test:register_policy_validator(), + SetPol = fun (Key, Val) -> + control_action_opts( + ["set_policy", "name", ".*", + rabbit_misc:format("{\"~s\":~p}", [Key, Val])]) + end, + + ok = SetPol("testeven", []), + ok = SetPol("testeven", [1, 2]), + ok = SetPol("testeven", [1, 2, 3, 4]), + ok = SetPol("testpos", [2, 5, 5678]), + + error = SetPol("testpos", [-1, 0, 1]), + error = SetPol("testeven", [ 1, 2, 3]), + + ok = control_action(clear_policy, ["name"]), + rabbit_runtime_parameters_test:unregister_policy_validator(), + passed. + +test_policy_opts_validation() -> + Set = fun (Extra) -> control_action_opts( + ["set_policy", "name", ".*", "{\"ha-mode\":\"all\"}" + | Extra]) end, + OK = fun (Extra) -> ok = Set(Extra) end, + Fail = fun (Extra) -> error = Set(Extra) end, + + OK ([]), + + OK (["--priority", "0"]), + OK (["--priority", "3"]), + Fail(["--priority", "banana"]), + Fail(["--priority"]), + + OK (["--apply-to", "all"]), + OK (["--apply-to", "queues"]), + Fail(["--apply-to", "bananas"]), + Fail(["--apply-to"]), + + OK (["--priority", "3", "--apply-to", "queues"]), + Fail(["--priority", "banana", "--apply-to", "queues"]), + Fail(["--priority", "3", "--apply-to", "bananas"]), + + Fail(["--offline"]), + + ok = control_action(clear_policy, ["name"]), + passed. + +test_ha_policy_validation() -> + Set = fun (JSON) -> control_action_opts( + ["set_policy", "name", ".*", JSON]) end, + OK = fun (JSON) -> ok = Set(JSON) end, + Fail = fun (JSON) -> error = Set(JSON) end, + + OK ("{\"ha-mode\":\"all\"}"), + Fail("{\"ha-mode\":\"made_up\"}"), + + Fail("{\"ha-mode\":\"nodes\"}"), + Fail("{\"ha-mode\":\"nodes\",\"ha-params\":2}"), + Fail("{\"ha-mode\":\"nodes\",\"ha-params\":[\"a\",2]}"), + OK ("{\"ha-mode\":\"nodes\",\"ha-params\":[\"a\",\"b\"]}"), + Fail("{\"ha-params\":[\"a\",\"b\"]}"), + + Fail("{\"ha-mode\":\"exactly\"}"), + Fail("{\"ha-mode\":\"exactly\",\"ha-params\":[\"a\",\"b\"]}"), + OK ("{\"ha-mode\":\"exactly\",\"ha-params\":2}"), + Fail("{\"ha-params\":2}"), + + OK ("{\"ha-mode\":\"all\",\"ha-sync-mode\":\"manual\"}"), + OK ("{\"ha-mode\":\"all\",\"ha-sync-mode\":\"automatic\"}"), + Fail("{\"ha-mode\":\"all\",\"ha-sync-mode\":\"made_up\"}"), + Fail("{\"ha-sync-mode\":\"manual\"}"), + Fail("{\"ha-sync-mode\":\"automatic\"}"), + + ok = control_action(clear_policy, ["name"]), passed. test_server_status() -> %% create a few things so there is some useful information to list - Writer = spawn(fun () -> receive shutdown -> ok end end), - {ok, Ch} = rabbit_channel:start_link( - 1, self(), Writer, self(), "", rabbit_framing_amqp_0_9_1, - user(<<"user">>), <<"/">>, [], self(), - rabbit_limiter:make_token(self())), + {_Writer, Limiter, Ch} = test_channel(), [Q, Q2] = [Queue || Name <- [<<"foo">>, <<"bar">>], {new, Queue = #amqqueue{}} <- [rabbit_amqqueue:declare( rabbit_misc:r(<<"/">>, queue, Name), false, false, [], none)]], - ok = rabbit_amqqueue:basic_consume( - Q, true, Ch, rabbit_limiter:make_token(), - <<"ctag">>, true, undefined), + Q, true, Ch, Limiter, false, <<"ctag">>, true, none, undefined), %% list queues ok = info_action(list_queues, rabbit_amqqueue:info_keys(), true), @@ -1195,11 +1188,9 @@ test_server_status() -> rabbit_misc:r(<<"/">>, queue, <<"foo">>)), %% list connections - [#listener{host = H, port = P} | _] = - [L || L = #listener{node = N} <- rabbit_networking:active_listeners(), - N =:= node()], - - {ok, _C} = gen_tcp:connect(H, P, []), + {H, P} = find_listener(), + {ok, C} = gen_tcp:connect(H, P, []), + gen_tcp:send(C, <<"AMQP", 0, 0, 9, 1>>), timer:sleep(100), ok = info_action(list_connections, rabbit_networking:connection_info_keys(), false), @@ -1215,7 +1206,18 @@ test_server_status() -> ok = control_action(list_consumers, []), %% set vm memory high watermark + HWM = vm_memory_monitor:get_vm_memory_high_watermark(), + ok = control_action(set_vm_memory_high_watermark, ["1"]), ok = control_action(set_vm_memory_high_watermark, ["1.0"]), + %% this will trigger an alarm + ok = control_action(set_vm_memory_high_watermark, ["0.0"]), + %% reset + ok = control_action(set_vm_memory_high_watermark, [float_to_list(HWM)]), + + %% eval + {error_string, _} = control_action(eval, ["\""]), + {error_string, _} = control_action(eval, ["a("]), + ok = control_action(eval, ["a."]), %% cleanup [{ok, _} = rabbit_amqqueue:delete(QR, false, false) || QR <- [Q, Q2]], @@ -1225,22 +1227,48 @@ test_server_status() -> passed. +test_amqp_connection_refusal() -> + [passed = test_amqp_connection_refusal(V) || + V <- [<<"AMQP",9,9,9,9>>, <<"AMQP",0,1,0,0>>, <<"XXXX",0,0,9,1>>]], + passed. + +test_amqp_connection_refusal(Header) -> + {H, P} = find_listener(), + {ok, C} = gen_tcp:connect(H, P, [binary, {active, false}]), + ok = gen_tcp:send(C, Header), + {ok, <<"AMQP",0,0,9,1>>} = gen_tcp:recv(C, 8, 100), + ok = gen_tcp:close(C), + passed. + +find_listener() -> + [#listener{host = H, port = P} | _] = + [L || L = #listener{node = N} <- rabbit_networking:active_listeners(), + N =:= node()], + {H, P}. + test_writer(Pid) -> receive - shutdown -> ok; - {send_command, Method} -> Pid ! Method, test_writer(Pid) + {'$gen_call', From, flush} -> gen_server:reply(From, ok), + test_writer(Pid); + {send_command, Method} -> Pid ! Method, + test_writer(Pid); + shutdown -> ok end. -test_spawn() -> +test_channel() -> Me = self(), Writer = spawn(fun () -> test_writer(Me) end), + {ok, Limiter} = rabbit_limiter:start_link(), {ok, Ch} = rabbit_channel:start_link( 1, Me, Writer, Me, "", rabbit_framing_amqp_0_9_1, - user(<<"guest">>), <<"/">>, [], Me, - rabbit_limiter:make_token(self())), + user(<<"guest">>), <<"/">>, [], Me, Limiter), + {Writer, Limiter, Ch}. + +test_spawn() -> + {Writer, _Limiter, Ch} = test_channel(), ok = rabbit_channel:do(Ch, #'channel.open'{}), receive #'channel.open_ok'{} -> ok - after 1000 -> throw(failed_to_receive_channel_open_ok) + after ?TIMEOUT -> throw(failed_to_receive_channel_open_ok) end, {Writer, Ch}. @@ -1261,7 +1289,7 @@ test_spawn_remote() -> end end), receive Res -> Res - after 1000 -> throw(failed_to_receive_result) + after ?TIMEOUT -> throw(failed_to_receive_result) end. user(Username) -> @@ -1281,13 +1309,10 @@ test_confirms() -> queue = Q0, exchange = <<"amq.direct">>, routing_key = "magic" }), - receive #'queue.bind_ok'{} -> - Q0 - after 1000 -> - throw(failed_to_bind_queue) + receive #'queue.bind_ok'{} -> Q0 + after ?TIMEOUT -> throw(failed_to_bind_queue) end - after 1000 -> - throw(failed_to_declare_queue) + after ?TIMEOUT -> throw(failed_to_declare_queue) end end, %% Declare and bind two queues @@ -1300,7 +1325,7 @@ test_confirms() -> rabbit_channel:do(Ch, #'confirm.select'{}), receive #'confirm.select_ok'{} -> ok - after 1000 -> throw(failed_to_enable_confirms) + after ?TIMEOUT -> throw(failed_to_enable_confirms) end, %% Publish a message rabbit_channel:do(Ch, #'basic.publish'{exchange = <<"amq.direct">>, @@ -1317,7 +1342,7 @@ test_confirms() -> receive #'basic.nack'{} -> ok; #'basic.ack'{} -> throw(received_ack_instead_of_nack) - after 2000 -> throw(did_not_receive_nack) + after ?TIMEOUT-> throw(did_not_receive_nack) end, receive #'basic.ack'{} -> throw(received_ack_when_none_expected) @@ -1327,13 +1352,18 @@ test_confirms() -> rabbit_channel:do(Ch, #'queue.delete'{queue = QName2}), receive #'queue.delete_ok'{} -> ok - after 1000 -> throw(failed_to_cleanup_queue) + after ?TIMEOUT -> throw(failed_to_cleanup_queue) end, unlink(Ch), ok = rabbit_channel:shutdown(Ch), passed. +test_with_state() -> + fhc_state = gen_server2:with_state(file_handle_cache, + fun (S) -> element(1, S) end), + passed. + test_statistics_event_receiver(Pid) -> receive Foo -> Pid ! Foo, test_statistics_event_receiver(Pid) @@ -1350,7 +1380,7 @@ test_statistics_receive_event1(Ch, Matcher) -> true -> Props; _ -> test_statistics_receive_event1(Ch, Matcher) end - after 1000 -> throw(failed_to_receive_event) + after ?TIMEOUT -> throw(failed_to_receive_event) end. test_statistics() -> @@ -1362,12 +1392,10 @@ test_statistics() -> %% Set up a channel and queue {_Writer, Ch} = test_spawn(), rabbit_channel:do(Ch, #'queue.declare'{}), - QName = receive #'queue.declare_ok'{queue = Q0} -> - Q0 - after 1000 -> throw(failed_to_receive_queue_declare_ok) + QName = receive #'queue.declare_ok'{queue = Q0} -> Q0 + after ?TIMEOUT -> throw(failed_to_receive_queue_declare_ok) end, - {ok, Q} = rabbit_amqqueue:lookup(rabbit_misc:r(<<"/">>, queue, QName)), - QPid = Q#amqqueue.pid, + QRes = rabbit_misc:r(<<"/">>, queue, QName), X = rabbit_misc:r(<<"/">>, exchange, <<"">>), rabbit_tests_event_receiver:start(self(), [node()], [channel_stats]), @@ -1391,9 +1419,9 @@ test_statistics() -> length(proplists:get_value( channel_queue_exchange_stats, E)) > 0 end), - [{QPid,[{get,1}]}] = proplists:get_value(channel_queue_stats, Event2), + [{QRes, [{get,1}]}] = proplists:get_value(channel_queue_stats, Event2), [{X,[{publish,1}]}] = proplists:get_value(channel_exchange_stats, Event2), - [{{QPid,X},[{publish,1}]}] = + [{{QRes,X},[{publish,1}]}] = proplists:get_value(channel_queue_exchange_stats, Event2), %% Check the stats remove stuff on queue deletion @@ -1418,33 +1446,33 @@ test_refresh_events(SecondaryNode) -> [channel_created, queue_created]), {_Writer, Ch} = test_spawn(), - expect_events(Ch, channel_created), + expect_events(pid, Ch, channel_created), rabbit_channel:shutdown(Ch), {_Writer2, Ch2} = test_spawn(SecondaryNode), - expect_events(Ch2, channel_created), + expect_events(pid, Ch2, channel_created), rabbit_channel:shutdown(Ch2), - {new, #amqqueue { pid = QPid } = Q} = + {new, #amqqueue{name = QName} = Q} = rabbit_amqqueue:declare(test_queue(), false, false, [], none), - expect_events(QPid, queue_created), + expect_events(name, QName, queue_created), rabbit_amqqueue:delete(Q, false, false), rabbit_tests_event_receiver:stop(), passed. -expect_events(Pid, Type) -> - expect_event(Pid, Type), +expect_events(Tag, Key, Type) -> + expect_event(Tag, Key, Type), rabbit:force_event_refresh(), - expect_event(Pid, Type). + expect_event(Tag, Key, Type). -expect_event(Pid, Type) -> +expect_event(Tag, Key, Type) -> receive #event{type = Type, props = Props} -> - case pget(pid, Props) of - Pid -> ok; - _ -> expect_event(Pid, Type) + case pget(Tag, Props) of + Key -> ok; + _ -> expect_event(Tag, Key, Type) end - after 1000 -> throw({failed_to_receive_event, Type}) + after ?TIMEOUT -> throw({failed_to_receive_event, Type}) end. test_delegates_async(SecondaryNode) -> @@ -1468,7 +1496,7 @@ make_responder(FMsg) -> make_responder(FMsg, timeout). make_responder(FMsg, Throw) -> fun () -> receive Msg -> FMsg(Msg) - after 1000 -> throw(Throw) + after ?TIMEOUT -> throw(Throw) end end. @@ -1481,9 +1509,7 @@ await_response(Count) -> receive response -> ok, await_response(Count - 1) - after 1000 -> - io:format("Async reply not received~n"), - throw(timeout) + after ?TIMEOUT -> throw(timeout) end. must_exit(Fun) -> @@ -1550,7 +1576,7 @@ test_queue_cleanup(_SecondaryNode) -> rabbit_channel:do(Ch, #'queue.declare'{ queue = ?CLEANUP_QUEUE_NAME }), receive #'queue.declare_ok'{queue = ?CLEANUP_QUEUE_NAME} -> ok - after 1000 -> throw(failed_to_receive_queue_declare_ok) + after ?TIMEOUT -> throw(failed_to_receive_queue_declare_ok) end, rabbit_channel:shutdown(Ch), rabbit:stop(), @@ -1561,8 +1587,7 @@ test_queue_cleanup(_SecondaryNode) -> receive #'channel.close'{reply_code = ?NOT_FOUND} -> ok - after 2000 -> - throw(failed_to_receive_channel_exit) + after ?TIMEOUT -> throw(failed_to_receive_channel_exit) end, rabbit_channel:shutdown(Ch2), passed. @@ -1589,8 +1614,7 @@ test_declare_on_dead_queue(SecondaryNode) -> true = rabbit_misc:is_process_alive(Q#amqqueue.pid), {ok, 0} = rabbit_amqqueue:delete(Q, false, false), passed - after 2000 -> - throw(failed_to_create_and_kill_queue) + after ?TIMEOUT -> throw(failed_to_create_and_kill_queue) end. %%--------------------------------------------------------------------- @@ -1603,7 +1627,7 @@ control_action(Command, Args, NewOpts) -> expand_options(default_options(), NewOpts)). control_action(Command, Node, Args, Opts) -> - case catch rabbit_control:action( + case catch rabbit_control_main:action( Command, Node, Args, Opts, fun (Format, Args1) -> io:format(Format ++ " ...~n", Args1) @@ -1616,9 +1640,21 @@ control_action(Command, Node, Args, Opts) -> Other end. +control_action_opts(Raw) -> + NodeStr = atom_to_list(node()), + case rabbit_control_main:parse_arguments(Raw, NodeStr) of + {ok, {Cmd, Opts, Args}} -> + case control_action(Cmd, node(), Args, Opts) of + ok -> ok; + _ -> error + end; + _ -> + error + end. + info_action(Command, Args, CheckVHost) -> ok = control_action(Command, []), - if CheckVHost -> ok = control_action(Command, []); + if CheckVHost -> ok = control_action(Command, [], ["-p", "/"]); true -> ok end, ok = control_action(Command, lists:map(fun atom_to_list/1, Args)), @@ -1679,15 +1715,15 @@ clean_logs(Files, Suffix) -> ok. assert_ram_node() -> - case rabbit_mnesia:is_disc_node() of - true -> exit('not_ram_node'); - false -> ok + case rabbit_mnesia:node_type() of + disc -> exit('not_ram_node'); + ram -> ok end. assert_disc_node() -> - case rabbit_mnesia:is_disc_node() of - true -> ok; - false -> exit('not_disc_node') + case rabbit_mnesia:node_type() of + disc -> ok; + ram -> exit('not_disc_node') end. delete_file(File) -> @@ -1821,7 +1857,7 @@ on_disk_capture(OnDisk, Awaiting, Pid) -> Pid); stop -> done - after (case Awaiting of [] -> 200; _ -> 1000 end) -> + after (case Awaiting of [] -> 200; _ -> ?TIMEOUT end) -> case Awaiting of [] -> Pid ! {self(), arrived}, on_disk_capture(); _ -> Pid ! {self(), timeout} @@ -2301,6 +2337,10 @@ variable_queue_publish(IsPersistent, Count, VQ) -> variable_queue_publish(IsPersistent, Count, fun (_N, P) -> P end, VQ). variable_queue_publish(IsPersistent, Count, PropFun, VQ) -> + variable_queue_publish(IsPersistent, 1, Count, PropFun, + fun (_N) -> <<>> end, VQ). + +variable_queue_publish(IsPersistent, Start, Count, PropFun, PayloadFun, VQ) -> lists:foldl( fun (N, VQN) -> rabbit_variable_queue:publish( @@ -2309,16 +2349,18 @@ variable_queue_publish(IsPersistent, Count, PropFun, VQ) -> <<>>, #'P_basic'{delivery_mode = case IsPersistent of true -> 2; false -> 1 - end}, <<>>), - PropFun(N, #message_properties{}), self(), VQN) - end, VQ, lists:seq(1, Count)). + end}, + PayloadFun(N)), + PropFun(N, #message_properties{}), false, self(), VQN) + end, VQ, lists:seq(Start, Start + Count - 1)). variable_queue_fetch(Count, IsPersistent, IsDelivered, Len, VQ) -> lists:foldl(fun (N, {VQN, AckTagsAcc}) -> Rem = Len - N, {{#basic_message { is_persistent = IsPersistent }, - IsDelivered, AckTagN, Rem}, VQM} = + IsDelivered, AckTagN}, VQM} = rabbit_variable_queue:fetch(true, VQN), + Rem = rabbit_variable_queue:len(VQM), {VQM, [AckTagN | AckTagsAcc]} end, {VQ, []}, lists:seq(1, Count)). @@ -2361,8 +2403,8 @@ publish_and_confirm(Q, Payload, Count) -> Msg = rabbit_basic:message(rabbit_misc:r(<<>>, exchange, <<>>), <<>>, #'P_basic'{delivery_mode = 2}, Payload), - Delivery = #delivery{mandatory = false, immediate = false, - sender = self(), message = Msg, msg_seq_no = Seq}, + Delivery = #delivery{mandatory = false, sender = self(), + message = Msg, msg_seq_no = Seq}, {routed, _} = rabbit_amqqueue:deliver([Q], Delivery) end || Seq <- Seqs], wait_for_confirms(gb_sets:from_list(Seqs)). @@ -2374,7 +2416,7 @@ wait_for_confirms(Unconfirmed) -> wait_for_confirms( rabbit_misc:gb_sets_difference( Unconfirmed, gb_sets:from_list(Confirmed))) - after 5000 -> exit(timeout_waiting_for_confirm) + after ?TIMEOUT -> exit(timeout_waiting_for_confirm) end end. @@ -2384,37 +2426,141 @@ test_variable_queue() -> fun test_variable_queue_partial_segments_delta_thing/1, fun test_variable_queue_all_the_bits_not_covered_elsewhere1/1, fun test_variable_queue_all_the_bits_not_covered_elsewhere2/1, - fun test_dropwhile/1, + fun test_drop/1, + fun test_variable_queue_fold_msg_on_disk/1, + fun test_dropfetchwhile/1, fun test_dropwhile_varying_ram_duration/1, + fun test_fetchwhile_varying_ram_duration/1, fun test_variable_queue_ack_limiting/1, - fun test_variable_queue_requeue/1]], + fun test_variable_queue_purge/1, + fun test_variable_queue_requeue/1, + fun test_variable_queue_requeue_ram_beta/1, + fun test_variable_queue_fold/1]], passed. -test_variable_queue_requeue(VQ0) -> - Interval = 50, - Count = rabbit_queue_index:next_segment_boundary(0) + 2 * Interval, +test_variable_queue_fold(VQ0) -> + {PendingMsgs, RequeuedMsgs, FreshMsgs, VQ1} = + variable_queue_with_holes(VQ0), + Count = rabbit_variable_queue:depth(VQ1), + Msgs = lists:sort(PendingMsgs ++ RequeuedMsgs ++ FreshMsgs), + lists:foldl(fun (Cut, VQ2) -> + test_variable_queue_fold(Cut, Msgs, PendingMsgs, VQ2) + end, VQ1, [0, 1, 2, Count div 2, + Count - 1, Count, Count + 1, Count * 2]). + +test_variable_queue_fold(Cut, Msgs, PendingMsgs, VQ0) -> + {Acc, VQ1} = rabbit_variable_queue:fold( + fun (M, _, Pending, A) -> + MInt = msg2int(M), + Pending = lists:member(MInt, PendingMsgs), %% assert + case MInt =< Cut of + true -> {cont, [MInt | A]}; + false -> {stop, A} + end + end, [], VQ0), + Expected = lists:takewhile(fun (I) -> I =< Cut end, Msgs), + Expected = lists:reverse(Acc), %% assertion + VQ1. + +msg2int(#basic_message{content = #content{ payload_fragments_rev = P}}) -> + binary_to_term(list_to_binary(lists:reverse(P))). + +ack_subset(AckSeqs, Interval, Rem) -> + lists:filter(fun ({_Ack, N}) -> (N + Rem) rem Interval == 0 end, AckSeqs). + +requeue_one_by_one(Acks, VQ) -> + lists:foldl(fun (AckTag, VQN) -> + {_MsgId, VQM} = rabbit_variable_queue:requeue( + [AckTag], VQN), + VQM + end, VQ, Acks). + +%% Create a vq with messages in q1, delta, and q3, and holes (in the +%% form of pending acks) in the latter two. +variable_queue_with_holes(VQ0) -> + Interval = 64, + Count = rabbit_queue_index:next_segment_boundary(0)*2 + 2 * Interval, Seq = lists:seq(1, Count), VQ1 = rabbit_variable_queue:set_ram_duration_target(0, VQ0), - VQ2 = variable_queue_publish(false, Count, VQ1), - {VQ3, Acks} = variable_queue_fetch(Count, false, false, Count, VQ2), - Subset = lists:foldl(fun ({Ack, N}, Acc) when N rem Interval == 0 -> - [Ack | Acc]; - (_, Acc) -> - Acc - end, [], lists:zip(Acks, Seq)), - {_MsgIds, VQ4} = rabbit_variable_queue:requeue(Acks -- Subset, VQ3), - VQ5 = lists:foldl(fun (AckTag, VQN) -> - {_MsgId, VQM} = rabbit_variable_queue:requeue( - [AckTag], VQN), - VQM - end, VQ4, Subset), - VQ6 = lists:foldl(fun (AckTag, VQa) -> - {{#basic_message{}, true, AckTag, _}, VQb} = + VQ2 = variable_queue_publish( + false, 1, Count, + fun (_, P) -> P end, fun erlang:term_to_binary/1, VQ1), + {VQ3, AcksR} = variable_queue_fetch(Count, false, false, Count, VQ2), + Acks = lists:reverse(AcksR), + AckSeqs = lists:zip(Acks, Seq), + [{Subset1, _Seq1}, {Subset2, _Seq2}, {Subset3, Seq3}] = + [lists:unzip(ack_subset(AckSeqs, Interval, I)) || I <- [0, 1, 2]], + %% we requeue in three phases in order to exercise requeuing logic + %% in various vq states + {_MsgIds, VQ4} = rabbit_variable_queue:requeue( + Acks -- (Subset1 ++ Subset2 ++ Subset3), VQ3), + VQ5 = requeue_one_by_one(Subset1, VQ4), + %% by now we have some messages (and holes) in delt + VQ6 = requeue_one_by_one(Subset2, VQ5), + VQ7 = rabbit_variable_queue:set_ram_duration_target(infinity, VQ6), + %% add the q1 tail + VQ8 = variable_queue_publish( + true, Count + 1, 64, + fun (_, P) -> P end, fun erlang:term_to_binary/1, VQ7), + %% assertions + [false = case V of + {delta, _, 0, _} -> true; + 0 -> true; + _ -> false + end || {K, V} <- rabbit_variable_queue:status(VQ8), + lists:member(K, [q1, delta, q3])], + Depth = Count + 64, + Depth = rabbit_variable_queue:depth(VQ8), + Len = Depth - length(Subset3), + Len = rabbit_variable_queue:len(VQ8), + {Seq3, Seq -- Seq3, lists:seq(Count + 1, Count + 64), VQ8}. + +test_variable_queue_requeue(VQ0) -> + {_PendingMsgs, RequeuedMsgs, FreshMsgs, VQ1} = + variable_queue_with_holes(VQ0), + Msgs = + lists:zip(RequeuedMsgs, + lists:duplicate(length(RequeuedMsgs), true)) ++ + lists:zip(FreshMsgs, + lists:duplicate(length(FreshMsgs), false)), + VQ2 = lists:foldl(fun ({I, Requeued}, VQa) -> + {{M, MRequeued, _}, VQb} = rabbit_variable_queue:fetch(true, VQa), + Requeued = MRequeued, %% assertion + I = msg2int(M), %% assertion VQb - end, VQ5, lists:reverse(Acks)), - {empty, VQ7} = rabbit_variable_queue:fetch(true, VQ6), - VQ7. + end, VQ1, Msgs), + {empty, VQ3} = rabbit_variable_queue:fetch(true, VQ2), + VQ3. + +%% requeue from ram_pending_ack into q3, move to delta and then empty queue +test_variable_queue_requeue_ram_beta(VQ0) -> + Count = rabbit_queue_index:next_segment_boundary(0)*2 + 2, + VQ1 = rabbit_tests:variable_queue_publish(false, Count, VQ0), + {VQ2, AcksR} = variable_queue_fetch(Count, false, false, Count, VQ1), + {Back, Front} = lists:split(Count div 2, AcksR), + {_, VQ3} = rabbit_variable_queue:requeue(erlang:tl(Back), VQ2), + VQ4 = rabbit_variable_queue:set_ram_duration_target(0, VQ3), + {_, VQ5} = rabbit_variable_queue:requeue([erlang:hd(Back)], VQ4), + VQ6 = requeue_one_by_one(Front, VQ5), + {VQ7, AcksAll} = variable_queue_fetch(Count, false, true, Count, VQ6), + {_, VQ8} = rabbit_variable_queue:ack(AcksAll, VQ7), + VQ8. + +test_variable_queue_purge(VQ0) -> + LenDepth = fun (VQ) -> + {rabbit_variable_queue:len(VQ), + rabbit_variable_queue:depth(VQ)} + end, + VQ1 = variable_queue_publish(false, 10, VQ0), + {VQ2, Acks} = variable_queue_fetch(6, false, false, 10, VQ1), + {4, VQ3} = rabbit_variable_queue:purge(VQ2), + {0, 6} = LenDepth(VQ3), + {_, VQ4} = rabbit_variable_queue:requeue(lists:sublist(Acks, 2), VQ3), + {2, 6} = LenDepth(VQ4), + VQ5 = rabbit_variable_queue:purge_acks(VQ4), + {2, 2} = LenDepth(VQ5), + VQ5. test_variable_queue_ack_limiting(VQ0) -> %% start by sending in a bunch of messages @@ -2445,41 +2591,86 @@ test_variable_queue_ack_limiting(VQ0) -> VQ6. -test_dropwhile(VQ0) -> +test_drop(VQ0) -> + %% start by sending a messages + VQ1 = variable_queue_publish(false, 1, VQ0), + %% drop message with AckRequired = true + {{MsgId, AckTag}, VQ2} = rabbit_variable_queue:drop(true, VQ1), + true = rabbit_variable_queue:is_empty(VQ2), + true = AckTag =/= undefinded, + %% drop again -> empty + {empty, VQ3} = rabbit_variable_queue:drop(false, VQ2), + %% requeue + {[MsgId], VQ4} = rabbit_variable_queue:requeue([AckTag], VQ3), + %% drop message with AckRequired = false + {{MsgId, undefined}, VQ5} = rabbit_variable_queue:drop(false, VQ4), + true = rabbit_variable_queue:is_empty(VQ5), + VQ5. + +test_dropfetchwhile(VQ0) -> Count = 10, %% add messages with sequential expiry VQ1 = variable_queue_publish( - false, Count, - fun (N, Props) -> Props#message_properties{expiry = N} end, VQ0), + false, 1, Count, + fun (N, Props) -> Props#message_properties{expiry = N} end, + fun erlang:term_to_binary/1, VQ0), + + %% fetch the first 5 messages + {#message_properties{expiry = 6}, {Msgs, AckTags}, VQ2} = + rabbit_variable_queue:fetchwhile( + fun (#message_properties{expiry = Expiry}) -> Expiry =< 5 end, + fun (Msg, AckTag, {MsgAcc, AckAcc}) -> + {[Msg | MsgAcc], [AckTag | AckAcc]} + end, {[], []}, VQ1), + true = lists:seq(1, 5) == [msg2int(M) || M <- lists:reverse(Msgs)], + + %% requeue them + {_MsgIds, VQ3} = rabbit_variable_queue:requeue(AckTags, VQ2), %% drop the first 5 messages - {undefined, VQ2} = rabbit_variable_queue:dropwhile( - fun(#message_properties { expiry = Expiry }) -> - Expiry =< 5 - end, false, VQ1), - - %% fetch five now - VQ3 = lists:foldl(fun (_N, VQN) -> - {{#basic_message{}, _, _, _}, VQM} = + {#message_properties{expiry = 6}, VQ4} = + rabbit_variable_queue:dropwhile( + fun (#message_properties {expiry = Expiry}) -> Expiry =< 5 end, VQ3), + + %% fetch 5 + VQ5 = lists:foldl(fun (N, VQN) -> + {{Msg, _, _}, VQM} = rabbit_variable_queue:fetch(false, VQN), + true = msg2int(Msg) == N, VQM - end, VQ2, lists:seq(6, Count)), + end, VQ4, lists:seq(6, Count)), %% should be empty now - {empty, VQ4} = rabbit_variable_queue:fetch(false, VQ3), + true = rabbit_variable_queue:is_empty(VQ5), - VQ4. + VQ5. test_dropwhile_varying_ram_duration(VQ0) -> + test_dropfetchwhile_varying_ram_duration( + fun (VQ1) -> + {_, VQ2} = rabbit_variable_queue:dropwhile( + fun (_) -> false end, VQ1), + VQ2 + end, VQ0). + +test_fetchwhile_varying_ram_duration(VQ0) -> + test_dropfetchwhile_varying_ram_duration( + fun (VQ1) -> + {_, ok, VQ2} = rabbit_variable_queue:fetchwhile( + fun (_) -> false end, + fun (_, _, A) -> A end, + ok, VQ1), + VQ2 + end, VQ0). + +test_dropfetchwhile_varying_ram_duration(Fun, VQ0) -> VQ1 = variable_queue_publish(false, 1, VQ0), VQ2 = rabbit_variable_queue:set_ram_duration_target(0, VQ1), - {undefined, VQ3} = rabbit_variable_queue:dropwhile( - fun(_) -> false end, false, VQ2), + VQ3 = Fun(VQ2), VQ4 = rabbit_variable_queue:set_ram_duration_target(infinity, VQ3), VQ5 = variable_queue_publish(false, 1, VQ4), - {undefined, VQ6} = - rabbit_variable_queue:dropwhile(fun(_) -> false end, false, VQ5), + VQ6 = Fun(VQ5), VQ6. test_variable_queue_dynamic_duration_change(VQ0) -> @@ -2514,7 +2705,8 @@ publish_fetch_and_ack(0, _Len, VQ0) -> VQ0; publish_fetch_and_ack(N, Len, VQ0) -> VQ1 = variable_queue_publish(false, 1, VQ0), - {{_Msg, false, AckTag, Len}, VQ2} = rabbit_variable_queue:fetch(true, VQ1), + {{_Msg, false, AckTag}, VQ2} = rabbit_variable_queue:fetch(true, VQ1), + Len = rabbit_variable_queue:len(VQ2), {_Guids, VQ3} = rabbit_variable_queue:ack([AckTag], VQ2), publish_fetch_and_ack(N-1, Len, VQ3). @@ -2579,8 +2771,8 @@ test_variable_queue_all_the_bits_not_covered_elsewhere1(VQ0) -> Count, VQ4), _VQ6 = rabbit_variable_queue:terminate(shutdown, VQ5), VQ7 = variable_queue_init(test_amqqueue(true), true), - {{_Msg1, true, _AckTag1, Count1}, VQ8} = - rabbit_variable_queue:fetch(true, VQ7), + {{_Msg1, true, _AckTag1}, VQ8} = rabbit_variable_queue:fetch(true, VQ7), + Count1 = rabbit_variable_queue:len(VQ8), VQ9 = variable_queue_publish(false, 1, VQ8), VQ10 = rabbit_variable_queue:set_ram_duration_target(0, VQ9), {VQ11, _AckTags2} = variable_queue_fetch(Count1, true, true, Count, VQ10), @@ -2599,6 +2791,13 @@ test_variable_queue_all_the_bits_not_covered_elsewhere2(VQ0) -> {empty, VQ8} = rabbit_variable_queue:fetch(false, VQ7), VQ8. +test_variable_queue_fold_msg_on_disk(VQ0) -> + VQ1 = variable_queue_publish(true, 1, VQ0), + {VQ2, AckTags} = variable_queue_fetch(1, true, false, 1, VQ1), + {ok, VQ3} = rabbit_variable_queue:ackfold(fun (_M, _A, ok) -> ok end, + ok, VQ2, AckTags), + VQ3. + test_queue_recover() -> Count = 2 * rabbit_queue_index:next_segment_boundary(0), {new, #amqqueue { pid = QPid, name = QName } = Q} = @@ -2611,19 +2810,21 @@ test_queue_recover() -> after 10000 -> exit(timeout_waiting_for_queue_death) end, rabbit_amqqueue:stop(), - rabbit_amqqueue:start(), + rabbit_amqqueue:start(rabbit_amqqueue:recover()), + {ok, Limiter} = rabbit_limiter:start_link(), rabbit_amqqueue:with_or_die( QName, fun (Q1 = #amqqueue { pid = QPid1 }) -> CountMinusOne = Count - 1, {ok, CountMinusOne, {QName, QPid1, _AckTag, true, _Msg}} = - rabbit_amqqueue:basic_get(Q1, self(), false), + rabbit_amqqueue:basic_get(Q1, self(), false, Limiter), exit(QPid1, shutdown), VQ1 = variable_queue_init(Q, true), - {{_Msg1, true, _AckTag1, CountMinusOne}, VQ2} = + {{_Msg1, true, _AckTag1}, VQ2} = rabbit_variable_queue:fetch(true, VQ1), + CountMinusOne = rabbit_variable_queue:len(VQ2), _VQ3 = rabbit_variable_queue:delete_and_terminate(shutdown, VQ2), - rabbit_amqqueue:internal_delete(QName, QPid1) + rabbit_amqqueue:internal_delete(QName) end), passed. @@ -2637,9 +2838,11 @@ test_variable_queue_delete_msg_store_files_callback() -> rabbit_amqqueue:set_ram_duration_target(QPid, 0), + {ok, Limiter} = rabbit_limiter:start_link(), + CountMinusOne = Count - 1, {ok, CountMinusOne, {QName, QPid, _AckTag, false, _Msg}} = - rabbit_amqqueue:basic_get(Q, self(), true), + rabbit_amqqueue:basic_get(Q, self(), true, Limiter), {ok, CountMinusOne} = rabbit_amqqueue:purge(Q), %% give the queue a second to receive the close_fds callback msg diff --git a/src/rabbit_tests_event_receiver.erl b/src/rabbit_tests_event_receiver.erl index 72c07b51..7b756cbc 100644 --- a/src/rabbit_tests_event_receiver.erl +++ b/src/rabbit_tests_event_receiver.erl @@ -10,8 +10,8 @@ %% %% The Original Code is RabbitMQ. %% -%% The Initial Developer of the Original Code is VMware, Inc. -%% Copyright (c) 2007-2012 VMware, Inc. All rights reserved. +%% The Initial Developer of the Original Code is GoPivotal, Inc. +%% Copyright (c) 2007-2013 GoPivotal, Inc. All rights reserved. %% -module(rabbit_tests_event_receiver). diff --git a/src/rabbit_trace.erl b/src/rabbit_trace.erl index 3a5b96de..d0dcaa71 100644 --- a/src/rabbit_trace.erl +++ b/src/rabbit_trace.erl @@ -10,13 +10,13 @@ %% %% The Original Code is RabbitMQ. %% -%% The Initial Developer of the Original Code is VMware, Inc. -%% Copyright (c) 2007-2012 VMware, Inc. All rights reserved. +%% The Initial Developer of the Original Code is GoPivotal, Inc. +%% Copyright (c) 2007-2013 GoPivotal, Inc. All rights reserved. %% -module(rabbit_trace). --export([init/1, tracing/1, tap_trace_in/2, tap_trace_out/2, start/1, stop/1]). +-export([init/1, enabled/1, tap_in/2, tap_out/2, start/1, stop/1]). -include("rabbit.hrl"). -include("rabbit_framing.hrl"). @@ -31,9 +31,9 @@ -type(state() :: rabbit_types:exchange() | 'none'). -spec(init/1 :: (rabbit_types:vhost()) -> state()). --spec(tracing/1 :: (rabbit_types:vhost()) -> boolean()). --spec(tap_trace_in/2 :: (rabbit_types:basic_message(), state()) -> 'ok'). --spec(tap_trace_out/2 :: (rabbit_amqqueue:qmsg(), state()) -> 'ok'). +-spec(enabled/1 :: (rabbit_types:vhost()) -> boolean()). +-spec(tap_in/2 :: (rabbit_types:basic_message(), state()) -> 'ok'). +-spec(tap_out/2 :: (rabbit_amqqueue:qmsg(), state()) -> 'ok'). -spec(start/1 :: (rabbit_types:vhost()) -> 'ok'). -spec(stop/1 :: (rabbit_types:vhost()) -> 'ok'). @@ -43,26 +43,26 @@ %%---------------------------------------------------------------------------- init(VHost) -> - case tracing(VHost) of + case enabled(VHost) of false -> none; true -> {ok, X} = rabbit_exchange:lookup( rabbit_misc:r(VHost, exchange, ?XNAME)), X end. -tracing(VHost) -> +enabled(VHost) -> {ok, VHosts} = application:get_env(rabbit, ?TRACE_VHOSTS), lists:member(VHost, VHosts). -tap_trace_in(Msg = #basic_message{exchange_name = #resource{name = XName}}, - TraceX) -> - maybe_trace(TraceX, Msg, <<"publish">>, XName, []). +tap_in(_Msg, none) -> ok; +tap_in(Msg = #basic_message{exchange_name = #resource{name = XName}}, TraceX) -> + trace(TraceX, Msg, <<"publish">>, XName, []). -tap_trace_out({#resource{name = QName}, _QPid, _QMsgId, Redelivered, Msg}, - TraceX) -> +tap_out(_Msg, none) -> ok; +tap_out({#resource{name = QName}, _QPid, _QMsgId, Redelivered, Msg}, TraceX) -> RedeliveredNum = case Redelivered of true -> 1; false -> 0 end, - maybe_trace(TraceX, Msg, <<"deliver">>, QName, - [{<<"redelivered">>, signedint, RedeliveredNum}]). + trace(TraceX, Msg, <<"deliver">>, QName, + [{<<"redelivered">>, signedint, RedeliveredNum}]). %%---------------------------------------------------------------------------- @@ -83,14 +83,11 @@ update_config(Fun) -> %%---------------------------------------------------------------------------- -maybe_trace(none, _Msg, _RKPrefix, _RKSuffix, _Extra) -> +trace(#exchange{name = Name}, #basic_message{exchange_name = Name}, + _RKPrefix, _RKSuffix, _Extra) -> ok; -maybe_trace(#exchange{name = Name}, #basic_message{exchange_name = Name}, - _RKPrefix, _RKSuffix, _Extra) -> - ok; -maybe_trace(X, Msg = #basic_message{content = #content{ - payload_fragments_rev = PFR}}, - RKPrefix, RKSuffix, Extra) -> +trace(X, Msg = #basic_message{content = #content{payload_fragments_rev = PFR}}, + RKPrefix, RKSuffix, Extra) -> {ok, _, _} = rabbit_basic:publish( X, <<RKPrefix/binary, ".", RKSuffix/binary>>, #'P_basic'{headers = msg_to_table(Msg) ++ Extra}, PFR), diff --git a/src/rabbit_types.erl b/src/rabbit_types.erl index 732c29b6..a36613db 100644 --- a/src/rabbit_types.erl +++ b/src/rabbit_types.erl @@ -10,8 +10,8 @@ %% %% The Original Code is RabbitMQ. %% -%% The Initial Developer of the Original Code is VMware, Inc. -%% Copyright (c) 2007-2012 VMware, Inc. All rights reserved. +%% The Initial Developer of the Original Code is GoPivotal, Inc. +%% Copyright (c) 2007-2013 GoPivotal, Inc. All rights reserved. %% -module(rabbit_types). @@ -64,12 +64,11 @@ #basic_message{exchange_name :: rabbit_exchange:name(), routing_keys :: [rabbit_router:routing_key()], content :: content(), - id :: msg_id(), + id :: msg_id(), is_persistent :: boolean()}). -type(message() :: basic_message()). -type(delivery() :: #delivery{mandatory :: boolean(), - immediate :: boolean(), sender :: pid(), message :: message()}). -type(message_properties() :: @@ -118,8 +117,7 @@ exclusive_owner :: rabbit_types:maybe(pid()), arguments :: rabbit_framing:amqp_table(), pid :: rabbit_types:maybe(pid()), - slave_pids :: [pid()], - mirror_nodes :: [node()] | 'undefined' | 'all'}). + slave_pids :: [pid()]}). -type(exchange() :: #exchange{name :: rabbit_exchange:name(), diff --git a/src/rabbit_upgrade.erl b/src/rabbit_upgrade.erl index e1a7bcae..1047b823 100644 --- a/src/rabbit_upgrade.erl +++ b/src/rabbit_upgrade.erl @@ -10,8 +10,8 @@ %% %% The Original Code is RabbitMQ. %% -%% The Initial Developer of the Original Code is VMware, Inc. -%% Copyright (c) 2007-2012 VMware, Inc. All rights reserved. +%% The Initial Developer of the Original Code is GoPivotal, Inc. +%% Copyright (c) 2007-2013 GoPivotal, Inc. All rights reserved. %% -module(rabbit_upgrade). @@ -66,11 +66,11 @@ %% into the boot process by prelaunch before the mnesia application is %% started. By the time Mnesia is started the upgrades have happened %% (on the primary), or Mnesia has been reset (on the secondary) and -%% rabbit_mnesia:init_db/3 can then make the node rejoin the cluster +%% rabbit_mnesia:init_db_unchecked/2 can then make the node rejoin the cluster %% in the normal way. %% %% The non-mnesia upgrades are then triggered by -%% rabbit_mnesia:init_db/3. Of course, it's possible for a given +%% rabbit_mnesia:init_db_unchecked/2. Of course, it's possible for a given %% upgrade process to only require Mnesia upgrades, or only require %% non-Mnesia upgrades. In the latter case no Mnesia resets and %% reclusterings occur. @@ -121,19 +121,16 @@ remove_backup() -> info("upgrades: Mnesia backup removed~n", []). maybe_upgrade_mnesia() -> - %% rabbit_mnesia:all_clustered_nodes/0 will return [] at this point - %% if we are a RAM node since Mnesia has not started yet. - AllNodes = lists:usort(rabbit_mnesia:all_clustered_nodes() ++ - rabbit_mnesia:read_cluster_nodes_config()), + AllNodes = rabbit_mnesia:cluster_nodes(all), case rabbit_version:upgrades_required(mnesia) of {error, starting_from_scratch} -> ok; {error, version_not_available} -> case AllNodes of - [_] -> ok; - _ -> die("Cluster upgrade needed but upgrading from " - "< 2.1.1.~nUnfortunately you will need to " - "rebuild the cluster.", []) + [] -> die("Cluster upgrade needed but upgrading from " + "< 2.1.1.~nUnfortunately you will need to " + "rebuild the cluster.", []); + _ -> ok end; {error, _} = Err -> throw(Err); @@ -150,12 +147,12 @@ maybe_upgrade_mnesia() -> upgrade_mode(AllNodes) -> case nodes_running(AllNodes) of [] -> - AfterUs = rabbit_mnesia:read_previously_running_nodes(), - case {is_disc_node_legacy(), AfterUs} of - {true, []} -> + AfterUs = rabbit_mnesia:cluster_nodes(running) -- [node()], + case {node_type_legacy(), AfterUs} of + {disc, []} -> primary; - {true, _} -> - Filename = rabbit_mnesia:running_nodes_filename(), + {disc, _} -> + Filename = rabbit_node_monitor:running_nodes_filename(), die("Cluster upgrade needed but other disc nodes shut " "down after this one.~nPlease first start the last " "disc node to shut down.~n~nNote: if several disc " @@ -163,7 +160,7 @@ upgrade_mode(AllNodes) -> "all~nshow this message. In which case, remove " "the lock file on one of them and~nstart that node. " "The lock file on this node is:~n~n ~s ", [Filename]); - {false, _} -> + {ram, _} -> die("Cluster upgrade needed but this is a ram node.~n" "Please first start the last disc node to shut down.", []) @@ -204,7 +201,7 @@ primary_upgrade(Upgrades, Nodes) -> mnesia, Upgrades, fun () -> - force_tables(), + rabbit_table:force_load(), case Others of [] -> ok; _ -> info("mnesia upgrades: Breaking cluster~n", []), @@ -214,23 +211,13 @@ primary_upgrade(Upgrades, Nodes) -> end), ok. -force_tables() -> - [mnesia:force_load_table(T) || T <- rabbit_mnesia:table_names()]. - secondary_upgrade(AllNodes) -> %% must do this before we wipe out schema - IsDiscNode = is_disc_node_legacy(), + NodeType = node_type_legacy(), rabbit_misc:ensure_ok(mnesia:delete_schema([node()]), cannot_delete_schema), - %% Note that we cluster with all nodes, rather than all disc nodes - %% (as we can't know all disc nodes at this point). This is safe as - %% we're not writing the cluster config, just setting up Mnesia. - ClusterNodes = case IsDiscNode of - true -> AllNodes; - false -> AllNodes -- [node()] - end, rabbit_misc:ensure_ok(mnesia:start(), cannot_start_mnesia), - ok = rabbit_mnesia:init_db(ClusterNodes, true, fun () -> ok end), + ok = rabbit_mnesia:init_db_unchecked(AllNodes, NodeType), ok = rabbit_version:record_desired_for_scope(mnesia), ok. @@ -278,13 +265,16 @@ lock_filename() -> lock_filename(dir()). lock_filename(Dir) -> filename:join(Dir, ?LOCK_FILENAME). backup_dir() -> dir() ++ "-upgrade-backup". -is_disc_node_legacy() -> +node_type_legacy() -> %% This is pretty ugly but we can't start Mnesia and ask it (will %% hang), we can't look at the config file (may not include us %% even if we're a disc node). We also can't use - %% rabbit_mnesia:is_disc_node/0 because that will give false + %% rabbit_mnesia:node_type/0 because that will give false %% postivies on Rabbit up to 2.5.1. - filelib:is_regular(filename:join(dir(), "rabbit_durable_exchange.DCD")). + case filelib:is_regular(filename:join(dir(), "rabbit_durable_exchange.DCD")) of + true -> disc; + false -> ram + end. %% NB: we cannot use rabbit_log here since it may not have been %% started yet diff --git a/src/rabbit_upgrade_functions.erl b/src/rabbit_upgrade_functions.erl index 485ccc5f..d50cb282 100644 --- a/src/rabbit_upgrade_functions.erl +++ b/src/rabbit_upgrade_functions.erl @@ -10,8 +10,8 @@ %% %% The Original Code is RabbitMQ. %% -%% The Initial Developer of the Original Code is VMware, Inc. -%% Copyright (c) 2007-2012 VMware, Inc. All rights reserved. +%% The Initial Developer of the Original Code is GoPivotal, Inc. +%% Copyright (c) 2007-2013 GoPivotal, Inc. All rights reserved. %% -module(rabbit_upgrade_functions). @@ -37,6 +37,14 @@ -rabbit_upgrade({mirrored_supervisor, mnesia, []}). -rabbit_upgrade({topic_trie_node, mnesia, []}). -rabbit_upgrade({runtime_parameters, mnesia, []}). +-rabbit_upgrade({exchange_scratches, mnesia, [exchange_scratch]}). +-rabbit_upgrade({policy, mnesia, + [exchange_scratches, ha_mirrors]}). +-rabbit_upgrade({sync_slave_pids, mnesia, [policy]}). +-rabbit_upgrade({no_mirror_nodes, mnesia, [sync_slave_pids]}). +-rabbit_upgrade({gm_pids, mnesia, [no_mirror_nodes]}). +-rabbit_upgrade({exchange_decorators, mnesia, [policy]}). +-rabbit_upgrade({policy_apply_to, mnesia, [runtime_parameters]}). %% ------------------------------------------------------------------- @@ -58,6 +66,12 @@ -spec(mirrored_supervisor/0 :: () -> 'ok'). -spec(topic_trie_node/0 :: () -> 'ok'). -spec(runtime_parameters/0 :: () -> 'ok'). +-spec(policy/0 :: () -> 'ok'). +-spec(sync_slave_pids/0 :: () -> 'ok'). +-spec(no_mirror_nodes/0 :: () -> 'ok'). +-spec(gm_pids/0 :: () -> 'ok'). +-spec(exchange_decorators/0 :: () -> 'ok'). +-spec(policy_apply_to/0 :: () -> 'ok'). -endif. @@ -193,15 +207,131 @@ runtime_parameters() -> {attributes, [key, value]}, {disc_copies, [node()]}]). +exchange_scratches() -> + ok = exchange_scratches(rabbit_exchange), + ok = exchange_scratches(rabbit_durable_exchange). + +exchange_scratches(Table) -> + transform( + Table, + fun ({exchange, Name, Type = <<"x-federation">>, Dur, AutoDel, Int, Args, + Scratch}) -> + Scratches = orddict:store(federation, Scratch, orddict:new()), + {exchange, Name, Type, Dur, AutoDel, Int, Args, Scratches}; + %% We assert here that nothing else uses the scratch mechanism ATM + ({exchange, Name, Type, Dur, AutoDel, Int, Args, undefined}) -> + {exchange, Name, Type, Dur, AutoDel, Int, Args, undefined} + end, + [name, type, durable, auto_delete, internal, arguments, scratches]). + +policy() -> + ok = exchange_policy(rabbit_exchange), + ok = exchange_policy(rabbit_durable_exchange), + ok = queue_policy(rabbit_queue), + ok = queue_policy(rabbit_durable_queue). + +exchange_policy(Table) -> + transform( + Table, + fun ({exchange, Name, Type, Dur, AutoDel, Int, Args, Scratches}) -> + {exchange, Name, Type, Dur, AutoDel, Int, Args, Scratches, + undefined} + end, + [name, type, durable, auto_delete, internal, arguments, scratches, + policy]). + +queue_policy(Table) -> + transform( + Table, + fun ({amqqueue, Name, Dur, AutoDel, Excl, Args, Pid, SPids, MNodes}) -> + {amqqueue, Name, Dur, AutoDel, Excl, Args, Pid, SPids, MNodes, + undefined} + end, + [name, durable, auto_delete, exclusive_owner, arguments, pid, + slave_pids, mirror_nodes, policy]). + +sync_slave_pids() -> + Tables = [rabbit_queue, rabbit_durable_queue], + AddSyncSlavesFun = + fun ({amqqueue, N, D, AD, Excl, Args, Pid, SPids, MNodes, Pol}) -> + {amqqueue, N, D, AD, Excl, Args, Pid, SPids, [], MNodes, Pol} + end, + [ok = transform(T, AddSyncSlavesFun, + [name, durable, auto_delete, exclusive_owner, arguments, + pid, slave_pids, sync_slave_pids, mirror_nodes, policy]) + || T <- Tables], + ok. + +no_mirror_nodes() -> + Tables = [rabbit_queue, rabbit_durable_queue], + RemoveMirrorNodesFun = + fun ({amqqueue, N, D, AD, O, A, Pid, SPids, SSPids, _MNodes, Pol}) -> + {amqqueue, N, D, AD, O, A, Pid, SPids, SSPids, Pol} + end, + [ok = transform(T, RemoveMirrorNodesFun, + [name, durable, auto_delete, exclusive_owner, arguments, + pid, slave_pids, sync_slave_pids, policy]) + || T <- Tables], + ok. + +gm_pids() -> + Tables = [rabbit_queue, rabbit_durable_queue], + AddGMPidsFun = + fun ({amqqueue, N, D, AD, O, A, Pid, SPids, SSPids, Pol}) -> + {amqqueue, N, D, AD, O, A, Pid, SPids, SSPids, Pol, []} + end, + [ok = transform(T, AddGMPidsFun, + [name, durable, auto_delete, exclusive_owner, arguments, + pid, slave_pids, sync_slave_pids, policy, gm_pids]) + || T <- Tables], + ok. + +exchange_decorators() -> + ok = exchange_decorators(rabbit_exchange), + ok = exchange_decorators(rabbit_durable_exchange). + +exchange_decorators(Table) -> + transform( + Table, + fun ({exchange, Name, Type, Dur, AutoDel, Int, Args, Scratches, + Policy}) -> + {exchange, Name, Type, Dur, AutoDel, Int, Args, Scratches, Policy, + {[], []}} + end, + [name, type, durable, auto_delete, internal, arguments, scratches, policy, + decorators]). + +policy_apply_to() -> + transform( + rabbit_runtime_parameters, + fun ({runtime_parameters, Key = {_VHost, <<"policy">>, _Name}, Value}) -> + ApplyTo = apply_to(proplists:get_value(<<"definition">>, Value)), + {runtime_parameters, Key, [{<<"apply-to">>, ApplyTo} | Value]}; + ({runtime_parameters, Key, Value}) -> + {runtime_parameters, Key, Value} + end, + [key, value]), + rabbit_policy:invalidate(), + ok. + +apply_to(Def) -> + case [proplists:get_value(K, Def) || + K <- [<<"federation-upstream-set">>, <<"ha-mode">>]] of + [undefined, undefined] -> <<"all">>; + [_, undefined] -> <<"exchanges">>; + [undefined, _] -> <<"queues">>; + [_, _] -> <<"all">> + end. + %%-------------------------------------------------------------------- transform(TableName, Fun, FieldList) -> - rabbit_mnesia:wait_for_tables([TableName]), + rabbit_table:wait([TableName]), {atomic, ok} = mnesia:transform_table(TableName, Fun, FieldList), ok. transform(TableName, Fun, FieldList, NewRecordName) -> - rabbit_mnesia:wait_for_tables([TableName]), + rabbit_table:wait([TableName]), {atomic, ok} = mnesia:transform_table(TableName, Fun, FieldList, NewRecordName), ok. diff --git a/src/rabbit_variable_queue.erl b/src/rabbit_variable_queue.erl index 49213c95..ac2b9f52 100644 --- a/src/rabbit_variable_queue.erl +++ b/src/rabbit_variable_queue.erl @@ -10,18 +10,19 @@ %% %% The Original Code is RabbitMQ. %% -%% The Initial Developer of the Original Code is VMware, Inc. -%% Copyright (c) 2007-2012 VMware, Inc. All rights reserved. +%% The Initial Developer of the Original Code is GoPivotal, Inc. +%% Copyright (c) 2007-2013 GoPivotal, Inc. All rights reserved. %% -module(rabbit_variable_queue). --export([init/3, terminate/2, delete_and_terminate/2, purge/1, - publish/4, publish_delivered/5, drain_confirmed/1, - dropwhile/3, fetch/2, ack/2, requeue/2, len/1, is_empty/1, - set_ram_duration_target/2, ram_duration/1, needs_timeout/1, - timeout/1, handle_pre_hibernate/1, status/1, invoke/3, - is_duplicate/2, discard/3, multiple_routing_keys/0, fold/3]). +-export([init/3, terminate/2, delete_and_terminate/2, purge/1, purge_acks/1, + publish/5, publish_delivered/4, discard/3, drain_confirmed/1, + dropwhile/2, fetchwhile/4, + fetch/2, drop/2, ack/2, requeue/2, ackfold/4, fold/3, len/1, + is_empty/1, depth/1, set_ram_duration_target/2, ram_duration/1, + needs_timeout/1, timeout/1, handle_pre_hibernate/1, status/1, invoke/3, + is_duplicate/2, multiple_routing_keys/0]). -export([start/1, stop/0]). @@ -254,16 +255,13 @@ q3, q4, next_seq_id, - pending_ack, - pending_ack_index, - ram_ack_index, + ram_pending_ack, + disk_pending_ack, index_state, msg_store_clients, durable, transient_threshold, - async_callback, - len, persistent_count, @@ -348,16 +346,14 @@ q3 :: ?QUEUE:?QUEUE(), q4 :: ?QUEUE:?QUEUE(), next_seq_id :: seq_id(), - pending_ack :: gb_tree(), - ram_ack_index :: gb_tree(), + ram_pending_ack :: gb_tree(), + disk_pending_ack :: gb_tree(), index_state :: any(), msg_store_clients :: 'undefined' | {{any(), binary()}, {any(), binary()}}, durable :: boolean(), transient_threshold :: non_neg_integer(), - async_callback :: rabbit_backing_queue:async_callback(), - len :: non_neg_integer(), persistent_count :: non_neg_integer(), @@ -426,7 +422,7 @@ init(Queue, Recover, AsyncCallback) -> init(#amqqueue { name = QueueName, durable = IsDurable }, false, AsyncCallback, MsgOnDiskFun, MsgIdxOnDiskFun) -> IndexState = rabbit_queue_index:init(QueueName, MsgIdxOnDiskFun), - init(IsDurable, IndexState, 0, [], AsyncCallback, + init(IsDurable, IndexState, 0, [], case IsDurable of true -> msg_store_client_init(?PERSISTENT_MSG_STORE, MsgOnDiskFun, AsyncCallback); @@ -454,7 +450,7 @@ init(#amqqueue { name = QueueName, durable = true }, true, rabbit_msg_store:contains(MsgId, PersistentClient) end, MsgIdxOnDiskFun), - init(true, IndexState, DeltaCount, Terms1, AsyncCallback, + init(true, IndexState, DeltaCount, Terms1, PersistentClient, TransientClient). terminate(_Reason, State) -> @@ -519,18 +515,19 @@ purge(State = #vqstate { q4 = Q4, ram_msg_count = 0, persistent_count = PCount1 })}. +purge_acks(State) -> a(purge_pending_ack(false, State)). + publish(Msg = #basic_message { is_persistent = IsPersistent, id = MsgId }, MsgProps = #message_properties { needs_confirming = NeedsConfirming }, - _ChPid, State = #vqstate { q1 = Q1, q3 = Q3, q4 = Q4, - next_seq_id = SeqId, - len = Len, - in_counter = InCount, - persistent_count = PCount, - durable = IsDurable, - ram_msg_count = RamMsgCount, - unconfirmed = UC }) -> + IsDelivered, _ChPid, State = #vqstate { q1 = Q1, q3 = Q3, q4 = Q4, + next_seq_id = SeqId, + len = Len, + in_counter = InCount, + persistent_count = PCount, + durable = IsDurable, + unconfirmed = UC }) -> IsPersistent1 = IsDurable andalso IsPersistent, - MsgStatus = msg_status(IsPersistent1, SeqId, Msg, MsgProps), + MsgStatus = msg_status(IsPersistent1, IsDelivered, SeqId, Msg, MsgProps), {MsgStatus1, State1} = maybe_write_to_disk(false, false, MsgStatus, State), State2 = case ?QUEUE:is_empty(Q3) of false -> State1 #vqstate { q1 = ?QUEUE:in(m(MsgStatus1), Q1) }; @@ -538,36 +535,25 @@ publish(Msg = #basic_message { is_persistent = IsPersistent, id = MsgId }, end, PCount1 = PCount + one_if(IsPersistent1), UC1 = gb_sets_maybe_insert(NeedsConfirming, MsgId, UC), - a(reduce_memory_use(State2 #vqstate { next_seq_id = SeqId + 1, - len = Len + 1, - in_counter = InCount + 1, - persistent_count = PCount1, - ram_msg_count = RamMsgCount + 1, - unconfirmed = UC1 })). - -publish_delivered(false, #basic_message { id = MsgId }, - #message_properties { needs_confirming = NeedsConfirming }, - _ChPid, State = #vqstate { async_callback = Callback, - len = 0 }) -> - case NeedsConfirming of - true -> blind_confirm(Callback, gb_sets:singleton(MsgId)); - false -> ok - end, - {undefined, a(State)}; -publish_delivered(true, Msg = #basic_message { is_persistent = IsPersistent, - id = MsgId }, + a(reduce_memory_use( + inc_ram_msg_count(State2 #vqstate { next_seq_id = SeqId + 1, + len = Len + 1, + in_counter = InCount + 1, + persistent_count = PCount1, + unconfirmed = UC1 }))). + +publish_delivered(Msg = #basic_message { is_persistent = IsPersistent, + id = MsgId }, MsgProps = #message_properties { needs_confirming = NeedsConfirming }, - _ChPid, State = #vqstate { len = 0, - next_seq_id = SeqId, + _ChPid, State = #vqstate { next_seq_id = SeqId, out_counter = OutCount, in_counter = InCount, persistent_count = PCount, durable = IsDurable, unconfirmed = UC }) -> IsPersistent1 = IsDurable andalso IsPersistent, - MsgStatus = (msg_status(IsPersistent1, SeqId, Msg, MsgProps)) - #msg_status { is_delivered = true }, + MsgStatus = msg_status(IsPersistent1, true, SeqId, Msg, MsgProps), {MsgStatus1, State1} = maybe_write_to_disk(false, false, MsgStatus, State), State2 = record_pending_ack(m(MsgStatus1), State1), PCount1 = PCount + one_if(IsPersistent1), @@ -579,6 +565,8 @@ publish_delivered(true, Msg = #basic_message { is_persistent = IsPersistent, persistent_count = PCount1, unconfirmed = UC1 }))}. +discard(_MsgId, _ChPid, State) -> State. + drain_confirmed(State = #vqstate { confirmed = C }) -> case gb_sets:is_empty(C) of true -> {[], State}; %% common case @@ -586,27 +574,28 @@ drain_confirmed(State = #vqstate { confirmed = C }) -> confirmed = gb_sets:new() }} end. -dropwhile(Pred, AckRequired, State) -> dropwhile(Pred, AckRequired, State, []). +dropwhile(Pred, State) -> + case queue_out(State) of + {empty, State1} -> + {undefined, a(State1)}; + {{value, MsgStatus = #msg_status { msg_props = MsgProps }}, State1} -> + case Pred(MsgProps) of + true -> {_, State2} = remove(false, MsgStatus, State1), + dropwhile(Pred, State2); + false -> {MsgProps, a(in_r(MsgStatus, State1))} + end + end. -dropwhile(Pred, AckRequired, State, Msgs) -> - End = fun(S) when AckRequired -> {lists:reverse(Msgs), S}; - (S) -> {undefined, S} - end, +fetchwhile(Pred, Fun, Acc, State) -> case queue_out(State) of {empty, State1} -> - End(a(State1)); + {undefined, Acc, a(State1)}; {{value, MsgStatus = #msg_status { msg_props = MsgProps }}, State1} -> - case {Pred(MsgProps), AckRequired} of - {true, true} -> - {MsgStatus1, State2} = read_msg(MsgStatus, State1), - {{Msg, _, AckTag, _}, State3} = - internal_fetch(true, MsgStatus1, State2), - dropwhile(Pred, AckRequired, State3, [{Msg, AckTag} | Msgs]); - {true, false} -> - {_, State2} = internal_fetch(false, MsgStatus, State1), - dropwhile(Pred, AckRequired, State2, undefined); - {false, _} -> - End(a(in_r(MsgStatus, State1))) + case Pred(MsgProps) of + true -> {Msg, State2} = read_msg(MsgStatus, State1), + {AckTag, State3} = remove(true, MsgStatus, State2), + fetchwhile(Pred, Fun, Fun(Msg, AckTag, Acc), State3); + false -> {MsgProps, Acc, a(in_r(MsgStatus, State1))} end end. @@ -617,9 +606,18 @@ fetch(AckRequired, State) -> {{value, MsgStatus}, State1} -> %% it is possible that the message wasn't read from disk %% at this point, so read it in. - {MsgStatus1, State2} = read_msg(MsgStatus, State1), - {Res, State3} = internal_fetch(AckRequired, MsgStatus1, State2), - {Res, a(State3)} + {Msg, State2} = read_msg(MsgStatus, State1), + {AckTag, State3} = remove(AckRequired, MsgStatus, State2), + {{Msg, MsgStatus#msg_status.is_delivered, AckTag}, a(State3)} + end. + +drop(AckRequired, State) -> + case queue_out(State) of + {empty, State1} -> + {empty, a(State1)}; + {{value, MsgStatus}, State1} -> + {AckTag, State2} = remove(AckRequired, MsgStatus, State1), + {{MsgStatus#msg_status.msg_id, AckTag}, a(State2)} end. ack([], State) -> @@ -645,17 +643,6 @@ ack(AckTags, State) -> persistent_count = PCount1, ack_out_counter = AckOutCount + length(AckTags) })}. -fold(undefined, State, _AckTags) -> - State; -fold(MsgFun, State = #vqstate{pending_ack = PA}, AckTags) -> - lists:foldl( - fun(SeqId, State1) -> - {MsgStatus, State2} = - read_msg(gb_trees:get(SeqId, PA), State1), - MsgFun(MsgStatus#msg_status.msg, SeqId), - State2 - end, State, AckTags). - requeue(AckTags, #vqstate { delta = Delta, q3 = Q3, q4 = Q4, @@ -677,10 +664,29 @@ requeue(AckTags, #vqstate { delta = Delta, in_counter = InCounter + MsgCount, len = Len + MsgCount }))}. +ackfold(MsgFun, Acc, State, AckTags) -> + {AccN, StateN} = + lists:foldl(fun(SeqId, {Acc0, State0}) -> + MsgStatus = lookup_pending_ack(SeqId, State0), + {Msg, State1} = read_msg(MsgStatus, State0), + {MsgFun(Msg, SeqId, Acc0), State1} + end, {Acc, State}, AckTags), + {AccN, a(StateN)}. + +fold(Fun, Acc, State = #vqstate{index_state = IndexState}) -> + {Its, IndexState1} = lists:foldl(fun inext/2, {[], IndexState}, + [msg_iterator(State), + disk_ack_iterator(State), + ram_ack_iterator(State)]), + ifold(Fun, Acc, Its, State#vqstate{index_state = IndexState1}). + len(#vqstate { len = Len }) -> Len. is_empty(State) -> 0 == len(State). +depth(State = #vqstate { ram_pending_ack = RPA, disk_pending_ack = DPA }) -> + len(State) + gb_trees:size(RPA) + gb_trees:size(DPA). + set_ram_duration_target( DurationTarget, State = #vqstate { rates = #rates { avg_egress = AvgEgressRate, @@ -716,7 +722,7 @@ ram_duration(State = #vqstate { ack_out_counter = AckOutCount, ram_msg_count = RamMsgCount, ram_msg_count_prev = RamMsgCountPrev, - ram_ack_index = RamAckIndex, + ram_pending_ack = RPA, ram_ack_count_prev = RamAckCountPrev }) -> Now = now(), {AvgEgressRate, Egress1} = update_rate(Now, Timestamp, OutCount, Egress), @@ -727,7 +733,7 @@ ram_duration(State = #vqstate { {AvgAckIngressRate, AckIngress1} = update_rate(Now, AckTimestamp, AckInCount, AckIngress), - RamAckCount = gb_trees:size(RamAckIndex), + RamAckCount = gb_trees:size(RPA), Duration = %% msgs+acks / (msgs+acks/sec) == sec case (AvgEgressRate == 0 andalso AvgIngressRate == 0 andalso @@ -759,21 +765,20 @@ ram_duration(State = #vqstate { ram_msg_count_prev = RamMsgCount, ram_ack_count_prev = RamAckCount }}. -needs_timeout(State = #vqstate { index_state = IndexState }) -> - case must_sync_index(State) of - true -> timed; - false -> - case rabbit_queue_index:needs_sync(IndexState) of - true -> idle; - false -> case reduce_memory_use( - fun (_Quota, State1) -> {0, State1} end, - fun (_Quota, State1) -> State1 end, - fun (_Quota, State1) -> {0, State1} end, - State) of - {true, _State} -> idle; - {false, _State} -> false - end - end +needs_timeout(State = #vqstate { index_state = IndexState, + target_ram_count = TargetRamCount }) -> + case rabbit_queue_index:needs_sync(IndexState) of + confirms -> timed; + other -> idle; + false when TargetRamCount == infinity -> false; + false -> case reduce_memory_use( + fun (_Quota, State1) -> {0, State1} end, + fun (_Quota, State1) -> State1 end, + fun (_Quota, State1) -> {0, State1} end, + State) of + {true, _State} -> idle; + {false, _State} -> false + end end. timeout(State = #vqstate { index_state = IndexState }) -> @@ -787,8 +792,8 @@ handle_pre_hibernate(State = #vqstate { index_state = IndexState }) -> status(#vqstate { q1 = Q1, q2 = Q2, delta = Delta, q3 = Q3, q4 = Q4, len = Len, - pending_ack = PA, - ram_ack_index = RAI, + ram_pending_ack = RPA, + disk_pending_ack = DPA, target_ram_count = TargetRamCount, ram_msg_count = RamMsgCount, next_seq_id = NextSeqId, @@ -803,10 +808,10 @@ status(#vqstate { {q3 , ?QUEUE:len(Q3)}, {q4 , ?QUEUE:len(Q4)}, {len , Len}, - {pending_acks , gb_trees:size(PA)}, + {pending_acks , gb_trees:size(RPA) + gb_trees:size(DPA)}, {target_ram_count , TargetRamCount}, {ram_msg_count , RamMsgCount}, - {ram_ack_count , gb_trees:size(RAI)}, + {ram_ack_count , gb_trees:size(RPA)}, {next_seq_id , NextSeqId}, {persistent_count , PersistentCount}, {avg_ingress_rate , AvgIngressRate}, @@ -814,12 +819,11 @@ status(#vqstate { {avg_ack_ingress_rate, AvgAckIngressRate}, {avg_ack_egress_rate , AvgAckEgressRate} ]. -invoke(?MODULE, Fun, State) -> Fun(?MODULE, State). +invoke(?MODULE, Fun, State) -> Fun(?MODULE, State); +invoke( _, _, State) -> State. is_duplicate(_Msg, State) -> {false, State}. -discard(_Msg, _ChPid, State) -> State. - %%---------------------------------------------------------------------------- %% Minor helpers %%---------------------------------------------------------------------------- @@ -843,6 +847,7 @@ a(State = #vqstate { q1 = Q1, q2 = Q2, delta = Delta, q3 = Q3, q4 = Q4, true = Len >= 0, true = PersistentCount >= 0, true = RamMsgCount >= 0, + true = RamMsgCount =< Len, State. @@ -867,15 +872,28 @@ cons_if(true, E, L) -> [E | L]; cons_if(false, _E, L) -> L. gb_sets_maybe_insert(false, _Val, Set) -> Set; -%% when requeueing, we re-add a msg_id to the unconfirmed set -gb_sets_maybe_insert(true, Val, Set) -> gb_sets:add(Val, Set). - -msg_status(IsPersistent, SeqId, Msg = #basic_message { id = MsgId }, - MsgProps) -> - #msg_status { seq_id = SeqId, msg_id = MsgId, msg = Msg, - is_persistent = IsPersistent, is_delivered = false, - msg_on_disk = false, index_on_disk = false, - msg_props = MsgProps }. +gb_sets_maybe_insert(true, Val, Set) -> gb_sets:add(Val, Set). + +msg_status(IsPersistent, IsDelivered, SeqId, + Msg = #basic_message {id = MsgId}, MsgProps) -> + #msg_status{seq_id = SeqId, + msg_id = MsgId, + msg = Msg, + is_persistent = IsPersistent, + is_delivered = IsDelivered, + msg_on_disk = false, + index_on_disk = false, + msg_props = MsgProps}. + +beta_msg_status({MsgId, SeqId, MsgProps, IsPersistent, IsDelivered}) -> + #msg_status{seq_id = SeqId, + msg_id = MsgId, + msg = undefined, + is_persistent = IsPersistent, + is_delivered = IsDelivered, + msg_on_disk = true, + index_on_disk = true, + msg_props = MsgProps}. trim_msg_status(MsgStatus) -> MsgStatus #msg_status { msg = undefined }. @@ -939,31 +957,21 @@ maybe_write_delivered(false, _SeqId, IndexState) -> maybe_write_delivered(true, SeqId, IndexState) -> rabbit_queue_index:deliver([SeqId], IndexState). -betas_from_index_entries(List, TransientThreshold, PA, IndexState) -> +betas_from_index_entries(List, TransientThreshold, RPA, DPA, IndexState) -> {Filtered, Delivers, Acks} = lists:foldr( - fun ({MsgId, SeqId, MsgProps, IsPersistent, IsDelivered}, + fun ({_MsgId, SeqId, _MsgProps, IsPersistent, IsDelivered} = M, {Filtered1, Delivers1, Acks1} = Acc) -> case SeqId < TransientThreshold andalso not IsPersistent of true -> {Filtered1, cons_if(not IsDelivered, SeqId, Delivers1), [SeqId | Acks1]}; - false -> case gb_trees:is_defined(SeqId, PA) of - false -> - {?QUEUE:in_r( - m(#msg_status { - seq_id = SeqId, - msg_id = MsgId, - msg = undefined, - is_persistent = IsPersistent, - is_delivered = IsDelivered, - msg_on_disk = true, - index_on_disk = true, - msg_props = MsgProps - }), Filtered1), - Delivers1, Acks1}; - true -> - Acc + false -> case (gb_trees:is_defined(SeqId, RPA) orelse + gb_trees:is_defined(SeqId, DPA)) of + false -> {?QUEUE:in_r(m(beta_msg_status(M)), + Filtered1), + Delivers1, Acks1}; + true -> Acc end end end, {?QUEUE:new(), [], []}, List), @@ -991,7 +999,7 @@ update_rate(Now, Then, Count, {OThen, OCount}) -> %% Internal major helpers for Public API %%---------------------------------------------------------------------------- -init(IsDurable, IndexState, DeltaCount, Terms, AsyncCallback, +init(IsDurable, IndexState, DeltaCount, Terms, PersistentClient, TransientClient) -> {LowSeqId, NextSeqId, IndexState1} = rabbit_queue_index:bounds(IndexState), @@ -1010,15 +1018,13 @@ init(IsDurable, IndexState, DeltaCount, Terms, AsyncCallback, q3 = ?QUEUE:new(), q4 = ?QUEUE:new(), next_seq_id = NextSeqId, - pending_ack = gb_trees:empty(), - ram_ack_index = gb_trees:empty(), + ram_pending_ack = gb_trees:empty(), + disk_pending_ack = gb_trees:empty(), index_state = IndexState1, msg_store_clients = {PersistentClient, TransientClient}, durable = IsDurable, transient_threshold = NextSeqId, - async_callback = AsyncCallback, - len = DeltaCount1, persistent_count = DeltaCount1, @@ -1049,9 +1055,11 @@ in_r(MsgStatus = #msg_status { msg = undefined }, State = #vqstate { q3 = Q3, q4 = Q4 }) -> case ?QUEUE:is_empty(Q4) of true -> State #vqstate { q3 = ?QUEUE:in_r(MsgStatus, Q3) }; - false -> {MsgStatus1, State1 = #vqstate { q4 = Q4a }} = + false -> {Msg, State1 = #vqstate { q4 = Q4a }} = read_msg(MsgStatus, State), - State1 #vqstate { q4 = ?QUEUE:in_r(MsgStatus1, Q4a) } + inc_ram_msg_count( + State1 #vqstate { q4 = ?QUEUE:in_r(MsgStatus#msg_status { + msg = Msg }, Q4a) }) end; in_r(MsgStatus, State = #vqstate { q4 = Q4 }) -> State #vqstate { q4 = ?QUEUE:in_r(MsgStatus, Q4) }. @@ -1067,33 +1075,35 @@ queue_out(State = #vqstate { q4 = Q4 }) -> {{value, MsgStatus}, State #vqstate { q4 = Q4a }} end. -read_msg(MsgStatus = #msg_status { msg = undefined, - msg_id = MsgId, - is_persistent = IsPersistent }, - State = #vqstate { ram_msg_count = RamMsgCount, - msg_store_clients = MSCState}) -> +read_msg(#msg_status{msg = undefined, + msg_id = MsgId, + is_persistent = IsPersistent}, State) -> + read_msg(MsgId, IsPersistent, State); +read_msg(#msg_status{msg = Msg}, State) -> + {Msg, State}. + +read_msg(MsgId, IsPersistent, State = #vqstate{msg_store_clients = MSCState}) -> {{ok, Msg = #basic_message {}}, MSCState1} = msg_store_read(MSCState, IsPersistent, MsgId), - {MsgStatus #msg_status { msg = Msg }, - State #vqstate { ram_msg_count = RamMsgCount + 1, - msg_store_clients = MSCState1 }}; -read_msg(MsgStatus, State) -> - {MsgStatus, State}. - -internal_fetch(AckRequired, MsgStatus = #msg_status { - seq_id = SeqId, - msg_id = MsgId, - msg = Msg, - is_persistent = IsPersistent, - is_delivered = IsDelivered, - msg_on_disk = MsgOnDisk, - index_on_disk = IndexOnDisk }, - State = #vqstate {ram_msg_count = RamMsgCount, - out_counter = OutCount, - index_state = IndexState, - msg_store_clients = MSCState, - len = Len, - persistent_count = PCount }) -> + {Msg, State #vqstate {msg_store_clients = MSCState1}}. + +inc_ram_msg_count(State = #vqstate{ram_msg_count = RamMsgCount}) -> + State#vqstate{ram_msg_count = RamMsgCount + 1}. + +remove(AckRequired, MsgStatus = #msg_status { + seq_id = SeqId, + msg_id = MsgId, + msg = Msg, + is_persistent = IsPersistent, + is_delivered = IsDelivered, + msg_on_disk = MsgOnDisk, + index_on_disk = IndexOnDisk }, + State = #vqstate {ram_msg_count = RamMsgCount, + out_counter = OutCount, + index_state = IndexState, + msg_store_clients = MSCState, + len = Len, + persistent_count = PCount}) -> %% 1. Mark it delivered if necessary IndexState1 = maybe_write_delivered( IndexOnDisk andalso not IsDelivered, @@ -1104,12 +1114,11 @@ internal_fetch(AckRequired, MsgStatus = #msg_status { ok = msg_store_remove(MSCState, IsPersistent, [MsgId]) end, Ack = fun () -> rabbit_queue_index:ack([SeqId], IndexState1) end, - IndexState2 = - case {AckRequired, MsgOnDisk, IndexOnDisk} of - {false, true, false} -> Rem(), IndexState1; - {false, true, true} -> Rem(), Ack(); - _ -> IndexState1 - end, + IndexState2 = case {AckRequired, MsgOnDisk, IndexOnDisk} of + {false, true, false} -> Rem(), IndexState1; + {false, true, true} -> Rem(), Ack(); + _ -> IndexState1 + end, %% 3. If an ack is required, add something sensible to PA {AckTag, State1} = case AckRequired of @@ -1120,16 +1129,14 @@ internal_fetch(AckRequired, MsgStatus = #msg_status { false -> {undefined, State} end, - PCount1 = PCount - one_if(IsPersistent andalso not AckRequired), - Len1 = Len - 1, + PCount1 = PCount - one_if(IsPersistent andalso not AckRequired), RamMsgCount1 = RamMsgCount - one_if(Msg =/= undefined), - {{Msg, IsDelivered, AckTag, Len1}, - State1 #vqstate { ram_msg_count = RamMsgCount1, - out_counter = OutCount + 1, - index_state = IndexState2, - len = Len1, - persistent_count = PCount1 }}. + {AckTag, State1 #vqstate {ram_msg_count = RamMsgCount1, + out_counter = OutCount + 1, + index_state = IndexState2, + len = Len - 1, + persistent_count = PCount1}}. purge_betas_and_deltas(LensByStore, State = #vqstate { q3 = Q3, @@ -1226,37 +1233,48 @@ maybe_write_to_disk(ForceMsg, ForceIndex, MsgStatus, %% Internal gubbins for acks %%---------------------------------------------------------------------------- -record_pending_ack(#msg_status { seq_id = SeqId, - msg_id = MsgId, - msg_on_disk = MsgOnDisk } = MsgStatus, - State = #vqstate { pending_ack = PA, - ram_ack_index = RAI, - ack_in_counter = AckInCount}) -> - {AckEntry, RAI1} = - case MsgOnDisk of - true -> {m(trim_msg_status(MsgStatus)), RAI}; - false -> {MsgStatus, gb_trees:insert(SeqId, MsgId, RAI)} +record_pending_ack(#msg_status { seq_id = SeqId, msg = Msg } = MsgStatus, + State = #vqstate { ram_pending_ack = RPA, + disk_pending_ack = DPA, + ack_in_counter = AckInCount}) -> + {RPA1, DPA1} = + case Msg of + undefined -> {RPA, gb_trees:insert(SeqId, MsgStatus, DPA)}; + _ -> {gb_trees:insert(SeqId, MsgStatus, RPA), DPA} end, - State #vqstate { pending_ack = gb_trees:insert(SeqId, AckEntry, PA), - ram_ack_index = RAI1, - ack_in_counter = AckInCount + 1}. + State #vqstate { ram_pending_ack = RPA1, + disk_pending_ack = DPA1, + ack_in_counter = AckInCount + 1}. + +lookup_pending_ack(SeqId, #vqstate { ram_pending_ack = RPA, + disk_pending_ack = DPA }) -> + case gb_trees:lookup(SeqId, RPA) of + {value, V} -> V; + none -> gb_trees:get(SeqId, DPA) + end. -remove_pending_ack(SeqId, State = #vqstate { pending_ack = PA, - ram_ack_index = RAI }) -> - {gb_trees:get(SeqId, PA), - State #vqstate { pending_ack = gb_trees:delete(SeqId, PA), - ram_ack_index = gb_trees:delete_any(SeqId, RAI) }}. +remove_pending_ack(SeqId, State = #vqstate { ram_pending_ack = RPA, + disk_pending_ack = DPA }) -> + case gb_trees:lookup(SeqId, RPA) of + {value, V} -> RPA1 = gb_trees:delete(SeqId, RPA), + {V, State #vqstate { ram_pending_ack = RPA1 }}; + none -> DPA1 = gb_trees:delete(SeqId, DPA), + {gb_trees:get(SeqId, DPA), + State #vqstate { disk_pending_ack = DPA1 }} + end. purge_pending_ack(KeepPersistent, - State = #vqstate { pending_ack = PA, + State = #vqstate { ram_pending_ack = RPA, + disk_pending_ack = DPA, index_state = IndexState, msg_store_clients = MSCState }) -> + F = fun (_SeqId, MsgStatus, Acc) -> accumulate_ack(MsgStatus, Acc) end, {IndexOnDiskSeqIds, MsgIdsByStore, _AllMsgIds} = - rabbit_misc:gb_trees_fold(fun (_SeqId, MsgStatus, Acc) -> - accumulate_ack(MsgStatus, Acc) - end, accumulate_ack_init(), PA), - State1 = State #vqstate { pending_ack = gb_trees:empty(), - ram_ack_index = gb_trees:empty() }, + rabbit_misc:gb_trees_fold( + F, rabbit_misc:gb_trees_fold(F, accumulate_ack_init(), RPA), DPA), + State1 = State #vqstate { ram_pending_ack = gb_trees:empty(), + disk_pending_ack = gb_trees:empty() }, + case KeepPersistent of true -> case orddict:find(false, MsgIdsByStore) of error -> State1; @@ -1306,27 +1324,9 @@ record_confirms(MsgIdSet, State = #vqstate { msgs_on_disk = MOD, unconfirmed = rabbit_misc:gb_sets_difference(UC, MsgIdSet), confirmed = gb_sets:union(C, MsgIdSet) }. -must_sync_index(#vqstate { msg_indices_on_disk = MIOD, - unconfirmed = UC }) -> - %% If UC is empty then by definition, MIOD and MOD are also empty - %% and there's nothing that can be pending a sync. - - %% If UC is not empty, then we want to find is_empty(UC - MIOD), - %% but the subtraction can be expensive. Thus instead, we test to - %% see if UC is a subset of MIOD. This can only be the case if - %% MIOD == UC, which would indicate that every message in UC is - %% also in MIOD and is thus _all_ pending on a msg_store sync, not - %% on a qi sync. Thus the negation of this is sufficient. Because - %% is_subset is short circuiting, this is more efficient than the - %% subtraction. - not (gb_sets:is_empty(UC) orelse gb_sets:is_subset(UC, MIOD)). - -blind_confirm(Callback, MsgIdSet) -> - Callback(?MODULE, - fun (?MODULE, State) -> record_confirms(MsgIdSet, State) end). - msgs_written_to_disk(Callback, MsgIdSet, ignored) -> - blind_confirm(Callback, MsgIdSet); + Callback(?MODULE, + fun (?MODULE, State) -> record_confirms(MsgIdSet, State) end); msgs_written_to_disk(Callback, MsgIdSet, written) -> Callback(?MODULE, fun (?MODULE, State = #vqstate { msgs_on_disk = MOD, @@ -1356,16 +1356,14 @@ msg_indices_written_to_disk(Callback, MsgIdSet) -> %%---------------------------------------------------------------------------- publish_alpha(#msg_status { msg = undefined } = MsgStatus, State) -> - read_msg(MsgStatus, State); -publish_alpha(MsgStatus, #vqstate {ram_msg_count = RamMsgCount } = State) -> - {MsgStatus, State #vqstate { ram_msg_count = RamMsgCount + 1 }}. + {Msg, State1} = read_msg(MsgStatus, State), + {MsgStatus#msg_status { msg = Msg }, inc_ram_msg_count(State1)}; +publish_alpha(MsgStatus, State) -> + {MsgStatus, inc_ram_msg_count(State)}. publish_beta(MsgStatus, State) -> - {#msg_status { msg = Msg} = MsgStatus1, - #vqstate { ram_msg_count = RamMsgCount } = State1} = - maybe_write_to_disk(true, false, MsgStatus, State), - {MsgStatus1, State1 #vqstate { - ram_msg_count = RamMsgCount + one_if(Msg =/= undefined) }}. + {MsgStatus1, State1} = maybe_write_to_disk(true, false, MsgStatus, State), + {m(trim_msg_status(MsgStatus1)), State1}. %% Rebuild queue, inserting sequence ids to maintain ordering queue_merge(SeqIds, Q, MsgIds, Limit, PubFun, State) -> @@ -1422,6 +1420,82 @@ delta_limit(?BLANK_DELTA_PATTERN(_X)) -> undefined; delta_limit(#delta { start_seq_id = StartSeqId }) -> StartSeqId. %%---------------------------------------------------------------------------- +%% Iterator +%%---------------------------------------------------------------------------- + +ram_ack_iterator(State) -> + {ack, gb_trees:iterator(State#vqstate.ram_pending_ack)}. + +disk_ack_iterator(State) -> + {ack, gb_trees:iterator(State#vqstate.disk_pending_ack)}. + +msg_iterator(State) -> istate(start, State). + +istate(start, State) -> {q4, State#vqstate.q4, State}; +istate(q4, State) -> {q3, State#vqstate.q3, State}; +istate(q3, State) -> {delta, State#vqstate.delta, State}; +istate(delta, State) -> {q2, State#vqstate.q2, State}; +istate(q2, State) -> {q1, State#vqstate.q1, State}; +istate(q1, _State) -> done. + +next({ack, It}, IndexState) -> + case gb_trees:next(It) of + none -> {empty, IndexState}; + {_SeqId, MsgStatus, It1} -> Next = {ack, It1}, + {value, MsgStatus, true, Next, IndexState} + end; +next(done, IndexState) -> {empty, IndexState}; +next({delta, #delta{start_seq_id = SeqId, + end_seq_id = SeqId}, State}, IndexState) -> + next(istate(delta, State), IndexState); +next({delta, #delta{start_seq_id = SeqId, + end_seq_id = SeqIdEnd} = Delta, State}, IndexState) -> + SeqIdB = rabbit_queue_index:next_segment_boundary(SeqId), + SeqId1 = lists:min([SeqIdB, SeqIdEnd]), + {List, IndexState1} = rabbit_queue_index:read(SeqId, SeqId1, IndexState), + next({delta, Delta#delta{start_seq_id = SeqId1}, List, State}, IndexState1); +next({delta, Delta, [], State}, IndexState) -> + next({delta, Delta, State}, IndexState); +next({delta, Delta, [{_, SeqId, _, _, _} = M | Rest], State}, IndexState) -> + case (gb_trees:is_defined(SeqId, State#vqstate.ram_pending_ack) orelse + gb_trees:is_defined(SeqId, State#vqstate.disk_pending_ack)) of + false -> Next = {delta, Delta, Rest, State}, + {value, beta_msg_status(M), false, Next, IndexState}; + true -> next({delta, Delta, Rest, State}, IndexState) + end; +next({Key, Q, State}, IndexState) -> + case ?QUEUE:out(Q) of + {empty, _Q} -> next(istate(Key, State), IndexState); + {{value, MsgStatus}, QN} -> Next = {Key, QN, State}, + {value, MsgStatus, false, Next, IndexState} + end. + +inext(It, {Its, IndexState}) -> + case next(It, IndexState) of + {empty, IndexState1} -> + {Its, IndexState1}; + {value, MsgStatus1, Unacked, It1, IndexState1} -> + {[{MsgStatus1, Unacked, It1} | Its], IndexState1} + end. + +ifold(_Fun, Acc, [], State) -> + {Acc, State}; +ifold(Fun, Acc, Its, State) -> + [{MsgStatus, Unacked, It} | Rest] = + lists:sort(fun ({#msg_status{seq_id = SeqId1}, _, _}, + {#msg_status{seq_id = SeqId2}, _, _}) -> + SeqId1 =< SeqId2 + end, Its), + {Msg, State1} = read_msg(MsgStatus, State), + case Fun(Msg, MsgStatus#msg_status.msg_props, Unacked, Acc) of + {stop, Acc1} -> + {Acc1, State}; + {cont, Acc1} -> + {Its1, IndexState1} = inext(It, {Rest, State1#vqstate.index_state}), + ifold(Fun, Acc1, Its1, State1#vqstate{index_state = IndexState1}) + end. + +%%---------------------------------------------------------------------------- %% Phase changes %%---------------------------------------------------------------------------- @@ -1444,12 +1518,9 @@ delta_limit(#delta { start_seq_id = StartSeqId }) -> StartSeqId. %% one segment's worth of messages in q3 - and thus would risk %% perpetually reporting the need for a conversion when no such %% conversion is needed. That in turn could cause an infinite loop. -reduce_memory_use(_AlphaBetaFun, _BetaDeltaFun, _AckFun, - State = #vqstate {target_ram_count = infinity}) -> - {false, State}; reduce_memory_use(AlphaBetaFun, BetaDeltaFun, AckFun, State = #vqstate { - ram_ack_index = RamAckIndex, + ram_pending_ack = RPA, ram_msg_count = RamMsgCount, target_ram_count = TargetRamCount, rates = #rates { avg_ingress = AvgIngress, @@ -1459,8 +1530,7 @@ reduce_memory_use(AlphaBetaFun, BetaDeltaFun, AckFun, }) -> {Reduce, State1 = #vqstate { q2 = Q2, q3 = Q3 }} = - case chunk_size(RamMsgCount + gb_trees:size(RamAckIndex), - TargetRamCount) of + case chunk_size(RamMsgCount + gb_trees:size(RPA), TargetRamCount) of 0 -> {false, State}; %% Reduce memory of pending acks and alphas. The order is %% determined based on which is growing faster. Whichever @@ -1485,23 +1555,23 @@ reduce_memory_use(AlphaBetaFun, BetaDeltaFun, AckFun, limit_ram_acks(0, State) -> {0, State}; -limit_ram_acks(Quota, State = #vqstate { pending_ack = PA, - ram_ack_index = RAI }) -> - case gb_trees:is_empty(RAI) of +limit_ram_acks(Quota, State = #vqstate { ram_pending_ack = RPA, + disk_pending_ack = DPA }) -> + case gb_trees:is_empty(RPA) of true -> {Quota, State}; false -> - {SeqId, MsgId, RAI1} = gb_trees:take_largest(RAI), - MsgStatus = #msg_status { msg_id = MsgId, is_persistent = false} = - gb_trees:get(SeqId, PA), + {SeqId, MsgStatus, RPA1} = gb_trees:take_largest(RPA), {MsgStatus1, State1} = maybe_write_to_disk(true, false, MsgStatus, State), - PA1 = gb_trees:update(SeqId, m(trim_msg_status(MsgStatus1)), PA), + DPA1 = gb_trees:insert(SeqId, m(trim_msg_status(MsgStatus1)), DPA), limit_ram_acks(Quota - 1, - State1 #vqstate { pending_ack = PA1, - ram_ack_index = RAI1 }) + State1 #vqstate { ram_pending_ack = RPA1, + disk_pending_ack = DPA1 }) end. +reduce_memory_use(State = #vqstate { target_ram_count = infinity }) -> + State; reduce_memory_use(State) -> {_, State1} = reduce_memory_use(fun push_alphas_to_betas/2, fun push_betas_to_deltas/2, @@ -1567,7 +1637,8 @@ maybe_deltas_to_betas(State = #vqstate { delta = Delta, q3 = Q3, index_state = IndexState, - pending_ack = PA, + ram_pending_ack = RPA, + disk_pending_ack = DPA, transient_threshold = TransientThreshold }) -> #delta { start_seq_id = DeltaSeqId, count = DeltaCount, @@ -1575,10 +1646,10 @@ maybe_deltas_to_betas(State = #vqstate { DeltaSeqId1 = lists:min([rabbit_queue_index:next_segment_boundary(DeltaSeqId), DeltaSeqIdEnd]), - {List, IndexState1} = - rabbit_queue_index:read(DeltaSeqId, DeltaSeqId1, IndexState), - {Q3a, IndexState2} = - betas_from_index_entries(List, TransientThreshold, PA, IndexState1), + {List, IndexState1} = rabbit_queue_index:read(DeltaSeqId, DeltaSeqId1, + IndexState), + {Q3a, IndexState2} = betas_from_index_entries(List, TransientThreshold, + RPA, DPA, IndexState1), State1 = State #vqstate { index_state = IndexState2 }, case ?QUEUE:len(Q3a) of 0 -> diff --git a/src/rabbit_version.erl b/src/rabbit_version.erl index 1cc7d6c8..c629180e 100644 --- a/src/rabbit_version.erl +++ b/src/rabbit_version.erl @@ -10,8 +10,8 @@ %% %% The Original Code is RabbitMQ. %% -%% The Initial Developer of the Original Code is VMware, Inc. -%% Copyright (c) 2007-2012 VMware, Inc. All rights reserved. +%% The Initial Developer of the Original Code is GoPivotal, Inc. +%% Copyright (c) 2007-2013 GoPivotal, Inc. All rights reserved. %% -module(rabbit_version). diff --git a/src/rabbit_vhost.erl b/src/rabbit_vhost.erl index 5548ef6d..fcf77bf9 100644 --- a/src/rabbit_vhost.erl +++ b/src/rabbit_vhost.erl @@ -10,8 +10,8 @@ %% %% The Original Code is RabbitMQ. %% -%% The Initial Developer of the Original Code is VMware, Inc. -%% Copyright (c) 2007-2012 VMware, Inc. All rights reserved. +%% The Initial Developer of the Original Code is GoPivotal, Inc. +%% Copyright (c) 2007-2013 GoPivotal, Inc. All rights reserved. %% -module(rabbit_vhost). @@ -20,7 +20,7 @@ %%---------------------------------------------------------------------------- --export([add/1, delete/1, exists/1, list/0, with/2]). +-export([add/1, delete/1, exists/1, list/0, with/2, assert/1]). -export([info/1, info/2, info_all/0, info_all/1]). -ifdef(use_specs). @@ -30,6 +30,7 @@ -spec(exists/1 :: (rabbit_types:vhost()) -> boolean()). -spec(list/0 :: () -> [rabbit_types:vhost()]). -spec(with/2 :: (rabbit_types:vhost(), rabbit_misc:thunk(A)) -> A). +-spec(assert/1 :: (rabbit_types:vhost()) -> 'ok'). -spec(info/1 :: (rabbit_types:vhost()) -> rabbit_types:infos()). -spec(info/2 :: (rabbit_types:vhost(), rabbit_types:info_keys()) @@ -70,6 +71,7 @@ add(VHostPath) -> {<<"amq.rabbitmq.trace">>, topic}]], ok end), + rabbit_event:notify(vhost_created, info(VHostPath)), R. delete(VHostPath) -> @@ -87,15 +89,19 @@ delete(VHostPath) -> with(VHostPath, fun () -> ok = internal_delete(VHostPath) end)), + ok = rabbit_event:notify(vhost_deleted, [{name, VHostPath}]), R. internal_delete(VHostPath) -> - lists:foreach( - fun (Info) -> - ok = rabbit_auth_backend_internal:clear_permissions( - proplists:get_value(user, Info), VHostPath) - end, - rabbit_auth_backend_internal:list_vhost_permissions(VHostPath)), + [ok = rabbit_auth_backend_internal:clear_permissions( + proplists:get_value(user, Info), VHostPath) + || Info <- rabbit_auth_backend_internal:list_vhost_permissions(VHostPath)], + [ok = rabbit_runtime_parameters:clear(VHostPath, + proplists:get_value(component, Info), + proplists:get_value(name, Info)) + || Info <- rabbit_runtime_parameters:list(VHostPath)], + [ok = rabbit_policy:delete(VHostPath, proplists:get_value(name, Info)) + || Info <- rabbit_policy:list(VHostPath)], ok = mnesia:delete({rabbit_vhost, VHostPath}), ok. @@ -115,12 +121,18 @@ with(VHostPath, Thunk) -> end end. +%% Like with/2 but outside an Mnesia tx +assert(VHostPath) -> case rabbit_vhost:exists(VHostPath) of + true -> ok; + false -> throw({error, {no_such_vhost, VHostPath}}) + end. + %%---------------------------------------------------------------------------- infos(Items, X) -> [{Item, i(Item, X)} || Item <- Items]. i(name, VHost) -> VHost; -i(tracing, VHost) -> rabbit_trace:tracing(VHost); +i(tracing, VHost) -> rabbit_trace:enabled(VHost); i(Item, _) -> throw({bad_argument, Item}). info(VHost) -> infos(?INFO_KEYS, VHost). diff --git a/src/rabbit_vm.erl b/src/rabbit_vm.erl new file mode 100644 index 00000000..597f9094 --- /dev/null +++ b/src/rabbit_vm.erl @@ -0,0 +1,228 @@ +%% The contents of this file are subject to the Mozilla Public License +%% Version 1.1 (the "License"); you may not use this file except in +%% compliance with the License. You may obtain a copy of the License +%% at http://www.mozilla.org/MPL/ +%% +%% Software distributed under the License is distributed on an "AS IS" +%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +%% the License for the specific language governing rights and +%% limitations under the License. +%% +%% The Original Code is RabbitMQ. +%% +%% The Initial Developer of the Original Code is GoPivotal, Inc. +%% Copyright (c) 2007-2013 GoPivotal, Inc. All rights reserved. +%% + +-module(rabbit_vm). + +-export([memory/0]). + +-define(MAGIC_PLUGINS, ["mochiweb", "webmachine", "cowboy", "sockjs", + "rfc4627_jsonrpc"]). + +%%---------------------------------------------------------------------------- + +-ifdef(use_specs). + +-spec(memory/0 :: () -> rabbit_types:infos()). + +-endif. + +%%---------------------------------------------------------------------------- + +%% Like erlang:memory(), but with awareness of rabbit-y things +memory() -> + ConnProcs = [rabbit_tcp_client_sup, ssl_connection_sup, amqp_sup], + QProcs = [rabbit_amqqueue_sup, rabbit_mirror_queue_slave_sup], + MsgIndexProcs = [msg_store_transient, msg_store_persistent], + MgmtDbProcs = [rabbit_mgmt_sup], + PluginProcs = plugin_sups(), + + All = [ConnProcs, QProcs, MsgIndexProcs, MgmtDbProcs, PluginProcs], + + {Sums, _Other} = sum_processes(lists:append(All), [memory]), + + [Conns, Qs, MsgIndexProc, MgmtDbProc, AllPlugins] = + [aggregate_memory(Names, Sums) || Names <- All], + + Mnesia = mnesia_memory(), + MsgIndexETS = ets_memory(rabbit_msg_store_ets_index), + MgmtDbETS = ets_memory(rabbit_mgmt_db), + Plugins = AllPlugins - MgmtDbProc, + + [{total, Total}, + {processes, Processes}, + {ets, ETS}, + {atom, Atom}, + {binary, Bin}, + {code, Code}, + {system, System}] = + erlang:memory([total, processes, ets, atom, binary, code, system]), + + OtherProc = Processes - Conns - Qs - MsgIndexProc - AllPlugins, + + [{total, Total}, + {connection_procs, Conns}, + {queue_procs, Qs}, + {plugins, Plugins}, + {other_proc, lists:max([0, OtherProc])}, %% [1] + {mnesia, Mnesia}, + {mgmt_db, MgmtDbETS + MgmtDbProc}, + {msg_index, MsgIndexETS + MsgIndexProc}, + {other_ets, ETS - Mnesia - MsgIndexETS - MgmtDbETS}, + {binary, Bin}, + {code, Code}, + {atom, Atom}, + {other_system, System - ETS - Atom - Bin - Code}]. + +%% [1] - erlang:memory(processes) can be less than the sum of its +%% parts. Rather than display something nonsensical, just silence any +%% claims about negative memory. See +%% http://erlang.org/pipermail/erlang-questions/2012-September/069320.html + +%%---------------------------------------------------------------------------- + +mnesia_memory() -> + case mnesia:system_info(is_running) of + yes -> lists:sum([bytes(mnesia:table_info(Tab, memory)) || + Tab <- mnesia:system_info(tables)]); + no -> 0 + end. + +ets_memory(Name) -> + lists:sum([bytes(ets:info(T, memory)) || T <- ets:all(), + N <- [ets:info(T, name)], + N =:= Name]). + +bytes(Words) -> Words * erlang:system_info(wordsize). + +plugin_sups() -> + lists:append([plugin_sup(App) || + {App, _, _} <- rabbit_misc:which_applications(), + is_plugin(atom_to_list(App))]). + +plugin_sup(App) -> + case application_controller:get_master(App) of + undefined -> []; + Master -> case application_master:get_child(Master) of + {Pid, _} when is_pid(Pid) -> [process_name(Pid)]; + Pid when is_pid(Pid) -> [process_name(Pid)]; + _ -> [] + end + end. + +process_name(Pid) -> + case process_info(Pid, registered_name) of + {registered_name, Name} -> Name; + _ -> Pid + end. + +is_plugin("rabbitmq_" ++ _) -> true; +is_plugin(App) -> lists:member(App, ?MAGIC_PLUGINS). + +aggregate_memory(Names, Sums) -> + lists:sum([extract_memory(Name, Sums) || Name <- Names]). + +extract_memory(Name, Sums) -> + {value, {_, Accs}} = lists:keysearch(Name, 1, Sums), + {value, {memory, V}} = lists:keysearch(memory, 1, Accs), + V. + +%%---------------------------------------------------------------------------- + +%% NB: this code is non-rabbit specific. + +-ifdef(use_specs). +-type(process() :: pid() | atom()). +-type(info_key() :: atom()). +-type(info_value() :: any()). +-type(info_item() :: {info_key(), info_value()}). +-type(accumulate() :: fun ((info_key(), info_value(), info_value()) -> + info_value())). +-spec(sum_processes/2 :: ([process()], [info_key()]) -> + {[{process(), [info_item()]}], [info_item()]}). +-spec(sum_processes/3 :: ([process()], accumulate(), [info_item()]) -> + {[{process(), [info_item()]}], [info_item()]}). +-endif. + +sum_processes(Names, Items) -> + sum_processes(Names, fun (_, X, Y) -> X + Y end, + [{Item, 0} || Item <- Items]). + +%% summarize the process_info of all processes based on their +%% '$ancestor' hierarchy, recorded in their process dictionary. +%% +%% The function takes +%% +%% 1) a list of names/pids of processes that are accumulation points +%% in the hierarchy. +%% +%% 2) a function that aggregates individual info items -taking the +%% info item key, value and accumulated value as the input and +%% producing a new accumulated value. +%% +%% 3) a list of info item key / initial accumulator value pairs. +%% +%% The process_info of a process is accumulated at the nearest of its +%% ancestors that is mentioned in the first argument, or, if no such +%% ancestor exists or the ancestor information is absent, in a special +%% 'other' bucket. +%% +%% The result is a pair consisting of +%% +%% 1) a k/v list, containing for each of the accumulation names/pids a +%% list of info items, containing the accumulated data, and +%% +%% 2) the 'other' bucket - a list of info items containing the +%% accumulated data of all processes with no matching ancestors +%% +%% Note that this function operates on names as well as pids, but +%% these must match whatever is contained in the '$ancestor' process +%% dictionary entry. Generally that means for all registered processes +%% the name should be used. +sum_processes(Names, Fun, Acc0) -> + Items = [Item || {Item, _Val0} <- Acc0], + Acc0Dict = orddict:from_list(Acc0), + NameAccs0 = orddict:from_list([{Name, Acc0Dict} || Name <- Names]), + {NameAccs, OtherAcc} = + lists:foldl( + fun (Pid, Acc) -> + InfoItems = [registered_name, dictionary | Items], + case process_info(Pid, InfoItems) of + undefined -> + Acc; + [{registered_name, RegName}, {dictionary, D} | Vals] -> + %% see docs for process_info/2 for the + %% special handling of 'registered_name' + %% info items + Extra = case RegName of + [] -> []; + N -> [N] + end, + accumulate(find_ancestor(Extra, D, Names), Fun, + orddict:from_list(Vals), Acc) + end + end, {NameAccs0, Acc0Dict}, processes()), + %% these conversions aren't strictly necessary; we do them simply + %% for the sake of encapsulating the representation. + {[{Name, orddict:to_list(Accs)} || + {Name, Accs} <- orddict:to_list(NameAccs)], + orddict:to_list(OtherAcc)}. + +find_ancestor(Extra, D, Names) -> + Ancestors = case lists:keysearch('$ancestors', 1, D) of + {value, {_, Ancs}} -> Ancs; + false -> [] + end, + case lists:splitwith(fun (A) -> not lists:member(A, Names) end, + Extra ++ Ancestors) of + {_, []} -> undefined; + {_, [Name | _]} -> Name + end. + +accumulate(undefined, Fun, ValsDict, {NameAccs, OtherAcc}) -> + {NameAccs, orddict:merge(Fun, ValsDict, OtherAcc)}; +accumulate(Name, Fun, ValsDict, {NameAccs, OtherAcc}) -> + F = fun (NameAcc) -> orddict:merge(Fun, ValsDict, NameAcc) end, + {orddict:update(Name, F, NameAccs), OtherAcc}. diff --git a/src/rabbit_writer.erl b/src/rabbit_writer.erl index f3a8cacf..bf6964d8 100644 --- a/src/rabbit_writer.erl +++ b/src/rabbit_writer.erl @@ -10,21 +10,29 @@ %% %% The Original Code is RabbitMQ. %% -%% The Initial Developer of the Original Code is VMware, Inc. -%% Copyright (c) 2007-2012 VMware, Inc. All rights reserved. +%% The Initial Developer of the Original Code is GoPivotal, Inc. +%% Copyright (c) 2007-2013 GoPivotal, Inc. All rights reserved. %% -module(rabbit_writer). -include("rabbit.hrl"). -include("rabbit_framing.hrl"). --export([start/5, start_link/5, mainloop/2, mainloop1/2]). +-export([start/5, start_link/5, start/6, start_link/6]). + +-export([system_continue/3, system_terminate/4, system_code_change/4]). + -export([send_command/2, send_command/3, send_command_sync/2, send_command_sync/3, - send_command_and_notify/4, send_command_and_notify/5]). + send_command_and_notify/4, send_command_and_notify/5, + flush/1]). -export([internal_send_command/4, internal_send_command/6]). --record(wstate, {sock, channel, frame_max, protocol, pending}). +%% internal +-export([mainloop/2, mainloop1/2]). + +-record(wstate, {sock, channel, frame_max, protocol, reader, + stats_timer, pending}). -define(HIBERNATE_AFTER, 5000). @@ -40,6 +48,19 @@ (rabbit_net:socket(), rabbit_channel:channel_number(), non_neg_integer(), rabbit_types:protocol(), pid()) -> rabbit_types:ok(pid())). +-spec(start/6 :: + (rabbit_net:socket(), rabbit_channel:channel_number(), + non_neg_integer(), rabbit_types:protocol(), pid(), boolean()) + -> rabbit_types:ok(pid())). +-spec(start_link/6 :: + (rabbit_net:socket(), rabbit_channel:channel_number(), + non_neg_integer(), rabbit_types:protocol(), pid(), boolean()) + -> rabbit_types:ok(pid())). + +-spec(system_code_change/4 :: (_,_,_,_) -> {'ok',_}). +-spec(system_continue/3 :: (_,_,#wstate{}) -> any()). +-spec(system_terminate/4 :: (_,_,_,_) -> none()). + -spec(send_command/2 :: (pid(), rabbit_framing:amqp_method_record()) -> 'ok'). -spec(send_command/3 :: @@ -57,6 +78,7 @@ (pid(), pid(), pid(), rabbit_framing:amqp_method_record(), rabbit_types:content()) -> 'ok'). +-spec(flush/1 :: (pid()) -> 'ok'). -spec(internal_send_command/4 :: (rabbit_net:socket(), rabbit_channel:channel_number(), rabbit_framing:amqp_method_record(), rabbit_types:protocol()) @@ -67,63 +89,95 @@ non_neg_integer(), rabbit_types:protocol()) -> 'ok'). --spec(mainloop/2 :: (_,_) -> 'done'). --spec(mainloop1/2 :: (_,_) -> any()). - -endif. %%--------------------------------------------------------------------------- start(Sock, Channel, FrameMax, Protocol, ReaderPid) -> - {ok, - proc_lib:spawn(?MODULE, mainloop, [ReaderPid, - #wstate{sock = Sock, - channel = Channel, - frame_max = FrameMax, - protocol = Protocol, - pending = []}])}. + start(Sock, Channel, FrameMax, Protocol, ReaderPid, false). start_link(Sock, Channel, FrameMax, Protocol, ReaderPid) -> - {ok, - proc_lib:spawn_link(?MODULE, mainloop, [ReaderPid, - #wstate{sock = Sock, - channel = Channel, - frame_max = FrameMax, - protocol = Protocol, - pending = []}])}. - -mainloop(ReaderPid, State) -> + start_link(Sock, Channel, FrameMax, Protocol, ReaderPid, false). + +start(Sock, Channel, FrameMax, Protocol, ReaderPid, ReaderWantsStats) -> + State = initial_state(Sock, Channel, FrameMax, Protocol, ReaderPid, + ReaderWantsStats), + Deb = sys:debug_options([]), + {ok, proc_lib:spawn(?MODULE, mainloop, [Deb, State])}. + +start_link(Sock, Channel, FrameMax, Protocol, ReaderPid, ReaderWantsStats) -> + State = initial_state(Sock, Channel, FrameMax, Protocol, ReaderPid, + ReaderWantsStats), + Deb = sys:debug_options([]), + {ok, proc_lib:spawn_link(?MODULE, mainloop, [Deb, State])}. + +initial_state(Sock, Channel, FrameMax, Protocol, ReaderPid, ReaderWantsStats) -> + (case ReaderWantsStats of + true -> fun rabbit_event:init_stats_timer/2; + false -> fun rabbit_event:init_disabled_stats_timer/2 + end)(#wstate{sock = Sock, + channel = Channel, + frame_max = FrameMax, + protocol = Protocol, + reader = ReaderPid, + pending = []}, + #wstate.stats_timer). + +system_continue(Parent, Deb, State) -> + mainloop(Deb, State#wstate{reader = Parent}). + +system_terminate(Reason, _Parent, _Deb, _State) -> + exit(Reason). + +system_code_change(Misc, _Module, _OldVsn, _Extra) -> + {ok, Misc}. + +mainloop(Deb, State) -> try - mainloop1(ReaderPid, State) + mainloop1(Deb, State) catch - exit:Error -> ReaderPid ! {channel_exit, #wstate.channel, Error} + exit:Error -> #wstate{reader = ReaderPid, channel = Channel} = State, + ReaderPid ! {channel_exit, Channel, Error} end, done. -mainloop1(ReaderPid, State = #wstate{pending = []}) -> +mainloop1(Deb, State = #wstate{pending = []}) -> receive - Message -> ?MODULE:mainloop1(ReaderPid, handle_message(Message, State)) + Message -> {Deb1, State1} = handle_message(Deb, Message, State), + ?MODULE:mainloop1(Deb1, State1) after ?HIBERNATE_AFTER -> - erlang:hibernate(?MODULE, mainloop, [ReaderPid, State]) + erlang:hibernate(?MODULE, mainloop, [Deb, State]) end; -mainloop1(ReaderPid, State) -> +mainloop1(Deb, State) -> receive - Message -> ?MODULE:mainloop1(ReaderPid, handle_message(Message, State)) + Message -> {Deb1, State1} = handle_message(Deb, Message, State), + ?MODULE:mainloop1(Deb1, State1) after 0 -> - ?MODULE:mainloop1(ReaderPid, flush(State)) + ?MODULE:mainloop1(Deb, internal_flush(State)) end. +handle_message(Deb, {system, From, Req}, State = #wstate{reader = Parent}) -> + sys:handle_system_msg(Req, From, Parent, ?MODULE, Deb, State); +handle_message(Deb, Message, State) -> + {Deb, handle_message(Message, State)}. + handle_message({send_command, MethodRecord}, State) -> internal_send_command_async(MethodRecord, State); handle_message({send_command, MethodRecord, Content}, State) -> internal_send_command_async(MethodRecord, Content, State); handle_message({'$gen_call', From, {send_command_sync, MethodRecord}}, State) -> - State1 = flush(internal_send_command_async(MethodRecord, State)), + State1 = internal_flush( + internal_send_command_async(MethodRecord, State)), gen_server:reply(From, ok), State1; handle_message({'$gen_call', From, {send_command_sync, MethodRecord, Content}}, State) -> - State1 = flush(internal_send_command_async(MethodRecord, Content, State)), + State1 = internal_flush( + internal_send_command_async(MethodRecord, Content, State)), + gen_server:reply(From, ok), + State1; +handle_message({'$gen_call', From, flush}, State) -> + State1 = internal_flush(State), gen_server:reply(From, ok), State1; handle_message({send_command_and_notify, QPid, ChPid, MethodRecord}, State) -> @@ -139,9 +193,12 @@ handle_message({'DOWN', _MRef, process, QPid, _Reason}, State) -> rabbit_amqqueue:notify_sent_queue_down(QPid), State; handle_message({inet_reply, _, ok}, State) -> - State; + rabbit_event:ensure_stats_timer(State, #wstate.stats_timer, emit_stats); handle_message({inet_reply, _, Status}, _State) -> exit({writer, send_failed, Status}); +handle_message(emit_stats, State = #wstate{reader = ReaderPid}) -> + ReaderPid ! ensure_stats, + rabbit_event:reset_stats_timer(State, #wstate.stats_timer); handle_message(Message, _State) -> exit({writer, message_not_understood, Message}). @@ -169,6 +226,8 @@ send_command_and_notify(W, Q, ChPid, MethodRecord, Content) -> W ! {send_command_and_notify, Q, ChPid, MethodRecord, Content}, ok. +flush(W) -> call(W, flush). + %%--------------------------------------------------------------------------- call(Pid, Msg) -> @@ -228,13 +287,13 @@ internal_send_command_async(MethodRecord, Content, maybe_flush(State = #wstate{pending = Pending}) -> case iolist_size(Pending) >= ?FLUSH_THRESHOLD of - true -> flush(State); + true -> internal_flush(State); false -> State end. -flush(State = #wstate{pending = []}) -> +internal_flush(State = #wstate{pending = []}) -> State; -flush(State = #wstate{sock = Sock, pending = Pending}) -> +internal_flush(State = #wstate{sock = Sock, pending = Pending}) -> ok = port_cmd(Sock, lists:reverse(Pending)), State#wstate{pending = []}. diff --git a/src/supervised_lifecycle.erl b/src/supervised_lifecycle.erl new file mode 100644 index 00000000..8b306f6f --- /dev/null +++ b/src/supervised_lifecycle.erl @@ -0,0 +1,68 @@ +%% The contents of this file are subject to the Mozilla Public License +%% Version 1.1 (the "License"); you may not use this file except in +%% compliance with the License. You may obtain a copy of the License +%% at http://www.mozilla.org/MPL/ +%% +%% Software distributed under the License is distributed on an "AS IS" +%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +%% the License for the specific language governing rights and +%% limitations under the License. +%% +%% The Original Code is RabbitMQ. +%% +%% The Initial Developer of the Original Code is GoPivotal, Inc. +%% Copyright (c) 2007-2013 GoPivotal, Inc. All rights reserved. +%% + +%% Invoke callbacks on startup and termination. +%% +%% Simply hook this process into a supervision hierarchy, to have the +%% callbacks invoked at a precise point during the establishment and +%% teardown of that hierarchy, respectively. +%% +%% Or launch the process independently, and link to it, to have the +%% callbacks invoked on startup and when the linked process +%% terminates, respectively. + +-module(supervised_lifecycle). + +-behavior(gen_server). + +-export([start_link/3]). + +%% gen_server callbacks +-export([init/1, handle_call/3, handle_cast/2, handle_info/2, terminate/2, + code_change/3]). + +%%---------------------------------------------------------------------------- + +-ifdef(use_specs). + +-spec(start_link/3 :: (atom(), rabbit_types:mfargs(), rabbit_types:mfargs()) -> + rabbit_types:ok_pid_or_error()). + +-endif. + +%%---------------------------------------------------------------------------- + +start_link(Name, StartMFA, StopMFA) -> + gen_server:start_link({local, Name}, ?MODULE, [StartMFA, StopMFA], []). + +%%---------------------------------------------------------------------------- + +init([{M, F, A}, StopMFA]) -> + process_flag(trap_exit, true), + apply(M, F, A), + {ok, StopMFA}. + +handle_call(_Request, _From, State) -> {noreply, State}. + +handle_cast(_Msg, State) -> {noreply, State}. + +handle_info(_Info, State) -> {noreply, State}. + +terminate(_Reason, {M, F, A}) -> + apply(M, F, A), + ok. + +code_change(_OldVsn, State, _Extra) -> {ok, State}. diff --git a/src/supervisor2.erl b/src/supervisor2.erl index 3d3623d7..5a6dc887 100644 --- a/src/supervisor2.erl +++ b/src/supervisor2.erl @@ -1,14 +1,18 @@ -%% This file is a copy of supervisor.erl from the R13B-3 Erlang/OTP +%% This file is a copy of supervisor.erl from the R16B Erlang/OTP %% distribution, with the following modifications: %% %% 1) the module name is supervisor2 %% -%% 2) there is a new strategy called -%% simple_one_for_one_terminate. This is exactly the same as for -%% simple_one_for_one, except that children *are* explicitly -%% terminated as per the shutdown component of the child_spec. +%% 2) a find_child/2 utility function has been added %% -%% 3) child specifications can contain, as the restart type, a tuple +%% 3) Added an 'intrinsic' restart type. Like the transient type, this +%% type means the child should only be restarted if the child exits +%% abnormally. Unlike the transient type, if the child exits +%% normally, the supervisor itself also exits normally. If the +%% child is a supervisor and it exits normally (i.e. with reason of +%% 'shutdown') then the child's parent also exits normally. +%% +%% 4) child specifications can contain, as the restart type, a tuple %% {permanent, Delay} | {transient, Delay} | {intrinsic, Delay} %% where Delay >= 0 (see point (4) below for intrinsic). The delay, %% in seconds, indicates what should happen if a child, upon being @@ -41,21 +45,14 @@ %% perspective it's a normal exit, whilst from supervisor's %% perspective, it's an abnormal exit. %% -%% 4) Added an 'intrinsic' restart type. Like the transient type, this -%% type means the child should only be restarted if the child exits -%% abnormally. Unlike the transient type, if the child exits -%% normally, the supervisor itself also exits normally. If the -%% child is a supervisor and it exits normally (i.e. with reason of -%% 'shutdown') then the child's parent also exits normally. -%% %% 5) normal, and {shutdown, _} exit reasons are all treated the same %% (i.e. are regarded as normal exits) %% -%% All modifications are (C) 2010-2012 VMware, Inc. +%% All modifications are (C) 2010-2013 GoPivotal, Inc. %% %% %CopyrightBegin% %% -%% Copyright Ericsson AB 1996-2009. All Rights Reserved. +%% Copyright Ericsson AB 1996-2012. All Rights Reserved. %% %% The contents of this file are subject to the Erlang Public License, %% Version 1.1, (the "License"); you may not use this file except in @@ -75,61 +72,34 @@ -behaviour(gen_server). %% External exports --export([start_link/2,start_link/3, +-export([start_link/2, start_link/3, start_child/2, restart_child/2, delete_child/2, terminate_child/2, - which_children/1, find_child/2, - check_childspecs/1]). + which_children/1, count_children/1, + find_child/2, check_childspecs/1]). %% Internal exports --export([init/1, handle_call/3, handle_info/2, terminate/2, code_change/3]). --export([handle_cast/2]). - --define(DICT, dict). - --record(state, {name, - strategy, - children = [], - dynamics = ?DICT:new(), - intensity, - period, - restarts = [], - module, - args}). - --record(child, {pid = undefined, % pid is undefined when child is not running - name, - mfa, - restart_type, - shutdown, - child_type, - modules = []}). - --define(is_simple(State), State#state.strategy =:= simple_one_for_one orelse - State#state.strategy =:= simple_one_for_one_terminate). --define(is_terminate_simple(State), - State#state.strategy =:= simple_one_for_one_terminate). - --ifdef(use_specs). +-export([init/1, handle_call/3, handle_cast/2, handle_info/2, + terminate/2, code_change/3]). +-export([try_again_restart/3]). %%-------------------------------------------------------------------------- -%% Types +-ifdef(use_specs). +-export_type([child_spec/0, startchild_ret/0, strategy/0]). +-endif. %%-------------------------------------------------------------------------- --export_type([child_spec/0, startchild_ret/0, strategy/0, sup_name/0]). - --type child() :: 'undefined' | pid(). +-ifdef(use_specs). +-type child() :: 'undefined' | pid(). -type child_id() :: term(). --type mfargs() :: {M :: module(), F :: atom(), A :: [term()] | undefined}. --type modules() :: [module()] | 'dynamic'. --type delay() :: non_neg_integer(). --type restart() :: 'permanent' | 'transient' | 'temporary' | 'intrinsic' - | {'permanent', delay()} | {'transient', delay()} - | {'intrinsic', delay()}. +-type mfargs() :: {M :: module(), F :: atom(), A :: [term()] | undefined}. +-type modules() :: [module()] | 'dynamic'. +-type delay() :: non_neg_integer(). +-type restart() :: 'permanent' | 'transient' | 'temporary' | 'intrinsic' | {'permanent', delay()} | {'transient', delay()} | {'intrinsic', delay()}. -type shutdown() :: 'brutal_kill' | timeout(). --type worker() :: 'worker' | 'supervisor'. +-type worker() :: 'worker' | 'supervisor'. -type sup_name() :: {'local', Name :: atom()} | {'global', Name :: atom()}. --type sup_ref() :: (Name :: atom()) +-type sup_ref() :: (Name :: atom()) | {Name :: atom(), Node :: node()} | {'global', Name :: atom()} | pid(). @@ -140,40 +110,110 @@ Type :: worker(), Modules :: modules()}. - -type strategy() :: 'one_for_all' | 'one_for_one' - | 'rest_for_one' | 'simple_one_for_one' - | 'simple_one_for_one_terminate'. - --type child_rec() :: #child{pid :: child() | {restarting,pid()} | [pid()], - name :: child_id(), - mfa :: mfargs(), - restart_type :: restart(), - shutdown :: shutdown(), - child_type :: worker(), - modules :: modules()}. - --type state() :: #state{strategy :: strategy(), - children :: [child_rec()], - dynamics :: ?DICT(), - intensity :: non_neg_integer(), - period :: pos_integer()}. + | 'rest_for_one' | 'simple_one_for_one'. +-endif. %%-------------------------------------------------------------------------- -%% Callback behaviour -%%-------------------------------------------------------------------------- +-ifdef(use_specs). +-record(child, {% pid is undefined when child is not running + pid = undefined :: child() | {restarting,pid()} | [pid()], + name :: child_id(), + mfargs :: mfargs(), + restart_type :: restart(), + shutdown :: shutdown(), + child_type :: worker(), + modules = [] :: modules()}). +-type child_rec() :: #child{}. +-else. +-record(child, { + pid = undefined, + name, + mfargs, + restart_type, + shutdown, + child_type, + modules = []}). +-endif. + +-define(DICT, dict). +-define(SETS, sets). +-define(SET, set). + +-ifdef(use_specs). +-record(state, {name, + strategy :: strategy(), + children = [] :: [child_rec()], + dynamics :: ?DICT() | ?SET(), + intensity :: non_neg_integer(), + period :: pos_integer(), + restarts = [], + module, + args}). +-type state() :: #state{}. +-else. +-record(state, {name, + strategy, + children = [], + dynamics, + intensity, + period, + restarts = [], + module, + args}). +-endif. + +-define(is_simple(State), State#state.strategy =:= simple_one_for_one). +-define(is_permanent(R), ((R =:= permanent) orelse + (is_tuple(R) andalso + tuple_size(R) == 2 andalso + element(1, R) =:= permanent))). +-define(is_explicit_restart(R), + R == {shutdown, restart}). + +-ifdef(use_specs). -callback init(Args :: term()) -> {ok, {{RestartStrategy :: strategy(), - MaxR :: non_neg_integer(), - MaxT :: non_neg_integer()}, + MaxR :: non_neg_integer(), + MaxT :: non_neg_integer()}, [ChildSpec :: child_spec()]}} | ignore. +-endif. +-define(restarting(_Pid_), {restarting,_Pid_}). -%%-------------------------------------------------------------------------- -%% Specs -%%-------------------------------------------------------------------------- +%%% --------------------------------------------------- +%%% This is a general process supervisor built upon gen_server.erl. +%%% Servers/processes should/could also be built using gen_server.erl. +%%% SupName = {local, atom()} | {global, atom()}. +%%% --------------------------------------------------- +-ifdef(use_specs). +-type startlink_err() :: {'already_started', pid()} + | {'shutdown', term()} + | term(). +-type startlink_ret() :: {'ok', pid()} | 'ignore' | {'error', startlink_err()}. + +-spec start_link(Module, Args) -> startlink_ret() when + Module :: module(), + Args :: term(). +-endif. +start_link(Mod, Args) -> + gen_server:start_link(?MODULE, {self, Mod, Args}, []). + +-ifdef(use_specs). +-spec start_link(SupName, Module, Args) -> startlink_ret() when + SupName :: sup_name(), + Module :: module(), + Args :: term(). +-endif. +start_link(SupName, Mod, Args) -> + gen_server:start_link(SupName, ?MODULE, {SupName, Mod, Args}, []). + +%%% --------------------------------------------------- +%%% Interface functions. +%%% --------------------------------------------------- +-ifdef(use_specs). -type startchild_err() :: 'already_present' | {'already_started', Child :: child()} | term(). -type startchild_ret() :: {'ok', Child :: child()} @@ -183,91 +223,30 @@ -spec start_child(SupRef, ChildSpec) -> startchild_ret() when SupRef :: sup_ref(), ChildSpec :: child_spec() | (List :: [term()]). +-endif. +start_child(Supervisor, ChildSpec) -> + call(Supervisor, {start_child, ChildSpec}). +-ifdef(use_specs). -spec restart_child(SupRef, Id) -> Result when SupRef :: sup_ref(), Id :: child_id(), Result :: {'ok', Child :: child()} | {'ok', Child :: child(), Info :: term()} | {'error', Error}, - Error :: 'running' | 'not_found' | 'simple_one_for_one' | term(). + Error :: 'running' | 'restarting' | 'not_found' | 'simple_one_for_one' | + term(). +-endif. +restart_child(Supervisor, Name) -> + call(Supervisor, {restart_child, Name}). +-ifdef(use_specs). -spec delete_child(SupRef, Id) -> Result when SupRef :: sup_ref(), Id :: child_id(), Result :: 'ok' | {'error', Error}, - Error :: 'running' | 'not_found' | 'simple_one_for_one'. - --spec terminate_child(SupRef, Id) -> Result when - SupRef :: sup_ref(), - Id :: pid() | child_id(), - Result :: 'ok' | {'error', Error}, - Error :: 'not_found' | 'simple_one_for_one'. - --spec which_children(SupRef) -> [{Id,Child,Type,Modules}] when - SupRef :: sup_ref(), - Id :: child_id() | 'undefined', - Child :: child(), - Type :: worker(), - Modules :: modules(). - --spec check_childspecs(ChildSpecs) -> Result when - ChildSpecs :: [child_spec()], - Result :: 'ok' | {'error', Error :: term()}. - --type init_sup_name() :: sup_name() | 'self'. - --type stop_rsn() :: 'shutdown' | {'bad_return', {module(),'init', term()}} - | {'bad_start_spec', term()} | {'start_spec', term()} - | {'supervisor_data', term()}. - --spec init({init_sup_name(), module(), [term()]}) -> - {'ok', state()} | 'ignore' | {'stop', stop_rsn()}. - --type call() :: 'which_children'. --spec handle_call(call(), term(), state()) -> {'reply', term(), state()}. - --spec handle_cast('null', state()) -> {'noreply', state()}. - --spec handle_info(term(), state()) -> - {'noreply', state()} | {'stop', 'shutdown', state()}. - --spec terminate(term(), state()) -> 'ok'. - --spec code_change(term(), state(), term()) -> - {'ok', state()} | {'error', term()}. - --else. - --export([behaviour_info/1]). - -behaviour_info(callbacks) -> - [{init,1}]; -behaviour_info(_Other) -> - undefined. - + Error :: 'running' | 'restarting' | 'not_found' | 'simple_one_for_one'. -endif. - -%%% --------------------------------------------------- -%%% This is a general process supervisor built upon gen_server.erl. -%%% Servers/processes should/could also be built using gen_server.erl. -%%% SupName = {local, atom()} | {global, atom()}. -%%% --------------------------------------------------- -start_link(Mod, Args) -> - gen_server:start_link(?MODULE, {self, Mod, Args}, []). - -start_link(SupName, Mod, Args) -> - gen_server:start_link(SupName, ?MODULE, {SupName, Mod, Args}, []). - -%%% --------------------------------------------------- -%%% Interface functions. -%%% --------------------------------------------------- -start_child(Supervisor, ChildSpec) -> - call(Supervisor, {start_child, ChildSpec}). - -restart_child(Supervisor, Name) -> - call(Supervisor, {restart_child, Name}). - delete_child(Supervisor, Name) -> call(Supervisor, {delete_child, Name}). @@ -277,12 +256,44 @@ delete_child(Supervisor, Name) -> %% Note that the child is *always* terminated in some %% way (maybe killed). %%----------------------------------------------------------------- +-ifdef(use_specs). +-spec terminate_child(SupRef, Id) -> Result when + SupRef :: sup_ref(), + Id :: pid() | child_id(), + Result :: 'ok' | {'error', Error}, + Error :: 'not_found' | 'simple_one_for_one'. +-endif. terminate_child(Supervisor, Name) -> call(Supervisor, {terminate_child, Name}). +-ifdef(use_specs). +-spec which_children(SupRef) -> [{Id,Child,Type,Modules}] when + SupRef :: sup_ref(), + Id :: child_id() | undefined, + Child :: child() | 'restarting', + Type :: worker(), + Modules :: modules(). +-endif. which_children(Supervisor) -> call(Supervisor, which_children). +-ifdef(use_specs). +-spec count_children(SupRef) -> PropListOfCounts when + SupRef :: sup_ref(), + PropListOfCounts :: [Count], + Count :: {specs, ChildSpecCount :: non_neg_integer()} + | {active, ActiveProcessCount :: non_neg_integer()} + | {supervisors, ChildSupervisorCount :: non_neg_integer()} + |{workers, ChildWorkerCount :: non_neg_integer()}. +-endif. +count_children(Supervisor) -> + call(Supervisor, count_children). + +-ifdef(use_specs). +-spec find_child(Supervisor, Name) -> [pid()] when + Supervisor :: sup_ref(), + Name :: child_id(). +-endif. find_child(Supervisor, Name) -> [Pid || {Name1, Pid, _Type, _Modules} <- which_children(Supervisor), Name1 =:= Name]. @@ -290,6 +301,11 @@ find_child(Supervisor, Name) -> call(Supervisor, Req) -> gen_server:call(Supervisor, Req, infinity). +-ifdef(use_specs). +-spec check_childspecs(ChildSpecs) -> Result when + ChildSpecs :: [child_spec()], + Result :: 'ok' | {'error', Error :: term()}. +-endif. check_childspecs(ChildSpecs) when is_list(ChildSpecs) -> case check_startspec(ChildSpecs) of {ok, _} -> ok; @@ -297,11 +313,37 @@ check_childspecs(ChildSpecs) when is_list(ChildSpecs) -> end; check_childspecs(X) -> {error, {badarg, X}}. +%%%----------------------------------------------------------------- +%%% Called by timer:apply_after from restart/2 +-ifdef(use_specs). +-spec try_again_restart(SupRef, Child, Reason) -> ok when + SupRef :: sup_ref(), + Child :: child_id() | pid(), + Reason :: term(). +-endif. +try_again_restart(Supervisor, Child, Reason) -> + cast(Supervisor, {try_again_restart, Child, Reason}). + +cast(Supervisor, Req) -> + gen_server:cast(Supervisor, Req). + %%% --------------------------------------------------- %%% %%% Initialize the supervisor. %%% %%% --------------------------------------------------- +-ifdef(use_specs). +-type init_sup_name() :: sup_name() | 'self'. + +-type stop_rsn() :: {'shutdown', term()} + | {'bad_return', {module(),'init', term()}} + | {'bad_start_spec', term()} + | {'start_spec', term()} + | {'supervisor_data', term()}. + +-spec init({init_sup_name(), module(), [term()]}) -> + {'ok', state()} | 'ignore' | {'stop', stop_rsn()}. +-endif. init({SupName, Mod, Args}) -> process_flag(trap_exit, true), case Mod:init(Args) of @@ -319,7 +361,7 @@ init({SupName, Mod, Args}) -> Error -> {stop, {bad_return, {Mod, init, Error}}} end. - + init_children(State, StartSpec) -> SupName = State#state.name, case check_startspec(StartSpec) of @@ -327,9 +369,9 @@ init_children(State, StartSpec) -> case start_children(Children, SupName) of {ok, NChildren} -> {ok, State#state{children = NChildren}}; - {error, NChildren} -> + {error, NChildren, Reason} -> terminate_children(NChildren, SupName), - {stop, shutdown} + {stop, {shutdown, Reason}} end; Error -> {stop, {start_spec, Error}} @@ -347,32 +389,35 @@ init_dynamic(_State, StartSpec) -> %%----------------------------------------------------------------- %% Func: start_children/2 -%% Args: Children = [#child] in start order -%% SupName = {local, atom()} | {global, atom()} | {pid(),Mod} -%% Purpose: Start all children. The new list contains #child's +%% Args: Children = [child_rec()] in start order +%% SupName = {local, atom()} | {global, atom()} | {pid(), Mod} +%% Purpose: Start all children. The new list contains #child's %% with pids. -%% Returns: {ok, NChildren} | {error, NChildren} -%% NChildren = [#child] in termination order (reversed +%% Returns: {ok, NChildren} | {error, NChildren, Reason} +%% NChildren = [child_rec()] in termination order (reversed %% start order) %%----------------------------------------------------------------- start_children(Children, SupName) -> start_children(Children, [], SupName). start_children([Child|Chs], NChildren, SupName) -> case do_start_child(SupName, Child) of + {ok, undefined} when Child#child.restart_type =:= temporary -> + start_children(Chs, NChildren, SupName); {ok, Pid} -> start_children(Chs, [Child#child{pid = Pid}|NChildren], SupName); {ok, Pid, _Extra} -> start_children(Chs, [Child#child{pid = Pid}|NChildren], SupName); {error, Reason} -> report_error(start_error, Reason, Child, SupName), - {error, lists:reverse(Chs) ++ [Child | NChildren]} + {error, lists:reverse(Chs) ++ [Child | NChildren], + {failed_to_start_child,Child#child.name,Reason}} end; start_children([], NChildren, _SupName) -> {ok, NChildren}. do_start_child(SupName, Child) -> - #child{mfa = {M, F, A}} = Child, - case catch apply(M, F, A) of + #child{mfargs = {M, F, Args}} = Child, + case catch apply(M, F, Args) of {ok, Pid} when is_pid(Pid) -> NChild = Child#child{pid = Pid}, report_progress(NChild, SupName), @@ -381,7 +426,7 @@ do_start_child(SupName, Child) -> NChild = Child#child{pid = Pid}, report_progress(NChild, SupName), {ok, Pid, Extra}; - ignore -> + ignore -> {ok, undefined}; {error, What} -> {error, What}; What -> {error, What} @@ -400,36 +445,55 @@ do_start_child_i(M, F, A) -> What -> {error, What} end. - %%% --------------------------------------------------- %%% %%% Callback functions. %%% %%% --------------------------------------------------- +-ifdef(use_specs). +-type call() :: 'which_children' | 'count_children' | {_, _}. % XXX: refine +-spec handle_call(call(), term(), state()) -> {'reply', term(), state()}. +-endif. handle_call({start_child, EArgs}, _From, State) when ?is_simple(State) -> - #child{mfa = {M, F, A}} = hd(State#state.children), + Child = hd(State#state.children), + #child{mfargs = {M, F, A}} = Child, Args = A ++ EArgs, case do_start_child_i(M, F, Args) of - {ok, undefined} -> - {reply, {ok, undefined}, State}; + {ok, undefined} when Child#child.restart_type =:= temporary -> + {reply, {ok, undefined}, State}; {ok, Pid} -> - NState = State#state{dynamics = - ?DICT:store(Pid, Args, State#state.dynamics)}, + NState = save_dynamic_child(Child#child.restart_type, Pid, Args, State), {reply, {ok, Pid}, NState}; {ok, Pid, Extra} -> - NState = State#state{dynamics = - ?DICT:store(Pid, Args, State#state.dynamics)}, + NState = save_dynamic_child(Child#child.restart_type, Pid, Args, State), {reply, {ok, Pid, Extra}, NState}; What -> {reply, What, State} end; -%%% The requests terminate_child, delete_child and restart_child are -%%% invalid for simple_one_for_one and simple_one_for_one_terminate -%%% supervisors. +%% terminate_child for simple_one_for_one can only be done with pid +handle_call({terminate_child, Name}, _From, State) when not is_pid(Name), + ?is_simple(State) -> + {reply, {error, simple_one_for_one}, State}; + +handle_call({terminate_child, Name}, _From, State) -> + case get_child(Name, State, ?is_simple(State)) of + {value, Child} -> + case do_terminate(Child, State#state.name) of + #child{restart_type=RT} when RT=:=temporary; ?is_simple(State) -> + {reply, ok, state_del_child(Child, State)}; + NChild -> + {reply, ok, replace_child(NChild, State)} + end; + false -> + {reply, {error, not_found}, State} + end; + +%%% The requests delete_child and restart_child are invalid for +%%% simple_one_for_one supervisors. handle_call({_Req, _Data}, _From, State) when ?is_simple(State) -> - {reply, {error, State#state.strategy}, State}; + {reply, {error, simple_one_for_one}, State}; handle_call({start_child, ChildSpec}, _From, State) -> case check_childspec(ChildSpec) of @@ -453,6 +517,8 @@ handle_call({restart_child, Name}, _From, State) -> Error -> {reply, Error, State} end; + {value, #child{pid=?restarting(_)}} -> + {reply, {error, restarting}, State}; {value, _} -> {reply, {error, running}, State}; _ -> @@ -464,60 +530,146 @@ handle_call({delete_child, Name}, _From, State) -> {value, Child} when Child#child.pid =:= undefined -> NState = remove_child(Child, State), {reply, ok, NState}; + {value, #child{pid=?restarting(_)}} -> + {reply, {error, restarting}, State}; {value, _} -> {reply, {error, running}, State}; _ -> {reply, {error, not_found}, State} end; -handle_call({terminate_child, Name}, _From, State) -> - case get_child(Name, State) of - {value, Child} -> - NChild = do_terminate(Child, State#state.name), - {reply, ok, replace_child(NChild, State)}; - _ -> - {reply, {error, not_found}, State} - end; +handle_call(which_children, _From, #state{children = [#child{restart_type = temporary, + child_type = CT, + modules = Mods}]} = + State) when ?is_simple(State) -> + Reply = lists:map(fun(Pid) -> {undefined, Pid, CT, Mods} end, + ?SETS:to_list(dynamics_db(temporary, State#state.dynamics))), + {reply, Reply, State}; -handle_call(which_children, _From, State) when ?is_simple(State) -> - [#child{child_type = CT, modules = Mods}] = State#state.children, - Reply = lists:map(fun ({Pid, _}) -> {undefined, Pid, CT, Mods} end, - ?DICT:to_list(State#state.dynamics)), +handle_call(which_children, _From, #state{children = [#child{restart_type = RType, + child_type = CT, + modules = Mods}]} = + State) when ?is_simple(State) -> + Reply = lists:map(fun({?restarting(_),_}) -> {undefined,restarting,CT,Mods}; + ({Pid, _}) -> {undefined, Pid, CT, Mods} end, + ?DICT:to_list(dynamics_db(RType, State#state.dynamics))), {reply, Reply, State}; handle_call(which_children, _From, State) -> Resp = - lists:map(fun (#child{pid = Pid, name = Name, + lists:map(fun(#child{pid = ?restarting(_), name = Name, + child_type = ChildType, modules = Mods}) -> + {Name, restarting, ChildType, Mods}; + (#child{pid = Pid, name = Name, child_type = ChildType, modules = Mods}) -> - {Name, Pid, ChildType, Mods} + {Name, Pid, ChildType, Mods} end, State#state.children), - {reply, Resp, State}. + {reply, Resp, State}; -%%% Hopefully cause a function-clause as there is no API function -%%% that utilizes cast. -handle_cast(null, State) -> - error_logger:error_msg("ERROR: Supervisor received cast-message 'null'~n", - []), - {noreply, State}. +handle_call(count_children, _From, #state{children = [#child{restart_type = temporary, + child_type = CT}]} = State) + when ?is_simple(State) -> + {Active, Count} = + ?SETS:fold(fun(Pid, {Alive, Tot}) -> + case is_pid(Pid) andalso is_process_alive(Pid) of + true ->{Alive+1, Tot +1}; + false -> + {Alive, Tot + 1} + end + end, {0, 0}, dynamics_db(temporary, State#state.dynamics)), + Reply = case CT of + supervisor -> [{specs, 1}, {active, Active}, + {supervisors, Count}, {workers, 0}]; + worker -> [{specs, 1}, {active, Active}, + {supervisors, 0}, {workers, Count}] + end, + {reply, Reply, State}; -handle_info({delayed_restart, {RestartType, Reason, Child}}, State) +handle_call(count_children, _From, #state{children = [#child{restart_type = RType, + child_type = CT}]} = State) when ?is_simple(State) -> - {ok, NState} = do_restart(RestartType, Reason, Child, State), - {noreply, NState}; -handle_info({delayed_restart, {RestartType, Reason, Child}}, State) -> - case get_child(Child#child.name, State) of - {value, Child1} -> - {ok, NState} = do_restart(RestartType, Reason, Child1, State), - {noreply, NState}; - _ -> + {Active, Count} = + ?DICT:fold(fun(Pid, _Val, {Alive, Tot}) -> + case is_pid(Pid) andalso is_process_alive(Pid) of + true -> + {Alive+1, Tot +1}; + false -> + {Alive, Tot + 1} + end + end, {0, 0}, dynamics_db(RType, State#state.dynamics)), + Reply = case CT of + supervisor -> [{specs, 1}, {active, Active}, + {supervisors, Count}, {workers, 0}]; + worker -> [{specs, 1}, {active, Active}, + {supervisors, 0}, {workers, Count}] + end, + {reply, Reply, State}; + +handle_call(count_children, _From, State) -> + %% Specs and children are together on the children list... + {Specs, Active, Supers, Workers} = + lists:foldl(fun(Child, Counts) -> + count_child(Child, Counts) + end, {0,0,0,0}, State#state.children), + + %% Reformat counts to a property list. + Reply = [{specs, Specs}, {active, Active}, + {supervisors, Supers}, {workers, Workers}], + {reply, Reply, State}. + + +count_child(#child{pid = Pid, child_type = worker}, + {Specs, Active, Supers, Workers}) -> + case is_pid(Pid) andalso is_process_alive(Pid) of + true -> {Specs+1, Active+1, Supers, Workers+1}; + false -> {Specs+1, Active, Supers, Workers+1} + end; +count_child(#child{pid = Pid, child_type = supervisor}, + {Specs, Active, Supers, Workers}) -> + case is_pid(Pid) andalso is_process_alive(Pid) of + true -> {Specs+1, Active+1, Supers+1, Workers}; + false -> {Specs+1, Active, Supers+1, Workers} + end. + + +%%% If a restart attempt failed, this message is sent via +%%% timer:apply_after(0,...) in order to give gen_server the chance to +%%% check it's inbox before trying again. +-ifdef(use_specs). +-spec handle_cast({try_again_restart, child_id() | pid(), term()}, state()) -> + {'noreply', state()} | {stop, shutdown, state()}. +-endif. +handle_cast({try_again_restart,Pid,Reason}, #state{children=[Child]}=State) + when ?is_simple(State) -> + RT = Child#child.restart_type, + RPid = restarting(Pid), + case dynamic_child_args(RPid, dynamics_db(RT, State#state.dynamics)) of + {ok, Args} -> + {M, F, _} = Child#child.mfargs, + NChild = Child#child{pid = RPid, mfargs = {M, F, Args}}, + try_restart(Child#child.restart_type, Reason, NChild, State); + error -> {noreply, State} end; +handle_cast({try_again_restart,Name,Reason}, State) -> + %% we still support >= R12-B3 in which lists:keyfind/3 doesn't exist + case lists:keysearch(Name,#child.name,State#state.children) of + {value, Child = #child{pid=?restarting(_), restart_type=RestartType}} -> + try_restart(RestartType, Reason, Child, State); + _ -> + {noreply,State} + end. + %% %% Take care of terminated children. %% +-ifdef(use_specs). +-spec handle_info(term(), state()) -> + {'noreply', state()} | {'stop', 'shutdown', state()}. +-endif. handle_info({'EXIT', Pid, Reason}, State) -> case restart_child(Pid, Reason, State) of {ok, State1} -> @@ -526,20 +678,34 @@ handle_info({'EXIT', Pid, Reason}, State) -> {stop, shutdown, State1} end; +handle_info({delayed_restart, {RestartType, Reason, Child}}, State) + when ?is_simple(State) -> + try_restart(RestartType, Reason, Child, State); +handle_info({delayed_restart, {RestartType, Reason, Child}}, State) -> + case get_child(Child#child.name, State) of + {value, Child1} -> + try_restart(RestartType, Reason, Child1, State); + _What -> + {noreply, State} + end; + handle_info(Msg, State) -> error_logger:error_msg("Supervisor received unexpected message: ~p~n", [Msg]), {noreply, State}. + %% %% Terminate this server. %% -terminate(_Reason, State) when ?is_terminate_simple(State) -> - terminate_simple_children( - hd(State#state.children), State#state.dynamics, State#state.name), - ok; +-ifdef(use_specs). +-spec terminate(term(), state()) -> 'ok'. +-endif. +terminate(_Reason, #state{children=[Child]} = State) when ?is_simple(State) -> + terminate_dynamic_children(Child, dynamics_db(Child#child.restart_type, + State#state.dynamics), + State#state.name); terminate(_Reason, State) -> - terminate_children(State#state.children, State#state.name), - ok. + terminate_children(State#state.children, State#state.name). %% %% Change code for the supervisor. @@ -550,6 +716,10 @@ terminate(_Reason, State) -> %% NOTE: This requires that the init function of the call-back module %% does not have any side effects. %% +-ifdef(use_specs). +-spec code_change(term(), state(), term()) -> + {'ok', state()} | {'error', term()}. +-endif. code_change(_, State, _) -> case (State#state.module):init(State#state.args) of {ok, {SupFlags, StartSpec}} -> @@ -577,14 +747,13 @@ check_flags({Strategy, MaxIntensity, Period}) -> check_flags(What) -> {bad_flags, What}. -update_childspec(State, StartSpec) when ?is_simple(State) -> - case check_startspec(StartSpec) of - {ok, [Child]} -> - {ok, State#state{children = [Child]}}; - Error -> - {error, Error} - end; - +update_childspec(State, StartSpec) when ?is_simple(State) -> + case check_startspec(StartSpec) of + {ok, [Child]} -> + {ok, State#state{children = [Child]}}; + Error -> + {error, Error} + end; update_childspec(State, StartSpec) -> case check_startspec(StartSpec) of {ok, Children} -> @@ -603,11 +772,11 @@ update_childspec1([Child|OldC], Children, KeepOld) -> update_childspec1(OldC, Children, [Child|KeepOld]) end; update_childspec1([], Children, KeepOld) -> - % Return them in (keeped) reverse start order. - lists:reverse(Children ++ KeepOld). + %% Return them in (kept) reverse start order. + lists:reverse(Children ++ KeepOld). update_chsp(OldCh, Children) -> - case lists:map(fun (Ch) when OldCh#child.name =:= Ch#child.name -> + case lists:map(fun(Ch) when OldCh#child.name =:= Ch#child.name -> Ch#child{pid = OldCh#child.pid}; (Ch) -> Ch @@ -627,20 +796,16 @@ handle_start_child(Child, State) -> case get_child(Child#child.name, State) of false -> case do_start_child(State#state.name, Child) of + {ok, undefined} when Child#child.restart_type =:= temporary -> + {{ok, undefined}, State}; {ok, Pid} -> - Children = State#state.children, - {{ok, Pid}, - State#state{children = - [Child#child{pid = Pid}|Children]}}; + {{ok, Pid}, save_child(Child#child{pid = Pid}, State)}; {ok, Pid, Extra} -> - Children = State#state.children, - {{ok, Pid, Extra}, - State#state{children = - [Child#child{pid = Pid}|Children]}}; + {{ok, Pid, Extra}, save_child(Child#child{pid = Pid}, State)}; {error, What} -> {{error, {What, Child}}, State} end; - {value, OldChild} when OldChild#child.pid =/= undefined -> + {value, OldChild} when is_pid(OldChild#child.pid) -> {{error, {already_started, OldChild#child.pid}}, State}; {value, _OldChild} -> {{error, already_present}, State} @@ -648,105 +813,145 @@ handle_start_child(Child, State) -> %%% --------------------------------------------------- %%% Restart. A process has terminated. -%%% Returns: {ok, #state} | {shutdown, #state} +%%% Returns: {ok, state()} | {shutdown, state()} %%% --------------------------------------------------- -restart_child(Pid, Reason, State) when ?is_simple(State) -> - case ?DICT:find(Pid, State#state.dynamics) of +restart_child(Pid, Reason, #state{children = [Child]} = State) when ?is_simple(State) -> + RestartType = Child#child.restart_type, + case dynamic_child_args(Pid, dynamics_db(RestartType, State#state.dynamics)) of {ok, Args} -> - [Child] = State#state.children, - RestartType = Child#child.restart_type, - {M, F, _} = Child#child.mfa, - NChild = Child#child{pid = Pid, mfa = {M, F, Args}}, + {M, F, _} = Child#child.mfargs, + NChild = Child#child{pid = Pid, mfargs = {M, F, Args}}, do_restart(RestartType, Reason, NChild, State); error -> - {ok, State} + {ok, State} end; + restart_child(Pid, Reason, State) -> Children = State#state.children, + %% we still support >= R12-B3 in which lists:keyfind/3 doesn't exist case lists:keysearch(Pid, #child.pid, Children) of - {value, Child} -> - RestartType = Child#child.restart_type, + {value, #child{restart_type = RestartType} = Child} -> do_restart(RestartType, Reason, Child, State); - _ -> + false -> {ok, State} end. -do_restart({permanent = RestartType, Delay}, Reason, Child, State) -> - do_restart_delay({RestartType, Delay}, Reason, Child, State); -do_restart(permanent, Reason, Child, State) -> - report_error(child_terminated, Reason, Child, State#state.name), - restart(Child, State); -do_restart(Type, normal, Child, State) -> - del_child_and_maybe_shutdown(Type, Child, State); -do_restart({RestartType, Delay}, {shutdown, restart} = Reason, Child, State) - when RestartType =:= transient orelse RestartType =:= intrinsic -> - do_restart_delay({RestartType, Delay}, Reason, Child, State); -do_restart(Type, {shutdown, _}, Child, State) -> - del_child_and_maybe_shutdown(Type, Child, State); -do_restart(Type, shutdown, Child = #child{child_type = supervisor}, State) -> - del_child_and_maybe_shutdown(Type, Child, State); -do_restart({RestartType, Delay}, Reason, Child, State) - when RestartType =:= transient orelse RestartType =:= intrinsic -> - do_restart_delay({RestartType, Delay}, Reason, Child, State); -do_restart(Type, Reason, Child, State) when Type =:= transient orelse - Type =:= intrinsic -> - report_error(child_terminated, Reason, Child, State#state.name), +try_restart(RestartType, Reason, Child, State) -> + case handle_restart(RestartType, Reason, Child, State) of + {ok, NState} -> {noreply, NState}; + {shutdown, State2} -> {stop, shutdown, State2} + end. + +do_restart(RestartType, Reason, Child, State) -> + maybe_report_error(RestartType, Reason, Child, State), + handle_restart(RestartType, Reason, Child, State). + +maybe_report_error(permanent, Reason, Child, State) -> + report_child_termination(Reason, Child, State); +maybe_report_error({permanent, _}, Reason, Child, State) -> + report_child_termination(Reason, Child, State); +maybe_report_error(_Type, Reason, Child, State) -> + case is_abnormal_termination(Reason) of + true -> report_child_termination(Reason, Child, State); + false -> ok + end. + +report_child_termination(Reason, Child, State) -> + report_error(child_terminated, Reason, Child, State#state.name). + +handle_restart(permanent, _Reason, Child, State) -> restart(Child, State); -do_restart(temporary, Reason, Child, State) -> - report_error(child_terminated, Reason, Child, State#state.name), - NState = state_del_child(Child, State), - {ok, NState}. +handle_restart(transient, Reason, Child, State) -> + restart_if_explicit_or_abnormal(fun restart/2, + fun delete_child_and_continue/2, + Reason, Child, State); +handle_restart(intrinsic, Reason, Child, State) -> + restart_if_explicit_or_abnormal(fun restart/2, + fun delete_child_and_stop/2, + Reason, Child, State); +handle_restart(temporary, _Reason, Child, State) -> + delete_child_and_continue(Child, State); +handle_restart({permanent, _Delay}=Restart, Reason, Child, State) -> + do_restart_delay(Restart, Reason, Child, State); +handle_restart({transient, _Delay}=Restart, Reason, Child, State) -> + restart_if_explicit_or_abnormal(defer_to_restart_delay(Restart, Reason), + fun delete_child_and_continue/2, + Reason, Child, State); +handle_restart({intrinsic, _Delay}=Restart, Reason, Child, State) -> + restart_if_explicit_or_abnormal(defer_to_restart_delay(Restart, Reason), + fun delete_child_and_stop/2, + Reason, Child, State). + +restart_if_explicit_or_abnormal(RestartHow, Otherwise, Reason, Child, State) -> + case ?is_explicit_restart(Reason) orelse is_abnormal_termination(Reason) of + true -> RestartHow(Child, State); + false -> Otherwise(Child, State) + end. + +defer_to_restart_delay(Restart, Reason) -> + fun(Child, State) -> do_restart_delay(Restart, Reason, Child, State) end. + +delete_child_and_continue(Child, State) -> + {ok, state_del_child(Child, State)}. + +delete_child_and_stop(Child, State) -> + {shutdown, state_del_child(Child, State)}. + +is_abnormal_termination(normal) -> false; +is_abnormal_termination(shutdown) -> false; +is_abnormal_termination({shutdown, _}) -> false; +is_abnormal_termination(_Other) -> true. do_restart_delay({RestartType, Delay}, Reason, Child, State) -> - case restart1(Child, State) of + case add_restart(State) of {ok, NState} -> - {ok, NState}; - {terminate, NState} -> + maybe_restart(NState#state.strategy, Child, NState); + {terminate, _NState} -> + %% we've reached the max restart intensity, but the + %% add_restart will have added to the restarts + %% field. Given we don't want to die here, we need to go + %% back to the old restarts field otherwise we'll never + %% attempt to restart later, which is why we ignore + %% NState for this clause. _TRef = erlang:send_after(trunc(Delay*1000), self(), {delayed_restart, {{RestartType, Delay}, Reason, Child}}), - {ok, state_del_child(Child, NState)} + {ok, state_del_child(Child, State)} end. -del_child_and_maybe_shutdown(intrinsic, Child, State) -> - {shutdown, state_del_child(Child, State)}; -del_child_and_maybe_shutdown({intrinsic, _Delay}, Child, State) -> - {shutdown, state_del_child(Child, State)}; -del_child_and_maybe_shutdown(_, Child, State) -> - {ok, state_del_child(Child, State)}. - restart(Child, State) -> case add_restart(State) of {ok, NState} -> - restart(NState#state.strategy, Child, NState, fun restart/2); + maybe_restart(NState#state.strategy, Child, NState); {terminate, NState} -> report_error(shutdown, reached_max_restart_intensity, Child, State#state.name), - {shutdown, state_del_child(Child, NState)} + {shutdown, remove_child(Child, NState)} end. -restart1(Child, State) -> - case add_restart(State) of - {ok, NState} -> - restart(NState#state.strategy, Child, NState, fun restart1/2); - {terminate, _NState} -> - %% we've reached the max restart intensity, but the - %% add_restart will have added to the restarts - %% field. Given we don't want to die here, we need to go - %% back to the old restarts field otherwise we'll never - %% attempt to restart later. - {terminate, State} +maybe_restart(Strategy, Child, State) -> + case restart(Strategy, Child, State) of + {try_again, Reason, NState2} -> + %% Leaving control back to gen_server before + %% trying again. This way other incoming requsts + %% for the supervisor can be handled - e.g. a + %% shutdown request for the supervisor or the + %% child. + Id = if ?is_simple(State) -> Child#child.pid; + true -> Child#child.name + end, + timer:apply_after(0,?MODULE,try_again_restart,[self(),Id,Reason]), + {ok,NState2}; + Other -> + Other end. -restart(Strategy, Child, State, Restart) - when Strategy =:= simple_one_for_one orelse - Strategy =:= simple_one_for_one_terminate -> - #child{mfa = {M, F, A}} = Child, - Dynamics = ?DICT:erase(Child#child.pid, State#state.dynamics), +restart(simple_one_for_one, Child, State) -> + #child{pid = OldPid, mfargs = {M, F, A}} = Child, + Dynamics = ?DICT:erase(OldPid, dynamics_db(Child#child.restart_type, + State#state.dynamics)), case do_start_child_i(M, F, A) of - {ok, undefined} -> - {ok, State}; {ok, Pid} -> NState = State#state{dynamics = ?DICT:store(Pid, A, Dynamics)}, {ok, NState}; @@ -754,10 +959,13 @@ restart(Strategy, Child, State, Restart) NState = State#state{dynamics = ?DICT:store(Pid, A, Dynamics)}, {ok, NState}; {error, Error} -> + NState = State#state{dynamics = ?DICT:store(restarting(OldPid), A, + Dynamics)}, report_error(start_error, Error, Child, State#state.name), - Restart(Child, State) + {try_again, Error, NState} end; -restart(one_for_one, Child, State, Restart) -> +restart(one_for_one, Child, State) -> + OldPid = Child#child.pid, case do_start_child(State#state.name, Child) of {ok, Pid} -> NState = replace_child(Child#child{pid = Pid}, State), @@ -766,123 +974,71 @@ restart(one_for_one, Child, State, Restart) -> NState = replace_child(Child#child{pid = Pid}, State), {ok, NState}; {error, Reason} -> + NState = replace_child(Child#child{pid = restarting(OldPid)}, State), report_error(start_error, Reason, Child, State#state.name), - Restart(Child, State) + {try_again, Reason, NState} end; -restart(rest_for_one, Child, State, Restart) -> +restart(rest_for_one, Child, State) -> {ChAfter, ChBefore} = split_child(Child#child.pid, State#state.children), ChAfter2 = terminate_children(ChAfter, State#state.name), case start_children(ChAfter2, State#state.name) of {ok, ChAfter3} -> {ok, State#state{children = ChAfter3 ++ ChBefore}}; - {error, ChAfter3} -> - Restart(Child, State#state{children = ChAfter3 ++ ChBefore}) + {error, ChAfter3, Reason} -> + NChild = Child#child{pid=restarting(Child#child.pid)}, + NState = State#state{children = ChAfter3 ++ ChBefore}, + {try_again, Reason, replace_child(NChild,NState)} end; -restart(one_for_all, Child, State, Restart) -> +restart(one_for_all, Child, State) -> Children1 = del_child(Child#child.pid, State#state.children), Children2 = terminate_children(Children1, State#state.name), case start_children(Children2, State#state.name) of {ok, NChs} -> {ok, State#state{children = NChs}}; - {error, NChs} -> - Restart(Child, State#state{children = NChs}) + {error, NChs, Reason} -> + NChild = Child#child{pid=restarting(Child#child.pid)}, + NState = State#state{children = NChs}, + {try_again, Reason, replace_child(NChild,NState)} end. +restarting(Pid) when is_pid(Pid) -> ?restarting(Pid); +restarting(RPid) -> RPid. + %%----------------------------------------------------------------- %% Func: terminate_children/2 -%% Args: Children = [#child] in termination order +%% Args: Children = [child_rec()] in termination order %% SupName = {local, atom()} | {global, atom()} | {pid(),Mod} -%% Returns: NChildren = [#child] in +%% Returns: NChildren = [child_rec()] in %% startup order (reversed termination order) %%----------------------------------------------------------------- terminate_children(Children, SupName) -> terminate_children(Children, SupName, []). +%% Temporary children should not be restarted and thus should +%% be skipped when building the list of terminated children, although +%% we do want them to be shut down as many functions from this module +%% use this function to just clear everything. +terminate_children([Child = #child{restart_type=temporary} | Children], SupName, Res) -> + do_terminate(Child, SupName), + terminate_children(Children, SupName, Res); terminate_children([Child | Children], SupName, Res) -> NChild = do_terminate(Child, SupName), terminate_children(Children, SupName, [NChild | Res]); terminate_children([], _SupName, Res) -> Res. -terminate_simple_children(Child, Dynamics, SupName) -> - Pids = dict:fold(fun (Pid, _Args, Pids) -> - erlang:monitor(process, Pid), - unlink(Pid), - exit(Pid, child_exit_reason(Child)), - [Pid | Pids] - end, [], Dynamics), - TimeoutMsg = {timeout, make_ref()}, - TRef = timeout_start(Child, TimeoutMsg), - {Replies, Timedout} = - lists:foldl( - fun (_Pid, {Replies, Timedout}) -> - {Reply, Timedout1} = - receive - TimeoutMsg -> - Remaining = Pids -- [P || {P, _} <- Replies], - [exit(P, kill) || P <- Remaining], - receive {'DOWN', _MRef, process, Pid, Reason} -> - {{error, Reason}, true} - end; - {'DOWN', _MRef, process, Pid, Reason} -> - {child_res(Child, Reason, Timedout), Timedout}; - {'EXIT', Pid, Reason} -> - receive {'DOWN', _MRef, process, Pid, _} -> - {{error, Reason}, Timedout} - end - end, - {[{Pid, Reply} | Replies], Timedout1} - end, {[], false}, Pids), - timeout_stop(Child, TRef, TimeoutMsg, Timedout), - ReportError = shutdown_error_reporter(SupName), - [case Reply of - {_Pid, ok} -> ok; - {Pid, {error, R}} -> ReportError(R, Child#child{pid = Pid}) - end || Reply <- Replies], - ok. - -child_exit_reason(#child{shutdown = brutal_kill}) -> kill; -child_exit_reason(#child{}) -> shutdown. - -child_res(#child{shutdown=brutal_kill}, killed, false) -> ok; -child_res(#child{}, shutdown, false) -> ok; -child_res(#child{restart_type=permanent}, normal, false) -> {error, normal}; -child_res(#child{restart_type={permanent,_}},normal, false) -> {error, normal}; -child_res(#child{}, normal, false) -> ok; -child_res(#child{}, R, _) -> {error, R}. - -timeout_start(#child{shutdown = Time}, Msg) when is_integer(Time) -> - erlang:send_after(Time, self(), Msg); -timeout_start(#child{}, _Msg) -> - ok. - -timeout_stop(#child{shutdown = Time}, TRef, Msg, false) when is_integer(Time) -> - erlang:cancel_timer(TRef), - receive - Msg -> ok - after - 0 -> ok - end; -timeout_stop(#child{}, ok, _Msg, _Timedout) -> - ok. - -do_terminate(Child, SupName) when Child#child.pid =/= undefined -> - ReportError = shutdown_error_reporter(SupName), +do_terminate(Child, SupName) when is_pid(Child#child.pid) -> case shutdown(Child#child.pid, Child#child.shutdown) of ok -> ok; - {error, normal} -> - case Child#child.restart_type of - permanent -> ReportError(normal, Child); - {permanent, _Delay} -> ReportError(normal, Child); - _ -> ok - end; + {error, normal} when not ?is_permanent(Child#child.restart_type) -> + ok; {error, OtherReason} -> - ReportError(OtherReason, Child) + report_error(shutdown_error, OtherReason, Child, SupName) end, Child#child{pid = undefined}; do_terminate(Child, _SupName) -> - Child. + Child#child{pid = undefined}. %%----------------------------------------------------------------- %% Shutdowns a child. We must check the EXIT value @@ -895,7 +1051,6 @@ do_terminate(Child, _SupName) -> %% Returns: ok | {error, OtherReason} (this should be reported) %%----------------------------------------------------------------- shutdown(Pid, brutal_kill) -> - case monitor_child(Pid) of ok -> exit(Pid, kill), @@ -908,9 +1063,7 @@ shutdown(Pid, brutal_kill) -> {error, Reason} -> {error, Reason} end; - shutdown(Pid, Time) -> - case monitor_child(Pid) of ok -> exit(Pid, shutdown), %% Try to shutdown gracefully @@ -957,20 +1110,163 @@ monitor_child(Pid) -> %% that will be handled in shutdown/2. ok end. - - + + +%%----------------------------------------------------------------- +%% Func: terminate_dynamic_children/3 +%% Args: Child = child_rec() +%% Dynamics = ?DICT() | ?SET() +%% SupName = {local, atom()} | {global, atom()} | {pid(),Mod} +%% Returns: ok +%% +%% +%% Shutdown all dynamic children. This happens when the supervisor is +%% stopped. Because the supervisor can have millions of dynamic children, we +%% can have an significative overhead here. +%%----------------------------------------------------------------- +terminate_dynamic_children(Child, Dynamics, SupName) -> + {Pids, EStack0} = monitor_dynamic_children(Child, Dynamics), + Sz = ?SETS:size(Pids), + EStack = case Child#child.shutdown of + brutal_kill -> + ?SETS:fold(fun(P, _) -> exit(P, kill) end, ok, Pids), + wait_dynamic_children(Child, Pids, Sz, undefined, EStack0); + infinity -> + ?SETS:fold(fun(P, _) -> exit(P, shutdown) end, ok, Pids), + wait_dynamic_children(Child, Pids, Sz, undefined, EStack0); + Time -> + ?SETS:fold(fun(P, _) -> exit(P, shutdown) end, ok, Pids), + TRef = erlang:start_timer(Time, self(), kill), + wait_dynamic_children(Child, Pids, Sz, TRef, EStack0) + end, + %% Unroll stacked errors and report them + ?DICT:fold(fun(Reason, Ls, _) -> + report_error(shutdown_error, Reason, + Child#child{pid=Ls}, SupName) + end, ok, EStack). + + +monitor_dynamic_children(#child{restart_type=temporary}, Dynamics) -> + ?SETS:fold(fun(P, {Pids, EStack}) -> + case monitor_child(P) of + ok -> + {?SETS:add_element(P, Pids), EStack}; + {error, normal} -> + {Pids, EStack}; + {error, Reason} -> + {Pids, ?DICT:append(Reason, P, EStack)} + end + end, {?SETS:new(), ?DICT:new()}, Dynamics); +monitor_dynamic_children(#child{restart_type=RType}, Dynamics) -> + ?DICT:fold(fun(P, _, {Pids, EStack}) when is_pid(P) -> + case monitor_child(P) of + ok -> + {?SETS:add_element(P, Pids), EStack}; + {error, normal} when not ?is_permanent(RType) -> + {Pids, EStack}; + {error, Reason} -> + {Pids, ?DICT:append(Reason, P, EStack)} + end; + (?restarting(_), _, {Pids, EStack}) -> + {Pids, EStack} + end, {?SETS:new(), ?DICT:new()}, Dynamics). + +wait_dynamic_children(_Child, _Pids, 0, undefined, EStack) -> + EStack; +wait_dynamic_children(_Child, _Pids, 0, TRef, EStack) -> + %% If the timer has expired before its cancellation, we must empty the + %% mail-box of the 'timeout'-message. + erlang:cancel_timer(TRef), + receive + {timeout, TRef, kill} -> + EStack + after 0 -> + EStack + end; +wait_dynamic_children(#child{shutdown=brutal_kill} = Child, Pids, Sz, + TRef, EStack) -> + receive + {'DOWN', _MRef, process, Pid, killed} -> + wait_dynamic_children(Child, ?SETS:del_element(Pid, Pids), Sz-1, + TRef, EStack); + + {'DOWN', _MRef, process, Pid, Reason} -> + wait_dynamic_children(Child, ?SETS:del_element(Pid, Pids), Sz-1, + TRef, ?DICT:append(Reason, Pid, EStack)) + end; +wait_dynamic_children(#child{restart_type=RType} = Child, Pids, Sz, + TRef, EStack) -> + receive + {'DOWN', _MRef, process, Pid, shutdown} -> + wait_dynamic_children(Child, ?SETS:del_element(Pid, Pids), Sz-1, + TRef, EStack); + + {'DOWN', _MRef, process, Pid, normal} when not ?is_permanent(RType) -> + wait_dynamic_children(Child, ?SETS:del_element(Pid, Pids), Sz-1, + TRef, EStack); + + {'DOWN', _MRef, process, Pid, Reason} -> + wait_dynamic_children(Child, ?SETS:del_element(Pid, Pids), Sz-1, + TRef, ?DICT:append(Reason, Pid, EStack)); + + {timeout, TRef, kill} -> + ?SETS:fold(fun(P, _) -> exit(P, kill) end, ok, Pids), + wait_dynamic_children(Child, Pids, Sz-1, undefined, EStack) + end. + %%----------------------------------------------------------------- %% Child/State manipulating functions. %%----------------------------------------------------------------- -state_del_child(#child{pid = Pid}, State) when ?is_simple(State) -> - NDynamics = ?DICT:erase(Pid, State#state.dynamics), + +%% Note we do not want to save the parameter list for temporary processes as +%% they will not be restarted, and hence we do not need this information. +%% Especially for dynamic children to simple_one_for_one supervisors +%% it could become very costly as it is not uncommon to spawn +%% very many such processes. +save_child(#child{restart_type = temporary, + mfargs = {M, F, _}} = Child, #state{children = Children} = State) -> + State#state{children = [Child#child{mfargs = {M, F, undefined}} |Children]}; +save_child(Child, #state{children = Children} = State) -> + State#state{children = [Child |Children]}. + +save_dynamic_child(temporary, Pid, _, #state{dynamics = Dynamics} = State) -> + State#state{dynamics = ?SETS:add_element(Pid, dynamics_db(temporary, Dynamics))}; +save_dynamic_child(RestartType, Pid, Args, #state{dynamics = Dynamics} = State) -> + State#state{dynamics = ?DICT:store(Pid, Args, dynamics_db(RestartType, Dynamics))}. + +dynamics_db(temporary, undefined) -> + ?SETS:new(); +dynamics_db(_, undefined) -> + ?DICT:new(); +dynamics_db(_,Dynamics) -> + Dynamics. + +dynamic_child_args(Pid, Dynamics) -> + case ?SETS:is_set(Dynamics) of + true -> + {ok, undefined}; + false -> + ?DICT:find(Pid, Dynamics) + end. + +state_del_child(#child{pid = Pid, restart_type = temporary}, State) when ?is_simple(State) -> + NDynamics = ?SETS:del_element(Pid, dynamics_db(temporary, State#state.dynamics)), + State#state{dynamics = NDynamics}; +state_del_child(#child{pid = Pid, restart_type = RType}, State) when ?is_simple(State) -> + NDynamics = ?DICT:erase(Pid, dynamics_db(RType, State#state.dynamics)), State#state{dynamics = NDynamics}; state_del_child(Child, State) -> NChildren = del_child(Child#child.name, State#state.children), State#state{children = NChildren}. +del_child(Name, [Ch=#child{pid = ?restarting(_)}|_]=Chs) when Ch#child.name =:= Name -> + Chs; +del_child(Name, [Ch|Chs]) when Ch#child.name =:= Name, Ch#child.restart_type =:= temporary -> + Chs; del_child(Name, [Ch|Chs]) when Ch#child.name =:= Name -> [Ch#child{pid = undefined} | Chs]; +del_child(Pid, [Ch|Chs]) when Ch#child.pid =:= Pid, Ch#child.restart_type =:= temporary -> + Chs; del_child(Pid, [Ch|Chs]) when Ch#child.pid =:= Pid -> [Ch#child{pid = undefined} | Chs]; del_child(Name, [Ch|Chs]) -> @@ -993,7 +1289,38 @@ split_child(_, [], After) -> {lists:reverse(After), []}. get_child(Name, State) -> + get_child(Name, State, false). +get_child(Pid, State, AllowPid) when AllowPid, is_pid(Pid) -> + get_dynamic_child(Pid, State); +get_child(Name, State, _) -> lists:keysearch(Name, #child.name, State#state.children). + +get_dynamic_child(Pid, #state{children=[Child], dynamics=Dynamics}) -> + DynamicsDb = dynamics_db(Child#child.restart_type, Dynamics), + case is_dynamic_pid(Pid, DynamicsDb) of + true -> + {value, Child#child{pid=Pid}}; + false -> + RPid = restarting(Pid), + case is_dynamic_pid(RPid, DynamicsDb) of + true -> + {value, Child#child{pid=RPid}}; + false -> + case erlang:is_process_alive(Pid) of + true -> false; + false -> {value, Child} + end + end + end. + +is_dynamic_pid(Pid, Dynamics) -> + case ?SETS:is_set(Dynamics) of + true -> + ?SETS:is_element(Pid, Dynamics); + false -> + ?DICT:is_key(Pid, Dynamics) + end. + replace_child(Child, State) -> Chs = do_replace_child(Child, State#state.children), State#state{children = Chs}. @@ -1012,13 +1339,13 @@ remove_child(Child, State) -> %% Args: SupName = {local, atom()} | {global, atom()} | self %% Type = {Strategy, MaxIntensity, Period} %% Strategy = one_for_one | one_for_all | simple_one_for_one | -%% rest_for_one -%% MaxIntensity = integer() -%% Period = integer() +%% rest_for_one +%% MaxIntensity = integer() >= 0 +%% Period = integer() > 0 %% Mod :== atom() -%% Arsg :== term() +%% Args :== term() %% Purpose: Check that Type is of correct type (!) -%% Returns: {ok, #state} | Error +%% Returns: {ok, state()} | Error %%----------------------------------------------------------------- init_state(SupName, Type, Mod, Args) -> case catch init_state1(SupName, Type, Mod, Args) of @@ -1033,46 +1360,45 @@ init_state1(SupName, {Strategy, MaxIntensity, Period}, Mod, Args) -> validIntensity(MaxIntensity), validPeriod(Period), {ok, #state{name = supname(SupName,Mod), - strategy = Strategy, - intensity = MaxIntensity, - period = Period, - module = Mod, - args = Args}}; + strategy = Strategy, + intensity = MaxIntensity, + period = Period, + module = Mod, + args = Args}}; init_state1(_SupName, Type, _, _) -> {invalid_type, Type}. -validStrategy(simple_one_for_one_terminate) -> true; -validStrategy(simple_one_for_one) -> true; -validStrategy(one_for_one) -> true; -validStrategy(one_for_all) -> true; -validStrategy(rest_for_one) -> true; -validStrategy(What) -> throw({invalid_strategy, What}). +validStrategy(simple_one_for_one) -> true; +validStrategy(one_for_one) -> true; +validStrategy(one_for_all) -> true; +validStrategy(rest_for_one) -> true; +validStrategy(What) -> throw({invalid_strategy, What}). validIntensity(Max) when is_integer(Max), Max >= 0 -> true; -validIntensity(What) -> throw({invalid_intensity, What}). +validIntensity(What) -> throw({invalid_intensity, What}). validPeriod(Period) when is_integer(Period), Period > 0 -> true; validPeriod(What) -> throw({invalid_period, What}). -supname(self,Mod) -> {self(),Mod}; -supname(N,_) -> N. +supname(self, Mod) -> {self(), Mod}; +supname(N, _) -> N. %%% ------------------------------------------------------ %%% Check that the children start specification is valid. %%% Shall be a six (6) tuple %%% {Name, Func, RestartType, Shutdown, ChildType, Modules} %%% where Name is an atom -%%% Func is {Mod, Fun, Args} == {atom, atom, list} +%%% Func is {Mod, Fun, Args} == {atom(), atom(), list()} %%% RestartType is permanent | temporary | transient | %%% intrinsic | {permanent, Delay} | %%% {transient, Delay} | {intrinsic, Delay} %% where Delay >= 0 -%%% Shutdown = integer() | infinity | brutal_kill +%%% Shutdown = integer() > 0 | infinity | brutal_kill %%% ChildType = supervisor | worker %%% Modules = [atom()] | dynamic -%%% Returns: {ok, [#child]} | Error +%%% Returns: {ok, [child_rec()]} | Error %%% ------------------------------------------------------ check_startspec(Children) -> check_startspec(Children, []). @@ -1100,14 +1426,14 @@ check_childspec(Name, Func, RestartType, Shutdown, ChildType, Mods) -> validChildType(ChildType), validShutdown(Shutdown, ChildType), validMods(Mods), - {ok, #child{name = Name, mfa = Func, restart_type = RestartType, + {ok, #child{name = Name, mfargs = Func, restart_type = RestartType, shutdown = Shutdown, child_type = ChildType, modules = Mods}}. validChildType(supervisor) -> true; validChildType(worker) -> true; validChildType(What) -> throw({invalid_child_type, What}). -validName(_Name) -> true. +validName(_Name) -> true. validFunc({M, F, A}) when is_atom(M), is_atom(F), @@ -1130,13 +1456,13 @@ validDelay(What) -> throw({invalid_delay, What}). validShutdown(Shutdown, _) when is_integer(Shutdown), Shutdown > 0 -> true; -validShutdown(infinity, supervisor) -> true; +validShutdown(infinity, _) -> true; validShutdown(brutal_kill, _) -> true; validShutdown(Shutdown, _) -> throw({invalid_shutdown, Shutdown}). validMods(dynamic) -> true; validMods(Mods) when is_list(Mods) -> - lists:foreach(fun (Mod) -> + lists:foreach(fun(Mod) -> if is_atom(Mod) -> ok; true -> throw({invalid_module, Mod}) @@ -1210,15 +1536,18 @@ report_error(Error, Reason, Child, SupName) -> {offender, extract_child(Child)}], error_logger:error_report(supervisor_report, ErrorMsg). -shutdown_error_reporter(SupName) -> - fun(Reason, Child) -> - report_error(shutdown_error, Reason, Child, SupName) - end. +extract_child(Child) when is_list(Child#child.pid) -> + [{nb_children, length(Child#child.pid)}, + {name, Child#child.name}, + {mfargs, Child#child.mfargs}, + {restart_type, Child#child.restart_type}, + {shutdown, Child#child.shutdown}, + {child_type, Child#child.child_type}]; extract_child(Child) -> [{pid, Child#child.pid}, {name, Child#child.name}, - {mfa, Child#child.mfa}, + {mfargs, Child#child.mfargs}, {restart_type, Child#child.restart_type}, {shutdown, Child#child.shutdown}, {child_type, Child#child.child_type}]. diff --git a/src/supervisor2_tests.erl b/src/supervisor2_tests.erl new file mode 100644 index 00000000..5a47e309 --- /dev/null +++ b/src/supervisor2_tests.erl @@ -0,0 +1,70 @@ +%% The contents of this file are subject to the Mozilla Public License +%% Version 1.1 (the "License"); you may not use this file except in +%% compliance with the License. You may obtain a copy of the License at +%% http://www.mozilla.org/MPL/ +%% +%% Software distributed under the License is distributed on an "AS IS" +%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See the +%% License for the specific language governing rights and limitations +%% under the License. +%% +%% The Original Code is RabbitMQ. +%% +%% The Initial Developer of the Original Code is GoPivotal, Inc. +%% Copyright (c) 2011-2013 GoPivotal, Inc. All rights reserved. +%% + +-module(supervisor2_tests). +-behaviour(supervisor2). + +-export([test_all/0, start_link/0]). +-export([init/1]). + +test_all() -> + ok = check_shutdown(stop, 200, 200, 2000), + ok = check_shutdown(ignored, 1, 2, 2000). + +check_shutdown(SigStop, Iterations, ChildCount, SupTimeout) -> + {ok, Sup} = supervisor2:start_link(?MODULE, [SupTimeout]), + Res = lists:foldl( + fun (I, ok) -> + TestSupPid = erlang:whereis(?MODULE), + ChildPids = + [begin + {ok, ChildPid} = + supervisor2:start_child(TestSupPid, []), + ChildPid + end || _ <- lists:seq(1, ChildCount)], + MRef = erlang:monitor(process, TestSupPid), + [P ! SigStop || P <- ChildPids], + ok = supervisor2:terminate_child(Sup, test_sup), + {ok, _} = supervisor2:restart_child(Sup, test_sup), + receive + {'DOWN', MRef, process, TestSupPid, shutdown} -> + ok; + {'DOWN', MRef, process, TestSupPid, Reason} -> + {error, {I, Reason}} + end; + (_, R) -> + R + end, ok, lists:seq(1, Iterations)), + unlink(Sup), + exit(Sup, shutdown), + Res. + +start_link() -> + Pid = spawn_link(fun () -> + process_flag(trap_exit, true), + receive stop -> ok end + end), + {ok, Pid}. + +init([Timeout]) -> + {ok, {{one_for_one, 0, 1}, + [{test_sup, {supervisor2, start_link, + [{local, ?MODULE}, ?MODULE, []]}, + transient, Timeout, supervisor, [?MODULE]}]}}; +init([]) -> + {ok, {{simple_one_for_one, 0, 1}, + [{test_worker, {?MODULE, start_link, []}, + temporary, 1000, worker, [?MODULE]}]}}. diff --git a/src/tcp_acceptor.erl b/src/tcp_acceptor.erl index 344196d7..267ce4f1 100644 --- a/src/tcp_acceptor.erl +++ b/src/tcp_acceptor.erl @@ -10,8 +10,8 @@ %% %% The Original Code is RabbitMQ. %% -%% The Initial Developer of the Original Code is VMware, Inc. -%% Copyright (c) 2007-2012 VMware, Inc. All rights reserved. +%% The Initial Developer of the Original Code is GoPivotal, Inc. +%% Copyright (c) 2007-2013 GoPivotal, Inc. All rights reserved. %% -module(tcp_acceptor). @@ -55,21 +55,30 @@ handle_info({inet_async, LSock, Ref, {ok, Sock}}, inet_db:register_socket(Sock, Mod), %% handle - file_handle_cache:transfer(apply(M, F, A ++ [Sock])), - ok = file_handle_cache:obtain(), + case tune_buffer_size(Sock) of + ok -> file_handle_cache:transfer( + apply(M, F, A ++ [Sock])), + ok = file_handle_cache:obtain(); + {error, enotconn} -> catch port_close(Sock); + {error, Err} -> {ok, {IPAddress, Port}} = inet:sockname(LSock), + error_logger:error_msg( + "failed to tune buffer size of " + "connection accepted on ~s:~p - ~p (~s)~n", + [rabbit_misc:ntoab(IPAddress), Port, + Err, rabbit_misc:format_inet_error(Err)]), + catch port_close(Sock) + end, %% accept more accept(State); -handle_info({inet_async, LSock, Ref, {error, closed}}, - State=#state{sock=LSock, ref=Ref}) -> - %% It would be wrong to attempt to restart the acceptor when we - %% know this will fail. - {stop, normal, State}; - handle_info({inet_async, LSock, Ref, {error, Reason}}, State=#state{sock=LSock, ref=Ref}) -> - {stop, {accept_failed, Reason}, State}; + case Reason of + closed -> {stop, normal, State}; %% listening socket closed + econnaborted -> accept(State); %% client sent RST before we accepted + _ -> {stop, {accept_failed, Reason}, State} + end; handle_info(_Info, State) -> {noreply, State}. @@ -87,3 +96,10 @@ accept(State = #state{sock=LSock}) -> {ok, Ref} -> {noreply, State#state{ref=Ref}}; Error -> {stop, {cannot_accept, Error}, State} end. + +tune_buffer_size(Sock) -> + case inet:getopts(Sock, [sndbuf, recbuf, buffer]) of + {ok, BufSizes} -> BufSz = lists:max([Sz || {_Opt, Sz} <- BufSizes]), + inet:setopts(Sock, [{buffer, BufSz}]); + Error -> Error + end. diff --git a/src/tcp_acceptor_sup.erl b/src/tcp_acceptor_sup.erl index d8844441..3619875f 100644 --- a/src/tcp_acceptor_sup.erl +++ b/src/tcp_acceptor_sup.erl @@ -10,8 +10,8 @@ %% %% The Original Code is RabbitMQ. %% -%% The Initial Developer of the Original Code is VMware, Inc. -%% Copyright (c) 2007-2012 VMware, Inc. All rights reserved. +%% The Initial Developer of the Original Code is GoPivotal, Inc. +%% Copyright (c) 2007-2013 GoPivotal, Inc. All rights reserved. %% -module(tcp_acceptor_sup). diff --git a/src/tcp_listener.erl b/src/tcp_listener.erl index fb01c792..4b4a31b5 100644 --- a/src/tcp_listener.erl +++ b/src/tcp_listener.erl @@ -10,8 +10,8 @@ %% %% The Original Code is RabbitMQ. %% -%% The Initial Developer of the Original Code is VMware, Inc. -%% Copyright (c) 2007-2012 VMware, Inc. All rights reserved. +%% The Initial Developer of the Original Code is GoPivotal, Inc. +%% Copyright (c) 2007-2013 GoPivotal, Inc. All rights reserved. %% -module(tcp_listener). diff --git a/src/tcp_listener_sup.erl b/src/tcp_listener_sup.erl index 9ee921b4..2a65cc17 100644 --- a/src/tcp_listener_sup.erl +++ b/src/tcp_listener_sup.erl @@ -10,8 +10,8 @@ %% %% The Original Code is RabbitMQ. %% -%% The Initial Developer of the Original Code is VMware, Inc. -%% Copyright (c) 2007-2012 VMware, Inc. All rights reserved. +%% The Initial Developer of the Original Code is GoPivotal, Inc. +%% Copyright (c) 2007-2013 GoPivotal, Inc. All rights reserved. %% -module(tcp_listener_sup). diff --git a/src/test_sup.erl b/src/test_sup.erl index 7f4b5049..da325f1e 100644 --- a/src/test_sup.erl +++ b/src/test_sup.erl @@ -10,8 +10,8 @@ %% %% The Original Code is RabbitMQ. %% -%% The Initial Developer of the Original Code is VMware, Inc. -%% Copyright (c) 2007-2012 VMware, Inc. All rights reserved. +%% The Initial Developer of the Original Code is GoPivotal, Inc. +%% Copyright (c) 2007-2013 GoPivotal, Inc. All rights reserved. %% -module(test_sup). @@ -34,7 +34,7 @@ %%---------------------------------------------------------------------------- test_supervisor_delayed_restart() -> - passed = with_sup(simple_one_for_one_terminate, + passed = with_sup(simple_one_for_one, fun (SupPid) -> {ok, _ChildPid} = supervisor2:start_child(SupPid, []), diff --git a/src/vm_memory_monitor.erl b/src/vm_memory_monitor.erl index fb184d1a..a07f6c65 100644 --- a/src/vm_memory_monitor.erl +++ b/src/vm_memory_monitor.erl @@ -10,8 +10,8 @@ %% %% The Original Code is RabbitMQ. %% -%% The Initial Developer of the Original Code is VMware, Inc. -%% Copyright (c) 2007-2012 VMware, Inc. All rights reserved. +%% The Initial Developer of the Original Code is GoPivotal, Inc. +%% Copyright (c) 2007-2013 GoPivotal, Inc. All rights reserved. %% %% In practice Erlang shouldn't be allowed to grow to more than a half @@ -27,7 +27,7 @@ -behaviour(gen_server). --export([start_link/1]). +-export([start_link/1, start_link/3]). -export([init/1, handle_call/3, handle_cast/2, handle_info/2, terminate/2, code_change/3]). @@ -49,9 +49,11 @@ -record(state, {total_memory, memory_limit, + memory_fraction, timeout, timer, - alarmed + alarmed, + alarm_funs }). %%---------------------------------------------------------------------------- @@ -59,6 +61,8 @@ -ifdef(use_specs). -spec(start_link/1 :: (float()) -> rabbit_types:ok_pid_or_error()). +-spec(start_link/3 :: (float(), fun ((any()) -> 'ok'), + fun ((any()) -> 'ok')) -> rabbit_types:ok_pid_or_error()). -spec(get_total_memory/0 :: () -> (non_neg_integer() | 'unknown')). -spec(get_vm_limit/0 :: () -> non_neg_integer()). -spec(get_check_interval/0 :: () -> non_neg_integer()). @@ -73,11 +77,9 @@ %% Public API %%---------------------------------------------------------------------------- -get_total_memory() -> - get_total_memory(os:type()). +get_total_memory() -> get_total_memory(os:type()). -get_vm_limit() -> - get_vm_limit(os:type()). +get_vm_limit() -> get_vm_limit(os:type()). get_check_interval() -> gen_server:call(?MODULE, get_check_interval, infinity). @@ -99,24 +101,27 @@ get_memory_limit() -> %% gen_server callbacks %%---------------------------------------------------------------------------- -start_link(Args) -> - gen_server:start_link({local, ?SERVER}, ?MODULE, [Args], []). +start_link(MemFraction) -> + start_link(MemFraction, + fun alarm_handler:set_alarm/1, fun alarm_handler:clear_alarm/1). -init([MemFraction]) -> +start_link(MemFraction, AlarmSet, AlarmClear) -> + gen_server:start_link({local, ?SERVER}, ?MODULE, + [MemFraction, {AlarmSet, AlarmClear}], []). + +init([MemFraction, AlarmFuns]) -> TRef = start_timer(?DEFAULT_MEMORY_CHECK_INTERVAL), - State = #state { timeout = ?DEFAULT_MEMORY_CHECK_INTERVAL, - timer = TRef, - alarmed = false}, + State = #state { timeout = ?DEFAULT_MEMORY_CHECK_INTERVAL, + timer = TRef, + alarmed = false, + alarm_funs = AlarmFuns }, {ok, set_mem_limits(State, MemFraction)}. handle_call(get_vm_memory_high_watermark, _From, State) -> - {reply, State#state.memory_limit / State#state.total_memory, State}; + {reply, State#state.memory_fraction, State}; handle_call({set_vm_memory_high_watermark, MemFraction}, _From, State) -> - State1 = set_mem_limits(State, MemFraction), - error_logger:info_msg("Memory alarm changed to ~p, ~p bytes.~n", - [MemFraction, State1#state.memory_limit]), - {reply, ok, State1}; + {reply, ok, set_mem_limits(State, MemFraction)}; handle_call(get_check_interval, _From, State) -> {reply, State#state.timeout, State}; @@ -168,32 +173,41 @@ set_mem_limits(State, MemFraction) -> ?MEMORY_SIZE_FOR_UNKNOWN_OS; M -> M end, - MemLim = get_mem_limit(MemFraction, TotalMemory), + UsableMemory = case get_vm_limit() of + Limit when Limit < TotalMemory -> + error_logger:warning_msg( + "Only ~pMB of ~pMB memory usable due to " + "limited address space.~n", + [trunc(V/?ONE_MB) || V <- [Limit, TotalMemory]]), + Limit; + _ -> + TotalMemory + end, + MemLim = trunc(MemFraction * UsableMemory), error_logger:info_msg("Memory limit set to ~pMB of ~pMB total.~n", [trunc(MemLim/?ONE_MB), trunc(TotalMemory/?ONE_MB)]), - internal_update(State #state { total_memory = TotalMemory, - memory_limit = MemLim }). + internal_update(State #state { total_memory = TotalMemory, + memory_limit = MemLim, + memory_fraction = MemFraction}). internal_update(State = #state { memory_limit = MemLimit, - alarmed = Alarmed}) -> + alarmed = Alarmed, + alarm_funs = {AlarmSet, AlarmClear} }) -> MemUsed = erlang:memory(total), NewAlarmed = MemUsed > MemLimit, case {Alarmed, NewAlarmed} of - {false, true} -> - emit_update_info(set, MemUsed, MemLimit), - alarm_handler:set_alarm({{resource_limit, memory, node()}, []}); - {true, false} -> - emit_update_info(clear, MemUsed, MemLimit), - alarm_handler:clear_alarm({resource_limit, memory, node()}); - _ -> - ok + {false, true} -> emit_update_info(set, MemUsed, MemLimit), + AlarmSet({{resource_limit, memory, node()}, []}); + {true, false} -> emit_update_info(clear, MemUsed, MemLimit), + AlarmClear({resource_limit, memory, node()}); + _ -> ok end, State #state {alarmed = NewAlarmed}. -emit_update_info(State, MemUsed, MemLimit) -> +emit_update_info(AlarmState, MemUsed, MemLimit) -> error_logger:info_msg( "vm_memory_high_watermark ~p. Memory used:~p allowed:~p~n", - [State, MemUsed, MemLimit]). + [AlarmState, MemUsed, MemLimit]). start_timer(Timeout) -> {ok, TRef} = timer:send_interval(Timeout, update), @@ -207,7 +221,7 @@ get_vm_limit({win32,_OSname}) -> 8 -> 8*1024*1024*1024*1024 %% 8 TB for 64 bits 2^42 end; -%% On a 32-bit machine, if you're using more than 2 gigs of RAM you're +%% On a 32-bit machine, if you're using more than 4 gigs of RAM you're %% in big trouble anyway. get_vm_limit(_OsType) -> case erlang:system_info(wordsize) of @@ -216,10 +230,6 @@ get_vm_limit(_OsType) -> %%http://en.wikipedia.org/wiki/X86-64#Virtual_address_space_details end. -get_mem_limit(MemFraction, TotalMemory) -> - AvMem = lists:min([TotalMemory, get_vm_limit()]), - trunc(AvMem * MemFraction). - %%---------------------------------------------------------------------------- %% Internal Helpers %%---------------------------------------------------------------------------- @@ -253,29 +263,11 @@ get_total_memory({unix,openbsd}) -> sysctl("hw.usermem"); get_total_memory({win32,_OSname}) -> - %% Due to the Erlang print format bug, on Windows boxes the memory - %% size is broken. For example Windows 7 64 bit with 4Gigs of RAM - %% we get negative memory size: - %% > os_mon_sysinfo:get_mem_info(). - %% ["76 -1658880 1016913920 -1 -1021628416 2147352576 2134794240\n"] - %% Due to this bug, we don't actually know anything. Even if the - %% number is postive we can't be sure if it's correct. This only - %% affects us on os_mon versions prior to 2.2.1. - case application:get_key(os_mon, vsn) of - undefined -> - unknown; - {ok, Version} -> - case rabbit_misc:version_compare(Version, "2.2.1", lt) of - true -> %% os_mon is < 2.2.1, so we know nothing - unknown; - false -> - [Result|_] = os_mon_sysinfo:get_mem_info(), - {ok, [_MemLoad, TotPhys, _AvailPhys, - _TotPage, _AvailPage, _TotV, _AvailV], _RestStr} = - io_lib:fread("~d~d~d~d~d~d~d", Result), - TotPhys - end - end; + [Result|_] = os_mon_sysinfo:get_mem_info(), + {ok, [_MemLoad, TotPhys, _AvailPhys, _TotPage, _AvailPage, _TotV, _AvailV], + _RestStr} = + io_lib:fread("~d~d~d~d~d~d~d", Result), + TotPhys; get_total_memory({unix, linux}) -> File = read_proc_file("/proc/meminfo"), diff --git a/src/worker_pool.erl b/src/worker_pool.erl index c9ecccd6..488db5ec 100644 --- a/src/worker_pool.erl +++ b/src/worker_pool.erl @@ -10,8 +10,8 @@ %% %% The Original Code is RabbitMQ. %% -%% The Initial Developer of the Original Code is VMware, Inc. -%% Copyright (c) 2007-2012 VMware, Inc. All rights reserved. +%% The Initial Developer of the Original Code is GoPivotal, Inc. +%% Copyright (c) 2007-2013 GoPivotal, Inc. All rights reserved. %% -module(worker_pool). diff --git a/src/worker_pool_sup.erl b/src/worker_pool_sup.erl index ff356366..24bc375c 100644 --- a/src/worker_pool_sup.erl +++ b/src/worker_pool_sup.erl @@ -10,8 +10,8 @@ %% %% The Original Code is RabbitMQ. %% -%% The Initial Developer of the Original Code is VMware, Inc. -%% Copyright (c) 2007-2012 VMware, Inc. All rights reserved. +%% The Initial Developer of the Original Code is GoPivotal, Inc. +%% Copyright (c) 2007-2013 GoPivotal, Inc. All rights reserved. %% -module(worker_pool_sup). diff --git a/src/worker_pool_worker.erl b/src/worker_pool_worker.erl index 1ddcebb2..a976503f 100644 --- a/src/worker_pool_worker.erl +++ b/src/worker_pool_worker.erl @@ -10,8 +10,8 @@ %% %% The Original Code is RabbitMQ. %% -%% The Initial Developer of the Original Code is VMware, Inc. -%% Copyright (c) 2007-2012 VMware, Inc. All rights reserved. +%% The Initial Developer of the Original Code is GoPivotal, Inc. +%% Copyright (c) 2007-2013 GoPivotal, Inc. All rights reserved. %% -module(worker_pool_worker). @@ -23,7 +23,7 @@ -export([set_maximum_since_use/2]). -export([init/1, handle_call/3, handle_cast/2, handle_info/2, - terminate/2, code_change/3, prioritise_cast/2]). + terminate/2, code_change/3, prioritise_cast/3]). %%---------------------------------------------------------------------------- @@ -73,8 +73,8 @@ init([WId]) -> {ok, WId, hibernate, {backoff, ?HIBERNATE_AFTER_MIN, ?HIBERNATE_AFTER_MIN, ?DESIRED_HIBERNATE}}. -prioritise_cast({set_maximum_since_use, _Age}, _State) -> 8; -prioritise_cast(_Msg, _State) -> 0. +prioritise_cast({set_maximum_since_use, _Age}, _Len, _State) -> 8; +prioritise_cast(_Msg, _Len, _State) -> 0. handle_call({submit, Fun}, From, WId) -> gen_server2:reply(From, run(Fun)), |