From 5f4b08236c8806d3fd2d56f0f11ddc8309b6e506 Mon Sep 17 00:00:00 2001 From: Jean-Sebastien Pedron Date: Mon, 24 Nov 2014 20:40:21 +0100 Subject: Add plugin paths to the beginning of the code path The goal is to make sure the right application is picked, in case the same one is available as both a RabbitMQ plugin and an external Erlang application. For instance, this could be the case with the Cowboy application: a version is available in the plugins, but the user could add an incompatible version to Erlang/OTP libdir or set ERL_LIBS to point to it. There's one exception currently: eldap. This application used to be available as a 3rd party one. But since Erlang R15B01, it's included in the standard library. We trust this version to have a stable API. Therefore, if the node runs on R15B01 or later and eldap version is 1.0+, we use this one. In all other cases, we don't trust it and prefer the RabbitMQ plugin. --- src/rabbit_plugins.erl | 59 ++++++++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 55 insertions(+), 4 deletions(-) diff --git a/src/rabbit_plugins.erl b/src/rabbit_plugins.erl index e290fb53..fe614b6a 100644 --- a/src/rabbit_plugins.erl +++ b/src/rabbit_plugins.erl @@ -90,7 +90,7 @@ list(PluginsDir) -> EZs = [{ez, EZ} || EZ <- filelib:wildcard("*.ez", PluginsDir)], FreeApps = [{app, App} || App <- filelib:wildcard("*/ebin/*.app", PluginsDir)], - {Plugins, Problems} = + {AvailablePlugins, Problems} = lists:foldl(fun ({error, EZ, Reason}, {Plugins1, Problems1}) -> {Plugins1, [{EZ, Reason} | Problems1]}; (Plugin = #plugin{}, {Plugins1, Problems1}) -> @@ -102,6 +102,7 @@ list(PluginsDir) -> _ -> rabbit_log:warning( "Problem reading some plugins: ~p~n", [Problems]) end, + Plugins = lists:filter(fun keep_plugin/1, AvailablePlugins), ensure_dependencies(Plugins). %% @doc Read the list of enabled plugins from the supplied term file. @@ -132,6 +133,55 @@ dependencies(Reverse, Sources, AllPlugins) -> true = digraph:delete(G), Dests. +%% For a few known cases, an externally provided plugin can be trusted. +%% In this special case, it overrides the plugin. +keep_plugin(#plugin{name = App} = Plugin) -> + case application:load(App) of + {error, {already_loaded, _}} -> + not plugin_provided_by_otp(Plugin); + ok -> + Ret = not plugin_provided_by_otp(Plugin), + application:unload(App), + Ret; + _ -> + true + end. + +plugin_provided_by_otp(#plugin{name = eldap, version = PluginVsn}) -> + %% eldap was added to Erlang/OTP R15B01. We prefer this version + %% to the plugin. Before, eldap always advertised version "1". In + %% R15B01, it got proper versionning starting from "1.0". If eldap's + %% version is "1", we keep using the plugin, otherwise we take the + %% OTP application. As an extra check, we look at ERTS version to be + %% sure we're on R15B01. + case application:get_key(eldap, vsn) of + {ok, PluginVsn} -> + %% The plugin itself; the plugin was previously added to the + %% code path. + false; + {ok, "1"} -> + %% The version available on GitHub, not part of OTP. + false; + {ok, Vsn} -> + try rabbit_misc:version_compare(Vsn, "1.0", gte) of + true -> + %% Probably part of OTP. Let's check ERTS version to + %% be sure. + rabbit_misc:version_compare( + erlang:system_info(version), "5.9.1", gte); + false -> + %% Probably not part of OTP. Use the plugin to be safe. + false + catch + _:_ -> + %% Couldn't parse the version. It's not the OTP + %% application. + false + end + end; +plugin_provided_by_otp(_) -> + false. + %% Make sure we don't list OTP apps in here, and also that we detect %% missing dependencies. ensure_dependencies(Plugins) -> @@ -158,7 +208,7 @@ is_loadable(App) -> ok -> application:unload(App), true; _ -> false - end. + end. %%---------------------------------------------------------------------------- @@ -197,8 +247,9 @@ clean_plugin(Plugin, ExpandDir) -> delete_recursively(rabbit_misc:format("~s/~s", [ExpandDir, Plugin])). prepare_dir_plugin(PluginAppDescPath) -> - code:add_path(filename:dirname(PluginAppDescPath)), - list_to_atom(filename:basename(PluginAppDescPath, ".app")). + rabbit_log:info("Plugins: adding \"~s\" to the beginning of the code path.~n", + [filename:dirname(PluginAppDescPath)]), + code:add_patha(filename:dirname(PluginAppDescPath)). %%---------------------------------------------------------------------------- -- cgit v1.2.1 -- cgit v1.2.1 From 6c026c7445f76b0a7a20cd4d0af21298f20fc930 Mon Sep 17 00:00:00 2001 From: Simon MacMullen Date: Wed, 26 Nov 2014 14:34:58 +0000 Subject: deb / RPM changelogs --- packaging/RPMS/Fedora/rabbitmq-server.spec | 3 +++ packaging/debs/Debian/debian/changelog | 6 ++++++ 2 files changed, 9 insertions(+) diff --git a/packaging/RPMS/Fedora/rabbitmq-server.spec b/packaging/RPMS/Fedora/rabbitmq-server.spec index f363bce6..c77d7e9d 100644 --- a/packaging/RPMS/Fedora/rabbitmq-server.spec +++ b/packaging/RPMS/Fedora/rabbitmq-server.spec @@ -130,6 +130,9 @@ done rm -rf %{buildroot} %changelog +* Wed Nov 26 2014 simon@rabbitmq.com 3.4.2-1 +- New Upstream Release + * Wed Oct 29 2014 simon@rabbitmq.com 3.4.1-1 - New Upstream Release diff --git a/packaging/debs/Debian/debian/changelog b/packaging/debs/Debian/debian/changelog index a47caa0e..5e3744fd 100644 --- a/packaging/debs/Debian/debian/changelog +++ b/packaging/debs/Debian/debian/changelog @@ -1,3 +1,9 @@ +rabbitmq-server (3.4.2-1) unstable; urgency=low + + * New Upstream Release + + -- Simon MacMullen Wed, 26 Nov 2014 12:11:12 +0000 + rabbitmq-server (3.4.1-1) unstable; urgency=low * New Upstream Release -- cgit v1.2.1 From 15bc16942ca5a7132c289df204258ec234abc129 Mon Sep 17 00:00:00 2001 From: Simon MacMullen Date: Wed, 26 Nov 2014 16:45:25 +0000 Subject: Don't halt execution if RABBITMQ_CTL_ERL_ARGS is set. --- scripts/rabbitmq-env | 3 +++ 1 file changed, 3 insertions(+) diff --git a/scripts/rabbitmq-env b/scripts/rabbitmq-env index 63cfda3c..5a3e73bc 100755 --- a/scripts/rabbitmq-env +++ b/scripts/rabbitmq-env @@ -111,3 +111,6 @@ DEFAULT_NODE_PORT=5672 [ "x" = "x$RABBITMQ_CTL_ERL_ARGS" ] && RABBITMQ_CTL_ERL_ARGS=${CTL_ERL_ARGS} ##--- End of overridden variables + +# Since we source this elsewhere, don't accidentally stop execution +true -- cgit v1.2.1 From f8466d59675c57c89935c7b2466d7e3177440f40 Mon Sep 17 00:00:00 2001 From: Jean-Sebastien Pedron Date: Wed, 26 Nov 2014 18:21:44 +0100 Subject: Only check ERTS version to determine if we accept an external eldap --- src/rabbit_plugins.erl | 34 +++------------------------------- 1 file changed, 3 insertions(+), 31 deletions(-) diff --git a/src/rabbit_plugins.erl b/src/rabbit_plugins.erl index fe614b6a..fd6acb5c 100644 --- a/src/rabbit_plugins.erl +++ b/src/rabbit_plugins.erl @@ -148,37 +148,9 @@ keep_plugin(#plugin{name = App} = Plugin) -> end. plugin_provided_by_otp(#plugin{name = eldap, version = PluginVsn}) -> - %% eldap was added to Erlang/OTP R15B01. We prefer this version - %% to the plugin. Before, eldap always advertised version "1". In - %% R15B01, it got proper versionning starting from "1.0". If eldap's - %% version is "1", we keep using the plugin, otherwise we take the - %% OTP application. As an extra check, we look at ERTS version to be - %% sure we're on R15B01. - case application:get_key(eldap, vsn) of - {ok, PluginVsn} -> - %% The plugin itself; the plugin was previously added to the - %% code path. - false; - {ok, "1"} -> - %% The version available on GitHub, not part of OTP. - false; - {ok, Vsn} -> - try rabbit_misc:version_compare(Vsn, "1.0", gte) of - true -> - %% Probably part of OTP. Let's check ERTS version to - %% be sure. - rabbit_misc:version_compare( - erlang:system_info(version), "5.9.1", gte); - false -> - %% Probably not part of OTP. Use the plugin to be safe. - false - catch - _:_ -> - %% Couldn't parse the version. It's not the OTP - %% application. - false - end - end; + %% eldap was added to Erlang/OTP R15B01 (ERTS 5.9.1). In this case, + %% we prefer this version to the plugin. + rabbit_misc:version_compare(erlang:system_info(version), "5.9.1", gte); plugin_provided_by_otp(_) -> false. -- cgit v1.2.1 From 6276d667177d915aff5c5b86e895a9b38b7ea2ae Mon Sep 17 00:00:00 2001 From: Jean-Sebastien Pedron Date: Thu, 27 Nov 2014 12:13:09 +0100 Subject: Check cluster consistency before joining a cluster Before this change, the node would join the cluster and the consistency would be only checked during application restart (rabbitmqctl start_app). This could lead to crash during the join due to Erlang, Mnesia or RabbitMQ incompatibilities. Now, the join_cluster command reports the problem. Here's an example: $ rabbitmqctl join_cluster Clustering node with ... Error: {inconsistent_cluster, "OTP version mismatch: local node is R16B03-1, remote node 17.3"} --- src/rabbit_mnesia.erl | 42 +++++++++++++++++++++++++++--------------- 1 file changed, 27 insertions(+), 15 deletions(-) diff --git a/src/rabbit_mnesia.erl b/src/rabbit_mnesia.erl index fa51dd70..b55d53b6 100644 --- a/src/rabbit_mnesia.erl +++ b/src/rabbit_mnesia.erl @@ -174,18 +174,25 @@ join_cluster(DiscoveryNode, NodeType) -> {ClusterNodes, _, _} = discover_cluster([DiscoveryNode]), case me_in_nodes(ClusterNodes) of false -> - %% reset the node. this simplifies things and it will be needed in - %% this case - we're joining a new cluster with new nodes which - %% are not in synch with the current node. I also lifts the burden - %% of reseting the node from the user. - reset_gracefully(), - - %% Join the cluster - rabbit_log:info("Clustering with ~p as ~p node~n", - [ClusterNodes, NodeType]), - ok = init_db_with_mnesia(ClusterNodes, NodeType, true, true), - rabbit_node_monitor:notify_joined_cluster(), - ok; + case check_cluster_consistency(DiscoveryNode, false) of + {ok, _} -> + %% reset the node. this simplifies things and it + %% will be needed in this case - we're joining a new + %% cluster with new nodes which are not in synch + %% with the current node. It also lifts the burden + %% of resetting the node from the user. + reset_gracefully(), + + %% Join the cluster + rabbit_log:info("Clustering with ~p as ~p node~n", + [ClusterNodes, NodeType]), + ok = init_db_with_mnesia(ClusterNodes, NodeType, + true, true), + rabbit_node_monitor:notify_joined_cluster(), + ok; + {error, Reason} -> + {error, Reason} + end; true -> rabbit_log:info("Already member of cluster: ~p~n", [ClusterNodes]), {ok, already_member} @@ -545,7 +552,7 @@ maybe_force_load() -> check_cluster_consistency() -> %% We want to find 0 or 1 consistent nodes. case lists:foldl( - fun (Node, {error, _}) -> check_cluster_consistency(Node); + fun (Node, {error, _}) -> check_cluster_consistency(Node, true); (_Node, {ok, Status}) -> {ok, Status} end, {error, not_found}, nodes_excl_me(cluster_nodes(all))) of @@ -575,17 +582,22 @@ check_cluster_consistency() -> throw(E) end. -check_cluster_consistency(Node) -> +check_cluster_consistency(Node, AlreadyFormed) -> case rpc:call(Node, rabbit_mnesia, node_info, []) of {badrpc, _Reason} -> {error, not_found}; {_OTP, _Rabbit, {error, _}} -> {error, not_found}; - {OTP, Rabbit, {ok, Status}} -> + {OTP, Rabbit, {ok, Status}} when AlreadyFormed -> case check_consistency(OTP, Rabbit, Node, Status) of {error, _} = E -> E; {ok, Res} -> {ok, Res} end; + {OTP, Rabbit, {ok, Status}} -> + case check_consistency(OTP, Rabbit) of + {error, _} = E -> E; + ok -> {ok, Status} + end; {_OTP, Rabbit, _Hash, _Status} -> %% delegate hash checking implies version mismatch version_error("Rabbit", rabbit_misc:version(), Rabbit) -- cgit v1.2.1 From f6cfb1a6195aca0af66652b918160dcce02ce34b Mon Sep 17 00:00:00 2001 From: Simon MacMullen Date: Fri, 28 Nov 2014 10:41:59 +0000 Subject: Rename this to indicate what it does, not the current circumstances in which it is called. --- src/rabbit_mnesia.erl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/rabbit_mnesia.erl b/src/rabbit_mnesia.erl index b55d53b6..80fbcd68 100644 --- a/src/rabbit_mnesia.erl +++ b/src/rabbit_mnesia.erl @@ -582,13 +582,13 @@ check_cluster_consistency() -> throw(E) end. -check_cluster_consistency(Node, AlreadyFormed) -> +check_cluster_consistency(Node, CheckNodesConsistency) -> case rpc:call(Node, rabbit_mnesia, node_info, []) of {badrpc, _Reason} -> {error, not_found}; {_OTP, _Rabbit, {error, _}} -> {error, not_found}; - {OTP, Rabbit, {ok, Status}} when AlreadyFormed -> + {OTP, Rabbit, {ok, Status}} when CheckNodesConsistency -> case check_consistency(OTP, Rabbit, Node, Status) of {error, _} = E -> E; {ok, Res} -> {ok, Res} -- cgit v1.2.1 From 57d48705f822d41ca7f13b3c72c94f1b4dd7b91b Mon Sep 17 00:00:00 2001 From: Jean-Sebastien Pedron Date: Fri, 28 Nov 2014 12:33:18 +0100 Subject: Remove "Plugins: adding to code path" log message --- src/rabbit_plugins.erl | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/rabbit_plugins.erl b/src/rabbit_plugins.erl index fd6acb5c..3aad48d1 100644 --- a/src/rabbit_plugins.erl +++ b/src/rabbit_plugins.erl @@ -219,8 +219,6 @@ clean_plugin(Plugin, ExpandDir) -> delete_recursively(rabbit_misc:format("~s/~s", [ExpandDir, Plugin])). prepare_dir_plugin(PluginAppDescPath) -> - rabbit_log:info("Plugins: adding \"~s\" to the beginning of the code path.~n", - [filename:dirname(PluginAppDescPath)]), code:add_patha(filename:dirname(PluginAppDescPath)). %%---------------------------------------------------------------------------- -- cgit v1.2.1 From d6798de8bf063780787c87dc7c0d934d27452cab Mon Sep 17 00:00:00 2001 From: Jean-Sebastien Pedron Date: Fri, 28 Nov 2014 12:45:14 +0100 Subject: No need to load/unload the plugin around the plugin_provided_by_otp/1 check While here, remove the unused PluginVsn variable. --- src/rabbit_plugins.erl | 17 +++-------------- 1 file changed, 3 insertions(+), 14 deletions(-) diff --git a/src/rabbit_plugins.erl b/src/rabbit_plugins.erl index 3aad48d1..dfde5289 100644 --- a/src/rabbit_plugins.erl +++ b/src/rabbit_plugins.erl @@ -102,7 +102,8 @@ list(PluginsDir) -> _ -> rabbit_log:warning( "Problem reading some plugins: ~p~n", [Problems]) end, - Plugins = lists:filter(fun keep_plugin/1, AvailablePlugins), + Plugins = lists:filter(fun(P) -> not plugin_provided_by_otp(P) end, + AvailablePlugins), ensure_dependencies(Plugins). %% @doc Read the list of enabled plugins from the supplied term file. @@ -135,19 +136,7 @@ dependencies(Reverse, Sources, AllPlugins) -> %% For a few known cases, an externally provided plugin can be trusted. %% In this special case, it overrides the plugin. -keep_plugin(#plugin{name = App} = Plugin) -> - case application:load(App) of - {error, {already_loaded, _}} -> - not plugin_provided_by_otp(Plugin); - ok -> - Ret = not plugin_provided_by_otp(Plugin), - application:unload(App), - Ret; - _ -> - true - end. - -plugin_provided_by_otp(#plugin{name = eldap, version = PluginVsn}) -> +plugin_provided_by_otp(#plugin{name = eldap}) -> %% eldap was added to Erlang/OTP R15B01 (ERTS 5.9.1). In this case, %% we prefer this version to the plugin. rabbit_misc:version_compare(erlang:system_info(version), "5.9.1", gte); -- cgit v1.2.1 From bca1e2ea0903c8ce1c130a08a16f86eff5fb0e44 Mon Sep 17 00:00:00 2001 From: Jean-Sebastien Pedron Date: Wed, 3 Dec 2014 15:43:57 +0100 Subject: Throw an error if at least one plugin's module can't be loaded This prevents a plugin from being enabled if it won't be able to actually run later. A use case for this is a plugin built with Erlang version N, but executed on Erlang version M, where M isn't capable of running the bytecode from N. This was the case with Eralng R14B vs. R15B. The "rabbitmq-plugins enable " command reports the error and the plugin remains disabled. A node reports the error too and refuses to start, exactly as if the plugin was missing. --- src/rabbit_plugins.erl | 22 +++++++++++++++++++++- 1 file changed, 21 insertions(+), 1 deletion(-) diff --git a/src/rabbit_plugins.erl b/src/rabbit_plugins.erl index dfde5289..55f7359b 100644 --- a/src/rabbit_plugins.erl +++ b/src/rabbit_plugins.erl @@ -208,7 +208,27 @@ clean_plugin(Plugin, ExpandDir) -> delete_recursively(rabbit_misc:format("~s/~s", [ExpandDir, Plugin])). prepare_dir_plugin(PluginAppDescPath) -> - code:add_patha(filename:dirname(PluginAppDescPath)). + PluginEbinDir = filename:dirname(PluginAppDescPath), + Plugin = filename:basename(PluginAppDescPath, ".app"), + code:add_patha(PluginEbinDir), + case filelib:wildcard(PluginEbinDir++ "/*.beam") of + [] -> + ok; + [BeamPath | _] -> + Module = list_to_atom(filename:basename(BeamPath, ".beam")), + case code:ensure_loaded(Module) of + {module, _} -> + ok; + {error, badfile} -> + rabbit_log:error("Failed to enable plugin \"~s\": " + "it may have been built with an " + "incompatible (more recent?) " + "version of Erlang~n", [Plugin]), + throw({plugin_built_with_incompatible_erlang, Plugin}); + Error -> + throw({plugin_module_unloadable, Plugin, Error}) + end + end. %%---------------------------------------------------------------------------- -- cgit v1.2.1 From d7e77822640f57c6b8f6c5318985a604612f91d6 Mon Sep 17 00:00:00 2001 From: Simon MacMullen Date: Fri, 5 Dec 2014 11:24:14 +0000 Subject: Add top_memory_use() and top_binary_refs(). --- src/rabbit_diagnostics.erl | 34 +++++++++++++++++++++++++++++++--- 1 file changed, 31 insertions(+), 3 deletions(-) diff --git a/src/rabbit_diagnostics.erl b/src/rabbit_diagnostics.erl index bf45b757..3abd05a8 100644 --- a/src/rabbit_diagnostics.erl +++ b/src/rabbit_diagnostics.erl @@ -17,10 +17,11 @@ -module(rabbit_diagnostics). -define(PROCESS_INFO, - [current_stacktrace, initial_call, dictionary, message_queue_len, - links, monitors, monitored_by, heap_size]). + [registered_name, current_stacktrace, initial_call, dictionary, + message_queue_len, links, monitors, monitored_by, heap_size]). --export([maybe_stuck/0, maybe_stuck/1]). +-export([maybe_stuck/0, maybe_stuck/1, top_memory_use/0, top_memory_use/1, + top_binary_refs/0, top_binary_refs/1]). maybe_stuck() -> maybe_stuck(5000). @@ -75,5 +76,32 @@ maybe_stuck_stacktrace({_M, F, _A}) -> _ -> false end. +top_memory_use() -> top_memory_use(30). + +top_memory_use(Count) -> + Pids = processes(), + io:format("Memory use: top ~p of ~p processes.~n", [Count, length(Pids)]), + Procs = [{catch process_info(Pid, memory), catch info(Pid)} || Pid <- Pids], + Sorted = lists:sublist(lists:reverse(lists:sort(Procs)), Count), + io:format("~p~n", [Sorted]). + +top_binary_refs() -> top_binary_refs(30). + +top_binary_refs(Count) -> + Pids = processes(), + io:format("Binary refs: top ~p of ~p processes.~n", [Count, length(Pids)]), + Procs = [{binary_refs(Pid), catch info(Pid)} || Pid <- Pids], + Sorted = lists:sublist(lists:reverse(lists:sort(Procs)), Count), + io:format("~p~n", [Sorted]). + +binary_refs(Pid) -> + Refs = try + {binary, Rs} = process_info(Pid, binary), + Rs + catch _:badarg -> [] + end, + lists:sum([Sz || {_Ptr, Sz} <- lists:usort([{Ptr, Sz} || + {Ptr, Sz, _Cnt} <- Refs])]). + info(Pid) -> [{pid, Pid} | process_info(Pid, ?PROCESS_INFO)]. -- cgit v1.2.1 From 9bf5de793ba9ccc2b6fe437753b42995d3cf89af Mon Sep 17 00:00:00 2001 From: Simon MacMullen Date: Fri, 5 Dec 2014 11:32:17 +0000 Subject: Abstraction: safe process_info(). --- src/rabbit_diagnostics.erl | 26 ++++++++++++++++---------- 1 file changed, 16 insertions(+), 10 deletions(-) diff --git a/src/rabbit_diagnostics.erl b/src/rabbit_diagnostics.erl index 3abd05a8..9fc0fabd 100644 --- a/src/rabbit_diagnostics.erl +++ b/src/rabbit_diagnostics.erl @@ -42,13 +42,13 @@ maybe_stuck(Pids, Timeout) -> maybe_stuck(Pids2, Timeout - 500). looks_stuck(Pid) -> - case catch process_info(Pid, status) of + case info(Pid, status, gone) of {status, waiting} -> %% It's tempting to just check for message_queue_len > 0 %% here rather than mess around with stack traces and %% heuristics. But really, sometimes freshly stuck %% processes can have 0 messages... - case catch erlang:process_info(Pid, current_stacktrace) of + case info(Pid, current_stacktrace, gone) of {current_stacktrace, [H|_]} -> maybe_stuck_stacktrace(H); _ -> @@ -81,7 +81,7 @@ top_memory_use() -> top_memory_use(30). top_memory_use(Count) -> Pids = processes(), io:format("Memory use: top ~p of ~p processes.~n", [Count, length(Pids)]), - Procs = [{catch process_info(Pid, memory), catch info(Pid)} || Pid <- Pids], + Procs = [{info(Pid, memory, 0), info(Pid)} || Pid <- Pids], Sorted = lists:sublist(lists:reverse(lists:sort(Procs)), Count), io:format("~p~n", [Sorted]). @@ -90,18 +90,24 @@ top_binary_refs() -> top_binary_refs(30). top_binary_refs(Count) -> Pids = processes(), io:format("Binary refs: top ~p of ~p processes.~n", [Count, length(Pids)]), - Procs = [{binary_refs(Pid), catch info(Pid)} || Pid <- Pids], + Procs = [{{binary_refs, binary_refs(Pid)}, info(Pid)} || Pid <- Pids], Sorted = lists:sublist(lists:reverse(lists:sort(Procs)), Count), io:format("~p~n", [Sorted]). binary_refs(Pid) -> - Refs = try - {binary, Rs} = process_info(Pid, binary), - Rs - catch _:badarg -> [] - end, + {binary, Refs} = info(Pid, binary, []), lists:sum([Sz || {_Ptr, Sz} <- lists:usort([{Ptr, Sz} || {Ptr, Sz, _Cnt} <- Refs])]). info(Pid) -> - [{pid, Pid} | process_info(Pid, ?PROCESS_INFO)]. + [{pid, Pid} | info(Pid, ?PROCESS_INFO, [])]. + +info(Pid, Infos, Default) -> + try + process_info(Pid, Infos) + catch + _:_ -> case is_atom(Infos) of + true -> {Infos, Default}; + false -> Default + end + end. -- cgit v1.2.1 From a755fcfcd6bfd9435534de2dc07288144bdf85a9 Mon Sep 17 00:00:00 2001 From: Jean-Sebastien Pedron Date: Tue, 9 Dec 2014 19:14:28 +0100 Subject: Autoheal: The loosing leader must wait for the winner_is message As any other loosing nodes, the leader must wait for the winner_is message, instead of restarting immediately. The previous behaviour caused transient failures in the autoheal process if the leader was in the middle of the restart at the time the winner checks that all loosing nodes are up and running. --- src/rabbit_autoheal.erl | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/src/rabbit_autoheal.erl b/src/rabbit_autoheal.erl index 90458741..7089911c 100644 --- a/src/rabbit_autoheal.erl +++ b/src/rabbit_autoheal.erl @@ -54,6 +54,10 @@ %% - we are the winner and are waiting for all losing nodes to stop %% before telling them they can restart %% +%% about_to_heal +%% - we are the leader, and have already assigned the winner and losers. +%% We are part of the losers and we wait for the winner_is announcement. +%% %% {leader_waiting, OutstandingStops} %% - we are the leader, and have already assigned the winner and losers. %% We are neither but need to ignore further requests to autoheal. @@ -135,7 +139,7 @@ handle_msg({request_start, Node}, true -> Continue({become_winner, Losers}); false -> send(Winner, {become_winner, Losers}), %% [0] case lists:member(node(), Losers) of - true -> Continue({winner_is, Winner}); + true -> about_to_heal; false -> {leader_waiting, Losers} end end @@ -163,7 +167,8 @@ handle_msg({become_winner, Losers}, end; handle_msg({winner_is, Winner}, - not_healing, _Partitions) -> + State, _Partitions) + when State =:= not_healing orelse State =:= about_to_heal -> rabbit_log:warning( "Autoheal: we were selected to restart; winner is ~p~n", [Winner]), rabbit_node_monitor:run_outside_applications( -- cgit v1.2.1 From 2a44901be5e4a70dad2523555996c5f552a9fbf7 Mon Sep 17 00:00:00 2001 From: Jean-Sebastien Pedron Date: Wed, 10 Dec 2014 10:55:12 +0100 Subject: Autoheal: Make sure Mnesia is stopped on all losers before they restart This works around a race in Mnesia where a starting loser would hang forever. This happens when a starting loser connects to another loser, negotiates the Mnesia protocol and attempts to acquire a write lock on the other node's schema. If the other nodes stops right between the protocol negotiation and the lock request, the starting node never receives an answer to its request. Before this fix, the hang occurred after at most 30 minutes looping on the partitions:autoheal test in rabbitmq-test. With the fix, RabbitMQ survived an all night long run. --- src/rabbit_autoheal.erl | 29 +++++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) diff --git a/src/rabbit_autoheal.erl b/src/rabbit_autoheal.erl index 90458741..a4ec86bf 100644 --- a/src/rabbit_autoheal.erl +++ b/src/rabbit_autoheal.erl @@ -21,6 +21,8 @@ %% The named process we are running in. -define(SERVER, rabbit_node_monitor). +-define(MNESIA_STOPPED_PING_INTERNAL, 200). + %%---------------------------------------------------------------------------- %% In order to autoheal we want to: @@ -194,9 +196,36 @@ abort(Down, Notify) -> winner_finish(Notify). winner_finish(Notify) -> + %% There is a race in Mnesia causing a starting loser to hang + %% forever if another loser stops at the same time: the starting + %% node connects to the other node, negotiates the protocol and + %% attemps to acquire a write lock on the schema on the other node. + %% If the other node stops between the protocol negotiation and lock + %% request, the starting node never gets and answer to its lock + %% request. + %% + %% To workaround the problem, we make sure Mnesia is stopped on all + %% loosing nodes before sending the "autoheal_safe_to_start" signal. + wait_for_mnesia_shutdown(Notify), [{rabbit_outside_app_process, N} ! autoheal_safe_to_start || N <- Notify], not_healing. +wait_for_mnesia_shutdown([Node | Rest] = AllNodes) -> + case rpc:call(Node, mnesia, system_info, [is_running]) of + no -> + wait_for_mnesia_shutdown(Rest); + Running when + Running =:= yes orelse + Running =:= starting orelse + Running =:= stopping -> + timer:sleep(?MNESIA_STOPPED_PING_INTERNAL), + wait_for_mnesia_shutdown(AllNodes); + _ -> + wait_for_mnesia_shutdown(Rest) + end; +wait_for_mnesia_shutdown([]) -> + ok. + make_decision(AllPartitions) -> Sorted = lists:sort([{partition_value(P), P} || P <- AllPartitions]), [[Winner | _] | Rest] = lists:reverse([P || {_, P} <- Sorted]), -- cgit v1.2.1 From b1cad59e2ae0f67bda87eeaaeb3fbc8ed5a14c32 Mon Sep 17 00:00:00 2001 From: Simon MacMullen Date: Wed, 10 Dec 2014 15:28:42 +0000 Subject: Minor language corrections. --- src/rabbit_autoheal.erl | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/rabbit_autoheal.erl b/src/rabbit_autoheal.erl index a4ec86bf..b5d64992 100644 --- a/src/rabbit_autoheal.erl +++ b/src/rabbit_autoheal.erl @@ -199,13 +199,13 @@ winner_finish(Notify) -> %% There is a race in Mnesia causing a starting loser to hang %% forever if another loser stops at the same time: the starting %% node connects to the other node, negotiates the protocol and - %% attemps to acquire a write lock on the schema on the other node. + %% attempts to acquire a write lock on the schema on the other node. %% If the other node stops between the protocol negotiation and lock - %% request, the starting node never gets and answer to its lock + %% request, the starting node never gets an answer to its lock %% request. %% - %% To workaround the problem, we make sure Mnesia is stopped on all - %% loosing nodes before sending the "autoheal_safe_to_start" signal. + %% To work around the problem, we make sure Mnesia is stopped on all + %% losing nodes before sending the "autoheal_safe_to_start" signal. wait_for_mnesia_shutdown(Notify), [{rabbit_outside_app_process, N} ! autoheal_safe_to_start || N <- Notify], not_healing. -- cgit v1.2.1 From 150d2432e99f5f7fa910e07ad664cf8f6a4fedac Mon Sep 17 00:00:00 2001 From: Jean-Sebastien Pedron Date: Wed, 10 Dec 2014 18:10:59 +0100 Subject: Add more information to the 'about_to_heal' state explanations --- src/rabbit_autoheal.erl | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/src/rabbit_autoheal.erl b/src/rabbit_autoheal.erl index 7089911c..8a5614ae 100644 --- a/src/rabbit_autoheal.erl +++ b/src/rabbit_autoheal.erl @@ -55,12 +55,16 @@ %% before telling them they can restart %% %% about_to_heal -%% - we are the leader, and have already assigned the winner and losers. -%% We are part of the losers and we wait for the winner_is announcement. +%% - we are the leader, and have already assigned the winner and +%% losers. We are part of the losers and we wait for the winner_is +%% announcement. This leader-specific state differs from not_healing +%% (the state other losers are in), because the leader could still +%% receive request_start messages: those subsequent requests must be +%% ignored. %% %% {leader_waiting, OutstandingStops} %% - we are the leader, and have already assigned the winner and losers. -%% We are neither but need to ignore further requests to autoheal. +%% We are neither but need to ignore further requests to autoheal. %% %% restarting %% - we are restarting. Of course the node monitor immediately dies -- cgit v1.2.1 From 12bd1745c23b1a478434209fd31250b3185f0135 Mon Sep 17 00:00:00 2001 From: Jean-Sebastien Pedron Date: Wed, 10 Dec 2014 18:20:47 +0100 Subject: Inline the Continue() anonymous function --- src/rabbit_autoheal.erl | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/src/rabbit_autoheal.erl b/src/rabbit_autoheal.erl index 8a5614ae..a277dbeb 100644 --- a/src/rabbit_autoheal.erl +++ b/src/rabbit_autoheal.erl @@ -136,11 +136,9 @@ handle_msg({request_start, Node}, " * Winner: ~p~n" " * Losers: ~p~n", [AllPartitions, Winner, Losers]), - Continue = fun(Msg) -> - handle_msg(Msg, not_healing, Partitions) - end, case node() =:= Winner of - true -> Continue({become_winner, Losers}); + true -> handle_msg({become_winner, Losers}, + not_healing, Partitions); false -> send(Winner, {become_winner, Losers}), %% [0] case lists:member(node(), Losers) of true -> about_to_heal; -- cgit v1.2.1 From e4116d4ace857b190b38feee25d31bda06bb07f5 Mon Sep 17 00:00:00 2001 From: Simon MacMullen Date: Thu, 11 Dec 2014 16:17:57 +0000 Subject: Backport the part of 505868f421db which fixes ram_bytes when requeueing an in-memory message to delta, and do the same for beta. --- src/rabbit_variable_queue.erl | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/src/rabbit_variable_queue.erl b/src/rabbit_variable_queue.erl index d076b534..6415eb6d 100644 --- a/src/rabbit_variable_queue.erl +++ b/src/rabbit_variable_queue.erl @@ -1485,7 +1485,12 @@ publish_alpha(MsgStatus, State) -> publish_beta(MsgStatus, State) -> {MsgStatus1, State1} = maybe_write_to_disk(true, false, MsgStatus, State), - {m(trim_msg_status(MsgStatus1)), State1}. + MsgStatus2 = m(trim_msg_status(MsgStatus1)), + case {MsgStatus1#msg_status.msg =:= undefined, + MsgStatus2#msg_status.msg =:= undefined} of + {false, true} -> {MsgStatus2, upd_ram_bytes(-1, MsgStatus, State1)}; + _ -> {MsgStatus2, State1} + end. %% Rebuild queue, inserting sequence ids to maintain ordering queue_merge(SeqIds, Q, MsgIds, Limit, PubFun, State) -> @@ -1521,8 +1526,12 @@ delta_merge(SeqIds, Delta, MsgIds, State) -> msg_from_pending_ack(SeqId, State0), {_MsgStatus, State2} = maybe_write_to_disk(true, true, MsgStatus, State1), + State3 = case MsgStatus#msg_status.msg of + undefined -> State2; + _ -> upd_ram_bytes(-1, MsgStatus, State2) + end, {expand_delta(SeqId, Delta0), [MsgId | MsgIds0], - upd_bytes(1, -1, MsgStatus, State2)} + upd_bytes(1, -1, MsgStatus, State3)} end, {Delta, MsgIds, State}, SeqIds). %% Mostly opposite of record_pending_ack/2 -- cgit v1.2.1 From 3990a8390f6f11abdb88f3059fd1e0fcb46eb5bb Mon Sep 17 00:00:00 2001 From: Simon MacMullen Date: Thu, 11 Dec 2014 16:36:56 +0000 Subject: Minor refactor. --- src/rabbit_variable_queue.erl | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/src/rabbit_variable_queue.erl b/src/rabbit_variable_queue.erl index 6415eb6d..1da3de26 100644 --- a/src/rabbit_variable_queue.erl +++ b/src/rabbit_variable_queue.erl @@ -1198,6 +1198,8 @@ upd_ram_bytes(Sign, MsgStatus, State = #vqstate{ram_bytes = RamBytes}) -> msg_size(#msg_status{msg_props = #message_properties{size = Size}}) -> Size. +msg_in_ram(#msg_status{msg = Msg}) -> Msg =/= undefined. + remove(AckRequired, MsgStatus = #msg_status { seq_id = SeqId, msg_id = MsgId, @@ -1486,10 +1488,9 @@ publish_alpha(MsgStatus, State) -> publish_beta(MsgStatus, State) -> {MsgStatus1, State1} = maybe_write_to_disk(true, false, MsgStatus, State), MsgStatus2 = m(trim_msg_status(MsgStatus1)), - case {MsgStatus1#msg_status.msg =:= undefined, - MsgStatus2#msg_status.msg =:= undefined} of - {false, true} -> {MsgStatus2, upd_ram_bytes(-1, MsgStatus, State1)}; - _ -> {MsgStatus2, State1} + case msg_in_ram(MsgStatus1) andalso not msg_in_ram(MsgStatus2) of + true -> {MsgStatus2, upd_ram_bytes(-1, MsgStatus, State1)}; + _ -> {MsgStatus2, State1} end. %% Rebuild queue, inserting sequence ids to maintain ordering @@ -1526,10 +1527,11 @@ delta_merge(SeqIds, Delta, MsgIds, State) -> msg_from_pending_ack(SeqId, State0), {_MsgStatus, State2} = maybe_write_to_disk(true, true, MsgStatus, State1), - State3 = case MsgStatus#msg_status.msg of - undefined -> State2; - _ -> upd_ram_bytes(-1, MsgStatus, State2) - end, + State3 = + case msg_in_ram(MsgStatus) of + false -> State2; + true -> upd_ram_bytes(-1, MsgStatus, State2) + end, {expand_delta(SeqId, Delta0), [MsgId | MsgIds0], upd_bytes(1, -1, MsgStatus, State3)} end, {Delta, MsgIds, State}, SeqIds). -- cgit v1.2.1 From ba14dc27c8d2236e8b24fbfb2e5dfc09e36ed6df Mon Sep 17 00:00:00 2001 From: Jean-Sebastien Pedron Date: Wed, 24 Dec 2014 15:58:14 +0100 Subject: Remove 'backing_queue_status' from ?STATISTICS_KEYS This key is already provided by rabbit_backing_queue:info_keys/0 which is concatenated to ?INFO_KEYS and ?STATISTICS_KEYS. This fixes a bug where this key was duplicated in the JSON returned by the "/api/queues" endpoint. --- src/rabbit_amqqueue_process.erl | 1 - 1 file changed, 1 deletion(-) diff --git a/src/rabbit_amqqueue_process.erl b/src/rabbit_amqqueue_process.erl index b06da4c1..a18df225 100644 --- a/src/rabbit_amqqueue_process.erl +++ b/src/rabbit_amqqueue_process.erl @@ -84,7 +84,6 @@ slave_pids, synchronised_slave_pids, down_slave_nodes, - backing_queue_status, state ]). -- cgit v1.2.1 From d40e75a4bc4a9020765acf83f5b1b87e533c5fd1 Mon Sep 17 00:00:00 2001 From: Jean-Sebastien Pedron Date: Fri, 26 Dec 2014 15:13:52 +0100 Subject: Rework SSL's verify_fun support Until now, only the legacy form was supported (the one existing in Erlang up-to R13B). The user would use the following RabbitMQ configuration: {ssl_options, [ {verify_fun, {Module, Function}} ]} This is converted to the following ssl option: {ssl_options, [ {verify_fun, fun(Errors) -> Module:Function(Errors) end} ]} Although this form is still supported by Erlang R14B+, this is undocumented and some users complain about RabbitMQ not matching the expected behaviour, according to ssl's documentation. Now, the new form is supported as well. Here's what the user would configure: {ssl_options, [ {verify_fun, {Module, Function, InitialUserState}} ]} or: {ssl_options, [ {verify_fun, {Module, Function}} ]} This is converted to the new form: {ssl_options, [ {verify_fun, {fun(OtpCert, Event, State) -> Module:Function(OtpCert, Event, State) end, InitialUserState }} ]} To determine which form to use, we look at the version of the ssl application: o 4.0.1+: We check if Module:Function/3 is exported and use the new form. If Module:Function/1 is exported, we use the old form. If both are exported, the new form is preferred. If InitialUserState is unspecified, 'none' is used. o Before 4.0.1: We use the legacy form only. If Module can't be loaded or if the expected Function/Arity is not exported, an error is logged and RabbitMQ fails to start. --- src/rabbit_networking.erl | 59 +++++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 55 insertions(+), 4 deletions(-) diff --git a/src/rabbit_networking.erl b/src/rabbit_networking.erl index d59b22f6..1a288374 100644 --- a/src/rabbit_networking.erl +++ b/src/rabbit_networking.erl @@ -188,12 +188,21 @@ fix_ssl_options(Config) -> fix_verify_fun(fix_ssl_protocol_versions(Config)). fix_verify_fun(SslOptsConfig) -> + %% Starting with ssl 4.0.1 in Erlang R14B, the verify_fun function + %% takes 3 arguments and returns a tuple. + {ok, SslAppVer} = application:get_key(ssl, vsn), + UseNewVerifyFun = rabbit_misc:version_compare(SslAppVer, "4.0.1", gte), case rabbit_misc:pget(verify_fun, SslOptsConfig) of + {Module, Function, InitialUserState} -> + Fun = make_verify_fun(Module, Function, InitialUserState, + UseNewVerifyFun), + rabbit_misc:pset(verify_fun, Fun, SslOptsConfig); {Module, Function} -> - rabbit_misc:pset(verify_fun, - fun (ErrorList) -> - Module:Function(ErrorList) - end, SslOptsConfig); + Fun = make_verify_fun(Module, Function, none, + UseNewVerifyFun), + rabbit_misc:pset(verify_fun, Fun, SslOptsConfig); + undefined when UseNewVerifyFun -> + SslOptsConfig; undefined -> % unknown_ca errors are silently ignored prior to R14B unless we % supply this verify_fun - remove when at least R14B is required @@ -206,6 +215,48 @@ fix_verify_fun(SslOptsConfig) -> end end. +make_verify_fun(Module, Function, InitialUserState, UseNewVerifyFun) -> + try + %% Preload the module: it is required to use + %% erlang:function_exported/3. + Module:module_info() + catch + _:Exception -> + rabbit_log:error("SSL verify_fun: module ~s missing: ~p~n", + [Module, Exception]), + throw({error, {invalid_verify_fun, missing_module}}) + end, + NewForm = erlang:function_exported(Module, Function, 3), + OldForm = erlang:function_exported(Module, Function, 1), + case {NewForm, OldForm} of + {true, _} when UseNewVerifyFun -> + %% This verify_fun is supported by Erlang R14B+ (ssl + %% 4.0.1 and later). + Fun = fun(OtpCert, Event, UserState) -> + Module:Function(OtpCert, Event, UserState) + end, + {Fun, InitialUserState}; + {_, true} -> + %% This verify_fun is supported by: + %% o Erlang up-to R13B; + %% o Erlang R14B+ for undocumented backward + %% compatibility. + %% + %% InitialUserState is ignored in this case. + fun(ErrorList) -> + Module:Function(ErrorList) + end; + {_, false} when not UseNewVerifyFun -> + rabbit_log:error("SSL verify_fun: ~s:~s/1 form required " + "for Erlang R13B~n", [Module, Function]), + throw({error, {invalid_verify_fun, old_form_required}}); + _ -> + Arity = case UseNewVerifyFun of true -> 3; _ -> 1 end, + rabbit_log:error("SSL verify_fun: no ~s:~s/~b exported~n", + [Module, Function, Arity]), + throw({error, {invalid_verify_fun, function_not_exported}}) + end. + fix_ssl_protocol_versions(Config) -> case application:get_env(rabbit, ssl_allow_poodle_attack) of {ok, true} -> -- cgit v1.2.1