summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJean-Sebastien Pedron <jean-sebastien@rabbitmq.com>2014-11-27 12:13:09 +0100
committerJean-Sebastien Pedron <jean-sebastien@rabbitmq.com>2014-11-27 12:13:09 +0100
commit6276d667177d915aff5c5b86e895a9b38b7ea2ae (patch)
tree802fbdfe130306a4212faff27c69152dc65a5152
parent6c026c7445f76b0a7a20cd4d0af21298f20fc930 (diff)
downloadrabbitmq-server-6276d667177d915aff5c5b86e895a9b38b7ea2ae.tar.gz
Check cluster consistency before joining a cluster
Before this change, the node would join the cluster and the consistency would be only checked during application restart (rabbitmqctl start_app). This could lead to crash during the join due to Erlang, Mnesia or RabbitMQ incompatibilities. Now, the join_cluster command reports the problem. Here's an example: $ rabbitmqctl join_cluster <remote_node> Clustering node <local_node> with <remote_node> ... Error: {inconsistent_cluster, "OTP version mismatch: local node is R16B03-1, remote node 17.3"}
-rw-r--r--src/rabbit_mnesia.erl42
1 files changed, 27 insertions, 15 deletions
diff --git a/src/rabbit_mnesia.erl b/src/rabbit_mnesia.erl
index fa51dd70..b55d53b6 100644
--- a/src/rabbit_mnesia.erl
+++ b/src/rabbit_mnesia.erl
@@ -174,18 +174,25 @@ join_cluster(DiscoveryNode, NodeType) ->
{ClusterNodes, _, _} = discover_cluster([DiscoveryNode]),
case me_in_nodes(ClusterNodes) of
false ->
- %% reset the node. this simplifies things and it will be needed in
- %% this case - we're joining a new cluster with new nodes which
- %% are not in synch with the current node. I also lifts the burden
- %% of reseting the node from the user.
- reset_gracefully(),
-
- %% Join the cluster
- rabbit_log:info("Clustering with ~p as ~p node~n",
- [ClusterNodes, NodeType]),
- ok = init_db_with_mnesia(ClusterNodes, NodeType, true, true),
- rabbit_node_monitor:notify_joined_cluster(),
- ok;
+ case check_cluster_consistency(DiscoveryNode, false) of
+ {ok, _} ->
+ %% reset the node. this simplifies things and it
+ %% will be needed in this case - we're joining a new
+ %% cluster with new nodes which are not in synch
+ %% with the current node. It also lifts the burden
+ %% of resetting the node from the user.
+ reset_gracefully(),
+
+ %% Join the cluster
+ rabbit_log:info("Clustering with ~p as ~p node~n",
+ [ClusterNodes, NodeType]),
+ ok = init_db_with_mnesia(ClusterNodes, NodeType,
+ true, true),
+ rabbit_node_monitor:notify_joined_cluster(),
+ ok;
+ {error, Reason} ->
+ {error, Reason}
+ end;
true ->
rabbit_log:info("Already member of cluster: ~p~n", [ClusterNodes]),
{ok, already_member}
@@ -545,7 +552,7 @@ maybe_force_load() ->
check_cluster_consistency() ->
%% We want to find 0 or 1 consistent nodes.
case lists:foldl(
- fun (Node, {error, _}) -> check_cluster_consistency(Node);
+ fun (Node, {error, _}) -> check_cluster_consistency(Node, true);
(_Node, {ok, Status}) -> {ok, Status}
end, {error, not_found}, nodes_excl_me(cluster_nodes(all)))
of
@@ -575,17 +582,22 @@ check_cluster_consistency() ->
throw(E)
end.
-check_cluster_consistency(Node) ->
+check_cluster_consistency(Node, AlreadyFormed) ->
case rpc:call(Node, rabbit_mnesia, node_info, []) of
{badrpc, _Reason} ->
{error, not_found};
{_OTP, _Rabbit, {error, _}} ->
{error, not_found};
- {OTP, Rabbit, {ok, Status}} ->
+ {OTP, Rabbit, {ok, Status}} when AlreadyFormed ->
case check_consistency(OTP, Rabbit, Node, Status) of
{error, _} = E -> E;
{ok, Res} -> {ok, Res}
end;
+ {OTP, Rabbit, {ok, Status}} ->
+ case check_consistency(OTP, Rabbit) of
+ {error, _} = E -> E;
+ ok -> {ok, Status}
+ end;
{_OTP, Rabbit, _Hash, _Status} ->
%% delegate hash checking implies version mismatch
version_error("Rabbit", rabbit_misc:version(), Rabbit)