diff options
author | Jean-Sebastien Pedron <jean-sebastien@rabbitmq.com> | 2014-11-27 12:13:09 +0100 |
---|---|---|
committer | Jean-Sebastien Pedron <jean-sebastien@rabbitmq.com> | 2014-11-27 12:13:09 +0100 |
commit | 6276d667177d915aff5c5b86e895a9b38b7ea2ae (patch) | |
tree | 802fbdfe130306a4212faff27c69152dc65a5152 | |
parent | 6c026c7445f76b0a7a20cd4d0af21298f20fc930 (diff) | |
download | rabbitmq-server-6276d667177d915aff5c5b86e895a9b38b7ea2ae.tar.gz |
Check cluster consistency before joining a cluster
Before this change, the node would join the cluster and the consistency
would be only checked during application restart (rabbitmqctl
start_app). This could lead to crash during the join due to Erlang,
Mnesia or RabbitMQ incompatibilities.
Now, the join_cluster command reports the problem. Here's an example:
$ rabbitmqctl join_cluster <remote_node>
Clustering node <local_node> with <remote_node> ...
Error: {inconsistent_cluster,
"OTP version mismatch: local node is R16B03-1, remote node 17.3"}
-rw-r--r-- | src/rabbit_mnesia.erl | 42 |
1 files changed, 27 insertions, 15 deletions
diff --git a/src/rabbit_mnesia.erl b/src/rabbit_mnesia.erl index fa51dd70..b55d53b6 100644 --- a/src/rabbit_mnesia.erl +++ b/src/rabbit_mnesia.erl @@ -174,18 +174,25 @@ join_cluster(DiscoveryNode, NodeType) -> {ClusterNodes, _, _} = discover_cluster([DiscoveryNode]), case me_in_nodes(ClusterNodes) of false -> - %% reset the node. this simplifies things and it will be needed in - %% this case - we're joining a new cluster with new nodes which - %% are not in synch with the current node. I also lifts the burden - %% of reseting the node from the user. - reset_gracefully(), - - %% Join the cluster - rabbit_log:info("Clustering with ~p as ~p node~n", - [ClusterNodes, NodeType]), - ok = init_db_with_mnesia(ClusterNodes, NodeType, true, true), - rabbit_node_monitor:notify_joined_cluster(), - ok; + case check_cluster_consistency(DiscoveryNode, false) of + {ok, _} -> + %% reset the node. this simplifies things and it + %% will be needed in this case - we're joining a new + %% cluster with new nodes which are not in synch + %% with the current node. It also lifts the burden + %% of resetting the node from the user. + reset_gracefully(), + + %% Join the cluster + rabbit_log:info("Clustering with ~p as ~p node~n", + [ClusterNodes, NodeType]), + ok = init_db_with_mnesia(ClusterNodes, NodeType, + true, true), + rabbit_node_monitor:notify_joined_cluster(), + ok; + {error, Reason} -> + {error, Reason} + end; true -> rabbit_log:info("Already member of cluster: ~p~n", [ClusterNodes]), {ok, already_member} @@ -545,7 +552,7 @@ maybe_force_load() -> check_cluster_consistency() -> %% We want to find 0 or 1 consistent nodes. case lists:foldl( - fun (Node, {error, _}) -> check_cluster_consistency(Node); + fun (Node, {error, _}) -> check_cluster_consistency(Node, true); (_Node, {ok, Status}) -> {ok, Status} end, {error, not_found}, nodes_excl_me(cluster_nodes(all))) of @@ -575,17 +582,22 @@ check_cluster_consistency() -> throw(E) end. -check_cluster_consistency(Node) -> +check_cluster_consistency(Node, AlreadyFormed) -> case rpc:call(Node, rabbit_mnesia, node_info, []) of {badrpc, _Reason} -> {error, not_found}; {_OTP, _Rabbit, {error, _}} -> {error, not_found}; - {OTP, Rabbit, {ok, Status}} -> + {OTP, Rabbit, {ok, Status}} when AlreadyFormed -> case check_consistency(OTP, Rabbit, Node, Status) of {error, _} = E -> E; {ok, Res} -> {ok, Res} end; + {OTP, Rabbit, {ok, Status}} -> + case check_consistency(OTP, Rabbit) of + {error, _} = E -> E; + ok -> {ok, Status} + end; {_OTP, Rabbit, _Hash, _Status} -> %% delegate hash checking implies version mismatch version_error("Rabbit", rabbit_misc:version(), Rabbit) |