diff options
author | Rickard Green <rickard@erlang.org> | 2022-01-07 22:24:56 +0100 |
---|---|---|
committer | Rickard Green <rickard@erlang.org> | 2022-02-06 23:48:36 +0100 |
commit | 1c0e9f0421e5a8356560d37aee500587d21f3309 (patch) | |
tree | 59db5161e4cff8b00d0e8930d70226e0520a778f | |
parent | 7fb87d74c865651282c28dc8dd2490649826a821 (diff) | |
download | erlang-1c0e9f0421e5a8356560d37aee500587d21f3309.tar.gz |
global: Preventing overlapping partitions fix
-rw-r--r-- | bootstrap/lib/kernel/ebin/global.beam | bin | 28700 -> 33588 bytes | |||
-rw-r--r-- | bootstrap/lib/kernel/ebin/global_group.beam | bin | 15708 -> 15744 bytes | |||
-rw-r--r-- | erts/emulator/test/distribution_SUITE.erl | 83 | ||||
-rw-r--r-- | erts/preloaded/ebin/init.beam | bin | 50224 -> 50808 bytes | |||
-rw-r--r-- | erts/preloaded/src/erts.app.src | 2 | ||||
-rw-r--r-- | erts/preloaded/src/init.erl | 20 | ||||
-rw-r--r-- | lib/kernel/doc/src/global.xml | 33 | ||||
-rw-r--r-- | lib/kernel/doc/src/kernel_app.xml | 13 | ||||
-rw-r--r-- | lib/kernel/src/global.erl | 442 | ||||
-rw-r--r-- | lib/kernel/src/global_group.erl | 26 | ||||
-rw-r--r-- | lib/kernel/src/kernel.app.src | 5 | ||||
-rw-r--r-- | lib/kernel/test/erl_distribution_SUITE.erl | 19 | ||||
-rw-r--r-- | lib/kernel/test/global_SUITE.erl | 997 | ||||
-rw-r--r-- | lib/kernel/test/pg2_SUITE.erl | 262 | ||||
-rw-r--r-- | lib/kernel/test/rpc_SUITE.erl | 50 |
15 files changed, 1259 insertions, 693 deletions
diff --git a/bootstrap/lib/kernel/ebin/global.beam b/bootstrap/lib/kernel/ebin/global.beam Binary files differindex aadb5bf080..537f1c8a77 100644 --- a/bootstrap/lib/kernel/ebin/global.beam +++ b/bootstrap/lib/kernel/ebin/global.beam diff --git a/bootstrap/lib/kernel/ebin/global_group.beam b/bootstrap/lib/kernel/ebin/global_group.beam Binary files differindex 83885cdeb5..364d2621ae 100644 --- a/bootstrap/lib/kernel/ebin/global_group.beam +++ b/bootstrap/lib/kernel/ebin/global_group.beam diff --git a/erts/emulator/test/distribution_SUITE.erl b/erts/emulator/test/distribution_SUITE.erl index 25e6ea89c8..d115626597 100644 --- a/erts/emulator/test/distribution_SUITE.erl +++ b/erts/emulator/test/distribution_SUITE.erl @@ -1070,21 +1070,24 @@ dist_auto_connect_relay(Parent) -> dist_parallel_send(Config) when is_list(Config) -> - {ok, RNode} = start_node(dist_parallel_receiver), - {ok, SNode} = start_node(dist_parallel_sender), - WatchDog = spawn_link( - fun () -> - TRef = erlang:start_timer((2*60*1000), self(), oops), - receive - {timeout, TRef, _ } -> - spawn(SNode, fun () -> abort(timeout) end), - spawn(RNode, fun () -> abort(timeout) end) - %% rpc:cast(SNode, erlang, halt, - %% ["Timetrap (sender)"]), - %% rpc:cast(RNode, erlang, halt, - %% ["Timetrap (receiver)"]) - end - end), + %% Disabled "connect all" so global wont interfere... + {ok, RNode} = start_node(dist_parallel_receiver, "-connect_all false"), + {ok, SNode} = start_node(dist_parallel_sender, "-connect_all false"), + + %% WatchDog = spawn_link( + %% fun () -> + %% TRef = erlang:start_timer((2*60*1000), self(), oops), + %% receive + %% {timeout, TRef, _ } -> + %% spawn(SNode, fun () -> abort(timeout) end), + %% spawn(RNode, fun () -> abort(timeout) end) + %% %% rpc:cast(SNode, erlang, halt, + %% %% ["Timetrap (sender)"]), + %% %% rpc:cast(RNode, erlang, halt, + %% %% ["Timetrap (receiver)"]) + %% end + %% end), + MkSndrs = fun (Receiver) -> lists:map(fun (_) -> spawn_link(SNode, @@ -1093,18 +1096,23 @@ dist_parallel_send(Config) when is_list(Config) -> [self(), Receiver, 1000]) end, lists:seq(1, 64)) end, + Parent = self(), SndrsStart = fun (Sndrs) -> - Parent = self(), spawn_link(SNode, fun () -> lists:foreach(fun (P) -> P ! {go, Parent} - end, Sndrs) + end, Sndrs), + unlink(Parent) end) end, SndrsWait = fun (Sndrs) -> lists:foreach(fun (P) -> - receive {P, done} -> ok end + receive + {P, done} -> + unlink(P), + ok + end end, Sndrs) end, DPR = spawn_link(RNode, ?MODULE, dist_parallel_receiver, []), @@ -1121,8 +1129,8 @@ dist_parallel_send(Config) when is_list(Config) -> unlink(DEPR), exit(DEPR, bang), - unlink(WatchDog), - exit(WatchDog, bang), + %% unlink(WatchDog), + %% exit(WatchDog, bang), stop_node(RNode), stop_node(SNode), @@ -1762,8 +1770,9 @@ start_link(Offender,P) -> bad_dist_structure(Config) when is_list(Config) -> ct:timetrap({seconds, 15}), - {ok, Offender} = start_node(bad_dist_structure_offender), - {ok, Victim} = start_node(bad_dist_structure_victim), + %% Disabled "connect all" so global wont interfere... + {ok, Offender} = start_node(bad_dist_structure_offender, "-connect_all false"), + {ok, Victim} = start_node(bad_dist_structure_victim, "-connect_all false"), start_node_monitors([Offender,Victim]), Parent = self(), P = spawn(Victim, @@ -1857,8 +1866,9 @@ bad_dist_structure(Config) when is_list(Config) -> bad_dist_fragments(Config) when is_list(Config) -> ct:timetrap({seconds, 15}), - {ok, Offender} = start_node(bad_dist_fragment_offender), - {ok, Victim} = start_node(bad_dist_fragment_victim), + %% Disabled "connect all" so global wont interfere... + {ok, Offender} = start_node(bad_dist_fragment_offender, "-connect_all false"), + {ok, Victim} = start_node(bad_dist_fragment_victim, "-connect_all false"), Msg = iolist_to_binary(dmsg_ext(lists:duplicate(255,255))), @@ -1994,8 +2004,9 @@ send_bad_fragments(Offender,VictimNode,Victim,Ctrl,WhereToPutSelf,Fragments) -> end. bad_dist_ext_receive(Config) when is_list(Config) -> - {ok, Offender} = start_node(bad_dist_ext_receive_offender), - {ok, Victim} = start_node(bad_dist_ext_receive_victim), + %% Disabled "connect all" so global wont interfere... + {ok, Offender} = start_node(bad_dist_ext_receive_offender, "-connect_all false"), + {ok, Victim} = start_node(bad_dist_ext_receive_victim, "-connect_all false"), start_node_monitors([Offender,Victim]), Parent = self(), @@ -2066,8 +2077,9 @@ bad_dist_ext_receive(Config) when is_list(Config) -> bad_dist_ext_process_info(Config) when is_list(Config) -> - {ok, Offender} = start_node(bad_dist_ext_process_info_offender), - {ok, Victim} = start_node(bad_dist_ext_process_info_victim), + %% Disabled "connect all" so global wont interfere... + {ok, Offender} = start_node(bad_dist_ext_process_info_offender, "-connect_all false"), + {ok, Victim} = start_node(bad_dist_ext_process_info_victim, "-connect_all false"), start_node_monitors([Offender,Victim]), Parent = self(), @@ -2126,8 +2138,9 @@ bad_dist_ext_process_info(Config) when is_list(Config) -> stop_node(Victim). bad_dist_ext_control(Config) when is_list(Config) -> - {ok, Offender} = start_node(bad_dist_ext_control_offender), - {ok, Victim} = start_node(bad_dist_ext_control_victim), + %% Disabled "connect all" so global wont interfere... + {ok, Offender} = start_node(bad_dist_ext_control_offender, "-connect_all false"), + {ok, Victim} = start_node(bad_dist_ext_control_victim, "-connect_all false"), start_node_monitors([Offender,Victim]), pong = rpc:call(Victim, net_adm, ping, [Offender]), @@ -2145,8 +2158,9 @@ bad_dist_ext_control(Config) when is_list(Config) -> stop_node(Victim). bad_dist_ext_connection_id(Config) when is_list(Config) -> - {ok, Offender} = start_node(bad_dist_ext_connection_id_offender), - {ok, Victim} = start_node(bad_dist_ext_connection_id_victim), + %% Disabled "connect all" so global wont interfere... + {ok, Offender} = start_node(bad_dist_ext_connection_id_offender, "-connect_all false"), + {ok, Victim} = start_node(bad_dist_ext_connection_id_victim, "-connect_all false"), start_node_monitors([Offender,Victim]), Parent = self(), @@ -2212,10 +2226,11 @@ bad_dist_ext_connection_id(Config) when is_list(Config) -> %% OTP-14661: Bad message is discovered by erts_msg_attached_data_size bad_dist_ext_size(Config) when is_list(Config) -> - {ok, Offender} = start_node(bad_dist_ext_process_info_offender), + %% Disabled "connect all" so global wont interfere... + {ok, Offender} = start_node(bad_dist_ext_process_info_offender, "-connect_all false"), %%Prog = "Prog=/home/uabseri/src/otp_new3/bin/cerl -rr -debug", Prog = [], - {ok, Victim} = start_node(bad_dist_ext_process_info_victim, [], Prog), + {ok, Victim} = start_node(bad_dist_ext_process_info_victim, "-connect_all false", Prog), start_node_monitors([Offender,Victim]), Parent = self(), diff --git a/erts/preloaded/ebin/init.beam b/erts/preloaded/ebin/init.beam Binary files differindex 6f4d981219..cd161f4dd7 100644 --- a/erts/preloaded/ebin/init.beam +++ b/erts/preloaded/ebin/init.beam diff --git a/erts/preloaded/src/erts.app.src b/erts/preloaded/src/erts.app.src index d3dbe0f2d1..4f11613d0e 100644 --- a/erts/preloaded/src/erts.app.src +++ b/erts/preloaded/src/erts.app.src @@ -42,7 +42,7 @@ {registered, []}, {applications, []}, {env, []}, - {runtime_dependencies, ["stdlib-3.5", "kernel-6.5.1", "sasl-3.3"]} + {runtime_dependencies, ["stdlib-3.5", "kernel-@OTP-17843@", "sasl-3.3"]} ]}. %% vim: ft=erlang diff --git a/erts/preloaded/src/init.erl b/erts/preloaded/src/init.erl index b36b4b5599..b2b16678f0 100644 --- a/erts/preloaded/src/init.erl +++ b/erts/preloaded/src/init.erl @@ -588,11 +588,31 @@ stop_heart(State) -> shutdown_pids(Heart,Logger,BootPid,State) -> Timer = shutdown_timer(State#state.flags), + global_prepare_shutdown(), catch shutdown(State#state.kernel,BootPid,Timer,State), kill_all_pids(Heart,Logger), % Even the shutdown timer. kill_all_ports(Heart), % Logger has no ports flush_timout(Timer). +global_prepare_shutdown() -> + %% Inform global that we are shutting down, so it wont + %% send 'lost_connection' messages when connections + %% goes down... + case whereis(global_name_server) of + undefined -> + ok; + Pid -> + Mon = erlang:monitor(process, Pid), + Pid ! {prepare_shutdown, self(), Mon}, + receive + {Mon, ok} -> + erlang:demonitor(Mon, [flush]), + ok; + {'DOWN', Mon, process, Pid, _Reason} -> + ok + end + end. + get_heart(Kernel) -> get_kernelpid(heart,Kernel). diff --git a/lib/kernel/doc/src/global.xml b/lib/kernel/doc/src/global.xml index 28b59c20bd..b8385436b5 100644 --- a/lib/kernel/doc/src/global.xml +++ b/lib/kernel/doc/src/global.xml @@ -37,6 +37,39 @@ <item>Global locks</item> <item>Maintenance of the fully connected network</item> </list> + <marker id="prevent_overlapping_partitions"/> + <warning> + <p> + By default <c>global</c> does <i>not</i> take any actions to restore + a fully connected network when connections are lost due to network + issues. This is problematic for all applications expecting a fully + connected network to be provided, such as for example <c>mnesia</c>, + but also for <c>global</c> itself. A network of overlapping partitions + might cause the internal state of <c>global</c> to become inconsistent. + Such an inconsistency can remain even after such partitions have been + brought together to form a fully connected network again. The effect + on other applications that expects that a fully connected network is + maintained may vary, but they might misbehave in very subtle hard to + detect ways during such a partitioning. + </p> + <p> + In order to prevent such issues, we have introduced a <i>prevent + overlapping partitions</i> fix which can be enabled using the + <seealso marker="kernel_app#prevent_overlapping_partitions"> + <c>prevent_overlapping_partitions</c></seealso> <c>kernel(6)</c> + parameter. When this fix has been enabled, <c>global</c> will actively + disconnect from nodes that reports that they have lost connections to + other nodes. This will cause fully connected partitions to form + instead of leaving the network in a state with overlapping partitions. + Note that this fix <i>has</i> to be enabled on <i>all</i> nodes in the + network in order to work properly. Since this quite substantially + changes the behavior, this fix is currently disabled by default. Since + you might get hard to detect issues without this fix you are, however, + <i>strongly</i> advised to enable this fix in order to avoid issues + such as the ones described above. As of OTP 25 this fix will become + enabled by default. + </p> + </warning> <p>These services are controlled through the process <c>global_name_server</c> that exists on every node. The global name server starts automatically when a node is started. diff --git a/lib/kernel/doc/src/kernel_app.xml b/lib/kernel/doc/src/kernel_app.xml index 49e007aa09..aef5a9fbe2 100644 --- a/lib/kernel/doc/src/kernel_app.xml +++ b/lib/kernel/doc/src/kernel_app.xml @@ -336,6 +336,19 @@ MaxT = TickTime + TickTime / 4</code> <p>Normally, a terminating node is detected immediately by the transport protocol (like TCP/IP).</p> </item> + <tag><marker id="prevent_overlapping_partitions"/> + <c>prevent_overlapping_partitions = true | false</c></tag> + <item> + <p> + If enabled (<c>true</c>), <c>global</c> will actively prevent + overlapping partitions from forming when connections are lost + between nodes. This fix is, however, currently disabled by + default. See the + <seealso marker="kernel:global#prevent_overlapping_partitions"> + <c>global(3)</c></seealso> documentation for more + information. + </p> + </item> <tag><c>shutdown_timeout = integer() | infinity</c></tag> <item> <p>Specifies the time <c>application_controller</c> waits diff --git a/lib/kernel/src/global.erl b/lib/kernel/src/global.erl index 26ddf21dcb..764a0bfd0f 100644 --- a/lib/kernel/src/global.erl +++ b/lib/kernel/src/global.erl @@ -32,7 +32,7 @@ whereis_name/1, register_name/2, register_name/3, register_name_external/2, register_name_external/3, unregister_name_external/1,re_register_name/2, re_register_name/3, - unregister_name/1, registered_names/0, send/2, node_disconnected/1, + unregister_name/1, registered_names/0, send/2, set_lock/1, set_lock/2, set_lock/3, del_lock/1, del_lock/2, trans/2, trans/3, trans/4, @@ -54,6 +54,10 @@ -define(N_CONNECT_RETRIES, global_connect_retries). -define(DEFAULT_N_CONNECT_RETRIES, 0). +%% Time that we keep information about multicasted lost_connection +%% messages... +-define(lost_conn_info_cleanup_time, 60*60*1000). + %%% In certain places in the server, calling io:format hangs everything, %%% so we'd better use erlang:display/1. %%% my_tracer is used in testsuites @@ -70,6 +74,9 @@ -define(trace(_), ok). -endif. +-define(MAX_64BIT_SMALL_INT, ((1 bsl 59) - 1)). +-define(MIN_64BIT_SMALL_INT, (-(1 bsl 59))). + %% These are the protocol versions: %% Vsn 1 is the original protocol. %% Vsn 2 is enhanced with code to take care of registration of names from @@ -84,15 +91,24 @@ %% nodes, but uses the server as a proxy. %% Vsn 7 - propagate global versions between nodes, so we always know %% versions of known nodes +%% - optional "prevent overlapping partitions" fix supported %% Current version of global does not support vsn 4 or earlier. -define(vsn, 7). +%% Version when the "prevent overlapping partitions" fix was introduced. +-define(pop_vsn, 7). +%% Version when the "propagate global protocol versions" feature +%% was introduced. +-define(pgpv_vsn, 7). + %%----------------------------------------------------------------- %% connect_all = boolean() - true if we are supposed to set up a %% fully connected net -%% known = [Node] - all nodes known to us +%% known = #{} - Map of known nodes including protocol version +%% as well as some other information. See state +%% record declaration below for more info. %% synced = [Node] - all nodes that have the same names as us %% resolvers = [{Node, MyTag, Resolver}] - %% the tag separating different synch sessions, @@ -114,8 +130,21 @@ %% {sync_tag_his, Node} = The Node's tag, used at synchronization %% {lock_id, Node} = The resource locking the partitions %%----------------------------------------------------------------- --record(state, {connect_all :: boolean(), - known = #{}, +-record(conf, {connect_all :: boolean(), + prevent_over_part :: boolean() + }). + +-record(state, {conf = #conf{} :: #conf{}, + known = #{} :: #{ + %% Known connected node with protocol + %% version as value + node() => non_neg_integer(), + %% Connecting node, not yet known, with + %% protocol version as value + {pending, node()} => non_neg_integer(), + %% Node currently being removed + {removing, node()} => yes + }, synced = [] :: [node()], resolvers = [], syncers = [] :: [pid()], @@ -144,6 +173,12 @@ %%% Resources locked by Pid. %%% ref() is the same ref() as in global_locks. %%% +%%% global_lost_connections (set): +%%% {{NodeA, NodeB}, {ExtendedCreationA, OpIdA, Timer} +%%% Information about lost connections (reported by NodeA) used by +%%% the "prevent overlapping partitions" fix. The timer is used is +%%% used to remove the entry when not needed anymore. +%%% %%% global_pid_names is a 'bag' for backward compatibility. %%% (Before vsn 5 more than one name could be registered for a process.) %%% @@ -209,9 +244,6 @@ send(Name, Msg) -> whereis_name(Name) -> where(Name). -node_disconnected(Node) -> - global_name_server ! {nodedown, Node}. - %%----------------------------------------------------------------- %% Method = function(Name, Pid1, Pid2) -> Pid | Pid2 | none %% Method is called if a name conflict is detected when two nodes @@ -454,6 +486,7 @@ init([]) -> _ = ets:new(global_pid_names, [bag, named_table, protected]), _ = ets:new(global_pid_ids, [bag, named_table, protected]), + _ = ets:new(global_lost_connections, [set, named_table, protected]), %% This is for troubleshooting only. DoTrace = os:getenv("GLOBAL_HIGH_LEVEL_TRACE") =:= "TRUE", @@ -471,10 +504,28 @@ init([]) -> _ -> true end, + POP = case application:get_env(kernel, + prevent_overlapping_partitions) of + {ok, Bool} when Bool == true; Bool == false -> + Bool; + {ok, Invalid} -> + error({invalid_parameter_value, + prevent_overlapping_partitions, + Invalid}); + undefined -> + false + end, S = #state{the_locker = start_the_locker(DoTrace), trace = T0, the_registrar = start_the_registrar(), - connect_all = Ca}, + conf = #conf{connect_all = Ca, + prevent_over_part = POP}}, + _ = rand:seed(exsss, + (erlang:monotonic_time(nanosecond) rem 1000000000) + + (erlang:system_time(nanosecond) rem 1000000000)), + CreX = ((rand:uniform(?MAX_64BIT_SMALL_INT - ?MIN_64BIT_SMALL_INT) + - 1) band (bnot ((1 bsl 2) -1))), + put(creation_extension, CreX), {ok, trace_message(S, {init, node()}, [])}. %%----------------------------------------------------------------- @@ -595,6 +646,51 @@ init([]) -> %% sent by each node to all new nodes (Node becomes known to them) %%----------------------------------------------------------------- +%% ---------------------------------------------------------------- +%% Prevent Overlapping Partitions Algorithm +%% ======================================== +%% +%% 1. When a node lose connection to another node it sends a +%% {lost_connection, LostConnNode, OtherNode} message to all +%% other nodes that it knows of. +%% 2. When a lost_connection message is received the receiver +%% first checks if it has seen this message before. If so, it +%% just ignores it. If it has not seen it before, it sends the +%% message to all nodes it knows of. This in order to ensure +%% that all connected nodes will receive this message. It then +%% sends a {remove_connection, LostConnRecvNode} message (where +%% LostConnRecvNode is its own node name) to OtherNode and +%% clear all information about OtherNode so OtherNode wont be +%% part of ReceiverNode's cluster anymore. When this information +%% has been cleared, no lost_connection will be triggered when +%% a nodedown message for the connection to OtherNode is +%% received. +%% 3. When a {remove_connection, LostConnRecvNode} message is +%% received, the receiver node takes down the connection to +%% LostConnRecvNode and clears its information about +%% LostConnRecvNode so it is not part of its cluster anymore. +%% Both nodes will receive a nodedown message due to the +%% connection being closed, but none of them will send +%% lost_connection messages since they have cleared information +%% about the other node. +%% +%% This will take down more connections than the minimum amount +%% of connections to remove in order to form fully connected +%% partitions. For example, if the user takes down a connection +%% between two nodes, the rest of the nodes will disconnect from +%% both of these nodes instead of just one. This is due to: +%% * We do not want to partition a remaining network when a node +%% has halted. When you receive a nodedown and/or lost_connection +%% messages you don't know if the corresponding node has halted +%% or if there are network issues. +%% * We need to decide which connection to take down as soon as +%% we receive a lost_connection message in order to prevent +%% inconsistencies entering global's state. +%% * All nodes need to make the same choices independent of +%% each other. +%% +%% ---------------------------------------------------------------- + -spec handle_call(term(), {pid(), term()}, state()) -> {'noreply', state()} | {'reply', term(), state()} | @@ -627,7 +723,7 @@ handle_call({del_lock, Lock}, {Pid, _Tag}, S0) -> {reply, true, S}; handle_call(get_known, _From, S) -> - {reply, mk_known_list(-1, S), S}; + {reply, mk_known_list(0, S), S}; handle_call(get_synced, _From, S) -> {reply, S#state.synced, S}; @@ -678,30 +774,28 @@ handle_call(Request, From, S) -> -spec handle_cast(term(), state()) -> {'noreply', state()}. -handle_cast({init_connect, Vsn, Node, InitMsg}, S) -> +handle_cast({init_connect, Vsn, Node, InitMsg}, S0) -> %% Sent from global_name_server at Node. ?trace({'####', init_connect, {vsn, Vsn}, {node,Node},{initmsg,InitMsg}}), - case Vsn of - %% It is always the responsibility of newer versions to understand - %% older versions of the protocol. - {HisVsn, HisTag} when HisVsn > ?vsn -> - put({pending_known, Node}, HisVsn), - init_connect(?vsn, Node, InitMsg, HisTag, S#state.resolvers, S); - {HisVsn, HisTag} -> - put({pending_known, Node}, HisVsn), - init_connect(HisVsn, Node, InitMsg, HisTag, S#state.resolvers, S); - %% To be future compatible - Tuple when is_tuple(Tuple) -> - List = tuple_to_list(Tuple), - [HisVsn, HisTag | _] = List, - put({pending_known, Node}, HisVsn), - %% use own version handling if his is newer. - init_connect(?vsn, Node, InitMsg, HisTag, S#state.resolvers, S); - _ -> - Txt = io_lib:format("Illegal global protocol version ~p Node: ~p\n", - [Vsn, Node]), - error_logger:info_report(lists:flatten(Txt)) - end, + S = case Vsn of + %% It is always the responsibility of newer versions to understand + %% older versions of the protocol. + {HisVsn, HisTag} when HisVsn > ?vsn -> + init_connect(?vsn, Node, InitMsg, HisTag, HisVsn, S0); + {HisVsn, HisTag} -> + init_connect(HisVsn, Node, InitMsg, HisTag, HisVsn, S0); + %% To be future compatible + Tuple when is_tuple(Tuple) -> + List = tuple_to_list(Tuple), + [HisVsn, HisTag | _] = List, + %% use own version handling if his is newer. + init_connect(?vsn, Node, InitMsg, HisTag, HisVsn, S0); + _ -> + Txt = io_lib:format("Illegal global protocol version ~p Node: ~p\n", + [Vsn, Node]), + error_logger:info_report(lists:flatten(Txt)), + S0 + end, {noreply, S}; %%======================================================================= @@ -810,7 +904,7 @@ handle_cast({new_nodes, Node, Ops, Names_ext, Nodes, ExtraInfo}, S) -> %% %% We are in sync with this node (from the other node's known world). %%======================================================================== -handle_cast({in_sync, Node, _IsKnown}, S) -> +handle_cast({in_sync, Node, _IsKnown}, #state{known = Known} = S) -> %% Sent from global_name_server at Node (in the other partition). ?trace({'####', in_sync, {Node, _IsKnown}}), lists:foreach(fun(Pid) -> Pid ! {synced, [Node]} end, S#state.syncers), @@ -820,7 +914,8 @@ handle_cast({in_sync, Node, _IsKnown}, S) -> true -> Synced; false -> [Node | Synced] end, - {noreply, NewS#state{synced = NSynced}}; + {noreply, NewS#state{known = maps:remove({pending, Node}, Known), + synced = NSynced}}; %% Called when Pid on other node crashed handle_cast({async_del_name, _Name, _Pid}, S) -> @@ -873,13 +968,20 @@ handle_info({nodedown, Node}, S) when Node =:= S#state.node_name -> handle_info({nodedown, Node}, S0) -> ?trace({'####', nodedown, {node,Node}}), S1 = trace_message(S0, {nodedown, Node}, []), - S = handle_nodedown(Node, S1), + S = handle_nodedown(Node, S1, disconnected), {noreply, S}; handle_info({extra_nodedown, Node}, S0) -> ?trace({'####', extra_nodedown, {node,Node}}), S1 = trace_message(S0, {extra_nodedown, Node}, []), - S = handle_nodedown(Node, S1), + S = handle_nodedown(Node, S1, disconnected), + {noreply, S}; + +handle_info({ignore_node, Node}, S0) -> + %% global_group wants us to ignore this node... + ?trace({'####', ignore_node, {node,Node}}), + S1 = trace_message(S0, {ignore_node, Node}, []), + S = handle_nodedown(Node, S1, ignore_node), {noreply, S}; handle_info({nodeup, Node}, S) when Node =:= node() -> @@ -888,10 +990,11 @@ handle_info({nodeup, Node}, S) when Node =:= node() -> %% references to old node name ('nonode@nohost') to Node. {noreply, change_our_node_name(Node, S)}; -handle_info({nodeup, _Node}, S) when not S#state.connect_all -> +handle_info({nodeup, _Node}, + #state{conf = #conf{connect_all = false}} = S) -> {noreply, S}; -handle_info({nodeup, Node}, S0) when S0#state.connect_all -> +handle_info({nodeup, Node}, S0) -> IsKnown = maps:is_key(Node, S0#state.known) or %% This one is only for double nodeups (shouldn't occur!) lists:keymember(Node, 1, S0#state.resolvers), @@ -933,6 +1036,110 @@ handle_info({whereis, Name, From}, S) -> do_whereis(Name, From), {noreply, S}; +handle_info({lost_connection, NodeA, XCreationA, OpIdA, NodeB} = Msg, + #state{conf = #conf{connect_all = true, + prevent_over_part = true}} = S0) -> + %% Message introduced in protocol version ?pop_vsn + %% + %% NodeA reports that it lost connection to NodeB. If we got a + %% connection to NodeB, we need to disconnect it in order to + %% prevent overlapping partitions... + LcKey = {NodeA, NodeB}, + S1 = case get_lost_connection_info(LcKey) of + {XCreationA, OpId, _Tmr} when OpIdA =< OpId -> + %% We have already handled this lost connection + %% message... + S0; + {_, _, Tmr} -> + %% Inform all other nodes we know of about this as well. This + %% in order to prevent that some nodes in this cluster wont + %% get the original message from NodeA. This ensures that all + %% nodes will know of this connection loss, even if some nodes + %% lose connection to NodeA while NodeA is multicasting that + %% it lost the connection to NodeB. + gns_volatile_multicast(Msg, NodeA, ?pop_vsn, true, S0), + + %% Save info about this lost_connection message... + save_lost_connection_info(LcKey, XCreationA, OpIdA, Tmr), + + RmNode = case node() == NodeB of + false -> + NodeB; + true -> + %% This toghether with NodeA being known by + %% us probably is unusal, but can happen + %% since lost_connection messages are + %% reapeted by receiving nodes. All other + %% nodes will remove us, so there is no + %% need to make them remove NodeA as well. + %% We therefore request removal from NodeA + %% and wait for the nodedown which + %% eventually will come since NodeA reported + %% this... + NodeA + end, + + case is_node_potentially_known(RmNode, S0) of + false -> + S0; + true -> + case node_vsn(RmNode, S0) of + Vsn when Vsn < ?pop_vsn -> + erlang:disconnect_node(RmNode), + error_logger:warning_msg( + "'global' at node ~p disconnected old " + "node ~p in order to prevent overlapping " + "partitions", + [node(), RmNode]), + ok; + _Vsn -> + gns_volatile_send(RmNode, + {remove_connection, node()}), + error_logger:warning_msg( + "'global' at node ~p requested disconnect " + "from node ~p in order to prevent " + "overlapping partitions", + [node(), RmNode]), + ok + end, + handle_nodedown(RmNode, S0, remove_connection) + end + end, + + {noreply, S1}; + +handle_info({lost_connection, _NodeA, _XCreationA, _OpIdA, _NodeB}, S) -> + %% Message introduced in protocol version ?pop_vsn + {noreply, S}; + +handle_info({timeout, _, _} = TmoMsg, S) -> + %% Instance specific message + remove_lost_connection_info(TmoMsg), + {noreply, S}; + +handle_info({remove_connection, Node}, S0) -> + %% Message introduced in protocol version ?pop_vsn + S2 = case is_node_potentially_known(Node, S0) of + false -> + S0; + true -> + erlang:disconnect_node(Node), + S1 = handle_nodedown(Node, S0, remove_connection), + error_logger:warning_msg( + "'global' at node ~p disconnected node ~p in order to " + "prevent overlapping partitions", [node(), Node]), + S1 + end, + {noreply, S2}; + +handle_info({prepare_shutdown, From, Ref}, S0) -> + %% Prevent lost_connection messages being sent due to + %% connections being taken down during the shutdown... + S1 = S0#state{conf = #conf{connect_all = false, + prevent_over_part = false}}, + From ! {Ref, ok}, + {noreply, S1}; + handle_info(known, S) -> io:format(">>>> ~p\n",[S#state.known]), {noreply, S}; @@ -1072,7 +1279,8 @@ check_replies([], _Id, _Replies) -> %% Another node wants to synchronize its registered names with us. %% Both nodes must have a lock before they are allowed to continue. %%======================================================================== -init_connect(Vsn, Node, InitMsg, HisTag, Resolvers, S) -> +init_connect(Vsn, Node, InitMsg, HisTag, HisVsn, + #state{resolvers = Resolvers, known = Known} = S) -> %% It is always the responsibility of newer versions to understand %% older versions of the protocol. put({prot_vsn, Node}, Vsn), @@ -1088,7 +1296,9 @@ init_connect(Vsn, Node, InitMsg, HisTag, Resolvers, S) -> false -> ?trace({init_connect,{pre_connect,Node},{histag,HisTag}}), put({pre_connect, Node}, {Vsn, InitMsg, HisTag}) - end. + end, + S#state{known = maps:put({pending, Node}, HisVsn, Known)}. + %%======================================================================== %% In the simple case, we'll get lock_is_set before we get exchange, @@ -1151,9 +1361,9 @@ resolved(Node, HisResolved, HisKnown, Names_ext, S0) -> NewNodesMsg = {new_nodes, node(), Ops, Names_ext, NewNodes, ExtraInfo}, NewNodesF = fun() -> lists:foreach( - fun (N) -> + fun (N) when is_atom(N) -> case maps:get(N, Known) of - V when V >= 7 -> + V when V >= ?pgpv_vsn -> gen_server:cast({global_name_server, N}, NewNodesMsg); _OldV -> @@ -1161,7 +1371,9 @@ resolved(Node, HisResolved, HisKnown, Names_ext, S0) -> {new_nodes, node(), Ops, Names_ext, node_list(NewNodes), ExtraInfo}) - end + end; + (_) -> + ok end, maps:keys(Known)) end, @@ -1179,7 +1391,7 @@ cancel_resolved_locker(Node, CancelFun) -> ?trace({calling_cancel_locker,Tag,get()}), S = CancelFun(Tag), reset_node_state(Node), - S. + S#state{known = maps:remove({pending, Node}, S#state.known)}. new_nodes(Ops, ConnNode, Names_ext, Nodes, ExtraInfo, S0) -> %% (*) This one requires some thought... @@ -1949,7 +2161,6 @@ cancel_locker(Node, S, Tag, ToBeRunOnLockerF) -> reset_node_state(Node) -> ?trace({{node,Node}, reset_node_state, get()}), - erase({pending_known, Node}), erase({wait_lock, Node}), erase({save_ops, Node}), erase({pre_connect, Node}), @@ -2085,23 +2296,99 @@ pid_locks(Ref) -> ref_is_locking(Ref, PidRefs) -> lists:keyfind(Ref, 2, PidRefs) =/= false. -handle_nodedown(Node, S) -> +handle_nodedown(Node, #state{synced = Syncs, + known = Known0} = S, What) -> %% DOWN signals from monitors have removed locks and registered names. - #state{synced = Syncs} = S, NewS = cancel_locker(Node, S, get({sync_tag_my, Node})), NewS#state.the_locker ! {remove_from_known, Node}, reset_node_state(Node), - NewS#state{known = maps:remove(Node, S#state.known), + Known1 = case What of + remove_connection -> + maps:put({removing, Node}, yes, Known0); + ignore_node -> + maps:remove({removing, Node}, Known0); + disconnected -> + case maps:get({removing, Node}, Known0, no) of + yes -> + maps:remove({removing, Node}, Known0); + no -> + inform_connection_loss(Node, S), + Known0 + end + end, + NewS#state{known = maps:remove(Node, + maps:remove({pending, Node}, Known1)), synced = lists:delete(Node, Syncs)}. +inform_connection_loss(Node, + #state{conf = #conf{connect_all = true, + prevent_over_part = true}} = S) -> + Msg = {lost_connection, + node(), + (get(creation_extension) + + erlang:system_info(creation) + - ?MIN_64BIT_SMALL_INT), + erlang:unique_integer([monotonic]), + Node}, + gns_volatile_multicast(Msg, Node, ?pop_vsn, true, S); +inform_connection_loss(_Node, #state{}) -> + ok. + +%% +%% Volatile send (does not bring up connections and does not +%% preserve signal order) of Msg to global name server at Node... +%% +gns_volatile_send(Node, Msg) -> + To = {global_name_server, Node}, + case erlang:send(To, Msg, [nosuspend, noconnect]) of + ok -> + ok; + noconnect -> + ok; + nosuspend -> + _ = spawn(fun () -> + _ = erlang:send(To, Msg, [noconnect]) + end), + ok + end. + +%% +%% Volatile multicast of Msg to all global name servers on known nodes +%% (and pending known nodes if AlsoPend is true) using protocol version +%% MinVer or larger... +%% +gns_volatile_multicast(Msg, IgnoreNode, MinVer, + AlsoPend, #state{known = Known}) -> + Send = fun (Key, Node) -> + case maps:get(Key, Known) of + Ver when Ver < MinVer -> ok; + _Ver -> gns_volatile_send(Node, Msg) + end + end, + lists:foreach(fun (Node) when is_atom(Node), Node =/= IgnoreNode -> + Send(Node, Node); + ({pending, Node} = Key) when AlsoPend == true, + Node =/= IgnoreNode -> + Send(Key, Node); + (_) -> + ok + end, maps:keys(Known)). + +is_node_potentially_known(Node, #state{known = Known}) -> + maps:is_key(Node, Known) orelse maps:is_key({pending, Node}, Known). + +node_vsn(Node, #state{}) when node() == Node -> + ?vsn; node_vsn(Node, #state{known = Known}) -> case maps:find(Node, Known) of {ok, Ver} -> Ver; error -> - case get({pending_known, Node}) of - undefined -> -1; - Ver when is_integer(Ver) -> Ver + case maps:find({pending, Node}, Known) of + {ok, Ver} -> + Ver; + error -> + 0 end end. @@ -2114,7 +2401,7 @@ node_list(NList) -> make_node_vsn_list(NList, #state{} = S) -> - lists:map(fun ({N, -1}) when is_atom(N) -> + lists:map(fun ({N, 0}) when is_atom(N) -> {N, node_vsn(N, S)}; (N) when is_atom(N) -> {N, node_vsn(N, S)}; @@ -2122,12 +2409,23 @@ make_node_vsn_list(NList, #state{} = S) -> is_integer(V) -> NV end, NList). - -mk_known_list(Vsn, #state{known = Known}) when Vsn < 7 -> - lists:map(fun ({N, _V}) -> N end, maps:to_list(Known)); +mk_known_list(Vsn, #state{known = Known}) when Vsn < ?pgpv_vsn -> + lists:foldl(fun ({N, _V}, Ns) when is_atom(N) -> + [N | Ns]; + (_, Ns) -> + Ns + end, + [], + maps:to_list(Known)); mk_known_list(_Vsn, #state{known = Known}) -> - maps:to_list(Known). + lists:foldl(fun ({N, _V} = NV, NVs) when is_atom(N) -> + [NV | NVs]; + (_, Ns) -> + Ns + end, + [], + maps:to_list(Known)). add_to_known(AddKnown, #state{known = Known} = S) -> Fun = fun (N, Acc) when N == node() -> @@ -2137,7 +2435,7 @@ add_to_known(AddKnown, #state{known = Known} = S) -> (N, {A, K} = Acc) when is_atom(N) -> case maps:is_key(N, K) of true -> Acc; - false -> {[N|A], maps:put(N, -1, K)} + false -> {[N|A], maps:put(N, 0, K)} end; ({N, V}, {A, K} = Acc) -> case maps:find(N, K) of @@ -2155,6 +2453,38 @@ add_to_known(AddKnown, #state{known = Known} = S) -> {Added, NewKnown} = lists:foldl(Fun, {[], Known}, AddKnown), {Added, S#state{known = NewKnown}}. +get_lost_connection_info(LcKey) -> + case ets:lookup(global_lost_connections, LcKey) of + [{LcKey, LcValue}] -> + LcValue; + _ -> + {undefined, undefined, undefined} + end. + +save_lost_connection_info(LcKey, XCre, OpId, undefined) -> + %% Make sure we clean up old unused information about + %% lost connections... + Tmr = erlang:start_timer(?lost_conn_info_cleanup_time, + self(), {lost_connection, LcKey}), + Value = {XCre, OpId, Tmr}, + _ = ets:insert(global_lost_connections, {LcKey, Value}), + ok; +save_lost_connection_info(LcKey, XCre, OpId, OldTmr) -> + %% Cancel lost connection info cleanup timer for info being replaced... + _ = erlang:cancel_timer(OldTmr, [{async, true}, {info, false}]), + save_lost_connection_info(LcKey, XCre, OpId, undefined). + +remove_lost_connection_info({timeout, Tmr, {lost_connection, LcKey}}) -> + case ets:lookup(global_lost_connections, LcKey) of + [{LcKey, {_, _, Tmr}}] -> + _ = ets:delete(global_lost_connections, LcKey), + ok; + _ -> + ok + end; +remove_lost_connection_info(_) -> + ok. + get_names() -> ets:select(global_names, ets:fun2ms(fun({Name, Pid, Method, _Ref}) -> diff --git a/lib/kernel/src/global_group.erl b/lib/kernel/src/global_group.erl index f5ead2a4c5..3e4ad1e685 100644 --- a/lib/kernel/src/global_group.erl +++ b/lib/kernel/src/global_group.erl @@ -925,15 +925,20 @@ handle_info({nodedown, Node}, S) when S#state.sync_state =:= no_conf -> handle_info({nodedown, Node}, S) -> % io:format("~p>>>>> nodedown, Node ~p ~n",[node(), Node]), send_monitor(S#state.monitor, {nodedown, Node}, S#state.sync_state), - global_name_server ! {nodedown, Node}, NN = lists:delete(Node, S#state.nodes), NSE = lists:delete(Node, S#state.sync_error), - NNC = case {lists:member(Node, get_own_nodes()), - lists:member(Node, S#state.no_contact)} of - {true, false} -> - [Node | S#state.no_contact]; - _ -> - S#state.no_contact + NNC = case lists:member(Node, get_own_nodes()) of + false -> + global_name_server ! {ignore_node, Node}, + S#state.no_contact; + true -> + global_name_server ! {nodedown, Node}, + case lists:member(Node, S#state.no_contact) of + false -> + [Node | S#state.no_contact]; + true -> + S#state.no_contact + end end, {noreply, S#state{nodes = NN, no_contact = NNC, sync_error = NSE}}; @@ -950,8 +955,7 @@ handle_info({disconnect_node, Node}, S) -> _ -> cont end, - global_name_server ! {nodedown, Node}, %% nodedown is used to inform global of the - %% disconnected node + global_name_server ! {ignore_node, Node}, NN = lists:delete(Node, S#state.nodes), NNC = lists:delete(Node, S#state.no_contact), NSE = lists:delete(Node, S#state.sync_error), @@ -1264,7 +1268,7 @@ kill_global_group_check() -> disconnect_nodes(DisconnectNodes) -> lists:foreach(fun(Node) -> {global_group, Node} ! {disconnect_node, node()}, - global:node_disconnected(Node) + global_name_server ! {ignore_node, Node} end, DisconnectNodes). @@ -1275,7 +1279,7 @@ disconnect_nodes(DisconnectNodes) -> force_nodedown(DisconnectNodes) -> lists:foreach(fun(Node) -> erlang:disconnect_node(Node), - global:node_disconnected(Node) + global_name_server ! {ignore_node, Node} end, DisconnectNodes). diff --git a/lib/kernel/src/kernel.app.src b/lib/kernel/src/kernel.app.src index 35c0c3f88e..234d71f745 100644 --- a/lib/kernel/src/kernel.app.src +++ b/lib/kernel/src/kernel.app.src @@ -146,9 +146,10 @@ pg2]}, {applications, []}, {env, [{logger_level, notice}, - {logger_sasl_compatible, false} + {logger_sasl_compatible, false}, + {prevent_overlapping_partitions, false} ]}, {mod, {kernel, []}}, - {runtime_dependencies, ["erts-10.6", "stdlib-3.5", "sasl-3.0"]} + {runtime_dependencies, ["erts-@OTP-17843@", "stdlib-3.5", "sasl-3.0"]} ] }. diff --git a/lib/kernel/test/erl_distribution_SUITE.erl b/lib/kernel/test/erl_distribution_SUITE.erl index 6f0064f0fa..2b84a68c52 100644 --- a/lib/kernel/test/erl_distribution_SUITE.erl +++ b/lib/kernel/test/erl_distribution_SUITE.erl @@ -121,6 +121,11 @@ tick(Config) when is_list(Config) -> run_dist_configs(fun tick/2, Config). tick(DCfg, _Config) -> + %% + %% This test case use disabled "connect all" so that + %% global wont interfere... + %% + %% First check that the normal case is OK! [Name1, Name2] = get_nodenames(2, dist_test), {ok, Node} = start_node(DCfg, Name1), @@ -148,11 +153,11 @@ tick(DCfg, _Config) -> %% node doesn't tick the client node within the interval ... {ok, ServNode} = start_node(DCfg, Name2, - "-kernel net_ticktime 100"), + "-kernel net_ticktime 100 -connect_all false"), rpc:call(ServNode, erl_distribution_SUITE, tick_serv_test, [Node, node()]), {ok, Node} = start_node(DCfg, Name1, - "-kernel net_ticktime 12"), + "-kernel net_ticktime 12 -connect_all false"), rpc:call(Node, erl_distribution_SUITE, tick_cli_test, [ServNode]), spawn_link(erl_distribution_SUITE, keep_conn, [Node]), @@ -585,6 +590,10 @@ tick_change(Config) when is_list(Config) -> run_dist_configs(fun tick_change/2, Config). tick_change(DCfg, _Config) -> + %% + %% This test case use disabled "connect all" so that + %% global wont interfere... + %% [BN, CN] = get_nodenames(2, tick_change), DefaultTT = net_kernel:get_net_ticktime(), unchanged = net_kernel:set_net_ticktime(DefaultTT, 60), @@ -601,7 +610,7 @@ tick_change(DCfg, _Config) -> end, wait_until(fun () -> 10 == net_kernel:get_net_ticktime() end), - {ok, B} = start_node(DCfg, BN, "-kernel net_ticktime 10"), + {ok, B} = start_node(DCfg, BN, "-kernel net_ticktime 10 -connect_all false"), {ok, C} = start_node(DCfg, CN, "-kernel net_ticktime 10 -hidden"), OTE = process_flag(trap_exit, true), @@ -663,7 +672,7 @@ run_tick_change_test(DCfg, B, C, PrevTT, TT) -> wait_for_nodedowns(Tester, Ref) end, - {ok, D} = start_node(DCfg, DN, "-kernel net_ticktime " + {ok, D} = start_node(DCfg, DN, "-connect_all false -kernel net_ticktime " ++ integer_to_list(PrevTT)), NMA = spawn_link(fun () -> MonitorNodes([B, C, D]) end), @@ -697,7 +706,7 @@ run_tick_change_test(DCfg, B, C, PrevTT, TT) -> sleep(7), change_initiated = rpc:call(C,net_kernel,set_net_ticktime,[TT,10]), - {ok, E} = start_node(DCfg, EN, "-kernel net_ticktime " + {ok, E} = start_node(DCfg, EN, "-connect_all false -kernel net_ticktime " ++ integer_to_list(TT)), NME = spawn_link(E, fun () -> MonitorNodes([node(), B, C, D]) end), NMA2 = spawn_link(fun () -> MonitorNodes([E]) end), diff --git a/lib/kernel/test/global_SUITE.erl b/lib/kernel/test/global_SUITE.erl index 3837a44c64..feae9ba4db 100644 --- a/lib/kernel/test/global_SUITE.erl +++ b/lib/kernel/test/global_SUITE.erl @@ -40,7 +40,11 @@ both_known_1/1, lost_unregister/1, mass_death/1, - garbage_messages/1]). + garbage_messages/1, + ring_line/1, + lost_connection/1, + lost_connection2/1 + ]). -export([global_load/3, lock_global/2, lock_global2/2]). @@ -77,7 +81,8 @@ all() -> simple_resolve2, simple_resolve3, leftover_name, re_register_name, name_exit, external_nodes, many_nodes, sync_0, global_groups_change, register_1, both_known_1, - lost_unregister, mass_death, garbage_messages] + lost_unregister, mass_death, garbage_messages, + lost_connection, lost_connection2] end. groups() -> @@ -217,6 +222,14 @@ lock_global(Parent, Config) -> %%% to obtain a lock for 'global' on node 3, which would keep the %%% name registry from ever becoming consistent again. both_known_1(Config) when is_list(Config) -> + case prevent_overlapping_partitions() of + true -> + {skipped, "Prevent overlapping partitions enabled"}; + false -> + both_known_1_test(Config) + end. + +both_known_1_test(Config) when is_list(Config) -> Timeout = 30, ct:timetrap({seconds,Timeout}), init_high_level_trace(Timeout), @@ -299,6 +312,14 @@ both_known_1(Config) when is_list(Config) -> %% OTP-6428. An unregistered name reappears. lost_unregister(Config) when is_list(Config) -> + case prevent_overlapping_partitions() of + true -> + {skipped, "Prevent overlapping partitions enabled"}; + false -> + lost_unregister_test(Config) + end. + +lost_unregister_test(Config) when is_list(Config) -> Timeout = 30, ct:timetrap({seconds,Timeout}), init_high_level_trace(Timeout), @@ -339,6 +360,120 @@ lost_unregister(Config) when is_list(Config) -> init_condition(Config), ok. +lost_connection(Config) when is_list(Config) -> + case prevent_overlapping_partitions() of + true -> + lost_connection_test(Config); + false -> + {skipped, "Prevent overlapping partitions disabled"} + end. + +lost_connection_test(Config) when is_list(Config) -> + %% OTP-17843: Registered names could become inconsistent due to + %% overlapping partitions. This has been solved by + %% global actively disconnecting nodes to prevent + %% overlapping partitions. + ct:timetrap({seconds, 15}), + + [Cp1, Cp2] = start_nodes([cp1, cp2], peer, Config), + + PartCtrlr = setup_partitions(Config, [[node(), Cp1, Cp2]]), + + wait_for_ready_net(Config), + + {Gurka, yes} = start_proc_basic(gurka), + + check_everywhere([node(), Cp1, Cp2], gurka, Config), + + Gurka = global:whereis_name(gurka), + + erlang:disconnect_node(Cp2), %% lost connection previously causing issues... + + ok = rfcall( + PartCtrlr, + fun () -> + ok = rfcall( + Cp2, + fun () -> + {AltGurka, yes} = start_proc_basic(gurka), + AltGurka = global:whereis_name(gurka), + ok + end), + timer:sleep(1000), + rpc:cast(Cp2, erlang, halt, []), + wait_until(fun () -> not lists:member(Cp2, nodes(hidden)) end) + end), + + Reconnected = case lists:member(Cp1, nodes()) of + true -> + false; + false -> + erlang:display("reconnecting Cp1"), + pong = net_adm:ping(Cp1), + timer:sleep(500), + true + end, + + check_everywhere([node(), Cp1], gurka, Config), + + Gurka = global:whereis_name(gurka), + + ok = global:unregister_name(gurka), + + stop_node(Cp1), + stop_partition_controller(PartCtrlr), + + {comment, case Reconnected of + true -> "Re-connected Cp1"; + false -> "No re-connection of Cp1 needed" + end}. + +lost_connection2(Config) when is_list(Config) -> + case prevent_overlapping_partitions() of + true -> + lost_connection2_test(Config); + false -> + {skipped, "Prevent overlapping partitions disabled"} + end. + +lost_connection2_test(Config) when is_list(Config) -> + %% OTP-17843: Registered names could become inconsistent due to + %% overlapping partitions. This has been solved by + %% global actively disconnecting nodes to prevent + %% overlapping partitions. + ct:timetrap({seconds, 15}), + + [Cp1, Cp2, Cp3, Cp4] = start_nodes([cp1, cp2, cp3, cp4], peer, Config), + + PartCtrlr = setup_partitions(Config, [[node(), Cp1, Cp2, Cp3, Cp4]]), + + wait_for_ready_net(Config), + + {Gurka, yes} = start_proc_basic(gurka), + + check_everywhere([node(), Cp1, Cp2], gurka, Config), + + Gurka = global:whereis_name(gurka), + + disconnect_nodes(PartCtrlr, Cp3, Cp4), + + Nodes = nodes(), + true = lists:member(Cp1, Nodes), + true = lists:member(Cp2, Nodes), + false = lists:member(Cp3, Nodes), + false = lists:member(Cp4, Nodes), + + pong = net_adm:ping(Cp3), + pong = net_adm:ping(Cp4), + + stop_node(Cp1), + stop_node(Cp2), + stop_node(Cp3), + stop_node(Cp4), + stop_partition_controller(PartCtrlr), + + ok. + -define(UNTIL_LOOP, 300). -define(end_tag, 'end at'). @@ -963,12 +1098,9 @@ name_die(Config) when is_list(Config) -> T1 = node(), Part1 = [T1], Part2 = [Cp1], - rpc_cast(Cp1, - ?MODULE, part_2_2, [Config, - Part1, - Part2, - []]), - ?UNTIL(is_ready_partition(Config)), + + PartCtrlr = setup_partitions(Config, [Part1, Part2]), + ?UNTIL(undefined =:= global:whereis_name(Name)), yes = global:register_name(Name, Pid), @@ -986,12 +1118,9 @@ name_die(Config) when is_list(Config) -> KillFile = filename:join([Dir, "kill.txt"]), file:delete(KillFile), erlang:spawn(Cp1, fun() -> kill_pid(Pid2, KillFile, Config) end), - rpc_cast(Cp1, - ?MODULE, part_2_2, [Config, - Part1, - Part2, - []]), - ?UNTIL(is_ready_partition(Config)), + + setup_partitions(PartCtrlr, [Part1, Part2]), + ?UNTIL(undefined =:= global:whereis_name(Name)), yes = global:register_name(Name, Pid2), touch(KillFile, "kill"), @@ -1001,6 +1130,7 @@ name_die(Config) when is_list(Config) -> ?UNTIL(OrigNames =:= global:registered_names()), write_high_level_trace(Config), stop_nodes(Cps), + stop_partition_controller(PartCtrlr), init_condition(Config), ok. @@ -1023,16 +1153,26 @@ basic_partition(Config) when is_list(Config) -> wait_for_ready_net(Config), %% make cp2 and cp3 connected, partitioned from us and cp1 - rpc_cast(Cp2, ?MODULE, part1, [Config, node(), Cp1, Cp3]), - ?UNTIL(is_ready_partition(Config)), + PCtrlr = setup_partitions(Config, [[node(), Cp1], [Cp2, Cp3]]), %% start different processes in both partitions {Pid, yes} = start_proc(test), + %% Reach into the other partition via PCtrlr... + ok = rfcall( + PCtrlr, + fun () -> + {_, yes} = rpc:call(Cp2, ?MODULE, + start_proc, [test2]), + {_, yes} = rpc:call(Cp1, ?MODULE, + start_proc, [test4]), + ok + end), + %% connect to other partition pong = net_adm:ping(Cp2), pong = net_adm:ping(Cp3), - [Cp1, Cp2, Cp3] = lists:sort(nodes()), + wait_until(fun () -> [Cp1, Cp2, Cp3] == lists:sort(nodes()) end), %% check names ?UNTIL(Pid =:= rpc:call(Cp2, global, whereis_name, [test])), @@ -1059,6 +1199,7 @@ basic_partition(Config) when is_list(Config) -> stop_node(Cp1), stop_node(Cp2), stop_node(Cp3), + stop_node(PCtrlr), init_condition(Config), ok. @@ -1091,12 +1232,23 @@ basic_name_partition(Config) when is_list(Config) -> %% cp2: register name12 %% cp3: register name03 - rpc_cast(Cp2, ?MODULE, part1_5, [Config, node(), Cp1, Cp3]), - ?UNTIL(is_ready_partition(Config)), + PCtrlr = setup_partitions(Config, [[node(), Cp1], [Cp2, Cp3]]), %% start different processes in both partitions {_, yes} = start_proc_basic(name03), {_, yes} = rpc:call(Cp1, ?MODULE, start_proc_basic, [name12]), + + %% Reach into the other partition via PCtrlr... + ok = rfcall( + PCtrlr, + fun () -> + {_, yes} = rpc:call(Cp2, ?MODULE, + start_proc_basic, [name12]), + {_, yes} = rpc:call(Cp3, ?MODULE, + start_proc_basic, [name03]), + ok + end), + ct:sleep(1000), %% connect to other partition @@ -1135,6 +1287,7 @@ basic_name_partition(Config) when is_list(Config) -> stop_node(Cp1), stop_node(Cp2), stop_node(Cp3), + stop_node(PCtrlr), init_condition(Config), ok. @@ -1160,24 +1313,36 @@ advanced_partition(Config) when is_list(Config) -> init_condition(Config), OrigNames = global:registered_names(), + Parent = self(), + [Cp0, Cp1, Cp2, Cp3, Cp4, Cp5, Cp6] = start_nodes([cp0, cp1, cp2, cp3, cp4, cp5, cp6], peer, Config), Nodes = lists:sort([node(), Cp0, Cp1, Cp2, Cp3, Cp4, Cp5, Cp6]), wait_for_ready_net(Config), %% make cp3-cp6 connected, partitioned from us and cp0-cp2 - rpc_cast(Cp3, ?MODULE, part2, - [Config, self(), node(), Cp0, Cp1, Cp2, Cp3, Cp4, Cp5,Cp6]), - ?UNTIL(is_ready_partition(Config)), + PCntrlr = setup_partitions(Config, [[node(), Cp0, Cp1, Cp2], [Cp3, Cp4, Cp5, Cp6]]), + + %% start processes in other partition (via partition controller)... + ok = rfcall(PCntrlr, + fun () -> + rfcall(Cp3, + fun () -> + start_procs(Parent, Cp4, Cp5, Cp6, Config), + ok + end) + end), %% start different processes in this partition start_procs(self(), Cp0, Cp1, Cp2, Config), %% connect to other partition pong = net_adm:ping(Cp3), - pong = net_adm:ping(Cp4), - pong = net_adm:ping(Cp5), - pong = net_adm:ping(Cp6), + + wait_until(fun () -> + CurrNodes = lists:sort(?NODES), + io:format("CurrNodes: ~p~n", [CurrNodes]), + Nodes == CurrNodes end), wait_for_ready_net(Config), @@ -1240,6 +1405,7 @@ advanced_partition(Config) when is_list(Config) -> stop_node(Cp4), stop_node(Cp5), stop_node(Cp6), + stop_node(PCntrlr), init_condition(Config), ok. @@ -1261,16 +1427,44 @@ stress_partition(Config) when is_list(Config) -> init_condition(Config), OrigNames = global:registered_names(), - [Cp0, Cp1, Cp2, Cp3, Cp4, Cp5, Cp6] - = start_nodes([cp0, cp1, cp2, cp3, cp4, cp5, cp6], peer, Config), + Parent = self(), + + [Cp0, Cp1, Cp2, Cp3, Cp4, Cp5, Cp6a, Cp6b] + = start_nodes([cp0, cp1, cp2, cp3, cp4, cp5, cp6a, cp6b], peer, Config), wait_for_ready_net(Config), %% make cp3-cp5 connected, partitioned from us and cp0-cp2 %% cp6 is alone (single node). cp6 pings cp0 and cp3 in 12 secs... - rpc_cast(Cp3, ?MODULE, part3, - [Config, self(), node(), Cp0, Cp1, Cp2, Cp3, Cp4, Cp5,Cp6]), - ?UNTIL(is_ready_partition(Config)), + PCntrlr = setup_partitions(Config, [[node(), Cp0, Cp1, Cp2], [Cp3, Cp4, Cp5, Cp6a], [Cp6b]]), + + %% start processes in other partition (via partition controller)... + ok = rfcall(PCntrlr, + fun () -> + ok = rfcall(Cp3, + fun () -> + start_procs(Parent, Cp4, Cp5, Cp6a, Config), + ok + end), + ok = rfcall(Cp6b, + fun () -> + Pid1 = start_proc3(test1), + assert_pid(Pid1), + Pid2 = start_proc3(test3), + assert_pid(Pid2), + yes = global:register_name( + test1, Pid1), + yes = global:register_name( + test3, Pid2, + fun global:random_notify_name/3), + ok + end), + %% Make Cp5 crash + rpc:cast(Cp5, ?MODULE, crash, [12000]), + %% Make Cp6b alone + rpc:cast(Cp6b, ?MODULE, alone, [Cp0, Cp3]), + ok + end), %% start different processes in this partition start_procs(self(), Cp0, Cp1, Cp2, Config), @@ -1289,17 +1483,19 @@ stress_partition(Config) when is_list(Config) -> %% connect to other partition pong = net_adm:ping(Cp3), + pong = net_adm:ping(Cp4), rpc_cast(Cp2, ?MODULE, crash, [0]), %% Start new nodes {ok, Cp7} = start_peer_node(cp7, Config), {ok, Cp2_2} = start_peer_node(cp2, Config), - Nodes = lists:sort([node(), Cp0, Cp1, Cp2_2, Cp3, Cp4, Cp6, Cp7, Cp8]), + Nodes = lists:sort([node(), Cp0, Cp1, Cp2_2, Cp3, Cp4, Cp6a, Cp6b, Cp7, Cp8]), put(?nodes_tag, Nodes), - pong = net_adm:ping(Cp4), - pong = net_adm:ping(Cp6), - pong = net_adm:ping(Cp8), + %% We don't know how the crashes partitions the net, so + %% we cast trough all nodes so we get them all connected + %% again... + cast_line(Nodes), wait_for_ready_net(Nodes, Config), @@ -1332,9 +1528,11 @@ stress_partition(Config) when is_list(Config) -> stop_node(Cp3), stop_node(Cp4), stop_node(Cp5), - stop_node(Cp6), + stop_node(Cp6a), + stop_node(Cp6b), stop_node(Cp7), stop_node(Cp8), + stop_node(PCntrlr), init_condition(Config), ok. @@ -1373,40 +1571,27 @@ ring(Config) when is_list(Config) -> wait_for_ready_net(Config), - Time = msec() + 7000, - - rpc_cast(Cp0, ?MODULE, single_node, [Time, Cp8, Config]), - rpc_cast(Cp1, ?MODULE, single_node, [Time, Cp0, Config]), - rpc_cast(Cp2, ?MODULE, single_node, [Time, Cp1, Config]), - rpc_cast(Cp3, ?MODULE, single_node, [Time, Cp2, Config]), - rpc_cast(Cp4, ?MODULE, single_node, [Time, Cp3, Config]), - rpc_cast(Cp5, ?MODULE, single_node, [Time, Cp4, Config]), - rpc_cast(Cp6, ?MODULE, single_node, [Time, Cp5, Config]), - rpc_cast(Cp7, ?MODULE, single_node, [Time, Cp6, Config]), - rpc_cast(Cp8, ?MODULE, single_node, [Time, Cp7, Config]), - - %% sleep to make the partitioned net ready - sleep(Time - msec()), - - pong = net_adm:ping(Cp0), - pong = net_adm:ping(Cp1), - pong = net_adm:ping(Cp2), - pong = net_adm:ping(Cp3), - pong = net_adm:ping(Cp4), - pong = net_adm:ping(Cp5), - pong = net_adm:ping(Cp6), - pong = net_adm:ping(Cp7), - pong = net_adm:ping(Cp8), + PartCtrlr = setup_partitions(Config, + [[node()], [Cp0], [Cp1], [Cp2], [Cp3], + [Cp4], [Cp5], [Cp6], [Cp7], [Cp8]]), + Time = msec() + 1000, + + ok = rfcall( + PartCtrlr, + fun () -> + rpc:cast(Cp0, ?MODULE, single_node, [Time, Cp8]), % ping ourself! + rpc:cast(Cp1, ?MODULE, single_node, [Time, Cp0]), + rpc:cast(Cp2, ?MODULE, single_node, [Time, Cp1]), + rpc:cast(Cp3, ?MODULE, single_node, [Time, Cp2]), + rpc:cast(Cp4, ?MODULE, single_node, [Time, Cp3]), + rpc:cast(Cp5, ?MODULE, single_node, [Time, Cp4]), + rpc:cast(Cp6, ?MODULE, single_node, [Time, Cp5]), + rpc:cast(Cp7, ?MODULE, single_node, [Time, Cp6]), + rpc:cast(Cp8, ?MODULE, single_node, [Time, Cp7]), + ok + end), pong = net_adm:ping(Cp0), - pong = net_adm:ping(Cp1), - pong = net_adm:ping(Cp2), - pong = net_adm:ping(Cp3), - pong = net_adm:ping(Cp4), - pong = net_adm:ping(Cp5), - pong = net_adm:ping(Cp6), - pong = net_adm:ping(Cp7), - pong = net_adm:ping(Cp8), wait_for_ready_net(Nodes, Config), @@ -1437,6 +1622,7 @@ ring(Config) when is_list(Config) -> stop_node(Cp6), stop_node(Cp7), stop_node(Cp8), + stop_partition_controller(PartCtrlr), init_condition(Config), ok. @@ -1459,31 +1645,24 @@ simple_ring(Config) when is_list(Config) -> wait_for_ready_net(Config), - Time = msec() + 5000, - - rpc_cast(Cp0, ?MODULE, single_node, [Time, Cp5, Config]), - rpc_cast(Cp1, ?MODULE, single_node, [Time, Cp0, Config]), - rpc_cast(Cp2, ?MODULE, single_node, [Time, Cp1, Config]), - rpc_cast(Cp3, ?MODULE, single_node, [Time, Cp2, Config]), - rpc_cast(Cp4, ?MODULE, single_node, [Time, Cp3, Config]), - rpc_cast(Cp5, ?MODULE, single_node, [Time, Cp4, Config]), - - %% sleep to make the partitioned net ready - sleep(Time - msec()), - - pong = net_adm:ping(Cp0), - pong = net_adm:ping(Cp1), - pong = net_adm:ping(Cp2), - pong = net_adm:ping(Cp3), - pong = net_adm:ping(Cp4), - pong = net_adm:ping(Cp5), + PartCtrlr = setup_partitions(Config, + [[node()], [Cp0], [Cp1], [Cp2], [Cp3], + [Cp4], [Cp5]]), + Time = msec() + 1000, + + ok = rfcall( + PartCtrlr, + fun () -> + rpc:cast(Cp0, ?MODULE, single_node, [Time, Cp5]), % ping ourself! + rpc:cast(Cp1, ?MODULE, single_node, [Time, Cp0]), + rpc:cast(Cp2, ?MODULE, single_node, [Time, Cp1]), + rpc:cast(Cp3, ?MODULE, single_node, [Time, Cp2]), + rpc:cast(Cp4, ?MODULE, single_node, [Time, Cp3]), + rpc:cast(Cp5, ?MODULE, single_node, [Time, Cp4]), + ok + end), pong = net_adm:ping(Cp0), - pong = net_adm:ping(Cp1), - pong = net_adm:ping(Cp2), - pong = net_adm:ping(Cp3), - pong = net_adm:ping(Cp4), - pong = net_adm:ping(Cp5), wait_for_ready_net(Nodes, Config), @@ -1511,6 +1690,7 @@ simple_ring(Config) when is_list(Config) -> stop_node(Cp3), stop_node(Cp4), stop_node(Cp5), + stop_partition_controller(PartCtrlr), init_condition(Config), ok. @@ -1531,41 +1711,27 @@ line(Config) when is_list(Config) -> wait_for_ready_net(Config), - Time = msec() + 7000, - - rpc_cast(Cp0, ?MODULE, single_node, - [Time, Cp0, Config]), % ping ourself! - rpc_cast(Cp1, ?MODULE, single_node, [Time, Cp0, Config]), - rpc_cast(Cp2, ?MODULE, single_node, [Time, Cp1, Config]), - rpc_cast(Cp3, ?MODULE, single_node, [Time, Cp2, Config]), - rpc_cast(Cp4, ?MODULE, single_node, [Time, Cp3, Config]), - rpc_cast(Cp5, ?MODULE, single_node, [Time, Cp4, Config]), - rpc_cast(Cp6, ?MODULE, single_node, [Time, Cp5, Config]), - rpc_cast(Cp7, ?MODULE, single_node, [Time, Cp6, Config]), - rpc_cast(Cp8, ?MODULE, single_node, [Time, Cp7, Config]), - - %% Sleep to make the partitioned net ready - sleep(Time - msec()), - - pong = net_adm:ping(Cp0), - pong = net_adm:ping(Cp1), - pong = net_adm:ping(Cp2), - pong = net_adm:ping(Cp3), - pong = net_adm:ping(Cp4), - pong = net_adm:ping(Cp5), - pong = net_adm:ping(Cp6), - pong = net_adm:ping(Cp7), - pong = net_adm:ping(Cp8), + PartCtrlr = setup_partitions(Config, + [[node()], [Cp0], [Cp1], [Cp2], [Cp3], + [Cp4], [Cp5], [Cp6], [Cp7], [Cp8]]), + ThisNode = node(), - pong = net_adm:ping(Cp0), - pong = net_adm:ping(Cp1), - pong = net_adm:ping(Cp2), - pong = net_adm:ping(Cp3), - pong = net_adm:ping(Cp4), - pong = net_adm:ping(Cp5), - pong = net_adm:ping(Cp6), - pong = net_adm:ping(Cp7), - pong = net_adm:ping(Cp8), + Time = msec() + 1000, + ok = rfcall( + PartCtrlr, + fun () -> + rpc:cast(Cp0, ?MODULE, single_node, [Time, Cp0]), % ping ourself! + rpc:cast(Cp1, ?MODULE, single_node, [Time, Cp0]), + rpc:cast(Cp2, ?MODULE, single_node, [Time, Cp1]), + rpc:cast(Cp3, ?MODULE, single_node, [Time, Cp2]), + rpc:cast(Cp4, ?MODULE, single_node, [Time, Cp3]), + rpc:cast(Cp5, ?MODULE, single_node, [Time, Cp4]), + rpc:cast(Cp6, ?MODULE, single_node, [Time, Cp5]), + rpc:cast(Cp7, ?MODULE, single_node, [Time, Cp6]), + rpc:cast(Cp8, ?MODULE, single_node, [Time, Cp7]), + rpc:cast(ThisNode, ?MODULE, single_node, [Time, Cp8]), + ok + end), wait_for_ready_net(Nodes, Config), @@ -1596,6 +1762,7 @@ line(Config) when is_list(Config) -> stop_node(Cp6), stop_node(Cp7), stop_node(Cp8), + stop_partition_controller(PartCtrlr), init_condition(Config), ok. @@ -1618,32 +1785,24 @@ simple_line(Config) when is_list(Config) -> wait_for_ready_net(Config), - Time = msec() + 5000, - - rpc_cast(Cp0, ?MODULE, single_node, - [Time, Cp0, Config]), % ping ourself! - rpc_cast(Cp1, ?MODULE, single_node, [Time, Cp0, Config]), - rpc_cast(Cp2, ?MODULE, single_node, [Time, Cp1, Config]), - rpc_cast(Cp3, ?MODULE, single_node, [Time, Cp2, Config]), - rpc_cast(Cp4, ?MODULE, single_node, [Time, Cp3, Config]), - rpc_cast(Cp5, ?MODULE, single_node, [Time, Cp4, Config]), - - %% sleep to make the partitioned net ready - sleep(Time - msec()), - - pong = net_adm:ping(Cp0), - pong = net_adm:ping(Cp1), - pong = net_adm:ping(Cp2), - pong = net_adm:ping(Cp3), - pong = net_adm:ping(Cp4), - pong = net_adm:ping(Cp5), + PartCtrlr = setup_partitions(Config, + [[node()], [Cp0], [Cp1], [Cp2], [Cp3], + [Cp4], [Cp5]]), + ThisNode = node(), - pong = net_adm:ping(Cp0), - pong = net_adm:ping(Cp1), - pong = net_adm:ping(Cp2), - pong = net_adm:ping(Cp3), - pong = net_adm:ping(Cp4), - pong = net_adm:ping(Cp5), + Time = msec() + 1000, + ok = rfcall( + PartCtrlr, + fun () -> + rpc:cast(Cp0, ?MODULE, single_node, [Time, Cp0]), % ping ourself! + rpc:cast(Cp1, ?MODULE, single_node, [Time, Cp0]), + rpc:cast(Cp2, ?MODULE, single_node, [Time, Cp1]), + rpc:cast(Cp3, ?MODULE, single_node, [Time, Cp2]), + rpc:cast(Cp4, ?MODULE, single_node, [Time, Cp3]), + rpc:cast(Cp5, ?MODULE, single_node, [Time, Cp4]), + rpc:cast(ThisNode, ?MODULE, single_node, [Time, Cp5]), + ok + end), wait_for_ready_net(Nodes, Config), @@ -1671,6 +1830,7 @@ simple_line(Config) when is_list(Config) -> stop_node(Cp3), stop_node(Cp4), stop_node(Cp5), + stop_partition_controller(PartCtrlr), init_condition(Config), ok. @@ -1756,9 +1916,9 @@ otp_1849(Config) when is_list(Config) -> %% Test ticket: Deadlock in global. otp_3162(Config) when is_list(Config) -> StartFun = fun() -> - {ok, Cp1} = start_node(cp1, Config), - {ok, Cp2} = start_node(cp2, Config), - {ok, Cp3} = start_node(cp3, Config), + {ok, Cp1} = start_node(cp1, peer, Config), + {ok, Cp2} = start_node(cp2, peer, Config), + {ok, Cp3} = start_node(cp3, peer, Config), [Cp1, Cp2, Cp3] end, do_otp_3162(StartFun, Config). @@ -1772,6 +1932,10 @@ do_otp_3162(StartFun, Config) -> wait_for_ready_net(Config), + ThisNode = node(), + + PartCntrlr = setup_partitions(Config, [[ThisNode, Cp1, Cp2, Cp3]]), + %% start procs on each node Pid1 = rpc:call(Cp1, ?MODULE, start_proc4, [kalle]), assert_pid(Pid1), @@ -1780,40 +1944,22 @@ do_otp_3162(StartFun, Config) -> Pid3 = rpc:call(Cp3, ?MODULE, start_proc4, [vera]), assert_pid(Pid3), - rpc_disconnect_node(Cp1, Cp2, Config), - - ?UNTIL - ([Cp3] =:= lists:sort(rpc:call(Cp1, erlang, nodes, [])) -- [node()]), - - ?UNTIL([kalle, vera] =:= - lists:sort(rpc:call(Cp1, global, registered_names, []))), - ?UNTIL - ([Cp3] =:= lists:sort(rpc:call(Cp2, erlang, nodes, [])) -- [node()]), - ?UNTIL([stina, vera] =:= - lists:sort(rpc:call(Cp2, global, registered_names, []))), - ?UNTIL - ([Cp1, Cp2] =:= - lists:sort(rpc:call(Cp3, erlang, nodes, [])) -- [node()]), - ?UNTIL([kalle, stina, vera] =:= - lists:sort(rpc:call(Cp3, global, registered_names, []))), - - pong = rpc:call(Cp2, net_adm, ping, [Cp1]), - - ?UNTIL - ([Cp2, Cp3] =:= - lists:sort(rpc:call(Cp1, erlang, nodes, [])) -- [node()]), - ?UNTIL(begin - NN = lists:sort(rpc:call(Cp1, global, registered_names, [])), - [kalle, stina, vera] =:= NN - end), - ?UNTIL - ([Cp1, Cp3] =:= - lists:sort(rpc:call(Cp2, erlang, nodes, [])) -- [node()]), - ?UNTIL([kalle, stina, vera] =:= - lists:sort(rpc:call(Cp2, global, registered_names, []))), - ?UNTIL - ([Cp1, Cp2] =:= - lists:sort(rpc:call(Cp3, erlang, nodes, [])) -- [node()]), + disconnect_nodes(PartCntrlr, Cp1, Cp2), + %% Nowadays we do not know how the net have been partitioned after the + %% disconnect, so we cannot perform all the tests this testcase originally + %% did. We instead only test the end result is ok after we have reconnected + %% all nodes. + + ok = rfcall(PartCntrlr, + fun () -> + rpc:cast(Cp1, net_kernel, connect_node, [Cp3]), + rpc:cast(Cp2, net_kernel, connect_node, [Cp3]), + rpc:cast(ThisNode, net_kernel, connect_node, [Cp3]), + ok + end), + + ?UNTIL(lists:sort([ThisNode, Cp1, Cp2]) =:= + lists:sort(rpc:call(Cp3, erlang, nodes, []))), ?UNTIL([kalle, stina, vera] =:= lists:sort(rpc:call(Cp3, global, registered_names, []))), @@ -1821,6 +1967,7 @@ do_otp_3162(StartFun, Config) -> stop_node(Cp1), stop_node(Cp2), stop_node(Cp3), + stop_partition_controller(PartCntrlr), init_condition(Config), ok. @@ -1949,6 +2096,8 @@ simple_disconnect(Config) when is_list(Config) -> [Cp1, Cp2] = Cps = start_nodes([n_1, n_2], peer, Config), wait_for_ready_net(Config), + PartCtrlr = start_partition_controller(Config), + Nodes = lists:sort([node() | Cps]), lists:foreach(fun(N) -> rpc:call(N, ?MODULE, start_tracer, []) end,Nodes), @@ -1962,16 +2111,21 @@ simple_disconnect(Config) when is_list(Config) -> ct:sleep(100), %% Disconnect test_server and Cp2. - true = erlang:disconnect_node(Cp2), - ct:sleep(500), + disconnect_nodes(PartCtrlr, node(), Cp2), + %% We might have been disconnected from Cp1 as well. Ensure connected + %% to Cp1... + pong = net_adm:ping(Cp1), %% _Pid is registered on Cp1. The exchange of names between Cp2 and %% test_server sees two identical pids. pong = net_adm:ping(PingNode), + ?UNTIL(Cps =:= lists:sort(nodes())), {_, Trace0} = collect_tracers(Nodes), Resolvers = [P || {_Node,new_resolver,{pid,P}} <- Trace0], + check_everywhere(Nodes, Name, Config), + true = undefined /= global:whereis_name(Name), lists:foreach(fun(P) -> P ! die end, Resolvers), lists:foreach(fun(P) -> wait_for_exit(P) end, Resolvers), check_everywhere(Nodes, Name, Config), @@ -1989,30 +2143,10 @@ simple_disconnect(Config) when is_list(Config) -> OrigNames = global:registered_names(), write_high_level_trace(Config), stop_nodes(Cps), + stop_partition_controller(PartCtrlr), init_condition(Config), ok. -%% Not used right now. -simple_dis(Nodes0, Name, Resolver, Config) -> - Nodes = [node() | Nodes0], - NN = lists:zip(Nodes, lists:seq(1, length(Nodes))), - [{_Node,Other} | Dis] = - [{N,[N1 || {N1,I1} <- NN, I1 > I + 1]} || {N,I} <- NN], - lists:foreach( - fun({Node, DisNodes}) -> - Args = [Node, DisNodes, Name, Resolver], - ok = rpc:call(Node, ?MODULE, simple_dis_node, Args) - end, Dis), - ok = simple_dis_node(node(), Other, Name, Resolver, Config). - -simple_dis_node(_Node, DisNodes, _Name, _Resolver, Config) -> - lists:foreach( - fun(OtherNode) -> _ = erlang:disconnect_node(OtherNode) end, DisNodes), - ?UNTIL(DisNodes -- nodes() =:= DisNodes), - ok. - - - %%%----------------------------------------------------------------- %%% Testing resolve of name. Many combinations with four nodes. %%%----------------------------------------------------------------- @@ -2023,7 +2157,8 @@ simple_dis_node(_Node, DisNodes, _Name, _Resolver, Config) -> n2, % node starting registered process in partition 2 nodes, % nodes expected to exist after ping n_res, % expected number of resolvers after ping - config + config, + ctrlr }). -define(RES(F), {F, fun ?MODULE:F/3}). @@ -2040,6 +2175,8 @@ simple_resolve(Config) when is_list(Config) -> Nodes = lists:sort([node() | Cps]), wait_for_ready_net(Config), + PartCtrlr = start_partition_controller(Config), + lists:foreach(fun(N) -> rpc:call(N, ?MODULE, start_tracer, []) end, Nodes), @@ -2049,7 +2186,8 @@ simple_resolve(Config) when is_list(Config) -> %% name 'link' remains... Cf = #cf{link = none, ping = A2, n1 = node(), n2 = A2, - nodes = [node(), N1, A2, Z2], n_res = 2, config = Config}, + nodes = [node(), N1, A2, Z2], n_res = 2, config = Config, + ctrlr = PartCtrlr}, %% There is no test with a resolver that deletes a pid (like %% global_exit_name does). The resulting DOWN signal just clears @@ -2157,6 +2295,7 @@ simple_resolve(Config) when is_list(Config) -> OrigNames = global:registered_names(), write_high_level_trace(Config), stop_nodes(Cps), + stop_partition_controller(PartCtrlr), init_condition(Config), ok. @@ -2175,12 +2314,15 @@ simple_resolve2(Config) when is_list(Config) -> wait_for_ready_net(Config), Nodes = lists:sort([node() | Cps]), + PartCtrlr = start_partition_controller(Config), + lists:foreach(fun(N) -> rpc:call(N, ?MODULE, start_tracer, []) end, Nodes), Cf = #cf{link = none, ping = A2, n1 = node(), n2 = A2, - nodes = [node(), N1, A2, Z2], n_res = 2, config = Config}, + nodes = [node(), N1, A2, Z2], n_res = 2, config = Config, + ctrlr = PartCtrlr}, %% Halt z_2. res(?RES(halt_second), Cps, Cf#cf{link = N1, n1 = N1, n2 = Z2, ping = A2, @@ -2193,6 +2335,7 @@ simple_resolve2(Config) when is_list(Config) -> OrigNames = global:registered_names(), write_high_level_trace(Config), stop_nodes(Cps), % Not all nodes may be present, but it works anyway. + stop_partition_controller(PartCtrlr), init_condition(Config), ok. @@ -2210,12 +2353,15 @@ simple_resolve3(Config) when is_list(Config) -> wait_for_ready_net(Config), Nodes = lists:sort([node() | Cps]), + PartCtrlr = start_partition_controller(Config), + lists:foreach(fun(N) -> rpc:call(N, ?MODULE, start_tracer, []) end, Nodes), Cf = #cf{link = none, ping = A2, n1 = node(), n2 = A2, - nodes = [node(), N1, A2, Z2], n_res = 2, config = Config}, + nodes = [node(), N1, A2, Z2], n_res = 2, config = Config, + ctrlr = PartCtrlr}, %% Halt a_2. res(?RES(halt_second), Cps, Cf#cf{link = node(), n2 = A2, @@ -2228,13 +2374,14 @@ simple_resolve3(Config) when is_list(Config) -> OrigNames = global:registered_names(), write_high_level_trace(Config), stop_nodes(Cps), % Not all nodes may be present, but it works anyway. + stop_partition_controller(PartCtrlr), init_condition(Config), ok. res({Res,Resolver}, [N1, A2, Z2], Cf) -> %% Note: there are no links anymore, but monitors. #cf{link = LinkedNode, ping = PingNode, n1 = Res1, n2 = OtherNode, - nodes = Nodes0, n_res = NRes, config = Config} = Cf, + nodes = Nodes0, n_res = NRes, config = Config, ctrlr = PartCtrlr} = Cf, io:format("~n~nResolver: ~p", [Res]), io:format(" Registered on partition 1: ~p", [Res1]), io:format(" Registered on partition 2: ~p", [OtherNode]), @@ -2252,11 +2399,20 @@ res({Res,Resolver}, [N1, A2, Z2], Cf) -> %% expected monitors remain between registered processes and the %% global_name_server. - rpc_cast(OtherNode, - ?MODULE, - part_2_2, - [Config, Part1, Part2, [{Name, Resolver}]]), - ?UNTIL(is_ready_partition(Config)), + setup_partitions(PartCtrlr, [Part1, Part2]), + + ok = rfcall(PartCtrlr, + fun () -> + rfcall(OtherNode, + fun () -> + {Pid2, yes} = start_resolver(Name, + Resolver), + trace_message({node(), part_2_2, + nodes(), {pid2,Pid2}}), + ok + end) + end), + {_Pid1, yes} = rpc:call(Res1, ?MODULE, start_resolver, [Name, Resolver]), @@ -2319,15 +2475,6 @@ monitored_by_node(Trace, Servers) -> M <- ML, lists:member(M, Servers)]). -%% Runs on a node in Part2 -part_2_2(Config, Part1, Part2, NameResolvers) -> - make_partition(Config, Part1, Part2), - lists:foreach - (fun({Name, Resolver}) -> - {Pid2, yes} = start_resolver(Name, Resolver), - trace_message({node(), part_2_2, nodes(), {pid2,Pid2}}) - end, NameResolvers). - resolve_first(name, Pid1, _Pid2) -> Pid1. @@ -2398,7 +2545,7 @@ mon_by_servers(Proc) -> {monitored_by, ML} = process_info(Proc, monitored_by), {monitors_2levels, lists:append([ML | - [begin + [begin {monitored_by, MML} = rpc:call(node(M), erlang, process_info, @@ -2433,13 +2580,23 @@ leftover_name(Config) when is_list(Config) -> Part2 = [A2, Z2], NoResolver = {no_module, resolve_none}, Resolver = fun contact_a_2/3, - rpc_cast(A2, - ?MODULE, part_2_2, [Config, - Part1, - Part2, - [{Name, NoResolver}, - {ResName, Resolver}]]), - ?UNTIL(is_ready_partition(Config)), + + PartCtrlr = setup_partitions(Config, [Part1, Part2]), + + ok = rfcall( + PartCtrlr, + fun () -> + rfcall( + A2, + fun () -> + lists:foreach( + fun({TheName, TheResolver}) -> + {Pid2, yes} = start_resolver(TheName, TheResolver), + trace_message({node(), part_2_2, nodes(), {pid2,Pid2}}) + end, [{Name, NoResolver}, {ResName, Resolver}]), + ok + end) + end), %% resolved_name is resolved to run on a_2, an insert operation is %% sent to n_1. The resolver function halts a_2, but the nodedown @@ -2471,6 +2628,7 @@ leftover_name(Config) when is_list(Config) -> ?UNTIL(OrigNames =:= global:registered_names()), write_high_level_trace(Config), stop_nodes(Cps), + stop_partition_controller(PartCtrlr), init_condition(Config), ok. @@ -2636,9 +2794,22 @@ external_nodes(Config) when is_list(Config) -> %% Two partitions: [test_server] and [b, c]. %% c registers an external name on b - rpc_cast(NodeB, ?MODULE, part_ext, - [Config, node(), NodeC, Name]), - ?UNTIL(is_ready_partition(Config)), + + PartCtrlr = setup_partitions(Config, [[node()], [NodeB, NodeC]]), + + ok = rfcall( + PartCtrlr, + fun () -> + rfcall( + NodeB, + fun () -> + Pid = spawn(NodeC, fun() -> cnode_proc(NodeB) end), + Pid ! {register, self(), Name}, + receive {Pid, Reply} -> yes = Reply end, + rpc:call(NodeC, erlang, register, [Name, Pid]), + ok + end) + end), pong = net_adm:ping(NodeB), ?UNTIL([NodeB, NodeC] =:= lists:sort(nodes())), @@ -2702,6 +2873,7 @@ external_nodes(Config) when is_list(Config) -> ?UNTIL(length(get_ext_names()) =:= 1), stop_node(NodeC), + stop_partition_controller(PartCtrlr), ?UNTIL(length(get_ext_names()) =:= 0), init_condition(Config), @@ -2710,15 +2882,6 @@ external_nodes(Config) when is_list(Config) -> get_ext_names() -> gen_server:call(global_name_server, get_names_ext, infinity). -%% Runs at B -part_ext(Config, Main, C, Name) -> - make_partition(Config, [Main], [node(), C]), - ThisNode = node(), - Pid = erlang:spawn(C, fun() -> cnode_proc(ThisNode) end), - Pid ! {register, self(), Name}, - receive {Pid, Reply} -> yes = Reply end, - rpc:call(C, erlang, register, [Name, Pid]). - cnode_links(Pid) -> Pid ! {links, self()}, receive @@ -2776,25 +2939,29 @@ many_nodes(Config) when is_list(Config) -> Nodes = lists:sort(?NODES), wait_for_ready_net(Nodes, Config), - Dir = proplists:get_value(priv_dir, Config), - GoFile = filename:join([Dir, "go.txt"]), - file:delete(GoFile), - - CpsFiles = [{N, filename:join([Dir, atom_to_list(N)++".node"])} || - N <- Cps], - IsoFun = - fun({N, File}) -> - file:delete(File), - rpc_cast(N, ?MODULE, isolated_node, [File, GoFile, Cps, Config]) - end, - lists:foreach(IsoFun, CpsFiles), + %% All nodes isolated not connected to any other (visible) nodes... + Partitions = [[node()] | lists:map(fun (Node) -> [Node] end, Cps)], + + PartCtrlr = setup_partitions(Config, Partitions), - all_nodes_files(CpsFiles, "isolated", Config), Time = msec(), - sync_until(), - erlang:display(ready_to_go), - touch(GoFile, "go"), - all_nodes_files(CpsFiles, "done", Config), + + ok = rfcall( + PartCtrlr, + fun () -> + OkRes = {lists:map(fun (_) -> ok end, Cps), []}, + OkRes = rfmulticall( + Cps, + fun () -> + lists:foreach(fun(N) -> + _ = net_adm:ping(N) + end, shuffle(Cps)), + ?UNTIL((Cps -- get_known(node())) =:= []), + ok + end), + ok + end), + Time2 = msec(), lists:foreach(fun(N) -> pong = net_adm:ping(N) end, Cps), @@ -2803,12 +2970,10 @@ many_nodes(Config) when is_list(Config) -> write_high_level_trace(Config), % The test succeeded, but was it slow? - lists:foreach(fun({_N, File}) -> file:delete(File) end, CpsFiles), - file:delete(GoFile), - ?UNTIL(OrigNames =:= global:registered_names()), write_high_level_trace(Config), stop_nodes(Cps), + stop_partition_controller(PartCtrlr), init_condition(Config), Diff = Time2 - Time, Return = lists:flatten(io_lib:format("~w nodes took ~w ms", @@ -2831,29 +2996,6 @@ node_rel(From, To) -> 1 -> Last end} || N <- NodeNumbers]. -isolated_node(File, GoFile, Nodes, Config) -> - Ns = lists:sort(Nodes), - exit(erlang:whereis(user), kill), - touch(File, "start_isolated"), - NodesList = nodes(), - append_to_file(File, [{nodes,Nodes},{nodes_list,NodesList}]), - Replies = - lists:map(fun(N) -> _ = erlang:disconnect_node(N) end, NodesList), - append_to_file(File, {replies,Replies}), - ?UNTIL(begin - Known = get_known(node()), - append_to_file(File, {known,Known}), - Known =:= [node()] - end), - touch(File, "isolated"), - sync_until(File), - file_contents(GoFile, "go", Config, File), - touch(File, "got_go"), - lists:foreach(fun(N) -> _ = net_adm:ping(N) end, shuffle(Nodes)), - touch(File, "pinged"), - ?UNTIL((Ns -- get_known(node())) =:= []), - touch(File, "done"). - touch(File, List) -> ok = file:write_file(File, list_to_binary(List)). @@ -2862,11 +3004,6 @@ append_to_file(File, Term) -> ok = file:write(Fd, io_lib:format("~p.~n", [Term])), ok = file:close(Fd). -all_nodes_files(CpsFiles, ContentsList, Config) -> - lists:all(fun({_N,File}) -> - file_contents(File, ContentsList, Config) - end, CpsFiles). - file_contents(File, ContentsList, Config) -> file_contents(File, ContentsList, Config, no_log_file). @@ -2890,14 +3027,6 @@ file_contents(File, ContentsList, Config, LogFile) -> end end). -sync_until() -> - sync_until(no_log_file). - -sync_until(LogFile) -> - Time = ?UNTIL_LOOP - (msec(now()) rem ?UNTIL_LOOP), - catch append_to_file(LogFile, {sync_until, Time}), - timer:sleep(Time). - shuffle(L) -> [E || {_, E} <- lists:keysort(1, [{rand:uniform(), E} || E <- L])]. @@ -3378,109 +3507,11 @@ from(_H, []) -> []. other(A, [A, _B]) -> A; other(_, [_A, B]) -> B. - -%% this one runs at cp2 -part1(Config, Main, Cp1, Cp3) -> - case catch begin - make_partition(Config, [Main, Cp1], [node(), Cp3]), - {_Pid, yes} = start_proc(test2), - {_Pid2, yes} = start_proc(test4) - end of - {_, yes} -> ok; % w("ok", []); - {'EXIT', _R} -> - ok - %% w("global_SUITE line:~w: ~p", [?LINE, _R]) - end. - -%% Runs at Cp2 -part1_5(Config, Main, Cp1, Cp3) -> - case catch begin - make_partition(Config, [Main, Cp1], [node(), Cp3]), - {_Pid1, yes} = start_proc_basic(name12), - {_Pid2, yes} = - rpc:call(Cp3, ?MODULE, start_proc_basic, [name03]) - end of - {_, yes} -> ok; % w("ok", []); - {'EXIT', _R} -> - ok - %% w("global_SUITE line:~w: ~p", [?LINE, _R]) - end. - w(X,Y) -> {ok, F} = file:open("cp2.log", [write]), io:format(F, X, Y), file:close(F). -%% this one runs on one node in Part2 -%% The partition is ready when is_ready_partition(Config) returns (true). -make_partition(Config, Part1, Part2) -> - Dir = proplists:get_value(priv_dir, Config), - Ns = [begin - Name = lists:concat([atom_to_list(N),"_",msec(),".part"]), - File = filename:join([Dir, Name]), - file:delete(File), - rpc_cast(N, ?MODULE, mk_part_node, [File, Part, Config], File), - {N, File} - end || Part <- [Part1, Part2], N <- Part], - all_nodes_files(Ns, "done", Config), - lists:foreach(fun({_N,File}) -> file:delete(File) end, Ns), - PartFile = make_partition_file(Config), - touch(PartFile, "done"). - -%% The node signals its success by touching a file. -mk_part_node(File, MyPart0, Config) -> - touch(File, "start"), % debug - MyPart = lists:sort(MyPart0), - ?UNTIL(is_node_in_part(File, MyPart)), - touch(File, "done"). - -%% The calls to append_to_file are for debugging. -is_node_in_part(File, MyPart) -> - lists:foreach(fun(N) -> - _ = erlang:disconnect_node(N) - end, nodes() -- MyPart), - case {(Known = get_known(node())) =:= MyPart, - (Nodes = lists:sort([node() | nodes()])) =:= MyPart} of - {true, true} -> - %% Make sure the resolvers have been terminated, - %% otherwise they may pop up and send some message. - %% (This check is probably unnecessary.) - case element(5, global:info()) of - [] -> - true; - Rs -> - erlang:display({is_node_in_part, resolvers, Rs}), - trace_message({node(), is_node_in_part, Rs}), - append_to_file(File, {now(), Known, Nodes, Rs}), - false - end; - _ -> - append_to_file(File, {now(), Known, Nodes}), - false - end. - -is_ready_partition(Config) -> - File = make_partition_file(Config), - file_contents(File, "done", Config), - file:delete(File), - true. - -make_partition_file(Config) -> - Dir = proplists:get_value(priv_dir, Config), - filename:join([Dir, atom_to_list(make_partition_done)]). - -%% this one runs at cp3 -part2(Config, Parent, Main, Cp0, Cp1, Cp2, Cp3, Cp4, Cp5, Cp6) -> - make_partition(Config, [Main, Cp0, Cp1, Cp2], [Cp3, Cp4, Cp5, Cp6]), - start_procs(Parent, Cp4, Cp5, Cp6, Config). - -part3(Config, Parent, Main, Cp0, Cp1, Cp2, Cp3, Cp4, Cp5, Cp6) -> - make_partition(Config, [Main, Cp0, Cp1, Cp2], [Cp3, Cp4, Cp5, Cp6]), - start_procs(Parent, Cp4, Cp5, Cp6, Config), - %% Make Cp6 alone - rpc_cast(Cp5, ?MODULE, crash, [12000]), - rpc_cast(Cp6, ?MODULE, alone, [Cp0, Cp3]). - start_procs(Parent, N1, N2, N3, Config) -> S1 = lists:sort([N1, N2, N3]), ?UNTIL(begin @@ -3575,13 +3606,11 @@ init_proc_basic(Parent, Name) -> Parent ! {self(),X}, loop(). -single_node(Time, Node, Config) -> - exit(erlang:whereis(user), kill), - lists:foreach(fun(N) -> _ = erlang:disconnect_node(N) end, nodes()), - ?UNTIL(get_known(node()) =:= [node()]), +single_node(Time, Node) -> spawn(?MODULE, init_2, []), - sleep(Time - msec()), - net_adm:ping(Node). + timer:sleep(Time - msec()), + _ = net_adm:ping(Node), + ok. init_2() -> register(single_name, self()), @@ -3640,8 +3669,6 @@ sreq(Pid, Msg) -> receive {Ref, X} -> X end. alone(N1, N2) -> - lists:foreach(fun(Node) -> true = erlang:disconnect_node(Node) end, - nodes()), ct:sleep(12000), net_adm:ping(N1), net_adm:ping(N2), @@ -4173,16 +4200,150 @@ rpc_cast(Node, Module, Function, Args) -> {_,pong,Node}= {node(),net_adm:ping(Node),Node}, rpc:cast(Node, Module, Function, Args). -rpc_cast(Node, Module, Function, Args, File) -> - case net_adm:ping(Node) of - pong -> - rpc:cast(Node, Module, Function, Args); - Else -> - append_to_file(File, {now(), {rpc_cast, Node, Module, Function, - Args, Else}}) - %% Maybe we should crash, but it probably doesn't matter. +global_known(Node) -> + gen_server:call({global_name_server, Node}, get_known, infinity). + +global_known() -> + global_known(node()). + +disconnect(Node) -> + erlang:disconnect_node(Node), + wait_until(fun () -> not lists:member(Node, global_known()) end), + ok. + +disconnect_nodes(HiddenCtrlNode, NodeA, NodeB) -> + Nodes = [node()|nodes()], + ok = rfcall(HiddenCtrlNode, + fun () -> + ok = rfcall(NodeA, + fun () -> + disconnect(NodeB) + end), + ok = rfcall(NodeB, + fun () -> + disconnect(NodeA) + end), + %% Try to ensure 'lost_connection' messages + %% have been handled (see comment in + %% create_partitions)... + lists:foreach(fun (N) -> + _ = global_known(N) + end, Nodes), + ok + end). + +create_partitions(PartitionsList) -> + AllNodes = lists:sort(lists:flatten(PartitionsList)), + + %% Take down all connections on all nodes... + AllOk = {lists:map(fun (_) -> ok end, AllNodes), []}, + io:format("Disconnecting all nodes from eachother...", []), + AllOk = rfmulticall( + AllNodes, + fun () -> + lists:foreach(fun (N) -> + erlang:disconnect_node(N) + end, nodes()), + wait_until(fun () -> [] == global_known() end), + ok + end, 5000), + %% Here we know that all 'lost_connection' messages that will be + %% sent by global name servers due to these disconnects have been + %% sent, but we don't know that all of them have been received and + %% handled. By communicating with all global name servers one more + %% time it is very likely that all of them have been received and + %% handled (however, not guaranteed). If 'lost_connection' messages + %% are received after we begin to set up the partitions, the + %% partitions may lose connection with some of its nodes. + lists:foreach(fun (N) -> [] = global_known(N) end, AllNodes), + + %% Set up fully connected partitions... + io:format("Connecting partitions...", []), + lists:foreach( + fun (Partition) -> + Part = lists:sort(Partition), + PartOk = {lists:map(fun (_) -> ok end, Part), []}, + PartOk = rfmulticall( + Part, + fun () -> + wait_until( + fun () -> + ConnNodes = Part -- [node()|nodes()], + if ConnNodes == [] -> + true; + true -> + lists:foreach( + fun (N) -> + net_kernel:connect_node(N) + end, ConnNodes), + false + end + end), + ok + + end, 5000) + end, PartitionsList), + ok. + +setup_partitions(PartCtrlr, PartList) when is_atom(PartCtrlr) -> + ok = rfcall(PartCtrlr, fun () -> create_partitions(PartList) end), + io:format("Partitions successfully setup:~n", []), + lists:foreach(fun (Part) -> + io:format("~p~n", [Part]) + end, + PartList), + ok; +setup_partitions(Config, PartList) when is_list(Config) -> + PartCtrlr = start_partition_controller(Config), + setup_partitions(PartCtrlr, PartList), + PartCtrlr. + +start_partition_controller(Config) when is_list(Config) -> + {ok, PartCtrlr} = start_hidden_node(part_ctrlr, Config), + PartCtrlr. + +stop_partition_controller(PartCtrlr) -> + stop_node(PartCtrlr). + +prevent_overlapping_partitions() -> + case application:get_env(kernel, prevent_overlapping_partitions) of + {ok, true} -> + true; + _ -> + false + end. + +cast_line([]) -> + ok; +cast_line([N|Ns]) when N == node() -> + cast_line(Ns); +cast_line([N|Ns]) -> + rfcast(N, fun () -> cast_line(Ns) end). + +wait_until(F) -> + case catch F() of + true -> + ok; + _ -> + receive after 10 -> ok end, + wait_until(F) end. +rfcall(Node, Fun) -> + rfcall(Node, Fun, infinity). + +rfcall(Node, Fun, Timeout) -> + rpc:call(Node, erlang, apply, [Fun, []], Timeout). + +rfcast(Node, Fun) -> + rpc:cast(Node, erlang, apply, [Fun, []]). + +rfmulticall(Nodes, Fun) -> + rfmulticall(Nodes, Fun, infinity). + +rfmulticall(Nodes, Fun, Timeout) -> + rpc:multicall(Nodes, erlang, apply, [Fun, []], Timeout). + %% The emulator now ensures that the node has been removed from %% nodes(). rpc_disconnect_node(Node, DisconnectedNode, Config) -> diff --git a/lib/kernel/test/pg2_SUITE.erl b/lib/kernel/test/pg2_SUITE.erl index acecd34ead..1241fe586b 100644 --- a/lib/kernel/test/pg2_SUITE.erl +++ b/lib/kernel/test/pg2_SUITE.erl @@ -37,8 +37,7 @@ -define(testcase, proplists:get_value(?TESTCASE, Config)). %% Internal export. --export([mk_part_node_and_group/3, part2/4, - mk_part_node/3, part1/5, p_init/3, start_proc/1, sane/0]). +-export([p_init/3, start_proc/1, sane/0]). init_per_testcase(Case, Config) -> [{?TESTCASE, Case}| Config]. @@ -101,9 +100,19 @@ otp_8653(Config) when is_list(Config) -> wait_for_ready_net(Config), + G = pg2_otp_8653, + + {[ok,ok,ok,ok], []} + = rfmulticall([node(), A, B, C], + fun () -> + Pid = spawn(forever()), + ok = pg2:create(G), + _ = [ok = pg2:join(G, Pid) || _ <- [1,1]], + ok + end), + %% make b and c connected, partitioned from node() and a - rpc_cast(B, ?MODULE, part2, [Config, node(), A, C]), - ?UNTIL(is_ready_partition(Config)), + PartCtrlr = setup_partitions(Config, [[B, C], [node(), A]]), %% Connect to the other partition. pong = net_adm:ping(B), @@ -112,7 +121,6 @@ otp_8653(Config) when is_list(Config) -> _ = global:sync(), [A, B, C] = lists:sort(nodes()), - G = pg2_otp_8653, ?UNTIL(begin GA = lists:sort(rpc:call(A, pg2, get_members, [G])), GB = lists:sort(rpc:call(B, pg2, get_members, [G])), @@ -125,27 +133,9 @@ otp_8653(Config) when is_list(Config) -> end), ok = pg2:delete(G), stop_nodes([A,B,C]), + stop_partition_controller(PartCtrlr), ok. -part2(Config, Main, A, C) -> - Function = mk_part_node_and_group, - case catch begin - make_partition(Config, [Main, A], [node(), C], Function) - end - of - ok -> ok - end. - -mk_part_node_and_group(File, MyPart0, Config) -> - touch(File, "start"), % debug - MyPart = lists:sort(MyPart0), - ?UNTIL(is_node_in_part(File, MyPart)), - G = pg2_otp_8653, - Pid = spawn(forever()), - ok = pg2:create(G), - _ = [ok = pg2:join(G, Pid) || _ <- [1,1]], - touch(File, "done"). - %% OTP-8259. Member was not removed after being killed. otp_8259(Config) when is_list(Config) -> [A, B, C] = start_nodes([a, b, c], peer, Config), @@ -162,8 +152,17 @@ otp_8259(Config) when is_list(Config) -> ok = pg2:join(G, Pid), %% make b and c connected, partitioned from node() and a - rpc_cast(B, ?MODULE, part1, [Config, node(), A, C, Name]), - ?UNTIL(is_ready_partition(Config)), + PartCtrlr = setup_partitions(Config, [[B, C], [node(), A]]), + ok = rfcall( + PartCtrlr, + fun () -> + rfcall( + B, + fun () -> + {_, yes} = start_proc(Name), + ok + end) + end), %% Connect to the other partition. %% The resolver on node b will be called. @@ -182,16 +181,9 @@ otp_8259(Config) when is_list(Config) -> ok = pg2:delete(G), stop_nodes([A,B,C]), + stop_partition_controller(PartCtrlr), ok. -part1(Config, Main, A, C, Name) -> - case catch begin - make_partition(Config, [Main, A], [node(), C]), - {_Pid, yes} = start_proc(Name) - end of - {_, yes} -> ok - end. - start_proc(Name) -> Pid = spawn(?MODULE, p_init, [self(), Name, node()]), receive @@ -585,6 +577,9 @@ collect_nodes(N, Max) -> [Node | collect_nodes(N+1, Max)] end. +start_hidden_node(Name, Config) -> + start_node(Name, slave, "-hidden", Config). + start_node(Name, How, Config) -> start_node(Name, How, "", Config). @@ -619,60 +614,79 @@ node_name(Name, Config) -> L = lists:flatten(Date), lists:concat([Name,U,?testcase,U,U,L]). -%% This one runs on one node in Part2. -%% The partition is ready when is_ready_partition(Config) returns (true). -make_partition(Config, Part1, Part2) -> - make_partition(Config, Part1, Part2, mk_part_node). - -make_partition(Config, Part1, Part2, Function) -> - Dir = proplists:get_value(priv_dir, Config), - Ns = [begin - Name = lists:concat([atom_to_list(N),"_",msec(),".part"]), - File = filename:join([Dir, Name]), - file:delete(File), - rpc_cast(N, ?MODULE, Function, [File, Part, Config], File), - {N, File} - end || Part <- [Part1, Part2], N <- Part], - all_nodes_files(Ns, "done", Config), - lists:foreach(fun({_N,File}) -> file:delete(File) end, Ns), - PartFile = make_partition_file(Config), - touch(PartFile, "done"). - -%% The node signals its success by touching a file. -mk_part_node(File, MyPart0, Config) -> - touch(File, "start"), % debug - MyPart = lists:sort(MyPart0), - ?UNTIL(is_node_in_part(File, MyPart)), - touch(File, "done"). - -%% The calls to append_to_file are for debugging. -is_node_in_part(File, MyPart) -> - lists:foreach(fun(N) -> - _ = erlang:disconnect_node(N) - end, nodes() -- MyPart), - case {(Known = get_known(node())) =:= MyPart, - (Nodes = lists:sort([node() | nodes()])) =:= MyPart} of - {true, true} -> - %% Make sure the resolvers have been terminated, - %% otherwise they may pop up and send some message. - %% (This check is probably unnecessary.) - case element(5, global:info()) of - [] -> - true; - Rs -> - append_to_file(File, {now(), Known, Nodes, Rs}), - false - end; - _ -> - append_to_file(File, {now(), Known, Nodes}), - false - end. -is_ready_partition(Config) -> - File = make_partition_file(Config), - file_contents(File, "done", Config), - file:delete(File), - true. +create_partitions(PartitionsList) -> + AllNodes = lists:sort(lists:flatten(PartitionsList)), + + %% Take down all connections on all nodes... + AllOk = {lists:map(fun (_) -> ok end, AllNodes), []}, + io:format("Disconnecting all nodes from eachother...", []), + AllOk = rfmulticall( + AllNodes, + fun () -> + lists:foreach(fun (N) -> + erlang:disconnect_node(N) + end, nodes()), + wait_until(fun () -> [] == global_known() end), + ok + end, 5000), + %% Here we know that all 'lost_connection' messages that will be + %% sent by global name servers due to these disconnects have been + %% sent, but we don't know that all of them have been received and + %% handled. By communicating with all global name servers one more + %% time it is very likely that all of them have been received and + %% handled (however, not guaranteed). If 'lost_connection' messages + %% are received after we begin to set up the partitions, the + %% partitions may lose connection with some of its nodes. + lists:foreach(fun (N) -> [] = global_known(N) end, AllNodes), + + %% Set up fully connected partitions... + io:format("Connecting partitions...", []), + lists:foreach( + fun (Partition) -> + Part = lists:sort(Partition), + PartOk = {lists:map(fun (_) -> ok end, Part), []}, + PartOk = rfmulticall( + Part, + fun () -> + wait_until( + fun () -> + ConnNodes = Part -- [node()|nodes()], + if ConnNodes == [] -> + true; + true -> + lists:foreach( + fun (N) -> + net_kernel:connect_node(N) + end, ConnNodes), + false + end + end), + ok + + end, 5000) + end, PartitionsList), + ok. + +setup_partitions(PartCtrlr, PartList) when is_atom(PartCtrlr) -> + ok = rfcall(PartCtrlr, fun () -> create_partitions(PartList) end), + io:format("Partitions successfully setup:~n", []), + lists:foreach(fun (Part) -> + io:format("~p~n", [Part]) + end, + PartList), + ok; +setup_partitions(Config, PartList) when is_list(Config) -> + PartCtrlr = start_partition_controller(Config), + setup_partitions(PartCtrlr, PartList), + PartCtrlr. + +start_partition_controller(Config) when is_list(Config) -> + {ok, PartCtrlr} = start_hidden_node(part_ctrlr, Config), + PartCtrlr. + +stop_partition_controller(PartCtrlr) -> + stop_node(PartCtrlr). wait_for_ready_net(Config) -> wait_for_ready_net([node()|nodes()], Config). @@ -688,64 +702,30 @@ wait_for_ready_net(Nodes0, Config) -> end, Nodes) end). -%% To make it less probable that some low-level problem causes -%% problems, the receiving node is ping:ed. -rpc_cast(Node, Module, Function, Args) -> - {_,pong,Node}= {node(),net_adm:ping(Node),Node}, - rpc:cast(Node, Module, Function, Args). - -rpc_cast(Node, Module, Function, Args, File) -> - case net_adm:ping(Node) of - pong -> - rpc:cast(Node, Module, Function, Args); - Else -> - append_to_file(File, {now(), {rpc_cast, Node, Module, Function, - Args, Else}}) - %% Maybe we should crash, but it probably doesn't matter. - end. - -touch(File, List) -> - ok = file:write_file(File, list_to_binary(List)). +global_known(Node) -> + gen_server:call({global_name_server, Node}, get_known, infinity). -append_to_file(File, Term) -> - {ok, Fd} = file:open(File, [raw,binary,append]), - ok = file:write(Fd, io_lib:format("~p.~n", [Term])), - ok = file:close(Fd). +global_known() -> + global_known(node()). -all_nodes_files(Files, ContentsList, Config) -> - lists:all(fun({_N,File}) -> - file_contents(File, ContentsList, Config) - end, Files). +wait_until(F) -> + case catch F() of + true -> + ok; + _ -> + receive after 10 -> ok end, + wait_until(F) + end. -file_contents(File, ContentsList, Config) -> - file_contents(File, ContentsList, Config, no_log_file). +rfcall(Node, Fun) -> + rfcall(Node, Fun, infinity). -file_contents(File, ContentsList, Config, LogFile) -> - Contents = list_to_binary(ContentsList), - Sz = size(Contents), - ?UNTIL(begin - case file:read_file(File) of - {ok, FileContents}=Reply -> - case catch split_binary(FileContents, Sz) of - {Contents,_} -> - true; - _ -> - catch append_to_file(LogFile, - {File,Contents,Reply}), - false - end; - Reply -> - catch append_to_file(LogFile, {File, Contents, Reply}), - false - end - end). +rfcall(Node, Fun, Timeout) -> + rpc:call(Node, erlang, apply, [Fun, []], Timeout). -make_partition_file(Config) -> - Dir = proplists:get_value(priv_dir, Config), - filename:join([Dir, atom_to_list(make_partition_done)]). +rfmulticall(Nodes, Fun) -> + rfmulticall(Nodes, Fun, infinity). -msec() -> - msec(now()). +rfmulticall(Nodes, Fun, Timeout) -> + rpc:multicall(Nodes, erlang, apply, [Fun, []], Timeout). -msec(T) -> - element(1,T)*1000000000 + element(2,T)*1000 + element(3,T) div 1000. diff --git a/lib/kernel/test/rpc_SUITE.erl b/lib/kernel/test/rpc_SUITE.erl index a89a7600a2..aea53003cb 100644 --- a/lib/kernel/test/rpc_SUITE.erl +++ b/lib/kernel/test/rpc_SUITE.erl @@ -70,13 +70,13 @@ call(Config) when is_list(Config) -> PA = filename:dirname(code:which(?MODULE)), %% Note. First part of nodename sets response delay in seconds {ok, N1} = test_server:start_node('3_rpc_SUITE_call', slave, - [{args, "-pa " ++ PA}]), + [{args, "-connect_all false -pa " ++ PA}]), {ok, N2} = test_server:start_node('1_rcp_SUITE_call', slave, - [{args, "-pa " ++ PA}]), + [{args, "-connect_all false -pa " ++ PA}]), {ok, N3} = test_server:start_node('4_rcp_SUITE_call', slave, - [{args, "-pa " ++ PA}]), + [{args, "-connect_all false -pa " ++ PA}]), {ok, N4} = test_server:start_node('8_rcp_SUITE_call', slave, - [{args, "-pa " ++ PA}]), + [{args, "-connect_all false -pa " ++ PA}]), ok = io:format("~p~n", [[N1, N2, N3]]), {hej,_,N1} = rpc:call(N1, ?MODULE, f, []), {hej,_,N2} = rpc:call(N2, ?MODULE, f, [], 2000), @@ -95,13 +95,13 @@ block_call(Config) when is_list(Config) -> PA = filename:dirname(code:which(?MODULE)), %% Note. First part of nodename sets response delay in seconds {ok, N1} = test_server:start_node('3_rpc_SUITE_block_call', slave, - [{args, "-pa " ++ PA}]), + [{args, "-connect_all false -pa " ++ PA}]), {ok, N2} = test_server:start_node('1_rcp_SUITE_block_call', slave, - [{args, "-pa " ++ PA}]), + [{args, "-connect_all false -pa " ++ PA}]), {ok, N3} = test_server:start_node('4_rcp_SUITE_block_call', slave, - [{args, "-pa " ++ PA}]), + [{args, "-connect_all false -pa " ++ PA}]), {ok, N4} = test_server:start_node('8_rcp_SUITE_block_call', slave, - [{args, "-pa " ++ PA}]), + [{args, "-connect_all false -pa " ++ PA}]), ok = io:format("~p~n", [[N1, N2, N3]]), {hej,_,N1} = rpc:block_call(N1, ?MODULE, f, []), {hej,_,N2} = rpc:block_call(N2, ?MODULE, f, [], 2000), @@ -121,9 +121,9 @@ multicall(Config) when is_list(Config) -> PA = filename:dirname(code:which(?MODULE)), %% Note. First part of nodename sets response delay in seconds {ok, N1} = test_server:start_node('3_rpc_SUITE_multicall', slave, - [{args, "-pa " ++ PA}]), + [{args, "-connect_all false -pa " ++ PA}]), {ok, N2} = test_server:start_node('1_rcp_SUITE_multicall', slave, - [{args, "-pa " ++ PA}]), + [{args, "-connect_all false -pa " ++ PA}]), ok = io:format("~p~n", [[N1, N2]]), {[{hej,_,N1},{hej,_,N2}],[]} = rpc:multicall([N1, N2], ?MODULE, f, []), @@ -137,13 +137,13 @@ multicall_timeout(Config) when is_list(Config) -> PA = filename:dirname(code:which(?MODULE)), %% Note. First part of nodename sets response delay in seconds {ok, N1} = test_server:start_node('11_rpc_SUITE_multicall', slave, - [{args, "-pa " ++ PA}]), + [{args, "-connect_all false -pa " ++ PA}]), {ok, N2} = test_server:start_node('8_rpc_SUITE_multicall', slave, - [{args, "-pa " ++ PA}]), + [{args, "-connect_all false -pa " ++ PA}]), {ok, N3} = test_server:start_node('5_rpc_SUITE_multicall', slave, - [{args, "-pa " ++ PA}]), + [{args, "-connect_all false -pa " ++ PA}]), {ok, N4} = test_server:start_node('2_rcp_SUITE_multicall', slave, - [{args, "-pa " ++ PA}]), + [{args, "-connect_all false -pa " ++ PA}]), ok = io:format("~p~n", [[N1, N2]]), {[{hej,_,N3},{hej,_,N4}],[N1, N2]} = rpc:multicall([N3, N1, N2, N4], ?MODULE, f, [], 6000), @@ -159,9 +159,9 @@ multicall_timeout(Config) when is_list(Config) -> multicall_dies(Config) when is_list(Config) -> PA = filename:dirname(code:which(?MODULE)), {ok, N1} = test_server:start_node('rpc_SUITE_multicall_dies_1', slave, - [{args, "-pa " ++ PA}]), + [{args, "-connect_all false -pa " ++ PA}]), {ok, N2} = test_server:start_node('rcp_SUITE_multicall_dies_2', slave, - [{args, "-pa " ++ PA}]), + [{args, "-connect_all false -pa " ++ PA}]), Nodes = [N1, N2], %% {[{badrpc, {'EXIT', normal}}, {badrpc, {'EXIT', normal}}], []} = @@ -213,10 +213,10 @@ multicall_node_dies(Config) when is_list(Config) -> do_multicall_2_nodes_dies(Mod, Func, Args) -> ok = io:format("~p:~p~p~n", [Mod, Func, Args]), PA = filename:dirname(code:which(?MODULE)), - {ok, N1} = test_server:start_node('rpc_SUITE_multicall_node_dies_1', slave, - [{args, "-pa " ++ PA}]), - {ok, N2} = test_server:start_node('rcp_SUITE_multicall_node_dies_2', slave, - [{args, "-pa " ++ PA}]), + {ok, N1} = test_server:start_node('rpc_SUITE_multicall_node_dies_1', peer, + [{args, "-connect_all false -pa " ++ PA}]), + {ok, N2} = test_server:start_node('rcp_SUITE_multicall_node_dies_2', peer, + [{args, "-connect_all false -pa " ++ PA}]), Nodes = [N1, N2], {[], Nodes} = rpc:multicall(Nodes, Mod, Func, Args), Msgs = flush([]), @@ -229,7 +229,7 @@ do_multicall_2_nodes_dies(Mod, Func, Args) -> called_dies(Config) when is_list(Config) -> PA = filename:dirname(code:which(?MODULE)), {ok, N} = test_server:start_node(rpc_SUITE_called_dies, slave, - [{args, "-pa " ++ PA}]), + [{args, "-connect_all false -pa " ++ PA}]), %% rep(fun (Tag, Call, Args) -> {Tag,{badrpc,{'EXIT',normal}}} = @@ -395,7 +395,7 @@ node_rep(Fun, Name, PA, M, F, A) -> node_rep_call(Tag, Call, Args, Fun, Name0, PA) -> Name = list_to_atom(Name0 ++ "_" ++ atom_to_list(Tag)), {ok, N} = test_server:start_node(Name, slave, - [{args, "-pa " ++ PA}]), + [{args, "-connect_all false -pa " ++ PA}]), Fun(Call, [N|Args]), catch test_server:stop_node(N), ok. @@ -405,7 +405,7 @@ called_throws(Config) when is_list(Config) -> PA = filename:dirname(code:which(?MODULE)), %% {ok, N} = test_server:start_node(rpc_SUITE_called_throws, slave, - [{args, "-pa " ++ PA}]), + [{args, "-connect_all false -pa " ++ PA}]), %% rep(fun (Tag, Call, Args) -> {Tag,up} = @@ -424,7 +424,7 @@ called_throws(Config) when is_list(Config) -> call_benchmark(Config) when is_list(Config) -> PA = filename:dirname(code:which(?MODULE)), {ok, Node} = test_server:start_node(rpc_SUITE_call_benchmark, slave, - [{args, "-pa " ++ PA}]), + [{args, "-connect_all false -pa " ++ PA}]), Iter = case erlang:system_info(modified_timing_level) of undefined -> 10000; _ -> 500 %Modified timing - spawn is slower @@ -452,7 +452,7 @@ do_call_benchmark(Node, I, M) -> async_call(Config) when is_list(Config) -> %% Note: First part of nodename sets response delay in seconds. PA = filename:dirname(code:which(?MODULE)), - NodeArgs = [{args,"-pa "++ PA}], + NodeArgs = [{args,"-connect_all false -pa "++ PA}], {ok,Node1} = test_server:start_node('1_rpc_SUITE_call', slave, NodeArgs), {ok,Node2} = test_server:start_node('10_rpc_SUITE_call', slave, NodeArgs), {ok,Node3} = test_server:start_node('20_rpc_SUITE_call', slave, NodeArgs), |