summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorErlang/OTP <otp@erlang.org>2021-09-30 11:49:27 +0200
committerErlang/OTP <otp@erlang.org>2021-09-30 11:49:27 +0200
commitbcda2c67198561f5d9a06bd2378dba4c9f1ea5d1 (patch)
tree271f600efda47c20b2b86d6089b952bd139c0166
parente9060507ff9b2fb9ad41f489f3b68fb8d165fced (diff)
parentffd032a1364b8c976dc52a52230f7f10ba36f92b (diff)
downloaderlang-bcda2c67198561f5d9a06bd2378dba4c9f1ea5d1.tar.gz
Merge branch 'Maria-12648430/supervisor_unlink_child_late/GH-5193/PR-5201/OTP-17649' into maint-24
* Maria-12648430/supervisor_unlink_child_late/GH-5193/PR-5201/OTP-17649: Keep supervisor children linked during shutdown
-rw-r--r--lib/stdlib/src/supervisor.erl222
-rw-r--r--lib/stdlib/test/Makefile1
-rw-r--r--lib/stdlib/test/supervisor_4.erl49
-rw-r--r--lib/stdlib/test/supervisor_SUITE.erl133
4 files changed, 295 insertions, 110 deletions
diff --git a/lib/stdlib/src/supervisor.erl b/lib/stdlib/src/supervisor.erl
index fce18b18b7..eea5b6a513 100644
--- a/lib/stdlib/src/supervisor.erl
+++ b/lib/stdlib/src/supervisor.erl
@@ -898,11 +898,9 @@ terminate_children(Children, SupName) ->
NChildren.
do_terminate(Child, SupName) when is_pid(Child#child.pid) ->
- case shutdown(Child#child.pid, Child#child.shutdown) of
+ case shutdown(Child) of
ok ->
ok;
- {error, normal} when not (?is_permanent(Child)) ->
- ok;
{error, OtherReason} ->
?report_error(shutdown_error, OtherReason, Child, SupName)
end,
@@ -920,65 +918,61 @@ do_terminate(_Child, _SupName) ->
%% supervisor.
%% Returns: ok | {error, OtherReason} (this should be reported)
%%-----------------------------------------------------------------
-shutdown(Pid, brutal_kill) ->
- case monitor_child(Pid) of
- ok ->
- exit(Pid, kill),
- receive
- {'DOWN', _MRef, process, Pid, killed} ->
- ok;
- {'DOWN', _MRef, process, Pid, OtherReason} ->
- {error, OtherReason}
- end;
- {error, Reason} ->
- {error, Reason}
+shutdown(#child{pid=Pid, shutdown=brutal_kill} = Child) ->
+ Mon = monitor(process, Pid),
+ exit(Pid, kill),
+ receive
+ {'DOWN', Mon, process, Pid, Reason0} ->
+ case unlink_flush(Pid, Reason0) of
+ killed ->
+ ok;
+ {shutdown, _} when not (?is_permanent(Child)) ->
+ ok;
+ normal when not (?is_permanent(Child)) ->
+ ok;
+ Reason ->
+ {error, Reason}
+ end
end;
-shutdown(Pid, Time) ->
- case monitor_child(Pid) of
- ok ->
- exit(Pid, shutdown), %% Try to shutdown gracefully
- receive
- {'DOWN', _MRef, process, Pid, shutdown} ->
- ok;
- {'DOWN', _MRef, process, Pid, OtherReason} ->
- {error, OtherReason}
- after Time ->
- exit(Pid, kill), %% Force termination.
- receive
- {'DOWN', _MRef, process, Pid, OtherReason} ->
- {error, OtherReason}
- end
- end;
- {error, Reason} ->
- {error, Reason}
+shutdown(#child{pid=Pid, shutdown=Time} = Child) ->
+ Mon = monitor(process, Pid),
+ exit(Pid, shutdown),
+ receive
+ {'DOWN', Mon, process, Pid, Reason0} ->
+ case unlink_flush(Pid, Reason0) of
+ shutdown ->
+ ok;
+ {shutdown, _} when not (?is_permanent(Child)) ->
+ ok;
+ normal when not (?is_permanent(Child)) ->
+ ok;
+ Reason ->
+ {error, Reason}
+ end
+ after Time ->
+ exit(Pid, kill),
+ receive
+ {'DOWN', Mon, process, Pid, Reason0} ->
+ case unlink_flush(Pid, Reason0) of
+ shutdown ->
+ ok;
+ {shutdown, _} when not (?is_permanent(Child)) ->
+ ok;
+ normal when not (?is_permanent(Child)) ->
+ ok;
+ Reason ->
+ {error, Reason}
+ end
+ end
end.
-%% Help function to shutdown/2 switches from link to monitor approach
-monitor_child(Pid) ->
-
- %% Do the monitor operation first so that if the child dies
- %% before the monitoring is done causing a 'DOWN'-message with
- %% reason noproc, we will get the real reason in the 'EXIT'-message
- %% unless a naughty child has already done unlink...
- erlang:monitor(process, Pid),
+unlink_flush(Pid, DefaultReason) ->
unlink(Pid),
-
receive
- %% If the child dies before the unlik we must empty
- %% the mail-box of the 'EXIT'-message and the 'DOWN'-message.
- {'EXIT', Pid, Reason} ->
- receive
- {'DOWN', _, process, Pid, _} ->
- {error, Reason}
- end
- after 0 ->
- %% If a naughty child did unlink and the child dies before
- %% monitor the result will be that shutdown/2 receives a
- %% 'DOWN'-message with reason noproc.
- %% If the child should die after the unlink there
- %% will be a 'DOWN'-message with a correct reason
- %% that will be handled in shutdown/2.
- ok
+ {'EXIT', Pid, Reason} ->
+ Reason
+ after 0 ->
+ DefaultReason
end.
%%-----------------------------------------------------------------
@@ -992,40 +986,37 @@ monitor_child(Pid) ->
%%-----------------------------------------------------------------
terminate_dynamic_children(State) ->
Child = get_dynamic_child(State),
- {Pids, EStack0} = monitor_dynamic_children(Child,State),
+ Pids = dyn_fold(
+ fun
+ (P, Acc) when is_pid(P) ->
+ Mon = monitor(process, P),
+ case Child#child.shutdown of
+ brutal_kill -> exit(P, kill);
+ _ -> exit(P, shutdown)
+ end,
+ Acc#{{P, Mon} => true};
+ (?restarting(_), Acc) ->
+ Acc
+ end,
+ #{},
+ State
+ ),
+ TRef = case Child#child.shutdown of
+ brutal_kill ->
+ undefined;
+ infinity ->
+ undefined;
+ Time ->
+ erlang:start_timer(Time, self(), kill)
+ end,
Sz = maps:size(Pids),
- EStack = case Child#child.shutdown of
- brutal_kill ->
- maps:foreach(fun(P, _) -> exit(P, kill) end, Pids),
- wait_dynamic_children(Child, Pids, Sz, undefined, EStack0);
- infinity ->
- maps:foreach(fun(P, _) -> exit(P, shutdown) end, Pids),
- wait_dynamic_children(Child, Pids, Sz, undefined, EStack0);
- Time ->
- maps:foreach(fun(P, _) -> exit(P, shutdown) end, Pids),
- TRef = erlang:start_timer(Time, self(), kill),
- wait_dynamic_children(Child, Pids, Sz, TRef, EStack0)
- end,
+ EStack = wait_dynamic_children(Child, Pids, Sz, TRef, #{}),
%% Unroll stacked errors and report them
maps:foreach(fun(Reason, Ls) ->
?report_error(shutdown_error, Reason,
Child#child{pid=Ls}, State#state.name)
end, EStack).
-monitor_dynamic_children(Child,State) ->
- dyn_fold(fun(P,{Pids, EStack}) when is_pid(P) ->
- case monitor_child(P) of
- ok ->
- {maps:put(P, P, Pids), EStack};
- {error, normal} when not (?is_permanent(Child)) ->
- {Pids, EStack};
- {error, Reason} ->
- {Pids, maps_prepend(Reason, P, EStack)}
- end;
- (?restarting(_), {Pids, EStack}) ->
- {Pids, EStack}
- end, {maps:new(), maps:new()}, State).
-
wait_dynamic_children(_Child, _Pids, 0, undefined, EStack) ->
EStack;
wait_dynamic_children(_Child, _Pids, 0, TRef, EStack) ->
@@ -1041,34 +1032,51 @@ wait_dynamic_children(_Child, _Pids, 0, TRef, EStack) ->
wait_dynamic_children(#child{shutdown=brutal_kill} = Child, Pids, Sz,
TRef, EStack) ->
receive
- {'DOWN', _MRef, process, Pid, killed} ->
- wait_dynamic_children(Child, maps:remove(Pid, Pids), Sz-1,
- TRef, EStack);
-
- {'DOWN', _MRef, process, Pid, Reason} ->
- wait_dynamic_children(Child, maps:remove(Pid, Pids), Sz-1,
- TRef, maps_prepend(Reason, Pid, EStack))
+ {'DOWN', Mon, process, Pid, Reason0}
+ when is_map_key({Pid, Mon}, Pids) ->
+ case unlink_flush(Pid, Reason0) of
+ killed ->
+ wait_dynamic_children(Child, maps:remove({Pid, Mon}, Pids),
+ Sz-1, TRef, EStack);
+
+ {shutdown, _} when not (?is_permanent(Child)) ->
+ wait_dynamic_children(Child, maps:remove({Pid, Mon}, Pids),
+ Sz-1, TRef, EStack);
+
+ normal when not (?is_permanent(Child)) ->
+ wait_dynamic_children(Child, maps:remove({Pid, Mon}, Pids),
+ Sz-1, TRef, EStack);
+ Reason ->
+ wait_dynamic_children(Child, maps:remove({Pid, Mon}, Pids),
+ Sz-1, TRef, maps_prepend(Reason, Pid,
+ EStack))
+ end
end;
wait_dynamic_children(Child, Pids, Sz, TRef, EStack) ->
receive
- {'DOWN', _MRef, process, Pid, shutdown} ->
- wait_dynamic_children(Child, maps:remove(Pid, Pids), Sz-1,
- TRef, EStack);
-
- {'DOWN', _MRef, process, Pid, {shutdown, _}} ->
- wait_dynamic_children(Child, maps:remove(Pid, Pids), Sz-1,
- TRef, EStack);
-
- {'DOWN', _MRef, process, Pid, normal} when not (?is_permanent(Child)) ->
- wait_dynamic_children(Child, maps:remove(Pid, Pids), Sz-1,
- TRef, EStack);
-
- {'DOWN', _MRef, process, Pid, Reason} ->
- wait_dynamic_children(Child, maps:remove(Pid, Pids), Sz-1,
- TRef, maps_prepend(Reason, Pid, EStack));
+ {'DOWN', Mon, process, Pid, Reason0}
+ when is_map_key({Pid, Mon}, Pids) ->
+ case unlink_flush(Pid, Reason0) of
+ shutdown ->
+ wait_dynamic_children(Child, maps:remove({Pid, Mon}, Pids),
+ Sz-1, TRef, EStack);
+
+ {shutdown, _} when not (?is_permanent(Child)) ->
+ wait_dynamic_children(Child, maps:remove({Pid, Mon}, Pids),
+ Sz-1, TRef, EStack);
+
+ normal when not (?is_permanent(Child)) ->
+ wait_dynamic_children(Child, maps:remove({Pid, Mon}, Pids),
+ Sz-1, TRef, EStack);
+
+ Reason ->
+ wait_dynamic_children(Child, maps:remove({Pid, Mon}, Pids),
+ Sz-1, TRef, maps_prepend(Reason, Pid,
+ EStack))
+ end;
{timeout, TRef, kill} ->
- maps:foreach(fun(P, _) -> exit(P, kill) end, Pids),
+ maps:foreach(fun({P, _}, _) -> exit(P, kill) end, Pids),
wait_dynamic_children(Child, Pids, Sz, undefined, EStack)
end.
diff --git a/lib/stdlib/test/Makefile b/lib/stdlib/test/Makefile
index 5f9510cf16..bda16c263d 100644
--- a/lib/stdlib/test/Makefile
+++ b/lib/stdlib/test/Makefile
@@ -72,6 +72,7 @@ MODULES= \
supervisor_1 \
supervisor_2 \
supervisor_3 \
+ supervisor_4 \
supervisor_deadlock \
naughty_child \
shell_SUITE \
diff --git a/lib/stdlib/test/supervisor_4.erl b/lib/stdlib/test/supervisor_4.erl
new file mode 100644
index 0000000000..038c904c27
--- /dev/null
+++ b/lib/stdlib/test/supervisor_4.erl
@@ -0,0 +1,49 @@
+%%
+%% %CopyrightBegin%
+%%
+%% Copyright Ericsson AB 1996-2016. All Rights Reserved.
+%%
+%% Licensed under the Apache License, Version 2.0 (the "License");
+%% you may not use this file except in compliance with the License.
+%% You may obtain a copy of the License at
+%%
+%% http://www.apache.org/licenses/LICENSE-2.0
+%%
+%% Unless required by applicable law or agreed to in writing, software
+%% distributed under the License is distributed on an "AS IS" BASIS,
+%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+%% See the License for the specific language governing permissions and
+%% limitations under the License.
+%%
+%% %CopyrightEnd%
+%%
+%% Description: Simulates the behaviour that a child process may have.
+%% Is used by the supervisor_SUITE test suite.
+-module(supervisor_4).
+
+-export([start_child/0, start_child/1, init/1]).
+
+-export([handle_call/3, handle_info/2, terminate/2]).
+
+start_child() ->
+ gen_server:start_link(?MODULE, undefined, []).
+
+start_child(Reason) ->
+ gen_server:start_link(?MODULE, {Reason}, []).
+
+init(Reason) ->
+ process_flag(trap_exit, true),
+ {ok, Reason}.
+
+handle_call(Req, _From, State) ->
+ {reply, Req, State}.
+
+handle_info(stop, State) ->
+ {stop, normal, State};
+handle_info(_, State) ->
+ {noreply, State}.
+
+terminate(_, {Reason}) ->
+ exit(Reason);
+terminate(_, _) ->
+ ok.
diff --git a/lib/stdlib/test/supervisor_SUITE.erl b/lib/stdlib/test/supervisor_SUITE.erl
index 3d05ce5402..5bae4f6e05 100644
--- a/lib/stdlib/test/supervisor_SUITE.erl
+++ b/lib/stdlib/test/supervisor_SUITE.erl
@@ -44,7 +44,9 @@
sup_start_child_returns_error_simple/1,
sup_start_map/1, sup_start_map_simple/1,
sup_start_map_faulty_specs/1,
- sup_stop_infinity/1, sup_stop_timeout/1, sup_stop_brutal_kill/1,
+ sup_stop_infinity/1, sup_stop_timeout/1, sup_stop_timeout_dynamic/1,
+ sup_stop_brutal_kill/1, sup_stop_brutal_kill_dynamic/1,
+ sup_stop_race/1, sup_stop_non_shutdown_exit_dynamic/1,
child_adm/1, child_adm_simple/1, child_specs/1, child_specs_map/1,
extra_return/1, sup_flags/1]).
@@ -133,8 +135,9 @@ groups() ->
{sup_start_map, [],
[sup_start_map, sup_start_map_simple, sup_start_map_faulty_specs]},
{sup_stop, [],
- [sup_stop_infinity, sup_stop_timeout,
- sup_stop_brutal_kill]},
+ [sup_stop_infinity, sup_stop_timeout, sup_stop_timeout_dynamic,
+ sup_stop_brutal_kill, sup_stop_brutal_kill_dynamic,
+ sup_stop_race, sup_stop_non_shutdown_exit_dynamic]},
{normal_termination, [],
[permanent_normal, transient_normal, temporary_normal]},
{shutdown_termination, [],
@@ -498,6 +501,20 @@ sup_stop_timeout(Config) when is_list(Config) ->
check_exit_reason(CPid1, shutdown),
check_exit_reason(CPid2, killed).
+sup_stop_timeout_dynamic(Config) when is_list(Config) ->
+ process_flag(trap_exit, true),
+ Child = {child, {supervisor_1, start_child, []}, temporary, 1000, worker, []},
+ {ok, Pid} = start_link({ok, {{simple_one_for_one, 2, 3600}, [Child]}}),
+ {ok, CPid1} = supervisor:start_child(Pid, []),
+ link(CPid1),
+ {ok, CPid2} = supervisor:start_child(Pid, []),
+ link(CPid2),
+ CPid2 ! {sleep, 200000},
+ terminate(Pid, shutdown),
+ check_exit_reason(Pid, shutdown),
+ check_exit_reason(CPid1, shutdown),
+ check_exit_reason(CPid2, killed).
+
%%-------------------------------------------------------------------------
%% See sup_stop/1 when Shutdown = brutal_kill
@@ -518,6 +535,116 @@ sup_stop_brutal_kill(Config) when is_list(Config) ->
check_exit_reason(CPid1, shutdown),
check_exit_reason(CPid2, killed).
+sup_stop_brutal_kill_dynamic(Config) when is_list(Config) ->
+ process_flag(trap_exit, true),
+ Child = {child, {supervisor_4, start_child, []}, temporary, brutal_kill, worker, []},
+ {ok, Pid} = start_link({ok, {{simple_one_for_one, 2, 3600}, [Child]}}),
+ CPids = lists:map(
+ fun (_) ->
+ {ok, CPid} = supervisor:start_child(Pid, []),
+ link(CPid),
+ CPid
+ end,
+ lists:seq(1, 10)),
+ terminate(Pid, shutdown),
+ check_exit_reason(Pid, shutdown),
+ lists:foreach(
+ fun (CPid) ->
+ check_exit_reason(CPid, killed)
+ end,
+ CPids
+ ).
+
+%%-------------------------------------------------------------------------
+%% Tests that a supervisor completes its shutdown properly while children
+%% exit by themselves for reasons other than shutdown
+sup_stop_race(Config) when is_list(Config) ->
+ process_flag(trap_exit, true),
+ {ok, Pid} = start_link({ok, {{one_for_one, 2, 3600}, []}}),
+ {ok, CPidA} = supervisor:start_child(Pid,
+ #{id => child_a,
+ start => {supervisor_1, start_child, []},
+ restart => temporary,
+ shutdown => 1000}),
+ link(CPidA),
+ CPids = lists:foldl(
+ fun ({Restart, Reason}, Acc) ->
+ {ok, CPid} = supervisor:start_child(Pid,
+ #{id => {child, make_ref()},
+ start => {supervisor_4, start_child, [Reason]},
+ restart => Restart,
+ shutdown => 1000
+ }),
+ link(CPid),
+ Acc#{CPid => Reason}
+ end,
+ #{},
+ [
+ {Restart, Reason}
+ || Restart <- [temporary, transient, permanent],
+ Reason <- [normal, shutdown, {shutdown, test}, other]
+ ]
+ ),
+ {ok, CPidZ} = supervisor:start_child(Pid,
+ #{
+ id => child_z,
+ start => {supervisor_2, start_child, [1000]},
+ restart => temporary,
+ shutdown => 2000
+ }),
+ link(CPidZ),
+ Self = self(),
+ spawn_link(
+ fun () ->
+ timer:sleep(200),
+ maps:foreach(
+ fun (CPid, _) ->
+ CPid ! stop
+ end,
+ CPids
+ ),
+ unlink(Self)
+ end
+ ),
+ ok = terminate(Pid, shutdown),
+ check_exit_reason(Pid, shutdown),
+ check_exit_reason(CPidZ, shutdown),
+ maps:foreach(
+ fun (CPid, Reason) ->
+ check_exit_reason(CPid, Reason)
+ end,
+ CPids),
+ check_exit_reason(CPidA, shutdown).
+
+%%-------------------------------------------------------------------------
+%% Tests that a simple_one_for_one supervisor shuts down correctly when
+%% some children exit with a reason other than shutdown
+sup_stop_non_shutdown_exit_dynamic(Config) when is_list(Config) ->
+ process_flag(trap_exit, true),
+ lists:foreach(
+ fun (Restart) ->
+ Child = {child, {supervisor_4, start_child, []}, Restart, 5000, worker, []},
+ {ok, Pid} = start_link({ok, {{simple_one_for_one, 2, 3600}, [Child]}}),
+ CPids = lists:foldl(
+ fun (Reason, Acc) ->
+ {ok, CPid} = supervisor:start_child(Pid, [Reason]),
+ link(CPid),
+ Acc#{CPid => Reason}
+ end,
+ #{},
+ [normal, shutdown, {shutdown, test}, other]
+ ),
+ ok = terminate(Pid, shutdown),
+ check_exit_reason(Pid, shutdown),
+ maps:foreach(
+ fun (CPid, Reason) ->
+ check_exit_reason(CPid, Reason)
+ end,
+ CPids)
+ end,
+ [temporary, transient, permanent]
+ ).
+
%%-------------------------------------------------------------------------
%% The start function provided to start a child may return {ok, Pid}
%% or {ok, Pid, Info}, if it returns the latter check that the