diff options
author | Rickard Green <rickard@erlang.org> | 2021-09-29 19:48:20 +0200 |
---|---|---|
committer | Rickard Green <rickard@erlang.org> | 2021-09-29 19:48:20 +0200 |
commit | 1f26b218ea612627a3ae10af06680627fff20cfc (patch) | |
tree | 70f376a8b4c73c8ef2872bdfb65ccfc3386bc348 | |
parent | 4267645a6b40f1665ac961708b9c0a58e66bad93 (diff) | |
parent | ffd032a1364b8c976dc52a52230f7f10ba36f92b (diff) | |
download | erlang-1f26b218ea612627a3ae10af06680627fff20cfc.tar.gz |
Merge branch 'Maria-12648430/supervisor_unlink_child_late/GH-5193/PR-5201/OTP-17649' into maint
* Maria-12648430/supervisor_unlink_child_late/GH-5193/PR-5201/OTP-17649:
Keep supervisor children linked during shutdown
-rw-r--r-- | lib/stdlib/src/supervisor.erl | 222 | ||||
-rw-r--r-- | lib/stdlib/test/Makefile | 1 | ||||
-rw-r--r-- | lib/stdlib/test/supervisor_4.erl | 49 | ||||
-rw-r--r-- | lib/stdlib/test/supervisor_SUITE.erl | 133 |
4 files changed, 295 insertions, 110 deletions
diff --git a/lib/stdlib/src/supervisor.erl b/lib/stdlib/src/supervisor.erl index fce18b18b7..eea5b6a513 100644 --- a/lib/stdlib/src/supervisor.erl +++ b/lib/stdlib/src/supervisor.erl @@ -898,11 +898,9 @@ terminate_children(Children, SupName) -> NChildren. do_terminate(Child, SupName) when is_pid(Child#child.pid) -> - case shutdown(Child#child.pid, Child#child.shutdown) of + case shutdown(Child) of ok -> ok; - {error, normal} when not (?is_permanent(Child)) -> - ok; {error, OtherReason} -> ?report_error(shutdown_error, OtherReason, Child, SupName) end, @@ -920,65 +918,61 @@ do_terminate(_Child, _SupName) -> %% supervisor. %% Returns: ok | {error, OtherReason} (this should be reported) %%----------------------------------------------------------------- -shutdown(Pid, brutal_kill) -> - case monitor_child(Pid) of - ok -> - exit(Pid, kill), - receive - {'DOWN', _MRef, process, Pid, killed} -> - ok; - {'DOWN', _MRef, process, Pid, OtherReason} -> - {error, OtherReason} - end; - {error, Reason} -> - {error, Reason} +shutdown(#child{pid=Pid, shutdown=brutal_kill} = Child) -> + Mon = monitor(process, Pid), + exit(Pid, kill), + receive + {'DOWN', Mon, process, Pid, Reason0} -> + case unlink_flush(Pid, Reason0) of + killed -> + ok; + {shutdown, _} when not (?is_permanent(Child)) -> + ok; + normal when not (?is_permanent(Child)) -> + ok; + Reason -> + {error, Reason} + end end; -shutdown(Pid, Time) -> - case monitor_child(Pid) of - ok -> - exit(Pid, shutdown), %% Try to shutdown gracefully - receive - {'DOWN', _MRef, process, Pid, shutdown} -> - ok; - {'DOWN', _MRef, process, Pid, OtherReason} -> - {error, OtherReason} - after Time -> - exit(Pid, kill), %% Force termination. - receive - {'DOWN', _MRef, process, Pid, OtherReason} -> - {error, OtherReason} - end - end; - {error, Reason} -> - {error, Reason} +shutdown(#child{pid=Pid, shutdown=Time} = Child) -> + Mon = monitor(process, Pid), + exit(Pid, shutdown), + receive + {'DOWN', Mon, process, Pid, Reason0} -> + case unlink_flush(Pid, Reason0) of + shutdown -> + ok; + {shutdown, _} when not (?is_permanent(Child)) -> + ok; + normal when not (?is_permanent(Child)) -> + ok; + Reason -> + {error, Reason} + end + after Time -> + exit(Pid, kill), + receive + {'DOWN', Mon, process, Pid, Reason0} -> + case unlink_flush(Pid, Reason0) of + shutdown -> + ok; + {shutdown, _} when not (?is_permanent(Child)) -> + ok; + normal when not (?is_permanent(Child)) -> + ok; + Reason -> + {error, Reason} + end + end end. -%% Help function to shutdown/2 switches from link to monitor approach -monitor_child(Pid) -> - - %% Do the monitor operation first so that if the child dies - %% before the monitoring is done causing a 'DOWN'-message with - %% reason noproc, we will get the real reason in the 'EXIT'-message - %% unless a naughty child has already done unlink... - erlang:monitor(process, Pid), +unlink_flush(Pid, DefaultReason) -> unlink(Pid), - receive - %% If the child dies before the unlik we must empty - %% the mail-box of the 'EXIT'-message and the 'DOWN'-message. - {'EXIT', Pid, Reason} -> - receive - {'DOWN', _, process, Pid, _} -> - {error, Reason} - end - after 0 -> - %% If a naughty child did unlink and the child dies before - %% monitor the result will be that shutdown/2 receives a - %% 'DOWN'-message with reason noproc. - %% If the child should die after the unlink there - %% will be a 'DOWN'-message with a correct reason - %% that will be handled in shutdown/2. - ok + {'EXIT', Pid, Reason} -> + Reason + after 0 -> + DefaultReason end. %%----------------------------------------------------------------- @@ -992,40 +986,37 @@ monitor_child(Pid) -> %%----------------------------------------------------------------- terminate_dynamic_children(State) -> Child = get_dynamic_child(State), - {Pids, EStack0} = monitor_dynamic_children(Child,State), + Pids = dyn_fold( + fun + (P, Acc) when is_pid(P) -> + Mon = monitor(process, P), + case Child#child.shutdown of + brutal_kill -> exit(P, kill); + _ -> exit(P, shutdown) + end, + Acc#{{P, Mon} => true}; + (?restarting(_), Acc) -> + Acc + end, + #{}, + State + ), + TRef = case Child#child.shutdown of + brutal_kill -> + undefined; + infinity -> + undefined; + Time -> + erlang:start_timer(Time, self(), kill) + end, Sz = maps:size(Pids), - EStack = case Child#child.shutdown of - brutal_kill -> - maps:foreach(fun(P, _) -> exit(P, kill) end, Pids), - wait_dynamic_children(Child, Pids, Sz, undefined, EStack0); - infinity -> - maps:foreach(fun(P, _) -> exit(P, shutdown) end, Pids), - wait_dynamic_children(Child, Pids, Sz, undefined, EStack0); - Time -> - maps:foreach(fun(P, _) -> exit(P, shutdown) end, Pids), - TRef = erlang:start_timer(Time, self(), kill), - wait_dynamic_children(Child, Pids, Sz, TRef, EStack0) - end, + EStack = wait_dynamic_children(Child, Pids, Sz, TRef, #{}), %% Unroll stacked errors and report them maps:foreach(fun(Reason, Ls) -> ?report_error(shutdown_error, Reason, Child#child{pid=Ls}, State#state.name) end, EStack). -monitor_dynamic_children(Child,State) -> - dyn_fold(fun(P,{Pids, EStack}) when is_pid(P) -> - case monitor_child(P) of - ok -> - {maps:put(P, P, Pids), EStack}; - {error, normal} when not (?is_permanent(Child)) -> - {Pids, EStack}; - {error, Reason} -> - {Pids, maps_prepend(Reason, P, EStack)} - end; - (?restarting(_), {Pids, EStack}) -> - {Pids, EStack} - end, {maps:new(), maps:new()}, State). - wait_dynamic_children(_Child, _Pids, 0, undefined, EStack) -> EStack; wait_dynamic_children(_Child, _Pids, 0, TRef, EStack) -> @@ -1041,34 +1032,51 @@ wait_dynamic_children(_Child, _Pids, 0, TRef, EStack) -> wait_dynamic_children(#child{shutdown=brutal_kill} = Child, Pids, Sz, TRef, EStack) -> receive - {'DOWN', _MRef, process, Pid, killed} -> - wait_dynamic_children(Child, maps:remove(Pid, Pids), Sz-1, - TRef, EStack); - - {'DOWN', _MRef, process, Pid, Reason} -> - wait_dynamic_children(Child, maps:remove(Pid, Pids), Sz-1, - TRef, maps_prepend(Reason, Pid, EStack)) + {'DOWN', Mon, process, Pid, Reason0} + when is_map_key({Pid, Mon}, Pids) -> + case unlink_flush(Pid, Reason0) of + killed -> + wait_dynamic_children(Child, maps:remove({Pid, Mon}, Pids), + Sz-1, TRef, EStack); + + {shutdown, _} when not (?is_permanent(Child)) -> + wait_dynamic_children(Child, maps:remove({Pid, Mon}, Pids), + Sz-1, TRef, EStack); + + normal when not (?is_permanent(Child)) -> + wait_dynamic_children(Child, maps:remove({Pid, Mon}, Pids), + Sz-1, TRef, EStack); + Reason -> + wait_dynamic_children(Child, maps:remove({Pid, Mon}, Pids), + Sz-1, TRef, maps_prepend(Reason, Pid, + EStack)) + end end; wait_dynamic_children(Child, Pids, Sz, TRef, EStack) -> receive - {'DOWN', _MRef, process, Pid, shutdown} -> - wait_dynamic_children(Child, maps:remove(Pid, Pids), Sz-1, - TRef, EStack); - - {'DOWN', _MRef, process, Pid, {shutdown, _}} -> - wait_dynamic_children(Child, maps:remove(Pid, Pids), Sz-1, - TRef, EStack); - - {'DOWN', _MRef, process, Pid, normal} when not (?is_permanent(Child)) -> - wait_dynamic_children(Child, maps:remove(Pid, Pids), Sz-1, - TRef, EStack); - - {'DOWN', _MRef, process, Pid, Reason} -> - wait_dynamic_children(Child, maps:remove(Pid, Pids), Sz-1, - TRef, maps_prepend(Reason, Pid, EStack)); + {'DOWN', Mon, process, Pid, Reason0} + when is_map_key({Pid, Mon}, Pids) -> + case unlink_flush(Pid, Reason0) of + shutdown -> + wait_dynamic_children(Child, maps:remove({Pid, Mon}, Pids), + Sz-1, TRef, EStack); + + {shutdown, _} when not (?is_permanent(Child)) -> + wait_dynamic_children(Child, maps:remove({Pid, Mon}, Pids), + Sz-1, TRef, EStack); + + normal when not (?is_permanent(Child)) -> + wait_dynamic_children(Child, maps:remove({Pid, Mon}, Pids), + Sz-1, TRef, EStack); + + Reason -> + wait_dynamic_children(Child, maps:remove({Pid, Mon}, Pids), + Sz-1, TRef, maps_prepend(Reason, Pid, + EStack)) + end; {timeout, TRef, kill} -> - maps:foreach(fun(P, _) -> exit(P, kill) end, Pids), + maps:foreach(fun({P, _}, _) -> exit(P, kill) end, Pids), wait_dynamic_children(Child, Pids, Sz, undefined, EStack) end. diff --git a/lib/stdlib/test/Makefile b/lib/stdlib/test/Makefile index 5f9510cf16..bda16c263d 100644 --- a/lib/stdlib/test/Makefile +++ b/lib/stdlib/test/Makefile @@ -72,6 +72,7 @@ MODULES= \ supervisor_1 \ supervisor_2 \ supervisor_3 \ + supervisor_4 \ supervisor_deadlock \ naughty_child \ shell_SUITE \ diff --git a/lib/stdlib/test/supervisor_4.erl b/lib/stdlib/test/supervisor_4.erl new file mode 100644 index 0000000000..038c904c27 --- /dev/null +++ b/lib/stdlib/test/supervisor_4.erl @@ -0,0 +1,49 @@ +%% +%% %CopyrightBegin% +%% +%% Copyright Ericsson AB 1996-2016. All Rights Reserved. +%% +%% Licensed under the Apache License, Version 2.0 (the "License"); +%% you may not use this file except in compliance with the License. +%% You may obtain a copy of the License at +%% +%% http://www.apache.org/licenses/LICENSE-2.0 +%% +%% Unless required by applicable law or agreed to in writing, software +%% distributed under the License is distributed on an "AS IS" BASIS, +%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +%% See the License for the specific language governing permissions and +%% limitations under the License. +%% +%% %CopyrightEnd% +%% +%% Description: Simulates the behaviour that a child process may have. +%% Is used by the supervisor_SUITE test suite. +-module(supervisor_4). + +-export([start_child/0, start_child/1, init/1]). + +-export([handle_call/3, handle_info/2, terminate/2]). + +start_child() -> + gen_server:start_link(?MODULE, undefined, []). + +start_child(Reason) -> + gen_server:start_link(?MODULE, {Reason}, []). + +init(Reason) -> + process_flag(trap_exit, true), + {ok, Reason}. + +handle_call(Req, _From, State) -> + {reply, Req, State}. + +handle_info(stop, State) -> + {stop, normal, State}; +handle_info(_, State) -> + {noreply, State}. + +terminate(_, {Reason}) -> + exit(Reason); +terminate(_, _) -> + ok. diff --git a/lib/stdlib/test/supervisor_SUITE.erl b/lib/stdlib/test/supervisor_SUITE.erl index 3d05ce5402..5bae4f6e05 100644 --- a/lib/stdlib/test/supervisor_SUITE.erl +++ b/lib/stdlib/test/supervisor_SUITE.erl @@ -44,7 +44,9 @@ sup_start_child_returns_error_simple/1, sup_start_map/1, sup_start_map_simple/1, sup_start_map_faulty_specs/1, - sup_stop_infinity/1, sup_stop_timeout/1, sup_stop_brutal_kill/1, + sup_stop_infinity/1, sup_stop_timeout/1, sup_stop_timeout_dynamic/1, + sup_stop_brutal_kill/1, sup_stop_brutal_kill_dynamic/1, + sup_stop_race/1, sup_stop_non_shutdown_exit_dynamic/1, child_adm/1, child_adm_simple/1, child_specs/1, child_specs_map/1, extra_return/1, sup_flags/1]). @@ -133,8 +135,9 @@ groups() -> {sup_start_map, [], [sup_start_map, sup_start_map_simple, sup_start_map_faulty_specs]}, {sup_stop, [], - [sup_stop_infinity, sup_stop_timeout, - sup_stop_brutal_kill]}, + [sup_stop_infinity, sup_stop_timeout, sup_stop_timeout_dynamic, + sup_stop_brutal_kill, sup_stop_brutal_kill_dynamic, + sup_stop_race, sup_stop_non_shutdown_exit_dynamic]}, {normal_termination, [], [permanent_normal, transient_normal, temporary_normal]}, {shutdown_termination, [], @@ -498,6 +501,20 @@ sup_stop_timeout(Config) when is_list(Config) -> check_exit_reason(CPid1, shutdown), check_exit_reason(CPid2, killed). +sup_stop_timeout_dynamic(Config) when is_list(Config) -> + process_flag(trap_exit, true), + Child = {child, {supervisor_1, start_child, []}, temporary, 1000, worker, []}, + {ok, Pid} = start_link({ok, {{simple_one_for_one, 2, 3600}, [Child]}}), + {ok, CPid1} = supervisor:start_child(Pid, []), + link(CPid1), + {ok, CPid2} = supervisor:start_child(Pid, []), + link(CPid2), + CPid2 ! {sleep, 200000}, + terminate(Pid, shutdown), + check_exit_reason(Pid, shutdown), + check_exit_reason(CPid1, shutdown), + check_exit_reason(CPid2, killed). + %%------------------------------------------------------------------------- %% See sup_stop/1 when Shutdown = brutal_kill @@ -518,6 +535,116 @@ sup_stop_brutal_kill(Config) when is_list(Config) -> check_exit_reason(CPid1, shutdown), check_exit_reason(CPid2, killed). +sup_stop_brutal_kill_dynamic(Config) when is_list(Config) -> + process_flag(trap_exit, true), + Child = {child, {supervisor_4, start_child, []}, temporary, brutal_kill, worker, []}, + {ok, Pid} = start_link({ok, {{simple_one_for_one, 2, 3600}, [Child]}}), + CPids = lists:map( + fun (_) -> + {ok, CPid} = supervisor:start_child(Pid, []), + link(CPid), + CPid + end, + lists:seq(1, 10)), + terminate(Pid, shutdown), + check_exit_reason(Pid, shutdown), + lists:foreach( + fun (CPid) -> + check_exit_reason(CPid, killed) + end, + CPids + ). + +%%------------------------------------------------------------------------- +%% Tests that a supervisor completes its shutdown properly while children +%% exit by themselves for reasons other than shutdown +sup_stop_race(Config) when is_list(Config) -> + process_flag(trap_exit, true), + {ok, Pid} = start_link({ok, {{one_for_one, 2, 3600}, []}}), + {ok, CPidA} = supervisor:start_child(Pid, + #{id => child_a, + start => {supervisor_1, start_child, []}, + restart => temporary, + shutdown => 1000}), + link(CPidA), + CPids = lists:foldl( + fun ({Restart, Reason}, Acc) -> + {ok, CPid} = supervisor:start_child(Pid, + #{id => {child, make_ref()}, + start => {supervisor_4, start_child, [Reason]}, + restart => Restart, + shutdown => 1000 + }), + link(CPid), + Acc#{CPid => Reason} + end, + #{}, + [ + {Restart, Reason} + || Restart <- [temporary, transient, permanent], + Reason <- [normal, shutdown, {shutdown, test}, other] + ] + ), + {ok, CPidZ} = supervisor:start_child(Pid, + #{ + id => child_z, + start => {supervisor_2, start_child, [1000]}, + restart => temporary, + shutdown => 2000 + }), + link(CPidZ), + Self = self(), + spawn_link( + fun () -> + timer:sleep(200), + maps:foreach( + fun (CPid, _) -> + CPid ! stop + end, + CPids + ), + unlink(Self) + end + ), + ok = terminate(Pid, shutdown), + check_exit_reason(Pid, shutdown), + check_exit_reason(CPidZ, shutdown), + maps:foreach( + fun (CPid, Reason) -> + check_exit_reason(CPid, Reason) + end, + CPids), + check_exit_reason(CPidA, shutdown). + +%%------------------------------------------------------------------------- +%% Tests that a simple_one_for_one supervisor shuts down correctly when +%% some children exit with a reason other than shutdown +sup_stop_non_shutdown_exit_dynamic(Config) when is_list(Config) -> + process_flag(trap_exit, true), + lists:foreach( + fun (Restart) -> + Child = {child, {supervisor_4, start_child, []}, Restart, 5000, worker, []}, + {ok, Pid} = start_link({ok, {{simple_one_for_one, 2, 3600}, [Child]}}), + CPids = lists:foldl( + fun (Reason, Acc) -> + {ok, CPid} = supervisor:start_child(Pid, [Reason]), + link(CPid), + Acc#{CPid => Reason} + end, + #{}, + [normal, shutdown, {shutdown, test}, other] + ), + ok = terminate(Pid, shutdown), + check_exit_reason(Pid, shutdown), + maps:foreach( + fun (CPid, Reason) -> + check_exit_reason(CPid, Reason) + end, + CPids) + end, + [temporary, transient, permanent] + ). + %%------------------------------------------------------------------------- %% The start function provided to start a child may return {ok, Pid} %% or {ok, Pid, Info}, if it returns the latter check that the |