diff options
-rw-r--r-- | lib/sasl/test/release_handler_SUITE.erl | 2 | ||||
-rw-r--r-- | lib/stdlib/doc/src/supervisor.xml | 5 | ||||
-rw-r--r-- | lib/stdlib/src/supervisor.erl | 49 | ||||
-rw-r--r-- | lib/stdlib/test/supervisor_SUITE.erl | 52 | ||||
-rw-r--r-- | lib/stdlib/test/supervisor_deadlock.erl | 14 |
5 files changed, 90 insertions, 32 deletions
diff --git a/lib/sasl/test/release_handler_SUITE.erl b/lib/sasl/test/release_handler_SUITE.erl index d57de2593a..ee620dcdb4 100644 --- a/lib/sasl/test/release_handler_SUITE.erl +++ b/lib/sasl/test/release_handler_SUITE.erl @@ -1363,7 +1363,7 @@ upgrade_supervisor(Conf) when is_list(Conf) -> %% Check that the restart strategy and child spec is updated {status, _, {module, _}, [_, _, _, _, [_,_,{data,[{"State",State}]}]]} = rpc:call(Node,sys,get_status,[a_sup]), - {state,_,RestartStrategy,[Child],_,_,_,_,_,_} = State, + {state,_,RestartStrategy,[Child],_,_,_,_,_,_,_} = State, one_for_all = RestartStrategy, % changed from one_for_one {child,_,_,_,_,brutal_kill,_,_} = Child, % changed from timeout 2000 diff --git a/lib/stdlib/doc/src/supervisor.xml b/lib/stdlib/doc/src/supervisor.xml index 24ff251ce3..815bf4a489 100644 --- a/lib/stdlib/doc/src/supervisor.xml +++ b/lib/stdlib/doc/src/supervisor.xml @@ -543,7 +543,10 @@ </item> <item> <p><c>active</c> - the count of all actively running child processes - managed by this supervisor.</p> + managed by this supervisor. In the case of <c>simple_one_for_one</c> + supervisors, no check is carried out to ensure that each child process + is still alive, though the result provided here is likely to be very + accurate unless the supervisor is heavily overloaded.</p> </item> <item> <p><c>supervisors</c> - the count of all children marked as diff --git a/lib/stdlib/src/supervisor.erl b/lib/stdlib/src/supervisor.erl index 6a5005b4b6..cecdebd0c8 100644 --- a/lib/stdlib/src/supervisor.erl +++ b/lib/stdlib/src/supervisor.erl @@ -116,6 +116,7 @@ intensity :: non_neg_integer(), period :: pos_integer(), restarts = [], + dynamic_restarts = 0 :: non_neg_integer(), module, args}). -type state() :: #state{}. @@ -518,39 +519,26 @@ handle_call(which_children, _From, State) -> handle_call(count_children, _From, #state{children = [#child{restart_type = temporary, child_type = CT}]} = State) when ?is_simple(State) -> - {Active, Count} = - ?SETS:fold(fun(Pid, {Alive, Tot}) -> - case is_pid(Pid) andalso is_process_alive(Pid) of - true ->{Alive+1, Tot +1}; - false -> - {Alive, Tot + 1} - end - end, {0, 0}, dynamics_db(temporary, State#state.dynamics)), + Sz = ?SETS:size(dynamics_db(temporary, State#state.dynamics)), Reply = case CT of - supervisor -> [{specs, 1}, {active, Active}, - {supervisors, Count}, {workers, 0}]; - worker -> [{specs, 1}, {active, Active}, - {supervisors, 0}, {workers, Count}] + supervisor -> [{specs, 1}, {active, Sz}, + {supervisors, Sz}, {workers, 0}]; + worker -> [{specs, 1}, {active, Sz}, + {supervisors, 0}, {workers, Sz}] end, {reply, Reply, State}; -handle_call(count_children, _From, #state{children = [#child{restart_type = RType, +handle_call(count_children, _From, #state{dynamic_restarts = Restarts, + children = [#child{restart_type = RType, child_type = CT}]} = State) when ?is_simple(State) -> - {Active, Count} = - ?DICTS:fold(fun(Pid, _Val, {Alive, Tot}) -> - case is_pid(Pid) andalso is_process_alive(Pid) of - true -> - {Alive+1, Tot +1}; - false -> - {Alive, Tot + 1} - end - end, {0, 0}, dynamics_db(RType, State#state.dynamics)), + Sz = ?DICTS:size(dynamics_db(RType, State#state.dynamics)), + Active = Sz - Restarts, Reply = case CT of supervisor -> [{specs, 1}, {active, Active}, - {supervisors, Count}, {workers, 0}]; + {supervisors, Sz}, {workers, 0}]; worker -> [{specs, 1}, {active, Active}, - {supervisors, 0}, {workers, Count}] + {supervisors, 0}, {workers, Sz}] end, {reply, Reply, State}; @@ -820,8 +808,15 @@ restart(Child, State) -> {shutdown, remove_child(Child, NState)} end. -restart(simple_one_for_one, Child, State) -> +restart(simple_one_for_one, Child, State0) -> #child{pid = OldPid, mfargs = {M, F, A}} = Child, + State = case OldPid of + ?restarting(_) -> + NRes = State0#state.dynamic_restarts - 1, + State0#state{dynamic_restarts = NRes}; + _ -> + State0 + end, Dynamics = ?DICTS:erase(OldPid, dynamics_db(Child#child.restart_type, State#state.dynamics)), case do_start_child_i(M, F, A) of @@ -832,7 +827,9 @@ restart(simple_one_for_one, Child, State) -> NState = State#state{dynamics = ?DICTS:store(Pid, A, Dynamics)}, {ok, NState}; {error, Error} -> - NState = State#state{dynamics = ?DICTS:store(restarting(OldPid), A, + NRestarts = State#state.dynamic_restarts + 1, + NState = State#state{dynamic_restarts = NRestarts, + dynamics = ?DICTS:store(restarting(OldPid), A, Dynamics)}, report_error(start_error, Error, Child, State#state.name), {try_again, NState} diff --git a/lib/stdlib/test/supervisor_SUITE.erl b/lib/stdlib/test/supervisor_SUITE.erl index 8cb2a5194a..903ca76575 100644 --- a/lib/stdlib/test/supervisor_SUITE.erl +++ b/lib/stdlib/test/supervisor_SUITE.erl @@ -67,6 +67,7 @@ %% Misc tests -export([child_unlink/1, tree/1, count_children/1, + count_restarting_children/1, do_not_save_start_parameters_for_temporary_children/1, do_not_save_child_specs_for_temporary_children/1, simple_one_for_one_scale_many_temporary_children/1, @@ -90,7 +91,8 @@ all() -> {group, normal_termination}, {group, shutdown_termination}, {group, abnormal_termination}, child_unlink, tree, - count_children, do_not_save_start_parameters_for_temporary_children, + count_children, count_restarting_children, + do_not_save_start_parameters_for_temporary_children, do_not_save_child_specs_for_temporary_children, simple_one_for_one_scale_many_temporary_children, temporary_bystander, simple_global_supervisor, hanging_restart_loop, hanging_restart_loop_simple, @@ -1459,6 +1461,54 @@ count_children(Config) when is_list(Config) -> [1,0,0,0] = get_child_counts(sup_test). %%------------------------------------------------------------------------- +%% Test count_children when some children are restarting +count_restarting_children(Config) when is_list(Config) -> + process_flag(trap_exit, true), + Child = {child, {supervisor_deadlock, start_child_noreg, []}, + permanent, brutal_kill, worker, []}, + %% 2 sek delay on failing restart (see supervisor_deadlock.erl) -> + %% MaxR=20, MaxT=10 should ensure a restart loop when starting and + %% restarting 3 instances of the child (as below) + {ok, SupPid} = start_link({ok, {{simple_one_for_one, 20, 10}, [Child]}}), + + %% Ets table with state read by supervisor_deadlock.erl + ets:new(supervisor_deadlock,[set,named_table,public]), + ets:insert(supervisor_deadlock,{fail_start,false}), + + [1,0,0,0] = get_child_counts(SupPid), + {ok, Ch1_1} = supervisor:start_child(SupPid, []), + [1,1,0,1] = get_child_counts(SupPid), + {ok, Ch1_2} = supervisor:start_child(SupPid, []), + [1,2,0,2] = get_child_counts(SupPid), + {ok, Ch1_3} = supervisor:start_child(SupPid, []), + [1,3,0,3] = get_child_counts(SupPid), + + supervisor_deadlock:restart_child(Ch1_1), + supervisor_deadlock:restart_child(Ch1_2), + supervisor_deadlock:restart_child(Ch1_3), + test_server:sleep(400), + [1,3,0,3] = get_child_counts(SupPid), + [Ch2_1, Ch2_2, Ch2_3] = [C || {_,C,_,_} <- supervisor:which_children(SupPid)], + + ets:insert(supervisor_deadlock,{fail_start,true}), + supervisor_deadlock:restart_child(Ch2_1), + supervisor_deadlock:restart_child(Ch2_2), + test_server:sleep(4000), % allow restart to happen before proceeding + [1,1,0,3] = get_child_counts(SupPid), + + ets:insert(supervisor_deadlock,{fail_start,false}), + test_server:sleep(4000), % allow restart to happen before proceeding + [1,3,0,3] = get_child_counts(SupPid), + + ok = supervisor:terminate_child(SupPid, Ch2_3), + [1,2,0,2] = get_child_counts(SupPid), + [Ch3_1, Ch3_2] = [C || {_,C,_,_} <- supervisor:which_children(SupPid)], + ok = supervisor:terminate_child(SupPid, Ch3_1), + [1,1,0,1] = get_child_counts(SupPid), + ok = supervisor:terminate_child(SupPid, Ch3_2), + [1,0,0,0] = get_child_counts(SupPid). + +%%------------------------------------------------------------------------- %% Temporary children shall not be restarted so they should not save %% start parameters, as it potentially can take up a huge amount of %% memory for no purpose. diff --git a/lib/stdlib/test/supervisor_deadlock.erl b/lib/stdlib/test/supervisor_deadlock.erl index 288547a972..8d3d1c6f30 100644 --- a/lib/stdlib/test/supervisor_deadlock.erl +++ b/lib/stdlib/test/supervisor_deadlock.erl @@ -11,9 +11,11 @@ init([child]) -> %% terminates immediately {ok, []}; [{fail_start, true}] -> - %% Restart frequency is MaxR=8, MaxT=10, so this will - %% ensure that restart intensity is not reached -> restart - %% loop + %% A restart frequency of MaxR=8, MaxT=10 should ensure + %% that restart intensity is not reached -> restart loop. + %% (Note that if we use simple_one_for_one, and start + %% 'many' child instances, the restart frequency must be + %% ajusted accordingly.) timer:sleep(2000), % NOTE: this could be a gen_server call timeout {stop, error} @@ -41,5 +43,11 @@ code_change(_OldVsn, State, _Extra) -> start_child() -> gen_server:start_link({local, ?MODULE}, ?MODULE, [child], []). +start_child_noreg() -> + gen_server:start_link(?MODULE, [child], []). + restart_child() -> gen_server:cast(supervisor_deadlock, restart). + +restart_child(Pid) -> + gen_server:cast(Pid, restart). |