summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--lib/sasl/test/release_handler_SUITE.erl2
-rw-r--r--lib/stdlib/doc/src/supervisor.xml5
-rw-r--r--lib/stdlib/src/supervisor.erl49
-rw-r--r--lib/stdlib/test/supervisor_SUITE.erl52
-rw-r--r--lib/stdlib/test/supervisor_deadlock.erl14
5 files changed, 90 insertions, 32 deletions
diff --git a/lib/sasl/test/release_handler_SUITE.erl b/lib/sasl/test/release_handler_SUITE.erl
index d57de2593a..ee620dcdb4 100644
--- a/lib/sasl/test/release_handler_SUITE.erl
+++ b/lib/sasl/test/release_handler_SUITE.erl
@@ -1363,7 +1363,7 @@ upgrade_supervisor(Conf) when is_list(Conf) ->
%% Check that the restart strategy and child spec is updated
{status, _, {module, _}, [_, _, _, _, [_,_,{data,[{"State",State}]}]]} =
rpc:call(Node,sys,get_status,[a_sup]),
- {state,_,RestartStrategy,[Child],_,_,_,_,_,_} = State,
+ {state,_,RestartStrategy,[Child],_,_,_,_,_,_,_} = State,
one_for_all = RestartStrategy, % changed from one_for_one
{child,_,_,_,_,brutal_kill,_,_} = Child, % changed from timeout 2000
diff --git a/lib/stdlib/doc/src/supervisor.xml b/lib/stdlib/doc/src/supervisor.xml
index 24ff251ce3..815bf4a489 100644
--- a/lib/stdlib/doc/src/supervisor.xml
+++ b/lib/stdlib/doc/src/supervisor.xml
@@ -543,7 +543,10 @@
</item>
<item>
<p><c>active</c> - the count of all actively running child processes
- managed by this supervisor.</p>
+ managed by this supervisor. In the case of <c>simple_one_for_one</c>
+ supervisors, no check is carried out to ensure that each child process
+ is still alive, though the result provided here is likely to be very
+ accurate unless the supervisor is heavily overloaded.</p>
</item>
<item>
<p><c>supervisors</c> - the count of all children marked as
diff --git a/lib/stdlib/src/supervisor.erl b/lib/stdlib/src/supervisor.erl
index 6a5005b4b6..cecdebd0c8 100644
--- a/lib/stdlib/src/supervisor.erl
+++ b/lib/stdlib/src/supervisor.erl
@@ -116,6 +116,7 @@
intensity :: non_neg_integer(),
period :: pos_integer(),
restarts = [],
+ dynamic_restarts = 0 :: non_neg_integer(),
module,
args}).
-type state() :: #state{}.
@@ -518,39 +519,26 @@ handle_call(which_children, _From, State) ->
handle_call(count_children, _From, #state{children = [#child{restart_type = temporary,
child_type = CT}]} = State)
when ?is_simple(State) ->
- {Active, Count} =
- ?SETS:fold(fun(Pid, {Alive, Tot}) ->
- case is_pid(Pid) andalso is_process_alive(Pid) of
- true ->{Alive+1, Tot +1};
- false ->
- {Alive, Tot + 1}
- end
- end, {0, 0}, dynamics_db(temporary, State#state.dynamics)),
+ Sz = ?SETS:size(dynamics_db(temporary, State#state.dynamics)),
Reply = case CT of
- supervisor -> [{specs, 1}, {active, Active},
- {supervisors, Count}, {workers, 0}];
- worker -> [{specs, 1}, {active, Active},
- {supervisors, 0}, {workers, Count}]
+ supervisor -> [{specs, 1}, {active, Sz},
+ {supervisors, Sz}, {workers, 0}];
+ worker -> [{specs, 1}, {active, Sz},
+ {supervisors, 0}, {workers, Sz}]
end,
{reply, Reply, State};
-handle_call(count_children, _From, #state{children = [#child{restart_type = RType,
+handle_call(count_children, _From, #state{dynamic_restarts = Restarts,
+ children = [#child{restart_type = RType,
child_type = CT}]} = State)
when ?is_simple(State) ->
- {Active, Count} =
- ?DICTS:fold(fun(Pid, _Val, {Alive, Tot}) ->
- case is_pid(Pid) andalso is_process_alive(Pid) of
- true ->
- {Alive+1, Tot +1};
- false ->
- {Alive, Tot + 1}
- end
- end, {0, 0}, dynamics_db(RType, State#state.dynamics)),
+ Sz = ?DICTS:size(dynamics_db(RType, State#state.dynamics)),
+ Active = Sz - Restarts,
Reply = case CT of
supervisor -> [{specs, 1}, {active, Active},
- {supervisors, Count}, {workers, 0}];
+ {supervisors, Sz}, {workers, 0}];
worker -> [{specs, 1}, {active, Active},
- {supervisors, 0}, {workers, Count}]
+ {supervisors, 0}, {workers, Sz}]
end,
{reply, Reply, State};
@@ -820,8 +808,15 @@ restart(Child, State) ->
{shutdown, remove_child(Child, NState)}
end.
-restart(simple_one_for_one, Child, State) ->
+restart(simple_one_for_one, Child, State0) ->
#child{pid = OldPid, mfargs = {M, F, A}} = Child,
+ State = case OldPid of
+ ?restarting(_) ->
+ NRes = State0#state.dynamic_restarts - 1,
+ State0#state{dynamic_restarts = NRes};
+ _ ->
+ State0
+ end,
Dynamics = ?DICTS:erase(OldPid, dynamics_db(Child#child.restart_type,
State#state.dynamics)),
case do_start_child_i(M, F, A) of
@@ -832,7 +827,9 @@ restart(simple_one_for_one, Child, State) ->
NState = State#state{dynamics = ?DICTS:store(Pid, A, Dynamics)},
{ok, NState};
{error, Error} ->
- NState = State#state{dynamics = ?DICTS:store(restarting(OldPid), A,
+ NRestarts = State#state.dynamic_restarts + 1,
+ NState = State#state{dynamic_restarts = NRestarts,
+ dynamics = ?DICTS:store(restarting(OldPid), A,
Dynamics)},
report_error(start_error, Error, Child, State#state.name),
{try_again, NState}
diff --git a/lib/stdlib/test/supervisor_SUITE.erl b/lib/stdlib/test/supervisor_SUITE.erl
index 8cb2a5194a..903ca76575 100644
--- a/lib/stdlib/test/supervisor_SUITE.erl
+++ b/lib/stdlib/test/supervisor_SUITE.erl
@@ -67,6 +67,7 @@
%% Misc tests
-export([child_unlink/1, tree/1, count_children/1,
+ count_restarting_children/1,
do_not_save_start_parameters_for_temporary_children/1,
do_not_save_child_specs_for_temporary_children/1,
simple_one_for_one_scale_many_temporary_children/1,
@@ -90,7 +91,8 @@ all() ->
{group, normal_termination},
{group, shutdown_termination},
{group, abnormal_termination}, child_unlink, tree,
- count_children, do_not_save_start_parameters_for_temporary_children,
+ count_children, count_restarting_children,
+ do_not_save_start_parameters_for_temporary_children,
do_not_save_child_specs_for_temporary_children,
simple_one_for_one_scale_many_temporary_children, temporary_bystander,
simple_global_supervisor, hanging_restart_loop, hanging_restart_loop_simple,
@@ -1459,6 +1461,54 @@ count_children(Config) when is_list(Config) ->
[1,0,0,0] = get_child_counts(sup_test).
%%-------------------------------------------------------------------------
+%% Test count_children when some children are restarting
+count_restarting_children(Config) when is_list(Config) ->
+ process_flag(trap_exit, true),
+ Child = {child, {supervisor_deadlock, start_child_noreg, []},
+ permanent, brutal_kill, worker, []},
+ %% 2 sek delay on failing restart (see supervisor_deadlock.erl) ->
+ %% MaxR=20, MaxT=10 should ensure a restart loop when starting and
+ %% restarting 3 instances of the child (as below)
+ {ok, SupPid} = start_link({ok, {{simple_one_for_one, 20, 10}, [Child]}}),
+
+ %% Ets table with state read by supervisor_deadlock.erl
+ ets:new(supervisor_deadlock,[set,named_table,public]),
+ ets:insert(supervisor_deadlock,{fail_start,false}),
+
+ [1,0,0,0] = get_child_counts(SupPid),
+ {ok, Ch1_1} = supervisor:start_child(SupPid, []),
+ [1,1,0,1] = get_child_counts(SupPid),
+ {ok, Ch1_2} = supervisor:start_child(SupPid, []),
+ [1,2,0,2] = get_child_counts(SupPid),
+ {ok, Ch1_3} = supervisor:start_child(SupPid, []),
+ [1,3,0,3] = get_child_counts(SupPid),
+
+ supervisor_deadlock:restart_child(Ch1_1),
+ supervisor_deadlock:restart_child(Ch1_2),
+ supervisor_deadlock:restart_child(Ch1_3),
+ test_server:sleep(400),
+ [1,3,0,3] = get_child_counts(SupPid),
+ [Ch2_1, Ch2_2, Ch2_3] = [C || {_,C,_,_} <- supervisor:which_children(SupPid)],
+
+ ets:insert(supervisor_deadlock,{fail_start,true}),
+ supervisor_deadlock:restart_child(Ch2_1),
+ supervisor_deadlock:restart_child(Ch2_2),
+ test_server:sleep(4000), % allow restart to happen before proceeding
+ [1,1,0,3] = get_child_counts(SupPid),
+
+ ets:insert(supervisor_deadlock,{fail_start,false}),
+ test_server:sleep(4000), % allow restart to happen before proceeding
+ [1,3,0,3] = get_child_counts(SupPid),
+
+ ok = supervisor:terminate_child(SupPid, Ch2_3),
+ [1,2,0,2] = get_child_counts(SupPid),
+ [Ch3_1, Ch3_2] = [C || {_,C,_,_} <- supervisor:which_children(SupPid)],
+ ok = supervisor:terminate_child(SupPid, Ch3_1),
+ [1,1,0,1] = get_child_counts(SupPid),
+ ok = supervisor:terminate_child(SupPid, Ch3_2),
+ [1,0,0,0] = get_child_counts(SupPid).
+
+%%-------------------------------------------------------------------------
%% Temporary children shall not be restarted so they should not save
%% start parameters, as it potentially can take up a huge amount of
%% memory for no purpose.
diff --git a/lib/stdlib/test/supervisor_deadlock.erl b/lib/stdlib/test/supervisor_deadlock.erl
index 288547a972..8d3d1c6f30 100644
--- a/lib/stdlib/test/supervisor_deadlock.erl
+++ b/lib/stdlib/test/supervisor_deadlock.erl
@@ -11,9 +11,11 @@ init([child]) ->
%% terminates immediately
{ok, []};
[{fail_start, true}] ->
- %% Restart frequency is MaxR=8, MaxT=10, so this will
- %% ensure that restart intensity is not reached -> restart
- %% loop
+ %% A restart frequency of MaxR=8, MaxT=10 should ensure
+ %% that restart intensity is not reached -> restart loop.
+ %% (Note that if we use simple_one_for_one, and start
+ %% 'many' child instances, the restart frequency must be
+ %% ajusted accordingly.)
timer:sleep(2000), % NOTE: this could be a gen_server call timeout
{stop, error}
@@ -41,5 +43,11 @@ code_change(_OldVsn, State, _Extra) ->
start_child() ->
gen_server:start_link({local, ?MODULE}, ?MODULE, [child], []).
+start_child_noreg() ->
+ gen_server:start_link(?MODULE, [child], []).
+
restart_child() ->
gen_server:cast(supervisor_deadlock, restart).
+
+restart_child(Pid) ->
+ gen_server:cast(Pid, restart).