From bdbef7872740322c248b97310d0371a081f2aa6f Mon Sep 17 00:00:00 2001 From: Micael Karlberg Date: Fri, 27 Jan 2023 18:32:18 +0100 Subject: [snmp] Tweaking agent start sequence to reduce error reporting Tweaked the agent start sequence in order to minimize error reporting during a failed start (due to net-if issues). Also added a new net-if option in order to change what otherwise will be an error reporet to an info report. OTP-18422 (ERIERL-873) --- lib/snmp/src/agent/snmpa_agent.erl | 75 +++++++++++++++++++++------------ lib/snmp/src/agent/snmpa_agent_sup.erl | 12 ++++-- lib/snmp/src/agent/snmpa_net_if.erl | 40 +++++++++++++----- lib/snmp/src/agent/snmpa_supervisor.erl | 43 +++++++++++++++++-- lib/snmp/src/app/snmp_app.erl | 11 +++-- lib/snmp/src/app/snmp_app_sup.erl | 16 +++++-- 6 files changed, 146 insertions(+), 51 deletions(-) diff --git a/lib/snmp/src/agent/snmpa_agent.erl b/lib/snmp/src/agent/snmpa_agent.erl index 5039b08391..b70bf310d3 100644 --- a/lib/snmp/src/agent/snmpa_agent.erl +++ b/lib/snmp/src/agent/snmpa_agent.erl @@ -1,7 +1,7 @@ %% %% %CopyrightBegin% %% -%% Copyright Ericsson AB 1996-2020. All Rights Reserved. +%% Copyright Ericsson AB 1996-2023. All Rights Reserved. %% %% Licensed under the Apache License, Version 2.0 (the "License"); %% you may not use this file except in compliance with the License. @@ -349,10 +349,16 @@ init([Prio, Parent, Ref, Options]) -> "~n Options: ~p", [Prio, Parent, Ref, Options]), case (catch do_init(Prio, Parent, Ref, Options)) of {ok, State} -> - ?vdebug("started",[]), + ?vdebug("started"), {ok, State}; + {error, {net_if, info, Reason}} -> + info_msg("Failed starting agent: " + "~n Net If error: ~p", [Reason]), + %% {shutdown, Reason}; + exit(Reason); {error, Reason} -> - config_err("failed starting agent: ~n~p", [Reason]), + config_err("Failed starting agent: " + "~n ~p", [Reason]), {stop, Reason} end. @@ -419,15 +425,18 @@ start_note_store(Prio, Ref, Options) -> {ok, Pid} -> ?vdebug("start_note_store -> Pid: ~p", [Pid]), Pid; - {error, Reason} -> - ?vinfo("error starting note store: ~n~p",[Reason]), - throw({error, {note_store_error, Reason}}); - {'EXIT', Reason} -> - ?vinfo("exit starting note store: ~n~p",[Reason]), - throw({error, {note_store_exit, Reason}}); + {error, {Reason, _ChildSpec}} -> + ?vinfo("error starting note store: " + "~n ~p", [Reason]), + throw({error, {note_store, error, Reason}}); + {'EXIT', {Reason, _ChildSpec}} -> + ?vinfo("exit starting note store: " + "~n ~p", [Reason]), + throw({error, {note_store, exit, Reason}}); Error -> - ?vinfo("failed starting note store: ~n~p",[Error]), - throw({error, {note_store_failed, Error}}) + ?vinfo("failed starting note store: " + "~n ~p", [Error]), + throw({error, {note_store, failed, Error}}) end. @@ -446,18 +455,25 @@ start_net_if(none, Prio, Ref, Vsns, NoteStore, Options) -> case (catch snmpa_misc_sup:start_net_if(Prio, NoteStore, Ref, self(), Mod, NiOpts)) of - {ok, Pid} -> + {ok, Pid} -> ?vdebug("start_net_if -> Pid: ~p", [Pid]), {master_agent, Pid, Mod}; - {error, Reason} -> - ?vinfo("error starting net if: ~n~p",[Reason]), - throw({error, {net_if_error, Reason}}); + {error, {{Class, udp_open, PortNo, Reason}, _ChildSpec}} -> + ?vinfo("error starting net if: " + "~n ~p", [Reason]), + throw({error, {net_if, Class, {udp_open, PortNo, Reason}}}); + {error, {Reason, _ChildSpec}} -> + ?vinfo("error starting net if: " + "~n ~p", [Reason]), + throw({error, {net_if, error, Reason}}); {'EXIT', Reason} -> - ?vinfo("exit starting net if: ~n~p",[Reason]), - throw({error, {net_if_exit, Reason}}); + ?vinfo("exit starting net if: " + "~n ~p", [Reason]), + throw({error, {net_if, exit, Reason}}); Error -> - ?vinfo("failed starting net if: ~n~p",[Error]), - throw({error, {net_if_failed, Error}}) + ?vinfo("failed starting net if: " + "~n ~p", [Error]), + throw({error, {net_if, failed, Error}}) end; start_net_if(Parent, _Prio, _Ref, _Vsns, _NoteStore, _Options) when is_pid(Parent) -> @@ -479,15 +495,18 @@ start_mib_server(Prio, Ref, Mibs, Options) -> {ok, Pid} -> ?vdebug("start_mib_server -> Pid: ~p", [Pid]), Pid; - {error, Reason} -> - ?vinfo("error starting mib server: ~n~p",[Reason]), - throw({error, {mib_server_error, Reason}}); + {error, {Reason, _ChildSpec}} -> + ?vinfo("error starting mib server: " + "~n ~p", [Reason]), + throw({error, {mib_server, error, Reason}}); {'EXIT', Reason} -> - ?vinfo("exit starting mib server: ~n~p",[Reason]), - throw({error, {mib_server_exit, Reason}}); + ?vinfo("exit starting mib server: " + "~n ~p", [Reason]), + throw({error, {mib_server, exit, Reason}}); Error -> - ?vinfo("failed starting mib server: ~n~p",[Error]), - throw({error, {mib_server_failed, Error}}) + ?vinfo("failed starting mib server: " + "~n ~p", [Error]), + throw({error, {mib_server, failed, Error}}) end. @@ -3462,8 +3481,8 @@ get_stats_counters([Counter|Counters], Acc) -> %% --------------------------------------------------------------------- -%% info_msg(F, A) -> -%% ?snmpa_info(F, A). +info_msg(F, A) -> + ?snmpa_info(F, A). warning_msg(F, A) -> ?snmpa_warning(F, A). diff --git a/lib/snmp/src/agent/snmpa_agent_sup.erl b/lib/snmp/src/agent/snmpa_agent_sup.erl index 0a5116b2d0..b1eeaba988 100644 --- a/lib/snmp/src/agent/snmpa_agent_sup.erl +++ b/lib/snmp/src/agent/snmpa_agent_sup.erl @@ -1,7 +1,7 @@ %% %% %CopyrightBegin% %% -%% Copyright Ericsson AB 1997-2016. All Rights Reserved. +%% Copyright Ericsson AB 1997-2023. All Rights Reserved. %% %% Licensed under the Apache License, Version 2.0 (the "License"); %% you may not use this file except in compliance with the License. @@ -24,7 +24,9 @@ -behaviour(supervisor). %% External exports --export([start_link/0, start_link/1, start_subagent/3, stop_subagent/1]). +-export([start_link/0, start_link/1, + start_master_agent/1, + start_subagent/3, stop_subagent/1]). %% Internal exports -export([init/1]). @@ -40,8 +42,7 @@ %%%----------------------------------------------------------------- -%%% This is a supervisor for the mib processes. Each agent has one -%%% mib process. +%%% This is a supervisor for the agent processes (master and sub). %%%----------------------------------------------------------------- start_link() -> ?d("start_link -> entry", []), @@ -52,6 +53,9 @@ start_link(AgentSpec) -> "~n AgentSpec: ~p", [AgentSpec]), supervisor:start_link({local, ?SERVER}, ?MODULE, [[AgentSpec]]). +start_master_agent(MasterAgentSpec) -> + supervisor:start_child(snmpa_agent_sup, MasterAgentSpec). + start_subagent(ParentAgent, Subtree, Mibs) -> ?d("start_subagent -> entry with" "~n ParentAgent: ~p" diff --git a/lib/snmp/src/agent/snmpa_net_if.erl b/lib/snmp/src/agent/snmpa_net_if.erl index bd19a32159..2c726eedad 100644 --- a/lib/snmp/src/agent/snmpa_net_if.erl +++ b/lib/snmp/src/agent/snmpa_net_if.erl @@ -1,7 +1,7 @@ %% %% %CopyrightBegin% %% -%% Copyright Ericsson AB 2004-2022. All Rights Reserved. +%% Copyright Ericsson AB 2004-2023. All Rights Reserved. %% %% Licensed under the Apache License, Version 2.0 (the "License"); %% you may not use this file except in compliance with the License. @@ -235,11 +235,21 @@ init(Prio, NoteStore, MasterAgent, Parent, Opts) -> end, erlang:raise(C, E, S) end; - {error, Reason} -> - config_err("failed starting net-if: ~n~p", [Reason]), - proc_lib:init_ack({error, Reason}); + {error, {udp_open, {open, PortNo, Reason}}} -> + OEFilters = get_open_err_filters(Opts), + Class = + case lists:member(Reason, OEFilters) of + false -> + error; + true -> + info + end, + proc_lib:init_ack({error, {Class, udp_open, PortNo, Reason}}); + {error, Reason} -> + %% config_err("failed starting net-if: ~n~p", [Reason]), + proc_lib:init_ack({error, Reason}); Error -> - config_err("failed starting net-if: ~n~p", [Error]), + %% config_err("failed starting net-if: ~n~p", [Error]), proc_lib:init_ack({error, Error}) end. @@ -256,7 +266,7 @@ do_init(Prio, NoteStore, MasterAgent, Parent, Opts) -> Vsns = get_vsns(Opts), ?vdebug("vsns: ~w",[Vsns]), - %% Flow control -- + %% -- Flow control -- Limit = get_req_limit(Opts), ?vdebug("Limit: ~w", [Limit]), FilterOpts = get_filter_opts(Opts), @@ -475,7 +485,7 @@ gen_udp_open(system, Opts) -> throw({udp_open, {port, PReason}}) end; {error, OReason} -> - throw({udp_open, {open, OReason}}) + throw({udp_open, {open, 0, OReason}}) end; %% This is for "future compat" since we cannot actually config '0'... gen_udp_open(IpPort, Opts) when (IpPort =:= 0) -> @@ -533,7 +543,7 @@ gen_udp_range_open(Min, Max, Opts) -> gen_udp_range_open(Min+1, Max, Opts); {error, Reason} -> ?vdebug("gen_udp_range_open(~w,~w) -> ~w", [Reason]), - throw({udp_open, {open, Reason}}) + throw({udp_open, {open, Min, Reason}}) catch C:E:S -> ?vinfo("gen_udp_range_open(~w,~w) -> failed open socket: " @@ -2104,6 +2114,16 @@ get_filter_opts(O) -> get_filter_module(O) -> snmp_misc:get_option(module, O, ?DEFAULT_FILTER_MODULE). +get_open_err_filters(O) -> + case snmp_misc:get_option(open_err_filters, O, []) of + Filters when is_list(Filters) -> + Filters; + Filter when is_atom(Filter) -> + [Filter]; + _ -> + [] + end. + get_recbuf(Opts, DefaultOpts) -> get_socket_opt(recbuf, Opts, DefaultOpts, use_default). @@ -2156,8 +2176,8 @@ info_msg(F,A) -> user_err(F, A) -> snmpa_error:user_err(F, A). -config_err(F, A) -> - snmpa_error:config_err(F, A). +%% config_err(F, A) -> +%% snmpa_error:config_err(F, A). %% ---------------------------------------------------------------- diff --git a/lib/snmp/src/agent/snmpa_supervisor.erl b/lib/snmp/src/agent/snmpa_supervisor.erl index 7d5c6da2c8..3261411ec3 100644 --- a/lib/snmp/src/agent/snmpa_supervisor.erl +++ b/lib/snmp/src/agent/snmpa_supervisor.erl @@ -1,7 +1,7 @@ %% %% %CopyrightBegin% %% -%% Copyright Ericsson AB 1996-2019. All Rights Reserved. +%% Copyright Ericsson AB 1996-2023. All Rights Reserved. %% %% Licensed under the Apache License, Version 2.0 (the "License"); %% you may not use this file except in compliance with the License. @@ -29,6 +29,7 @@ %% Internal exports -export([init/1, config/2]). +-compile({no_auto_import,[erase/1]}). -define(SERVER, ?MODULE). @@ -41,6 +42,10 @@ %% Process structure %% ================= %% +%% "application" +%% | +%% app-sup +%% | %% ___________________ supervisor __________________ %% / | | \ \ %% ___misc_sup___ target_cache symbolic_store local_db agent_sup @@ -176,7 +181,23 @@ start_master_sup(Opts) -> do_start_master_sup(Opts) -> verify_mandatory([db_dir], Opts), - supervisor:start_link({local, ?SERVER}, ?MODULE, [master, Opts]). + case supervisor:start_link({local, ?SERVER}, ?MODULE, [master, Opts]) of + {ok, Pid} = OK -> + %% + Key = master_agent_child_spec, + MasterAgentSpec = lookup(Key), + case snmpa_agent_sup:start_master_agent(MasterAgentSpec) of + {ok, MPid} when is_pid(MPid) -> + erase(Key), + OK; + {error, {Reason, _ChildSpec}} -> + stop(Pid, 0), + {error, Reason} + end; + %% + Else -> + Else + end. verify_mandatory([], _) -> ok; @@ -503,9 +524,18 @@ init([AgentType, Opts]) -> worker_spec(snmpa_agent, [Prio, snmp_master_agent, none, Ref, AgentOpts], Restart, 15000), + %% + %% The point is to make start failure more quiet + %% Often the failure happens in the master agent, + %% so we move the start of that out of this function + %% and into the 'do_start_master_sup' function. + %% At some point we should rewrite this. Maybe start all + %% children the same way (explicitly). + store(master_agent_child_spec, AgentSpec), AgentSupSpec = - sup_spec(snmpa_agent_sup, [AgentSpec], + sup_spec(snmpa_agent_sup, [], Restart, infinity), + %% [ConfigSpec, AgentSupSpec]; _ -> ?vdebug("[sub agent] spec for the agent supervisor",[]), @@ -521,6 +551,13 @@ init([AgentType, Opts]) -> store(Key, Value) -> ets:insert(snmp_agent_table, {Key, Value}). +lookup(Key) -> + [{Key, Value}] = ets:lookup(snmp_agent_table, Key), + Value. + +erase(Key) -> + ets:delete(snmp_agent_table, Key). + get_mibs(Mibs, Vsns) -> MibDir = filename:join(code:priv_dir(snmp), "mibs"), StdMib = diff --git a/lib/snmp/src/app/snmp_app.erl b/lib/snmp/src/app/snmp_app.erl index 486b276383..7ba3c659f0 100644 --- a/lib/snmp/src/app/snmp_app.erl +++ b/lib/snmp/src/app/snmp_app.erl @@ -1,7 +1,7 @@ %% %% %CopyrightBegin% %% -%% Copyright Ericsson AB 2003-2021. All Rights Reserved. +%% Copyright Ericsson AB 2003-2023. All Rights Reserved. %% %% Licensed under the Apache License, Version 2.0 (the "License"); %% you may not use this file except in compliance with the License. @@ -38,8 +38,13 @@ start(Type, []) -> %% First start the (new) central supervisor, {ok, Pid} = snmp_app_sup:start_link(), Entities = entities(), - ok = start_entities(Type, Entities), - {ok, Pid}. + case start_entities(Type, Entities) of + ok -> + {ok, Pid}; + Error -> + snmp_app_sup:stop(), + Error + end. entities() -> entities([agent, manager], []). diff --git a/lib/snmp/src/app/snmp_app_sup.erl b/lib/snmp/src/app/snmp_app_sup.erl index eb89cc5b6d..bb6faa18cb 100644 --- a/lib/snmp/src/app/snmp_app_sup.erl +++ b/lib/snmp/src/app/snmp_app_sup.erl @@ -1,7 +1,7 @@ %% %% %CopyrightBegin% %% -%% Copyright Ericsson AB 2003-2016. All Rights Reserved. +%% Copyright Ericsson AB 2003-2023. All Rights Reserved. %% %% Licensed under the Apache License, Version 2.0 (the "License"); %% you may not use this file except in compliance with the License. @@ -64,7 +64,12 @@ start_agent(Type, Opts) -> "~n Type: ~p" "~n Opts: ~p", [Type, Opts]), Restart = get_restart(Opts, permanent), - start_sup_child(snmpa_supervisor, Restart, [Type, Opts]). + case start_sup_child(snmpa_supervisor, Restart, [Type, Opts]) of + {ok, Pid} = OK when is_pid(Pid) -> + OK; + {error, {Reason, _ChildSpec}} -> + {error, Reason} + end. start_manager(Type, Opts) -> @@ -72,7 +77,12 @@ start_manager(Type, Opts) -> "~n Type: ~p" "~n Opts: ~p", [Type, Opts]), Restart = get_restart(Opts, transient), - start_sup_child(snmpm_supervisor, Restart, [Type, Opts]). + case start_sup_child(snmpm_supervisor, Restart, [Type, Opts]) of + {ok, Pid} = OK when is_pid(Pid) -> + OK; + {error, {Reason, _ChildSpec}} -> + {error, Reason} + end. %%%------------------------------------------------------------------- -- cgit v1.2.1