diff options
author | Riccardo Brognara <brognara@us.ibm.com> | 2014-08-22 17:53:19 -0400 |
---|---|---|
committer | Jay Doane <jaydoane@apache.org> | 2021-04-19 00:35:19 -0700 |
commit | d09b5026ff625ff517a99b97d6112279b3151d99 (patch) | |
tree | 008bb298f67480acfa66842f8d80ddef44878717 | |
parent | b3272c7fa96cedac0b89b20ced1800789a68b08c (diff) | |
download | couchdb-d09b5026ff625ff517a99b97d6112279b3151d99.tar.gz |
Check mean node statistics over one second
Check the absolute statistics obtained by recon:node_stats/4
over a one second period. The values are sampled ten times and
the mean is returned.
For run_queue and process_count the mean is compared to
hard-coded thresholds which determine whether a warning or
info message is returned. For all other statistics an info
message is always returned.
BugzID: 32877
-rw-r--r-- | src/weatherreport/src/weatherreport_check_node_stats.erl | 66 |
1 files changed, 66 insertions, 0 deletions
diff --git a/src/weatherreport/src/weatherreport_check_node_stats.erl b/src/weatherreport/src/weatherreport_check_node_stats.erl new file mode 100644 index 000000000..27b77cefd --- /dev/null +++ b/src/weatherreport/src/weatherreport_check_node_stats.erl @@ -0,0 +1,66 @@ +%% ------------------------------------------------------------------- +%% +%% weatherreport - automated diagnostic tools for CouchDB +%% +%% Copyright (c) 2014 Cloudant +%% +%% This file is provided to you under the Apache License, +%% Version 2.0 (the "License"); you may not use this file +%% except in compliance with the License. You may obtain +%% a copy of the License at +%% +%% http://www.apache.org/licenses/LICENSE-2.0 +%% +%% Unless required by applicable law or agreed to in writing, +%% software distributed under the License is distributed on an +%% "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +%% KIND, either express or implied. See the License for the +%% specific language governing permissions and limitations +%% under the License. +%% +%% ------------------------------------------------------------------- + +%% @doc Diagnostic that checks various erlang VM statistics that are +%% useful for diagnostics. A warning message is printed if certain stats +%% rise above pre-determined thresholds, otherwise an info message is sent. +-module(weatherreport_check_node_stats). +-behaviour(weatherreport_check). + +-export([description/0, + valid/0, + check/1, + format/1]). + +-define(SAMPLES, 10). +-define(T_RUN_QUEUE, 40). +-define(T_PROCESS_COUNT, 100000). + +-spec description() -> string(). +description() -> + "Check useful erlang statistics for diagnostics". + +-spec valid() -> boolean(). +valid() -> + weatherreport_node:can_connect(). + +-spec sum_absolute_stats({list(), list()}, list()) -> list(). +sum_absolute_stats({AbsStats, _}, AbsSum) -> + [{K, V + proplists:get_value(K, AbsSum, 0)} || {K, V} <- AbsStats]. + +-spec mean_to_message({atom(), integer()}) -> {atom(), {atom(), integer()}}. +mean_to_message({run_queue, Mean}) when Mean > ?T_RUN_QUEUE -> + {warning, {run_queue, Mean}}; +mean_to_message({process_count, Mean}) when Mean > ?T_PROCESS_COUNT -> + {warning, {process_count, Mean}}; +mean_to_message({Statistic, Mean}) -> + {info, {Statistic, Mean}}. + +-spec check(list()) -> [{atom(), term()}]. +check(_Opts) -> + SumOfStats = recon:node_stats(?SAMPLES, 100, fun sum_absolute_stats/2, []), + MeanStats = [{K, erlang:round(V / ?SAMPLES)} || {K, V} <- SumOfStats], + lists:map(fun mean_to_message/1, MeanStats). + +-spec format(term()) -> {io:format(), [term()]}. +format({Statistic, Value}) -> + {"Mean ~w over one second is ~w", [Statistic, Value]}. |