summaryrefslogtreecommitdiff
path: root/ctdb/utils
diff options
context:
space:
mode:
authorMathieu Parent <math.parent@gmail.com>2011-11-05 16:39:55 +0100
committerRonnie Sahlberg <ronniesahlberg@gmail.com>2011-11-08 16:34:15 +1100
commit68d81587dca9e7c2039dc4397ec20a28af4556d8 (patch)
tree76bc7648ba088a9e2f0ce2dff0ff20a42a900625 /ctdb/utils
parentfa197073cde30ffefb79163c5a0a7b94fb3485bd (diff)
downloadsamba-68d81587dca9e7c2039dc4397ec20a28af4556d8.tar.gz
Nagios plugin for CTDB
This plugin is GPL2 or greater as generally found in Nagios. (this is obviously compatible with GPL3 or greater). (This used to be ctdb commit df1ac1cfd65f32743ca2588edfdad46ce5a4f03f)
Diffstat (limited to 'ctdb/utils')
-rw-r--r--ctdb/utils/nagios/README56
-rw-r--r--ctdb/utils/nagios/check_ctdb259
2 files changed, 315 insertions, 0 deletions
diff --git a/ctdb/utils/nagios/README b/ctdb/utils/nagios/README
new file mode 100644
index 00000000000..99fa6dc328d
--- /dev/null
+++ b/ctdb/utils/nagios/README
@@ -0,0 +1,56 @@
+check_ctdb 0.3
+
+This nagios plugin is free software, and comes with ABSOLUTELY NO WARRANTY.
+It may be used, redistributed and/or modified under the terms of the GNU
+General Public Licence (see http://www.fsf.org/licensing/licenses/gpl.txt).
+
+CTDB plugin
+
+Usage: check_ctdb -i <info>
+ [ -t <timeout> ] [ -w <warn_range> ] [ -c <crit_range> ]
+ [ -H <host> ] [-s] [ -l <login_name> ]
+ [ -V ] [ -h ]
+
+ -?, --usage
+ Print usage information
+ -h, --help
+ Print detailed help screen
+ -V, --version
+ Print version information
+ --extra-opts=[section][@file]
+ Read options from an ini file. See http://nagiosplugins.org/extra-opts for usage
+ -i, --info=<info>
+ Information: One of scriptstatus or ping.
+ -H, --hostname=<login_name>
+ Host name or IP Address.
+ -s, --sudo
+ Use sudo.
+ -l, --login=<host>
+ The user to log in as on the remote machine.
+ -w, --warning=THRESHOLD
+ Warning threshold. See
+ http://nagiosplug.sourceforge.net/developer-guidelines.html#THRESHOLDFORMAT
+ for the threshold format.
+ -c, --critical=THRESHOLD
+ Critical threshold. See
+ http://nagiosplug.sourceforge.net/developer-guidelines.html#THRESHOLDFORMAT
+ for the threshold format.
+ -t, --timeout=INTEGER
+ Seconds before plugin times out (default: 30)
+ -v, --verbose
+ Show details for command-line debugging (can repeat up to 3 times)
+Supported commands:
+ * scriptstatus :
+ check the ctdb scriptstatus command and return CRITICAL if one of the
+ scripts fails.
+ Perfdata count the number of scripts by state (ok, disabled, error,
+ total).
+ * ping :
+ check the ctdb ping command.
+ Perfdata count the number of nodes, the total ping time and the number
+ of clients.
+ Thresholds are checked against the number of nodes.
+
+
+Copyright (c) 2011 Nantes Metropole
+
diff --git a/ctdb/utils/nagios/check_ctdb b/ctdb/utils/nagios/check_ctdb
new file mode 100644
index 00000000000..943033378c1
--- /dev/null
+++ b/ctdb/utils/nagios/check_ctdb
@@ -0,0 +1,259 @@
+#!/usr/bin/perl -w
+# Nagios plugin to monitor CTDB (Clustered Trivial Database)
+#
+# License: GPL
+# Copyright (c) 2011 Nantes Metropole
+# Author: Mathieu Parent <math.parent@gmail.com>
+# Contributor(s): -
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License version 2 as
+# published by the Free Software Foundation.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, see <http://www.gnu.org/licenses/>.
+#
+
+use strict;
+use warnings;
+use vars qw($PROGNAME $VERSION $output $values $result);
+use Nagios::Plugin;
+use File::Basename;
+
+$PROGNAME = basename($0);
+$VERSION = '0.3';
+
+my $np = Nagios::Plugin->new(
+ usage => "Usage: %s -i <info>\n"
+ . " [ -t <timeout> ] [ -w <warn_range> ] [ -c <crit_range> ]\n"
+ . " [ -H <host> ] [-s] [ -l <login_name> ]\n"
+ . ' [ -V ] [ -h ]',
+ version => $VERSION,
+ plugin => $PROGNAME,
+ shortname => uc($PROGNAME),
+ blurb => 'CTDB plugin',
+ extra => "Supported commands:\n"
+ . " * scriptstatus :\n"
+ . " check the ctdb scriptstatus command and return CRITICAL if one of the\n"
+ . " scripts fails.\n"
+ . " Perfdata count the number of scripts by state (ok, disabled, error,\n"
+ . " total).\n"
+ . " * ping :\n"
+ . " check the ctdb ping command.\n"
+ . " Perfdata count the number of nodes, the total ping time and the number\n"
+ . " of clients.\n"
+ . " Thresholds are checked against the number of nodes.\n"
+ . "\n\nCopyright (c) 2011 Nantes Metropole",
+ timeout => 30,
+);
+
+$np->add_arg(
+ spec => 'info|i=s',
+ help => "-i, --info=<info>\n"
+ . ' Information: One of scriptstatus or ping.',
+ required => 1,
+);
+
+$np->add_arg(
+ spec => 'hostname|H=s',
+ help => "-H, --hostname=<login_name>\n"
+ . ' Host name or IP Address.',
+ required => 0,
+);
+
+$np->add_arg(
+ spec => 'sudo|s',
+ help => "-s, --sudo\n"
+ . ' Use sudo.',
+ required => 0,
+);
+
+$np->add_arg(
+ spec => 'login|l=s',
+ help => "-l, --login=<host>\n"
+ . ' The user to log in as on the remote machine.',
+ required => 0,
+);
+
+$np->add_arg(
+ spec => 'warning|w=s',
+ help => "-w, --warning=THRESHOLD\n"
+ . " Warning threshold. See\n"
+ . " http://nagiosplug.sourceforge.net/developer-guidelines.html#THRESHOLDFORMAT\n"
+ . ' for the threshold format.',
+ required => 0,
+);
+
+$np->add_arg(
+ spec => 'critical|c=s',
+ help => "-c, --critical=THRESHOLD\n"
+ . " Critical threshold. See\n"
+ . " http://nagiosplug.sourceforge.net/developer-guidelines.html#THRESHOLDFORMAT\n"
+ . ' for the threshold format.',
+ required => 0,
+);
+
+$np->getopts;
+
+my $info = $np->opts->info;
+my $hostname = $np->opts->hostname;
+my $login = $np->opts->login;
+my $sudo = $np->opts->sudo;
+my $warning = $np->opts->warning;
+my $critical = $np->opts->critical;
+my $percw;
+my $percc;
+
+$output = "";
+$result = OK;
+
+if (defined($critical))
+{
+ ($percc, $critical) = check_percantage($critical);
+ $critical = undef if ($critical eq '');
+}
+
+if (defined($warning))
+{
+ ($percw, $warning) = check_percantage($warning);
+ $warning = undef if ($warning eq '');
+}
+
+$np->set_thresholds(critical => $critical, warning => $warning);
+
+my $stderr;
+
+sub safe_open_command {
+ unshift @_, "sudo" if $sudo;
+ if ($hostname) {
+ unshift @_, $hostname;
+ unshift @_, "-l", $login if $login;
+ unshift @_, "ssh";
+ }
+ open(OLDERR, ">&", \*STDERR) or die "Can't dup STDERR: $!";
+ $stderr = "";
+ close STDERR;
+ open(STDERR, ">>", \$stderr) or die "Can't open STDERR: $!";
+ if (!open(PIPE, '-|', @_)) {
+ $result = CRITICAL;
+ $output = "Cannot open command '@_': $! ($stderr)";
+ # restore STDERR
+ open(STDERR, ">", \*OLDERR) or die "Can't dup OLDERR: $!";
+ }
+}
+
+sub safe_close_command {
+ close(PIPE);
+
+ if ($? == -1) {
+ $result = CRITICAL;
+ $output = "failed to execute: $!";
+ } elsif ($? & 127) {
+ $result = CRITICAL;
+ $output = sprintf("child died with signal %d, %s coredump",
+ ($? & 127), ($? & 128) ? 'with' : 'without');
+ } elsif ($? >> 8) {
+ $result = CRITICAL;
+ $output = sprintf("child exited with value %d", $? >> 8);
+ }
+ # restore STDERR
+ open(STDERR, ">&OLDERR") or die "Can't dup OLDERR: $!";
+}
+
+# main :
+
+if ($info eq "scriptstatus") {
+ safe_open_command('ctdb', '-Y', 'scriptstatus');
+ if ($result == OK) {
+ my $script_count = 0;
+ my $ok_script_count = 0;
+ my $disabled_script_count = 0;
+ my $error_script_count = 0;
+ while (<PIPE>) {
+ next if $. == 1; # Header
+ $script_count++;
+ chop;
+ my ($type, $name, $code, $status, $start, $end, @error) = split(":");
+ my $error = join(':', @error);
+ if ($error ne "") {
+ $output = "$output ;; " if $output;
+ $output = "$output$name ($status=$code): $error";
+ if ($result != CRITICAL) {
+ $result = WARNING;
+ }
+ }
+ if ($status eq "OK") {
+ $ok_script_count++;
+ next;
+ }
+ if ($status eq "DISABLED") {
+ $disabled_script_count++;
+ next;
+ }
+ $error_script_count++;
+ $result = WARNING;
+ }
+ safe_close_command();
+ $np->add_perfdata(label => "ok", value => $ok_script_count, uom => '',
+ min => 0, max => $script_count);
+ $np->add_perfdata(label => "disabled", value => $disabled_script_count, uom => '',
+ min => 0, max => $script_count);
+ $np->add_perfdata(label => "error", value => $error_script_count, uom => '',
+ min => 0, max => $script_count, warning => '0', critical => '0');
+ $np->add_perfdata(label => "total", value => $script_count, uom => '',
+ min => 0, max => $script_count);
+ if ($result == OK) {
+ $result = $np->check_threshold(check => $error_script_count, warning => '0', critical => '0');
+ }
+ }
+ $np->nagios_exit($result, $output);
+} elsif ($info eq "ping") {
+ safe_open_command('ctdb', '-n', 'all', 'ping');
+ if ($result == OK) {
+ my $nodes_count = 0;
+ my $time_total = 0.0;
+ my $clients_count = 0;
+ open(CTDB_NODES, "/etc/ctdb/nodes");
+ 1 while( <CTDB_NODES> );
+ my $max_nodes_count = $.;
+
+ while (<PIPE>) {
+ chop;
+ if ($_ =~ /^response from (\d+) time=([0-9.]+) sec \((\d+) clients\)$/) {
+ my ($node_id, $time, $clients) = ($1,$2,$3);
+ $nodes_count += 1;
+ $time_total += $time;
+ $clients_count += $clients;
+ } else {
+ $result = CRITICAL;
+ $output = "'$_' doesn't match regexp."
+ }
+ }
+ safe_close_command();
+ $np->add_perfdata(label => "nodes", value => $nodes_count, uom => '',
+ min => 0, max => $max_nodes_count, warning => $warning, critical => $critical);
+ $np->add_perfdata(label => "ping_time", value => $time_total, uom => 's',
+ min => 0, max => undef);
+ $np->add_perfdata(label => "clients", value => $clients_count, uom => '',
+ min => 0, max => undef);
+ if ($result == OK) {
+ $result = $np->check_threshold(check => $nodes_count);
+ }
+ }
+ $np->nagios_exit($result, $output);
+} else {
+ $np->nagios_exit(UNKNOWN, "Unknown command: '$info'");
+}
+
+sub check_percantage
+{
+ my ($number) = shift(@_);
+ my $perc = $number =~ s/\%//;
+ return ($perc, $number);
+}
+