summaryrefslogtreecommitdiff
path: root/ctdb/tools/ctdb_diagnostics
diff options
context:
space:
mode:
authorMartin Schwenke <martin@meltin.net>2011-10-07 15:00:42 +1100
committerRonnie Sahlberg <ronniesahlberg@gmail.com>2011-10-13 14:27:34 +1100
commit71b8015ccfb7f9e257159bbac338ffcd1e5c8172 (patch)
tree8dd8a5d160a7d4ad32b4cd533850cb6be29b8f96 /ctdb/tools/ctdb_diagnostics
parenta19ec048caf00cdbdeeafd23ab9968f77ccdc43f (diff)
downloadsamba-71b8015ccfb7f9e257159bbac338ffcd1e5c8172.tar.gz
Make ctdb_diagnostics more resilient to uncontactable nodes.
Current behaviour is for onnode to timeout (for about 20s) for each attempted ssh to a down node. With 40 or 50 invocations of onnode this takes a long time. 2 changes to work around this: * If EXTRA_SSH_OPTS (which is passed to ssh by onnode) does not contains a ConnectTimeout= setting then add a setting for a 5 second timeout. * Filter the nodes before starting any diagnosis, taking out any "bad nodes" that are uncontactable via onnode. In the nodes summary at the beginning of the output, print information about any "bad nodes". Signed-off-by: Martin Schwenke <martin@meltin.net> (This used to be ctdb commit 8c3b6427dbaade87e1a0f5590f0894c2e69b31a3)
Diffstat (limited to 'ctdb/tools/ctdb_diagnostics')
-rwxr-xr-xctdb/tools/ctdb_diagnostics34
1 files changed, 33 insertions, 1 deletions
diff --git a/ctdb/tools/ctdb_diagnostics b/ctdb/tools/ctdb_diagnostics
index cf166ec09ad..117def8f6f1 100755
--- a/ctdb/tools/ctdb_diagnostics
+++ b/ctdb/tools/ctdb_diagnostics
@@ -18,6 +18,7 @@ EOF
}
nodes=$(ctdb listnodes -Y | cut -d: -f2)
+bad_nodes=""
diff_opts=
no_ads=false
@@ -45,6 +46,25 @@ parse_options ()
parse_options "$@"
+# Use 5s ssh timeout if EXTRA_SSH_OPTS doesn't set a timeout.
+case "$EXTRA_SSH_OPTS" in
+ *ConnectTimeout=*) : ;;
+ *)
+ export EXTRA_SSH_OPTS="${EXTRA_SSH_OPTS} -o ConnectTimeout=5"
+esac
+
+# Filter nodes. Remove any nodes we can't contact from $node and add
+# them to $bad_nodes.
+_nodes=""
+for _i in $nodes ; do
+ if onnode $_i true >/dev/null 2>&1 ; then
+ _nodes="${_nodes}${_nodes:+ }${_i}"
+ else
+ bad_nodes="${bad_nodes}${bad_nodes:+,}${_i}"
+ fi
+done
+nodes="$_nodes"
+
nodes_comma=$(echo $nodes | sed -e 's@[[:space:]]@,@g')
PATH="$PATH:/sbin:/usr/sbin:/usr/lpp/mmfs/bin"
@@ -138,11 +158,23 @@ NUM_ERRORS=0
cat <<EOF
Diagnosis started on these nodes:
$nodes_comma
+EOF
+
+if [ -n "$bad_nodes" ] ; then
+ cat <<EOF
+
+NOT RUNNING DIAGNOSTICS on these uncontactable nodes:
+$bad_nodes
+EOF
+
+fi
+
+cat <<EOF
For reference, here is the nodes file on the current node...
EOF
-show_file /etc/ctdb/nodes
+show_file /etc/ctdb/nodes
cat <<EOF
--------------------------------------------------------------------