summaryrefslogtreecommitdiff
path: root/ctdb
diff options
context:
space:
mode:
authorMartin Schwenke <martin@meltin.net>2019-03-29 11:19:55 +1100
committerAmitay Isaacs <amitay@samba.org>2019-05-07 05:45:34 +0000
commitb80967f5dcc6b58db0c38ec3e5cf0cbe46dbeb4b (patch)
tree66c0171b773cb11e555ffcd1248ee112017a952b /ctdb
parent8108b3134c017c22d245fc5b2207a88d44ab0dd2 (diff)
downloadsamba-b80967f5dcc6b58db0c38ec3e5cf0cbe46dbeb4b.tar.gz
ctdb-scripts: Drop script configuration variable CTDB_MONITOR_SWAP_USAGE
CTDB's system memory monitoring in 05.system.script monitors both main memory and swap. The swap monitoring was originally based on the (possibly incorrect, see below) idea that swap space stacks on top of main memory, so that when a system starts filling swap space then this is supposed to be a good sign that the system is running out of memory. Additionally, performance on a Linux system tends to be destroyed by the I/O associated with a lot of swapping to spinning disks. However, some platforms default to creating only 4GB of swap space even when there is 128GB of main memory. With such a small swap to main memory ratio, memory pressure can force swap to be nearly full even when a significant amount of main memory is still available and the system is performing well. This suggests that checking swap utilisation might be less than useful in many circumstances. So, remove the separate swap space checking and change the memory check to cover the total of main memory and swap space. Test function set_mem_usage() still takes an argument for each of main memory and swap space utilisation. For simplicity, the same number is now passed twice to make the intended results comprehensible. This could be changed later. A couple of tests are cleaned up to no longer use hard-coded /proc/meminfo and ps output. Signed-off-by: Martin Schwenke <martin@meltin.net> Reviewed-by: Amitay Isaacs <amitay@gmail.com>
Diffstat (limited to 'ctdb')
-rwxr-xr-xctdb/config/events/legacy/05.system.script17
-rw-r--r--ctdb/doc/ctdb-script.options.5.xml21
-rwxr-xr-xctdb/doc/examples/config_migrate.sh2
-rwxr-xr-xctdb/tests/eventscripts/05.system.monitor.011.sh3
-rwxr-xr-xctdb/tests/eventscripts/05.system.monitor.012.sh3
-rwxr-xr-xctdb/tests/eventscripts/05.system.monitor.013.sh21
-rwxr-xr-xctdb/tests/eventscripts/05.system.monitor.014.sh4
-rwxr-xr-xctdb/tests/eventscripts/05.system.monitor.015.sh4
-rwxr-xr-xctdb/tests/eventscripts/05.system.monitor.016.sh19
-rwxr-xr-xctdb/tests/eventscripts/05.system.monitor.017.sh30
-rwxr-xr-xctdb/tests/eventscripts/05.system.monitor.018.sh81
11 files changed, 35 insertions, 170 deletions
diff --git a/ctdb/config/events/legacy/05.system.script b/ctdb/config/events/legacy/05.system.script
index e2ffeac715a..08e401a9e73 100755
--- a/ctdb/config/events/legacy/05.system.script
+++ b/ctdb/config/events/legacy/05.system.script
@@ -132,9 +132,6 @@ monitor_memory_usage ()
if [ -z "$CTDB_MONITOR_MEMORY_USAGE" ] ; then
CTDB_MONITOR_MEMORY_USAGE=80
fi
- if [ -z "$CTDB_MONITOR_SWAP_USAGE" ] ; then
- CTDB_MONITOR_SWAP_USAGE=25
- fi
_meminfo=$(get_proc "meminfo")
# Intentional word splitting here
@@ -149,21 +146,19 @@ $1 == "SwapFree:" { swapfree = $2 }
$1 == "SwapTotal:" { swaptotal = $2 }
END {
if (memavail != 0) { memfree = memavail ; }
- if (memtotal != 0) { print int((memtotal - memfree) / memtotal * 100) ; } else { print 0 ; }
- if (swaptotal != 0) { print int((swaptotal - swapfree) / swaptotal * 100) ; } else { print 0 ; }
+ if (memtotal + swaptotal != 0) {
+ usedtotal = memtotal - memfree + swaptotal - swapfree
+ print int(usedtotal / (memtotal + swaptotal) * 100)
+ } else {
+ print 0
+ }
}')
_mem_usage="$1"
- _swap_usage="$2"
check_thresholds "System memory" \
"$CTDB_MONITOR_MEMORY_USAGE" \
"$_mem_usage" \
dump_memory_info
-
- check_thresholds "System swap" \
- "$CTDB_MONITOR_SWAP_USAGE" \
- "$_swap_usage" \
- dump_memory_info
}
diff --git a/ctdb/doc/ctdb-script.options.5.xml b/ctdb/doc/ctdb-script.options.5.xml
index 9d545b5cc0d..6b2efb27ac2 100644
--- a/ctdb/doc/ctdb-script.options.5.xml
+++ b/ctdb/doc/ctdb-script.options.5.xml
@@ -964,27 +964,6 @@ CTDB_PER_IP_ROUTING_TABLE_ID_HIGH=9000
</listitem>
</varlistentry>
- <varlistentry>
- <term>
- CTDB_MONITOR_SWAP_USAGE=<parameter>SWAP-LIMITS</parameter>
- </term>
- <listitem>
- <para>
- SWAP-LIMITS takes the form
- <parameter>WARN_LIMIT</parameter><optional>:<parameter>UNHEALTHY_LIMIT</parameter></optional>
- indicating that warnings should be logged if
- swap usage reaches WARN_LIMIT%. If usage reaches
- UNHEALTHY_LIMIT then the node should be flagged
- unhealthy. Either WARN_LIMIT or UNHEALTHY_LIMIT may be
- left blank, meaning that check will be omitted.
- </para>
- <para>
- Default is 25, so warnings will be logged when swap
- usage reaches 25%.
- </para>
- </listitem>
- </varlistentry>
-
</variablelist>
</refsect2>
diff --git a/ctdb/doc/examples/config_migrate.sh b/ctdb/doc/examples/config_migrate.sh
index 8479aeb39f3..e0d01e77057 100755
--- a/ctdb/doc/examples/config_migrate.sh
+++ b/ctdb/doc/examples/config_migrate.sh
@@ -209,6 +209,7 @@ CTDB_NOTIFY_SCRIPT
CTDB_PUBLIC_INTERFACE
CTDB_MAX_PERSISTENT_CHECK_ERRORS
CTDB_SHUTDOWN_TIMEOUT
+CTDB_MONITOR_SWAP_USAGE
EOF
}
@@ -262,7 +263,6 @@ CTDB_MAX_CORRUPT_DB_BACKUPS
# 05.system
CTDB_MONITOR_FILESYSTEM_USAGE
CTDB_MONITOR_MEMORY_USAGE
-CTDB_MONITOR_SWAP_USAGE
# debug_hung_scripts.sh
CTDB_DEBUG_HUNG_SCRIPT_STACKPAT
EOF
diff --git a/ctdb/tests/eventscripts/05.system.monitor.011.sh b/ctdb/tests/eventscripts/05.system.monitor.011.sh
index a7d2e99c2b7..6cd1dabbb37 100755
--- a/ctdb/tests/eventscripts/05.system.monitor.011.sh
+++ b/ctdb/tests/eventscripts/05.system.monitor.011.sh
@@ -2,13 +2,12 @@
. "${TEST_SCRIPTS_DIR}/unit.sh"
-define_test "Memory check, bad situation, default checks enabled"
+define_test "Memory check (default), warning situation"
setup
set_mem_usage 100 100
ok <<EOF
WARNING: System memory utilization 100% >= threshold 80%
-WARNING: System swap utilization 100% >= threshold 25%
EOF
simple_test
diff --git a/ctdb/tests/eventscripts/05.system.monitor.012.sh b/ctdb/tests/eventscripts/05.system.monitor.012.sh
index bc517081e42..9e840564f49 100755
--- a/ctdb/tests/eventscripts/05.system.monitor.012.sh
+++ b/ctdb/tests/eventscripts/05.system.monitor.012.sh
@@ -2,13 +2,12 @@
. "${TEST_SCRIPTS_DIR}/unit.sh"
-define_test "Memory check, good situation, all memory checks enabled"
+define_test "Memory check (custom, both), good situation"
setup
setup_script_options <<EOF
CTDB_MONITOR_MEMORY_USAGE="80:90"
-CTDB_MONITOR_SWAP_USAGE="1:50"
EOF
ok_null
diff --git a/ctdb/tests/eventscripts/05.system.monitor.013.sh b/ctdb/tests/eventscripts/05.system.monitor.013.sh
deleted file mode 100755
index f4ea7ded6d0..00000000000
--- a/ctdb/tests/eventscripts/05.system.monitor.013.sh
+++ /dev/null
@@ -1,21 +0,0 @@
-#!/bin/sh
-
-. "${TEST_SCRIPTS_DIR}/unit.sh"
-
-define_test "Memory check, bad situation, custom swap critical"
-
-setup
-
-setup_script_options <<EOF
-CTDB_MONITOR_SWAP_USAGE=":50"
-EOF
-
-set_mem_usage 100 90
-required_result 1 <<EOF
-WARNING: System memory utilization 100% >= threshold 80%
-ERROR: System swap utilization 90% >= threshold 50%
-$FAKE_PROC_MEMINFO
-$(ps foobar)
-EOF
-
-simple_test
diff --git a/ctdb/tests/eventscripts/05.system.monitor.014.sh b/ctdb/tests/eventscripts/05.system.monitor.014.sh
index 1b6d2155272..9e2b21c9822 100755
--- a/ctdb/tests/eventscripts/05.system.monitor.014.sh
+++ b/ctdb/tests/eventscripts/05.system.monitor.014.sh
@@ -2,7 +2,7 @@
. "${TEST_SCRIPTS_DIR}/unit.sh"
-define_test "Memory check, bad memory situation, custom memory warning"
+define_test "Memory check (custom, warning only), warning situation"
setup
@@ -10,7 +10,7 @@ setup_script_options <<EOF
CTDB_MONITOR_MEMORY_USAGE="85:"
EOF
-set_mem_usage 90 10
+set_mem_usage 90 90
ok <<EOF
WARNING: System memory utilization 90% >= threshold 85%
EOF
diff --git a/ctdb/tests/eventscripts/05.system.monitor.015.sh b/ctdb/tests/eventscripts/05.system.monitor.015.sh
index 3f1fe9bfc46..0091c429ac1 100755
--- a/ctdb/tests/eventscripts/05.system.monitor.015.sh
+++ b/ctdb/tests/eventscripts/05.system.monitor.015.sh
@@ -2,7 +2,7 @@
. "${TEST_SCRIPTS_DIR}/unit.sh"
-define_test "Memory check, bad situation, custom memory critical"
+define_test "Memory check (custom, error only), error situation"
setup
@@ -10,7 +10,7 @@ setup_script_options <<EOF
CTDB_MONITOR_MEMORY_USAGE=":85"
EOF
-set_mem_usage 90 0
+set_mem_usage 90 90
required_result 1 <<EOF
ERROR: System memory utilization 90% >= threshold 85%
$FAKE_PROC_MEMINFO
diff --git a/ctdb/tests/eventscripts/05.system.monitor.016.sh b/ctdb/tests/eventscripts/05.system.monitor.016.sh
deleted file mode 100755
index 459b8ba76b4..00000000000
--- a/ctdb/tests/eventscripts/05.system.monitor.016.sh
+++ /dev/null
@@ -1,19 +0,0 @@
-#!/bin/sh
-
-. "${TEST_SCRIPTS_DIR}/unit.sh"
-
-define_test "Memory check, bad situation, both memory checks, causes warning"
-
-setup
-
-setup_script_options <<EOF
-CTDB_MONITOR_MEMORY_USAGE="80:90"
-CTDB_MONITOR_SWAP_USAGE=""
-EOF
-
-set_mem_usage 87 0
-ok <<EOF
-WARNING: System memory utilization 87% >= threshold 80%
-EOF
-
-simple_test
diff --git a/ctdb/tests/eventscripts/05.system.monitor.017.sh b/ctdb/tests/eventscripts/05.system.monitor.017.sh
index 7f7480aed93..8eef4afc85d 100755
--- a/ctdb/tests/eventscripts/05.system.monitor.017.sh
+++ b/ctdb/tests/eventscripts/05.system.monitor.017.sh
@@ -2,7 +2,7 @@
. "${TEST_SCRIPTS_DIR}/unit.sh"
-define_test "Memory check, bad situation, both custom memory checks, causes unhealthy"
+define_test "Memory check (custom, both), error situation"
setup
@@ -10,33 +10,11 @@ setup_script_options <<EOF
CTDB_MONITOR_MEMORY_USAGE="70:80"
EOF
-set_mem_usage 87 0
+set_mem_usage 87 87
required_result 1 <<EOF
ERROR: System memory utilization 87% >= threshold 80%
-MemTotal: 3940712 kB
-MemFree: 225268 kB
-Buffers: 146120 kB
-Cached: 140904 kB
-SwapCached: 56016 kB
-Active: 2422104 kB
-Inactive: 1019928 kB
-Active(anon): 1917580 kB
-Inactive(anon): 523080 kB
-Active(file): 504524 kB
-Inactive(file): 496848 kB
-Unevictable: 4844 kB
-Mlocked: 4844 kB
-SwapTotal: 5857276 kB
-SwapFree: 5857276 kB
-...
-USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
-root 2 0.0 0.0 0 0 ? S Aug28 0:00 [kthreadd]
-root 3 0.0 0.0 0 0 ? S Aug28 0:43 \_ [ksoftirqd/0]
-...
-root 1 0.0 0.0 2976 624 ? Ss Aug28 0:07 init [2]
-root 495 0.0 0.0 3888 1640 ? Ss Aug28 0:00 udevd --daemon
-...
-[MORE FAKE ps OUTPUT]
+$FAKE_PROC_MEMINFO
+$(ps foobar)
EOF
simple_test
diff --git a/ctdb/tests/eventscripts/05.system.monitor.018.sh b/ctdb/tests/eventscripts/05.system.monitor.018.sh
index 37578d8bbed..cd9305f8826 100755
--- a/ctdb/tests/eventscripts/05.system.monitor.018.sh
+++ b/ctdb/tests/eventscripts/05.system.monitor.018.sh
@@ -2,126 +2,81 @@
. "${TEST_SCRIPTS_DIR}/unit.sh"
-define_test "Check throttling of warnings"
+define_test "Memory check (custom, both), check throttling of warnings"
setup
setup_script_options <<EOF
CTDB_MONITOR_MEMORY_USAGE="70:80"
-CTDB_MONITOR_SWAP_USAGE=""
EOF
# Below threshold, nothing logged
-set_mem_usage 67 0
+set_mem_usage 67 67
ok_null
simple_test
-set_mem_usage 71 0
+set_mem_usage 71 71
ok "WARNING: System memory utilization 71% >= threshold 70%"
simple_test
# 2nd time at same level, nothing logged
-set_mem_usage 71 0
+set_mem_usage 71 71
ok_null
simple_test
-set_mem_usage 73 0
+set_mem_usage 73 73
ok "WARNING: System memory utilization 73% >= threshold 70%"
simple_test
# 2nd time at same level, nothing logged
-set_mem_usage 73 0
+set_mem_usage 73 73
ok_null
simple_test
-set_mem_usage 79 0
+set_mem_usage 79 79
ok "WARNING: System memory utilization 79% >= threshold 70%"
simple_test
-set_mem_usage 80 0
+set_mem_usage 80 80
required_result 1 <<EOF
ERROR: System memory utilization 80% >= threshold 80%
-MemTotal: 3940712 kB
-MemFree: 225268 kB
-Buffers: 146120 kB
-Cached: 416754 kB
-SwapCached: 56016 kB
-Active: 2422104 kB
-Inactive: 1019928 kB
-Active(anon): 1917580 kB
-Inactive(anon): 523080 kB
-Active(file): 504524 kB
-Inactive(file): 496848 kB
-Unevictable: 4844 kB
-Mlocked: 4844 kB
-SwapTotal: 5857276 kB
-SwapFree: 5857276 kB
-...
-USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
-root 2 0.0 0.0 0 0 ? S Aug28 0:00 [kthreadd]
-root 3 0.0 0.0 0 0 ? S Aug28 0:43 \_ [ksoftirqd/0]
-...
-root 1 0.0 0.0 2976 624 ? Ss Aug28 0:07 init [2]
-root 495 0.0 0.0 3888 1640 ? Ss Aug28 0:00 udevd --daemon
-...
-[MORE FAKE ps OUTPUT]
+$FAKE_PROC_MEMINFO
+$(ps foobar)
EOF
simple_test
# Fall back into warning at same level as last warning... should log
-set_mem_usage 79 0
+set_mem_usage 79 79
ok "WARNING: System memory utilization 79% >= threshold 70%"
simple_test
# Below threshold, notice
-set_mem_usage 69 0
+set_mem_usage 69 69
ok <<EOF
NOTICE: System memory utilization 69% < threshold 70%
EOF
simple_test
# Further reduction, nothing logged
-set_mem_usage 68 0
+set_mem_usage 68 68
ok_null
simple_test
# Back up into warning at same level as last warning... should log
-set_mem_usage 79 0
+set_mem_usage 79 79
ok "WARNING: System memory utilization 79% >= threshold 70%"
simple_test
# Back up above critical threshold... unhealthy
-set_mem_usage 81 0
+set_mem_usage 81 81
required_result 1 <<EOF
ERROR: System memory utilization 81% >= threshold 80%
-MemTotal: 3940712 kB
-MemFree: 225268 kB
-Buffers: 146120 kB
-Cached: 377347 kB
-SwapCached: 56016 kB
-Active: 2422104 kB
-Inactive: 1019928 kB
-Active(anon): 1917580 kB
-Inactive(anon): 523080 kB
-Active(file): 504524 kB
-Inactive(file): 496848 kB
-Unevictable: 4844 kB
-Mlocked: 4844 kB
-SwapTotal: 5857276 kB
-SwapFree: 5857276 kB
-...
-USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
-root 2 0.0 0.0 0 0 ? S Aug28 0:00 [kthreadd]
-root 3 0.0 0.0 0 0 ? S Aug28 0:43 \_ [ksoftirqd/0]
-...
-root 1 0.0 0.0 2976 624 ? Ss Aug28 0:07 init [2]
-root 495 0.0 0.0 3888 1640 ? Ss Aug28 0:00 udevd --daemon
-...
-[MORE FAKE ps OUTPUT]
+$FAKE_PROC_MEMINFO
+$(ps foobar)
EOF
simple_test
# Straight back down to a good level... notice
-set_mem_usage 65 0
+set_mem_usage 65 65
ok "NOTICE: System memory utilization 65% < threshold 70%"
simple_test