summaryrefslogtreecommitdiff
path: root/ctdb/config/events/legacy/60.nfs.script
blob: 2eb90b421c86d69253a115cf9a9af3c8809908dc (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
#!/bin/sh
# script to manage nfs in a clustered environment

[ -n "$CTDB_BASE" ] || \
    CTDB_BASE=$(d=$(dirname "$0") ; cd -P "$d" ; dirname "$PWD")

. "${CTDB_BASE}/functions"

service_name="nfs"

load_system_config "nfs"

load_script_options

ctdb_setup_state_dir "service" "$service_name"

######################################################################

service_reconfigure ()
{
    # Restart lock manager, notify clients
    if [ -x "${CTDB_BASE}/statd-callout" ] ; then
	"${CTDB_BASE}/statd-callout" notify &
    fi >/dev/null 2>&1
}

######################################################################

######################################################
# Check the health of NFS services
#
# Use .check files in $CTDB_NFS_CHECKS_DIR.
# Default is "${CTDB_BASE}/nfs-checks.d/"
######################################################
nfs_check_services ()
{
    _dir="${CTDB_NFS_CHECKS_DIR:-${CTDB_BASE}/nfs-checks.d}"

    # Files must end with .check - avoids editor backups, RPM fu, ...
    for _f in "$_dir"/[0-9][0-9].*.check ; do
	[ -r "$_f" ] || continue

	_t="${_f%.check}"
	_progname="${_t##*/[0-9][0-9].}"

	nfs_check_service "$_progname" <"$_f"
    done
}

######################################################
# Check the health of an NFS service
#
# $1 - progname, passed to rpcinfo (looked up in /etc/rpc)
#
# Reads variables from stdin
#
# Variables are:
#
# * family             - "tcp" or "udp" or space separated list
#                        default: tcp, not used with "service_check_cmd"
# * version            - optional, RPC service version number
#                        default is to omit to check for any version,
#                        not used with "service_check_cmd"
# * unhealthy_after    - number of check fails before unhealthy
#                        default: 1
# * restart_every      - number of check fails before restart
#                        default: 0, meaning no restart
# * service_stop_cmd   - command to stop service
#                        default: no default, must be provided if
#                                 restart_every > 0
# * service_start_cmd  - command to start service
#                        default: no default, must be provided if
#                                 restart_every > 0
# * service_check_cmd  - command to check health of service
#                        default is to check RPC service using rpcinfo
# * service_debug_cmd  - command to debug a service after trying to stop it;
#                        for example, it can be useful to print stack
#                        traces of threads that have not exited, since
#                        they may be stuck doing I/O;
#                        no default, see also function program_stack_traces()
#
# Quoting in values is not preserved
#
######################################################
nfs_check_service ()
{
    _progname="$1"

    # This sub-shell is created to intentionally limit the scope of
    # variable values read from the .check files.
    # shellcheck disable=SC2030
    (
	# Subshell to restrict scope variables...

	# Defaults
	family="tcp"
	version=""
	unhealthy_after=1
	restart_every=0
	service_stop_cmd=""
	service_start_cmd=""
	service_check_cmd=""
	service_debug_cmd=""

	# Eval line-by-line.  Expands variable references in values.
	# Also allows variable name checking, which seems useful.
	while read _line ; do
	    case "$_line" in
		\#*|"") : ;; # Ignore comments, blank lines

		family=*|version=*|\
		unhealthy_after=*|restart_every=*|\
		service_stop_cmd=*|service_start_cmd=*|\
		service_check_cmd=*|service_debug_cmd=*)

		    eval "$_line"
		    ;;
		*)
		    echo "ERROR: Unknown variable for ${_progname}: ${_line}"
		    exit 1
	    esac
	done

	_ok=false
	if [ -n "$service_check_cmd" ] ; then
	    # Using eval means variables can contain semicolon separated commands
	    if eval "$service_check_cmd" ; then
		_ok=true
	    else
		_err="monitoring service \"${_progname}\" failed"
	    fi
	else
	    if nfs_check_rpcinfo \
		   "$_progname" "$version" "$family" >/dev/null ; then
		_ok=true
	    else
		_err="$ctdb_check_rpc_out"
	    fi
	fi

	if $_ok ; then
	    if [ $unhealthy_after -ne 1 -o $restart_every -ne 0 ] ; then
		ctdb_counter_init "$_progname"
	    fi
	    exit 0
	fi

	ctdb_counter_incr "$_progname"
	_failcount=$(ctdb_counter_get "$_progname")

	_unhealthy=false
	if [ "$unhealthy_after" -gt 0 ] ; then
	    if [ "$_failcount" -ge "$unhealthy_after" ] ; then
		_unhealthy=true
		echo "ERROR: $_err"
	    fi
	fi

	if [ "$restart_every" -gt 0 ] ; then
	    if [ $((_failcount % restart_every)) -eq 0 ] ; then
		if ! $_unhealthy ; then
		    echo "WARNING: $_err"
		fi
		nfs_restart_service
	    fi
	fi

	if $_unhealthy ; then
	    exit 1
	fi

	return 0
    ) || exit 1
}

# Uses: service_stop_cmd, service_start_cmd, service_debug_cmd
# This function is called within the sub-shell that shellcheck thinks
# loses the above variable values.
# shellcheck disable=SC2031
nfs_restart_service ()
{
    if [ -z "$service_stop_cmd" -o -z "$service_start_cmd" ] ; then
	die "ERROR: Can not restart service \"${_progname}\" without corresponding service_start_cmd/service_stop_cmd settings"
    fi

    echo "Trying to restart service \"${_progname}\"..."
    # Using eval means variables can contain semicolon separated commands
    eval "$service_stop_cmd"
    if [ -n "$service_debug_cmd" ] ; then
	eval "$service_debug_cmd"
    fi
    background_with_logging eval "$service_start_cmd"
}

######################################################
# Check an RPC service with rpcinfo
######################################################
ctdb_check_rpc ()
{
    _progname="$1"        # passed to rpcinfo (looked up in /etc/rpc)
    _version="$2"         # optional, not passed if empty/unset
    _family="${3:-tcp}"   # optional, default is "tcp"

    case "$_family" in
	tcp6|udp6)
	    _localhost="${CTDB_RPCINFO_LOCALHOST6:-::1}"
	    ;;
	*)
	    _localhost="${CTDB_RPCINFO_LOCALHOST:-127.0.0.1}"
    esac

    # $_version is not quoted because it is optional
    # shellcheck disable=SC2086
    if ! ctdb_check_rpc_out=$(rpcinfo -T "$_family" "$_localhost" \
				      "$_progname" $_version 2>&1) ; then
	ctdb_check_rpc_out="$_progname failed RPC check:
$ctdb_check_rpc_out"
	echo "$ctdb_check_rpc_out"
	return 1
    fi
}

nfs_check_rpcinfo ()
{
    _progname="$1"        # passed to rpcinfo (looked up in /etc/rpc)
    _versions="$2"        # optional, space separated, not passed if empty/unset
    _families="${3:-tcp}" # optional, space separated, default is "tcp"

    for _family in $_families ; do
	if [ -n "$_versions" ] ; then
	    for _version in $_versions ; do
		ctdb_check_rpc "$_progname" "$_version" "$_family" || return $?
	    done
	else
	    ctdb_check_rpc "$_progname" "" "$_family" || return $?
	fi
    done
}

##################################################################
# use statd-callout to update NFS lock info
##################################################################
nfs_update_lock_info ()
{
    if [ -x "$CTDB_BASE/statd-callout" ] ; then
	"$CTDB_BASE/statd-callout" update
    fi
}

######################################################################

# script_state_dir set by ctdb_setup_state_dir()
# shellcheck disable=SC2154
nfs_callout_init "$script_state_dir"

case "$1" in
startup)
	nfs_callout "$@" || exit $?
	;;

shutdown)
	nfs_callout "$@" || exit $?
	;;

takeip)
	nfs_callout "$@" || exit $?
	ctdb_service_set_reconfigure
	;;

releaseip)
	nfs_callout "$@" || exit $?
	ctdb_service_set_reconfigure
	;;

ipreallocated)
	if ctdb_service_needs_reconfigure ; then
		ctdb_service_reconfigure
	fi
	;;

monitor)
	nfs_callout "monitor-pre" || exit $?

	# Check that directories for shares actually exist
	if [ "$CTDB_NFS_SKIP_SHARE_CHECK" != "yes" ] ; then
	    nfs_callout "monitor-list-shares" | ctdb_check_directories || \
		exit $?
	fi

	update_tickles 2049
	nfs_update_lock_info

	nfs_check_services

	nfs_callout "monitor-post" || exit $?
       	;;
esac

exit 0