summaryrefslogtreecommitdiff
path: root/src/ceph-kdump-copy.in
blob: 1f12f86565d403a379029b501d20bf64d6f56139 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
#!/bin/bash -norc

# Copyright (C) 2012 Alex Elder <elder@dreamhost.com>
#
# This is free software; you can redistribute it and/or
# modify it under the terms of the GNU Lesser General Public
# License version 2.1, as published by the Free Software
# Foundation.  See file COPYING.

PROGNAME=$(basename $0)

# issue a message to stderr and exit
function err() {
	echo "${PROGNAME}: $@" >&2
	exit 1
}

######################

# This script is normally called by @sysconfdir@/init.d/ceph-kdump-copy,
# which will set up these variables based on its config file, found
# in @sysconfdir@/default/ceph-kdump-copy.

[ -z "${KDUMP_HOST}" ] &&
	err "KDUMP_HOST must be specified"
[ -z "${KDUMP_HOST_USER}" ] &&
	err "KDUMP_HOST_UESR must be specified"

# The local directory in which dumps are saved.
KDUMP_COREDIR="${KDUMP_COREDIR:-/var/crash}"

# Subdirectory on dump host under which my dumps are collected
KDUMP_HOST_MY_ID="${KDUMP_HOST_MY_ID:-$(hostname)}"

# Path on the dump host to the directory in which dumps are copied.
KDUMP_HOST_COREDIR="${KDUMP_HOST_COREDIR:-/var/crash/remote}"

KDUMP_HOST_MY_COREDIR="${KDUMP_HOST_COREDIR}/${KDUMP_HOST_MY_ID}"

#####################################################################


# If no arguments are provided, it is a simple usage message (no error).
# Otherwise display the message before printing usage information, and
# exit with status indicating error.
function usage () {
	local status=0

	echo "" >&2
	if [ $# -gt 0 ]; then
		status=1
		echo "${PROGNAME}: $@" >&2
		echo "" >&2
	fi
	echo "Usage: ${PROGNAME}" >&2
	echo "" >&2
	echo "    each crash_file is the name of a crash file in " >&2
	echo "    ${KDUMP_COREDIR} generated by kernel_crashdump" >&2
	echo "" >&2

	exit ${status}
}

# Run a command (or semicolon-separated commands) on the dump host
function on_dump_host() {
	ssh -T "${KDUMP_HOST_USER}@${KDUMP_HOST}" "$@"
}

# Create a summary file based on apport crashfile content
function summarize() {
        echo "Crash Summary"
        echo "-------------"
        echo "hostname:        $(hostname)"
        echo "host arch:       $(arch)"
        echo "time collected:  $(date)"
        echo "crash_dir:       ${CRASH_DIR}"
        echo ""
        echo "crash uname:     $(cat Uname)"
        echo "crash timestamp: $(cat Date)"
        echo "kernel package:  $(cat Package)"
        echo "distribution:    $(cat DistroRelease)"
}

# Collect information related to a dump file.  The file name provided
# is the name of a file in ${KDUMP_COREDIR} containing a crash file
# generated by /usr/share/apport/kernel_crashdump.  Ubuntu uses its
# apport package to bundle up information from the crash.  We'll unpack
# that and re-bundle it in a way less specific to Ubuntu.  We'll also
# gather a few more files to make the result self-contained.
function collect_dump_info() {
	[ $# -eq 2 ] || exit 99
	local crash_release="$1"
	local crash_dir="$2"
	local i original copy

	# We need the debug version of vmlinux matching the dump.
	# Grab a few other useful files from /boot as well.
	for i in @libdir@/debug/boot/vmlinux \
		/boot/System.map /boot/vmcoreinfo \
		/boot/config /boot/abi
	do
		original="${i}-${crash_release}"
		copy="${crash_dir}/$(basename "${original}")"

		cp "${original}" "${copy}"
		gzip "${copy}"	# Compressing could be optional
	done
}

# Copy a directory containing a kdump and associated files.
function move_crash_to_repository() {
	[ $# -eq 1 ] || exit 99
	local crash_dir="$1"

	tar cf - "./${crash_dir}" |
	on_dump_host "tar -C '${KDUMP_HOST_MY_COREDIR}' -xf -" &&
	# Removing it should be the default, but optionally skipped
	rm -rf "./${crash_dir}"
}

# Process a single apport-generated crash file
function process_crash_file() {
	[ $# -eq 1 ] || exit 99
	local crash_file="$1"
	local apport_dir crash_release crash_dir

	apport_dir="${crash_file}-apport_dir"
	mkdir "${apport_dir}"

	# Unpack the crash file
	apport-unpack "${crash_file}" "${apport_dir}"
	rm -f "${crash_file}"

	# Grab the release id from the kernel that crashed
	crash_release=$(cat "${apport_dir}"/Uname | awk '{print $2}')

	# Create a date-stamped directory in which to hold this crash
	crash_dir=$(date '+%F-%T%z')
	mkdir "${crash_dir}"

	# Produce a summary and save the actual core file
	( cd "${apport_dir}"; summarize ) > "${crash_dir}/summary.txt"
	gzip "${crash_dir}/summary.txt"

	# Save and compress the actual core file
	mv ${apport_dir}/VmCore "${crash_dir}/vmcore-${crash_release}"
	gzip "${crash_dir}/vmcore-${crash_release}"

	# We've got what we need from the crash file
	rm -rf "${apport_dir}"

	# Collect the other related files
	collect_dump_info "${crash_release}" "${crash_dir}"

	# Create a little README file
	(
		echo "To analyze the kernel core dump here:"
		echo "    gunzip 'vmcore-${crash_release}'"
		echo "    crash 'vmlinux-${crash_release}.gz' \\"
		echo "        'vmcore-${crash_release}'"
		echo ""
		echo "Other files provide additional context."
	) > "${crash_dir}/README"

	# Remove other files we don't have any need for
	rm config_link kernel_link system.map_link

	# Finally, copy the crash directory over to the repository
	move_crash_to_repository "${crash_dir}"
}

######### Start #########

cd "${KDUMP_COREDIR}"
CRASH_FILES=$(ls linux-image-*.crash 2> /dev/null)
[ -z "${CRASH_FILES}" ] && exit 0	# Quit if there's nothing to do

# Make sure the directory to contain our dumps is there on the dump host
on_dump_host "mkdir -p '${KDUMP_HOST_MY_COREDIR}'" ||
err "unable to create '${KDUMP_HOST_MY_COREDIR}' on host '${KDUMP_HOST}'"

# Now process each crash file; exit on the first error.
for crash_file in ${CRASH_FILES}; do
	process_crash_file "${crash_file}" ||
	err "unable to process '${crsh_file}'"
done

exit 0