#!/bin/bash -norc

# Copyright (C) 2012 Alex Elder <elder@dreamhost.com>
#
# This is free software; you can redistribute it and/or
# modify it under the terms of the GNU Lesser General Public
# License version 2.1, as published by the Free Software
# Foundation.  See file COPYING.

PROGNAME=$(basename $0)

# issue a message to stderr and exit
function err() {
	echo "${PROGNAME}: $@" >&2
	exit 1
}

######################

# This script is normally called by @sysconfdir@/init.d/ceph-kdump-copy,
# which will set up these variables based on its config file, found
# in @sysconfdir@/default/ceph-kdump-copy.

[ -z "${KDUMP_HOST}" ] &&
	err "KDUMP_HOST must be specified"
[ -z "${KDUMP_HOST_USER}" ] &&
	err "KDUMP_HOST_UESR must be specified"

# The local directory in which dumps are saved.
KDUMP_COREDIR="${KDUMP_COREDIR:-/var/crash}"

# Subdirectory on dump host under which my dumps are collected
KDUMP_HOST_MY_ID="${KDUMP_HOST_MY_ID:-$(hostname)}"

# Path on the dump host to the directory in which dumps are copied.
KDUMP_HOST_COREDIR="${KDUMP_HOST_COREDIR:-/var/crash/remote}"

KDUMP_HOST_MY_COREDIR="${KDUMP_HOST_COREDIR}/${KDUMP_HOST_MY_ID}"

#####################################################################


# If no arguments are provided, it is a simple usage message (no error).
# Otherwise display the message before printing usage information, and
# exit with status indicating error.
function usage () {
	local status=0

	echo "" >&2
	if [ $# -gt 0 ]; then
		status=1
		echo "${PROGNAME}: $@" >&2
		echo "" >&2
	fi
	echo "Usage: ${PROGNAME}" >&2
	echo "" >&2
	echo "    each crash_file is the name of a crash file in " >&2
	echo "    ${KDUMP_COREDIR} generated by kernel_crashdump" >&2
	echo "" >&2

	exit ${status}
}

# Run a command (or semicolon-separated commands) on the dump host
function on_dump_host() {
	ssh -T "${KDUMP_HOST_USER}@${KDUMP_HOST}" "$@"
}

# Create a summary file based on apport crashfile content
function summarize() {
        echo "Crash Summary"
        echo "-------------"
        echo "hostname:        $(hostname)"
        echo "host arch:       $(arch)"
        echo "time collected:  $(date)"
        echo "crash_dir:       ${CRASH_DIR}"
        echo ""
        echo "crash uname:     $(cat Uname)"
        echo "crash timestamp: $(cat Date)"
        echo "kernel package:  $(cat Package)"
        echo "distribution:    $(cat DistroRelease)"
}

# Collect information related to a dump file.  The file name provided
# is the name of a file in ${KDUMP_COREDIR} containing a crash file
# generated by /usr/share/apport/kernel_crashdump.  Ubuntu uses its
# apport package to bundle up information from the crash.  We'll unpack
# that and re-bundle it in a way less specific to Ubuntu.  We'll also
# gather a few more files to make the result self-contained.
function collect_dump_info() {
	[ $# -eq 2 ] || exit 99
	local crash_release="$1"
	local crash_dir="$2"
	local i original copy

	# We need the debug version of vmlinux matching the dump.
	# Grab a few other useful files from /boot as well.
	for i in @libdir@/debug/boot/vmlinux \
		/boot/System.map /boot/vmcoreinfo \
		/boot/config /boot/abi
	do
		original="${i}-${crash_release}"
		copy="${crash_dir}/$(basename "${original}")"

		cp "${original}" "${copy}"
		gzip "${copy}"	# Compressing could be optional
	done
}

# Copy a directory containing a kdump and associated files.
function move_crash_to_repository() {
	[ $# -eq 1 ] || exit 99
	local crash_dir="$1"

	tar cf - "./${crash_dir}" |
	on_dump_host "tar -C '${KDUMP_HOST_MY_COREDIR}' -xf -" &&
	# Removing it should be the default, but optionally skipped
	rm -rf "./${crash_dir}"
}

# Process a single apport-generated crash file
function process_crash_file() {
	[ $# -eq 1 ] || exit 99
	local crash_file="$1"
	local apport_dir crash_release crash_dir

	apport_dir="${crash_file}-apport_dir"
	mkdir "${apport_dir}"

	# Unpack the crash file
	apport-unpack "${crash_file}" "${apport_dir}"
	rm -f "${crash_file}"

	# Grab the release id from the kernel that crashed
	crash_release=$(cat "${apport_dir}"/Uname | awk '{print $2}')

	# Create a date-stamped directory in which to hold this crash
	crash_dir=$(date '+%F-%T%z')
	mkdir "${crash_dir}"

	# Produce a summary and save the actual core file
	( cd "${apport_dir}"; summarize ) > "${crash_dir}/summary.txt"
	gzip "${crash_dir}/summary.txt"

	# Save and compress the actual core file
	mv ${apport_dir}/VmCore "${crash_dir}/vmcore-${crash_release}"
	gzip "${crash_dir}/vmcore-${crash_release}"

	# We've got what we need from the crash file
	rm -rf "${apport_dir}"

	# Collect the other related files
	collect_dump_info "${crash_release}" "${crash_dir}"

	# Create a little README file
	(
		echo "To analyze the kernel core dump here:"
		echo "    gunzip 'vmcore-${crash_release}'"
		echo "    crash 'vmlinux-${crash_release}.gz' \\"
		echo "        'vmcore-${crash_release}'"
		echo ""
		echo "Other files provide additional context."
	) > "${crash_dir}/README"

	# Remove other files we don't have any need for
	rm config_link kernel_link system.map_link

	# Finally, copy the crash directory over to the repository
	move_crash_to_repository "${crash_dir}"
}

######### Start #########

cd "${KDUMP_COREDIR}"
CRASH_FILES=$(ls linux-image-*.crash 2> /dev/null)
[ -z "${CRASH_FILES}" ] && exit 0	# Quit if there's nothing to do

# Make sure the directory to contain our dumps is there on the dump host
on_dump_host "mkdir -p '${KDUMP_HOST_MY_COREDIR}'" ||
err "unable to create '${KDUMP_HOST_MY_COREDIR}' on host '${KDUMP_HOST}'"

# Now process each crash file; exit on the first error.
for crash_file in ${CRASH_FILES}; do
	process_crash_file "${crash_file}" ||
	err "unable to process '${crsh_file}'"
done

exit 0