summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSam Thursfield <sam.thursfield@codethink.co.uk>2015-04-13 12:31:46 +0000
committerBaserock Gerrit <gerrit@baserock.org>2015-04-21 18:25:12 +0000
commit985d512ad9969b9216720a7dc9274b41bb2802eb (patch)
tree6d45125cf8aed806f91be5b542b073329e6c5dc8
parent64465445f2a95d74cb4a5bae3ab0d1783d6de68e (diff)
downloaddefinitions-985d512ad9969b9216720a7dc9274b41bb2802eb.tar.gz
Add distbuild-trove-nfsboot.write
The nfsboot.write deployment extension has been deprecated for a while because it's not generally useful. It's only used for deploying distbuild nodes to a Trove, as far as I know. We still need to support setting up a bunch of machines that boot over NFS from a Trove. But we can do this in a special-purpose .write extension. The new distbuild-trove-nfsboot.write is much more efficient than the more generic nfsboot.write: instead of treating each system individually (thus copying an almost identical ~2GB rootfs to the Trove once per node) it copies the system image to the Trove once, and /then/ sets up a rootfs per node. Upgrades are now supported, although the code assumes distbuild nodes are stateless (as they should be) so nothing special is done for upgrades, other than checking that there is already a version of the given system in existance. The new extension does not create an orig/ and run/ version of each system, because there is no need when the deployed system is stateless. There could be further gains in efficiency, but I don't have time to do them right now. This write extension is full of compromises, its goal is to better support the existing users who have a Trove and a distbuild network deployed via NFS. It is specifically not intended to be useful for other purposes. Change-Id: I9a50c58b714ed272212d1d6c55b289aaa96051b1
-rwxr-xr-xdistbuild-trove-nfsboot.check150
-rw-r--r--distbuild-trove-nfsboot.help49
-rwxr-xr-xdistbuild-trove-nfsboot.write283
3 files changed, 482 insertions, 0 deletions
diff --git a/distbuild-trove-nfsboot.check b/distbuild-trove-nfsboot.check
new file mode 100755
index 00000000..38c491e5
--- /dev/null
+++ b/distbuild-trove-nfsboot.check
@@ -0,0 +1,150 @@
+#!/usr/bin/python
+# Copyright (C) 2014-2015 Codethink Limited
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; version 2 of the License.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License along
+# with this program. If not, see <http://www.gnu.org/licenses/>.
+
+'''Preparatory checks for Morph 'distbuild-trove-nfsboot' write extension'''
+
+import cliapp
+import logging
+import os
+
+import morphlib.writeexts
+
+
+class DistbuildTroveNFSBootCheckExtension(morphlib.writeexts.WriteExtension):
+
+ nfsboot_root = '/srv/nfsboot'
+ remote_user = 'root'
+
+ required_vars = [
+ 'DISTBUILD_CONTROLLER',
+ 'DISTBUILD_GIT_SERVER',
+ 'DISTBUILD_SHARED_ARTIFACT_CACHE',
+ 'DISTBUILD_TROVE_ID',
+ 'DISTBUILD_WORKERS',
+ 'DISTBUILD_WORKER_SSH_KEY',
+ ]
+
+ def system_path(self, system_name, version_label=None):
+ if version_label:
+ return os.path.join(self.nfsboot_root, system_name, 'systems',
+ version_label, 'run')
+ else:
+ return os.path.join(self.nfsboot_root, system_name)
+
+ def process_args(self, args):
+ if len(args) != 1:
+ raise cliapp.AppException('Wrong number of command line args')
+
+ nfs_host = args[0]
+ nfs_netloc = '%s@%s' % (self.remote_user, nfs_host)
+
+ version_label = os.getenv('VERSION_LABEL', 'factory')
+
+ missing_vars = [var for var in self.required_vars
+ if not var in os.environ]
+ if missing_vars:
+ raise cliapp.AppException(
+ 'Please set: %s' % ', '.join(missing_vars))
+
+ controllers = os.getenv('DISTBUILD_CONTROLLER').split()
+ workers = os.getenv('DISTBUILD_WORKERS').split()
+
+ if len(controllers) != 1:
+ raise cliapp.AppException('Please specify exactly one controller.')
+
+ if len(workers) == 0:
+ raise cliapp.AppException('Please specify at least one worker.')
+
+ upgrade = self.get_environment_boolean('UPGRADE')
+
+ self.check_good_server(nfs_netloc)
+
+ system_names = set(controllers + workers)
+ for system_name in system_names:
+ if upgrade:
+ self.check_upgradeable(nfs_netloc, system_name, version_label)
+ else:
+ system_path = self.system_path(system_name)
+
+ if self.remote_directory_exists(nfs_netloc, system_path):
+ if self.get_environment_boolean('OVERWRITE') == False:
+ raise cliapp.AppException(
+ 'System %s already exists at %s:%s. Try `morph '
+ 'upgrade` instead of `morph deploy`.' % (
+ system_name, nfs_netloc, system_path))
+
+ def check_good_server(self, netloc):
+ # FIXME: assumes root
+ self.check_ssh_connectivity(netloc.split('@')[-1])
+
+ # Is an NFS server
+ try:
+ cliapp.ssh_runcmd(
+ netloc, ['test', '-e', '/etc/exports'])
+ except cliapp.AppException:
+ raise cliapp.AppException('server %s is not an nfs server'
+ % netloc)
+ try:
+ cliapp.ssh_runcmd(
+ netloc, ['systemctl', 'is-enabled', 'nfs-server.service'])
+
+ except cliapp.AppException:
+ raise cliapp.AppException('server %s does not control its '
+ 'nfs server by systemd' % netloc)
+
+ # TFTP server exports /srv/nfsboot/tftp
+ tftp_root = os.path.join(self.nfsboot_root, 'tftp')
+ try:
+ cliapp.ssh_runcmd(
+ netloc, ['test' , '-d', tftp_root])
+ except cliapp.AppException:
+ raise cliapp.AppException('server %s does not export %s' %
+ (netloc, tftp_root))
+
+ def check_upgradeable(self, nfs_netloc, system_name, version_label):
+ '''Check that there is already a version of the system present.
+
+ Distbuild nodes are stateless, so an upgrade is actually pretty much
+ the same as an initial deployment. This test is just a sanity check.
+
+ '''
+ system_path = self.system_path(system_name)
+ system_version_path = self.system_path(system_name, version_label)
+
+ if not self.remote_directory_exists(nfs_netloc, system_path):
+ raise cliapp.AppException(
+ 'System %s not found at %s:%s, cannot deploy an upgrade.' % (
+ system_name, nfs_netloc, system_path))
+
+ if self.remote_directory_exists(nfs_netloc, system_version_path):
+ if self.get_environment_boolean('OVERWRITE'):
+ pass
+ else:
+ raise cliapp.AppException(
+ 'System %s version %s already exists at %s:%s.' % (
+ system_name, version_label, nfs_netloc,
+ system_version_path))
+
+ def remote_directory_exists(self, nfs_netloc, path):
+ try:
+ cliapp.ssh_runcmd(nfs_netloc, ['test', '-d', path])
+ except cliapp.AppException as e:
+ logging.debug('SSH exception: %s', e)
+ return False
+
+ return True
+
+
+DistbuildTroveNFSBootCheckExtension().run()
diff --git a/distbuild-trove-nfsboot.help b/distbuild-trove-nfsboot.help
new file mode 100644
index 00000000..62f1455c
--- /dev/null
+++ b/distbuild-trove-nfsboot.help
@@ -0,0 +1,49 @@
+# Copyright (C) 2014, 2015 Codethink Limited
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; version 2 of the License.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License along
+# with this program; if not, see <http://www.gnu.org/licenses/>.
+
+help: |
+ Deploy a distbuild network, using a Trove to serve the kernel and rootfs.
+
+ The `location` argument is the hostname of the Trove system.
+
+ The following configuration values must be specified:
+
+ - DISTBUILD_CONTROLLER: hostname of controller system
+ - DISTBUILD_WORKERS: hostnames of each worker system
+ - DISTBUILD_GIT_SERVER: Trove hostname
+ - DISTBUILD_SHARED_ARTIFACT_CACHE: Trove hostname
+ - DISTBUILD_TROVE_ID: Trove ID
+ - DISTBUILD_WORKER_SSH_KEY: SSH key to be used for ssh:// repos
+
+ A note on TROVE_ID: the current distbuild-setup service requires that
+ a single 'Trove ID' is specified. This is used in Morph for expanding
+ keyed URLs. If you set TROVE_ID=foo for example, foo:bar will be expanded
+ to git://$GIT_SERVER/foo, in addition to the standard baserock: and
+ upstream: prefixes that you can use.
+
+ The WORKER_SSH_KEY must be provided, even if you don't need it. The
+ distbuild-setup service could be changed to make it optional.
+
+ The following configuration values are optional:
+
+ - HOST_MAP: a list of key=value pairs mapping hostnames to IP addresses,
+ or fully-qualified domain names. Useful if you
+ cannot rely on hostname resolution working for your deploment.
+
+ The extension will connect to root@location via ssh to copy the kernel and
+ rootfs, and configure the nfs server. It will duplicate the kernel and
+ rootfs once for each node in the distbuild network.
+
+ The deployment mechanism makes assumptions about the bootloader
+ configuration of the target machines.
diff --git a/distbuild-trove-nfsboot.write b/distbuild-trove-nfsboot.write
new file mode 100755
index 00000000..a5a5b094
--- /dev/null
+++ b/distbuild-trove-nfsboot.write
@@ -0,0 +1,283 @@
+#!/usr/bin/python
+# Copyright (C) 2013-2015 Codethink Limited
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; version 2 of the License.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License along
+# with this program. If not, see <http://www.gnu.org/licenses/>.
+
+
+'''Morph .write extension for a distbuild network booting off a Trove with NFS.
+
+'''
+
+
+import os
+import sys
+import tempfile
+
+import cliapp
+import morphlib.writeexts
+
+
+class DistbuildTroveNFSBootWriteExtension(morphlib.writeexts.WriteExtension):
+
+ '''Create an NFS root and kernel on TFTP during Morph's deployment.
+
+ See distbuild-trove-nfsboot.help for documentation.
+
+ '''
+
+ nfsboot_root = '/srv/nfsboot'
+ remote_user = 'root'
+
+ def system_path(self, system_name, version_label=None):
+ if version_label:
+ # The 'run' directory is kind of a historical artifact. Baserock
+ # systems that have Btrfs root disks maintain an orig/ and a run/
+ # subvolume, so that one can find changes that have been made at
+ # runtime. For distbuild systems, this isn't necessary because the
+ # root filesystems of the nodes are effectively stateless. However,
+ # existing systems have bootloaders configured to look for the
+ # 'run' directory, so we need to keep creating it.
+ return os.path.join(self.nfsboot_root, system_name, 'systems',
+ version_label, 'run')
+ else:
+ return os.path.join(self.nfsboot_root, system_name)
+
+ def process_args(self, args):
+ if len(args) != 2:
+ raise cliapp.AppException('Wrong number of command line args')
+
+ local_system_path, nfs_host = args
+
+ nfs_netloc = '%s@%s' % (self.remote_user, nfs_host)
+
+ version_label = os.getenv('VERSION_LABEL', 'factory')
+
+ controller_name = os.getenv('DISTBUILD_CONTROLLER')
+ worker_names = os.getenv('DISTBUILD_WORKERS').split()
+ system_names = set([controller_name] + worker_names)
+
+ git_server = os.getenv('DISTBUILD_GIT_SERVER')
+ shared_artifact_cache = os.getenv('DISTBUILD_SHARED_ARTIFACT_CACHE')
+ trove_id = os.getenv('DISTBUILD_TROVE_ID')
+ worker_ssh_key_path = os.getenv('DISTBUILD_WORKER_SSH_KEY')
+
+ host_map = self.parse_host_map_string(os.getenv('HOST_MAP', ''))
+
+ kernel_relpath = self.find_kernel(local_system_path)
+
+ copied_rootfs = None
+ for system_name in system_names:
+ remote_system_path = self.system_path(system_name, version_label)
+ if copied_rootfs is None:
+ self.transfer_system(
+ nfs_netloc, local_system_path, remote_system_path)
+ copied_rootfs = remote_system_path
+ else:
+ self.duplicate_remote_system(
+ nfs_netloc, copied_rootfs, remote_system_path)
+
+ for system_name in system_names:
+ remote_system_path = self.system_path(system_name, version_label)
+ self.link_kernel_to_tftpboot_path(
+ nfs_netloc, system_name, version_label, kernel_relpath)
+ self.set_hostname(
+ nfs_netloc, system_name, remote_system_path)
+ self.write_distbuild_config(
+ nfs_netloc, system_name, remote_system_path, git_server,
+ shared_artifact_cache, trove_id, worker_ssh_key_path,
+ controller_name, worker_names, host_map=host_map)
+
+ self.configure_nfs_exports(nfs_netloc, system_names)
+
+ for system_name in system_names:
+ self.update_default_version(nfs_netloc, system_name, version_label)
+
+ def parse_host_map_string(self, host_map_string):
+ '''Parse the HOST_MAP variable
+
+ Returns a dict mapping hostname to value (where value is an IP
+ address, a fully-qualified domain name, an alternate hostname, or
+ whatever).
+
+ '''
+ pairs = host_map_string.split(' ')
+ return morphlib.util.parse_environment_pairs({}, pairs)
+
+ def transfer_system(self, nfs_netloc, local_system_path,
+ remote_system_path):
+ self.status(msg='Copying rootfs to %(nfs_netloc)s',
+ nfs_netloc=nfs_netloc)
+ cliapp.ssh_runcmd(
+ nfs_netloc, ['mkdir', '-p', remote_system_path])
+ # The deployed rootfs may have been created by OSTree, so definitely
+ # don't pass --hard-links to `rsync`.
+ cliapp.runcmd(
+ ['rsync', '--archive', '--delete', '--info=progress2',
+ '--protect-args', '--partial', '--sparse', '--xattrs',
+ local_system_path + '/',
+ '%s:%s' % (nfs_netloc, remote_system_path)], stdout=sys.stdout)
+
+ def duplicate_remote_system(self, nfs_netloc, source_system_path,
+ target_system_path):
+ self.status(msg='Duplicating rootfs to %(target_system_path)s',
+ target_system_path=target_system_path)
+ cliapp.ssh_runcmd(nfs_netloc,
+ ['mkdir', '-p', target_system_path])
+ # We can't pass --info=progress2 here, because it may not be available
+ # in the remote 'rsync'. The --info setting was added in RSync 3.1.0,
+ # old versions of Baserock have RSync 3.0.9. So the user doesn't get
+ # any progress info on stdout for the 'duplicate' stage.
+ cliapp.ssh_runcmd(nfs_netloc,
+ ['rsync', '--archive', '--delete', '--protect-args', '--partial',
+ '--sparse', '--xattrs', source_system_path + '/',
+ target_system_path], stdout=sys.stdout)
+
+ def find_kernel(self, local_system_path):
+ bootdir = os.path.join(local_system_path, 'boot')
+ image_names = ['vmlinuz', 'zImage', 'uImage']
+
+ for name in image_names:
+ try_path = os.path.join(bootdir, name)
+ if os.path.exists(try_path):
+ kernel_path = os.path.relpath(try_path, local_system_path)
+ break
+ else:
+ raise cliapp.AppException(
+ 'Could not find a kernel in the system: none of '
+ '%s found' % ', '.join(image_names))
+ return kernel_path
+
+ def link_kernel_to_tftpboot_path(self, nfs_netloc, system_name,
+ version_label, kernel_relpath):
+ '''Create links for TFTP server for a system's kernel.'''
+
+ remote_system_path = self.system_path(system_name, version_label)
+ kernel_dest = os.path.join(remote_system_path, kernel_relpath)
+
+ self.status(msg='Creating links to %(name)s kernel in tftp directory',
+ name=system_name)
+ tftp_dir = os.path.join(self.nfsboot_root , 'tftp')
+
+ versioned_kernel_name = "%s-%s" % (system_name, version_label)
+ kernel_name = system_name
+
+ cliapp.ssh_runcmd(nfs_netloc,
+ ['ln', '-f', kernel_dest,
+ os.path.join(tftp_dir, versioned_kernel_name)])
+
+ cliapp.ssh_runcmd(nfs_netloc,
+ ['ln', '-sf', versioned_kernel_name,
+ os.path.join(tftp_dir, kernel_name)])
+
+ def set_remote_file_contents(self, nfs_netloc, path, text):
+ with tempfile.NamedTemporaryFile() as f:
+ f.write(text)
+ f.flush()
+ cliapp.runcmd(
+ ['scp', f.name, '%s:%s' % (nfs_netloc, path)])
+
+ def set_hostname(self, nfs_netloc, system_name, system_path):
+ hostname_path = os.path.join(system_path, 'etc', 'hostname')
+ self.set_remote_file_contents(
+ nfs_netloc, hostname_path, system_name + '\n')
+
+ def write_distbuild_config(self, nfs_netloc, system_name, system_path,
+ git_server, shared_artifact_cache, trove_id,
+ worker_ssh_key_path, controller_name,
+ worker_names, host_map = {}):
+ '''Write /etc/distbuild/distbuild.conf on the node.
+
+ This .write extension takes advantage of the 'generic' mode of
+ distbuild.configure. Each node is not configured until first-boot,
+ when distbuild-setup.service runs and configures the node based on the
+ contents of /etc/distbuild/distbuild.conf.
+
+ '''
+ def host(hostname):
+ return host_map.get(hostname, hostname)
+
+ config = {
+ 'ARTIFACT_CACHE_SERVER': host(shared_artifact_cache),
+ 'CONTROLLERHOST': host(controller_name),
+ 'TROVE_HOST': host(git_server),
+ 'TROVE_ID': trove_id,
+ 'DISTBUILD_CONTROLLER': system_name == controller_name,
+ 'DISTBUILD_WORKER': system_name in worker_names,
+ 'WORKERS': ', '.join(map(host, worker_names)),
+ 'WORKER_SSH_KEY': '/etc/distbuild/worker.key',
+ }
+
+ config_text = '\n'.join(
+ '%s: %s' % (key, value) for key, value in config.iteritems())
+ config_text = \
+ '# Generated by distbuild-trove-nfsboot.write\n' + \
+ config_text + '\n'
+ path = os.path.join(system_path, 'etc', 'distbuild')
+ cliapp.ssh_runcmd(
+ nfs_netloc, ['mkdir', '-p', path])
+ cliapp.runcmd(
+ ['scp', worker_ssh_key_path, '%s:%s' % (nfs_netloc, path)])
+ self.set_remote_file_contents(
+ nfs_netloc, os.path.join(path, 'distbuild.conf'), config_text)
+
+ def configure_nfs_exports(self, nfs_netloc, system_names):
+ '''Ensure the Trove is set up to export the NFS roots we need.
+
+ This doesn't handle setting up the TFTP daemon. We assume that is
+ already running.
+
+ '''
+ for system_name in system_names:
+ exported_path = self.system_path(system_name)
+ exports_path = '/etc/exports'
+
+ # Rather ugly SSH hackery follows to ensure each system path is
+ # listed in /etc/exports.
+ try:
+ cliapp.ssh_runcmd(
+ nfs_netloc, ['grep', '-q', exported_path, exports_path])
+ except cliapp.AppException:
+ ip_mask = '*'
+ options = 'rw,no_subtree_check,no_root_squash,async'
+ exports_string = '%s %s(%s)\n' % (exported_path, ip_mask,
+ options)
+ exports_append_sh = '''\
+ set -eu
+ target="$1"
+ temp=$(mktemp)
+ cat "$target" > "$temp"
+ cat >> "$temp"
+ mv "$temp" "$target"
+ '''
+ cliapp.ssh_runcmd(
+ nfs_netloc,
+ ['sh', '-c', exports_append_sh, '--', exports_path],
+ feed_stdin=exports_string)
+
+ cliapp.ssh_runcmd(nfs_netloc,
+ ['systemctl', 'restart', 'nfs-server.service'])
+
+ def update_default_version(self, remote_netloc, system_name,
+ version_label):
+ self.status(msg='Linking \'default\' to %(version)s for %(system)s',
+ version=version_label, system=system_name)
+ system_path = self.system_path(system_name)
+ system_version_path = os.path.join(system_path, 'systems',
+ version_label)
+ default_path = os.path.join(system_path, 'systems', 'default')
+
+ cliapp.ssh_runcmd(remote_netloc,
+ ['ln', '-sfn', system_version_path, default_path])
+
+
+DistbuildTroveNFSBootWriteExtension().run()