diff options
-rwxr-xr-x | morphlib/exts/distbuild-trove-nfsboot.check | 150 | ||||
-rw-r--r-- | morphlib/exts/distbuild-trove-nfsboot.help | 49 | ||||
-rwxr-xr-x | morphlib/exts/distbuild-trove-nfsboot.write | 283 |
3 files changed, 482 insertions, 0 deletions
diff --git a/morphlib/exts/distbuild-trove-nfsboot.check b/morphlib/exts/distbuild-trove-nfsboot.check new file mode 100755 index 00000000..38c491e5 --- /dev/null +++ b/morphlib/exts/distbuild-trove-nfsboot.check @@ -0,0 +1,150 @@ +#!/usr/bin/python +# Copyright (C) 2014-2015 Codethink Limited +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; version 2 of the License. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with this program. If not, see <http://www.gnu.org/licenses/>. + +'''Preparatory checks for Morph 'distbuild-trove-nfsboot' write extension''' + +import cliapp +import logging +import os + +import morphlib.writeexts + + +class DistbuildTroveNFSBootCheckExtension(morphlib.writeexts.WriteExtension): + + nfsboot_root = '/srv/nfsboot' + remote_user = 'root' + + required_vars = [ + 'DISTBUILD_CONTROLLER', + 'DISTBUILD_GIT_SERVER', + 'DISTBUILD_SHARED_ARTIFACT_CACHE', + 'DISTBUILD_TROVE_ID', + 'DISTBUILD_WORKERS', + 'DISTBUILD_WORKER_SSH_KEY', + ] + + def system_path(self, system_name, version_label=None): + if version_label: + return os.path.join(self.nfsboot_root, system_name, 'systems', + version_label, 'run') + else: + return os.path.join(self.nfsboot_root, system_name) + + def process_args(self, args): + if len(args) != 1: + raise cliapp.AppException('Wrong number of command line args') + + nfs_host = args[0] + nfs_netloc = '%s@%s' % (self.remote_user, nfs_host) + + version_label = os.getenv('VERSION_LABEL', 'factory') + + missing_vars = [var for var in self.required_vars + if not var in os.environ] + if missing_vars: + raise cliapp.AppException( + 'Please set: %s' % ', '.join(missing_vars)) + + controllers = os.getenv('DISTBUILD_CONTROLLER').split() + workers = os.getenv('DISTBUILD_WORKERS').split() + + if len(controllers) != 1: + raise cliapp.AppException('Please specify exactly one controller.') + + if len(workers) == 0: + raise cliapp.AppException('Please specify at least one worker.') + + upgrade = self.get_environment_boolean('UPGRADE') + + self.check_good_server(nfs_netloc) + + system_names = set(controllers + workers) + for system_name in system_names: + if upgrade: + self.check_upgradeable(nfs_netloc, system_name, version_label) + else: + system_path = self.system_path(system_name) + + if self.remote_directory_exists(nfs_netloc, system_path): + if self.get_environment_boolean('OVERWRITE') == False: + raise cliapp.AppException( + 'System %s already exists at %s:%s. Try `morph ' + 'upgrade` instead of `morph deploy`.' % ( + system_name, nfs_netloc, system_path)) + + def check_good_server(self, netloc): + # FIXME: assumes root + self.check_ssh_connectivity(netloc.split('@')[-1]) + + # Is an NFS server + try: + cliapp.ssh_runcmd( + netloc, ['test', '-e', '/etc/exports']) + except cliapp.AppException: + raise cliapp.AppException('server %s is not an nfs server' + % netloc) + try: + cliapp.ssh_runcmd( + netloc, ['systemctl', 'is-enabled', 'nfs-server.service']) + + except cliapp.AppException: + raise cliapp.AppException('server %s does not control its ' + 'nfs server by systemd' % netloc) + + # TFTP server exports /srv/nfsboot/tftp + tftp_root = os.path.join(self.nfsboot_root, 'tftp') + try: + cliapp.ssh_runcmd( + netloc, ['test' , '-d', tftp_root]) + except cliapp.AppException: + raise cliapp.AppException('server %s does not export %s' % + (netloc, tftp_root)) + + def check_upgradeable(self, nfs_netloc, system_name, version_label): + '''Check that there is already a version of the system present. + + Distbuild nodes are stateless, so an upgrade is actually pretty much + the same as an initial deployment. This test is just a sanity check. + + ''' + system_path = self.system_path(system_name) + system_version_path = self.system_path(system_name, version_label) + + if not self.remote_directory_exists(nfs_netloc, system_path): + raise cliapp.AppException( + 'System %s not found at %s:%s, cannot deploy an upgrade.' % ( + system_name, nfs_netloc, system_path)) + + if self.remote_directory_exists(nfs_netloc, system_version_path): + if self.get_environment_boolean('OVERWRITE'): + pass + else: + raise cliapp.AppException( + 'System %s version %s already exists at %s:%s.' % ( + system_name, version_label, nfs_netloc, + system_version_path)) + + def remote_directory_exists(self, nfs_netloc, path): + try: + cliapp.ssh_runcmd(nfs_netloc, ['test', '-d', path]) + except cliapp.AppException as e: + logging.debug('SSH exception: %s', e) + return False + + return True + + +DistbuildTroveNFSBootCheckExtension().run() diff --git a/morphlib/exts/distbuild-trove-nfsboot.help b/morphlib/exts/distbuild-trove-nfsboot.help new file mode 100644 index 00000000..62f1455c --- /dev/null +++ b/morphlib/exts/distbuild-trove-nfsboot.help @@ -0,0 +1,49 @@ +# Copyright (C) 2014, 2015 Codethink Limited +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; version 2 of the License. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with this program; if not, see <http://www.gnu.org/licenses/>. + +help: | + Deploy a distbuild network, using a Trove to serve the kernel and rootfs. + + The `location` argument is the hostname of the Trove system. + + The following configuration values must be specified: + + - DISTBUILD_CONTROLLER: hostname of controller system + - DISTBUILD_WORKERS: hostnames of each worker system + - DISTBUILD_GIT_SERVER: Trove hostname + - DISTBUILD_SHARED_ARTIFACT_CACHE: Trove hostname + - DISTBUILD_TROVE_ID: Trove ID + - DISTBUILD_WORKER_SSH_KEY: SSH key to be used for ssh:// repos + + A note on TROVE_ID: the current distbuild-setup service requires that + a single 'Trove ID' is specified. This is used in Morph for expanding + keyed URLs. If you set TROVE_ID=foo for example, foo:bar will be expanded + to git://$GIT_SERVER/foo, in addition to the standard baserock: and + upstream: prefixes that you can use. + + The WORKER_SSH_KEY must be provided, even if you don't need it. The + distbuild-setup service could be changed to make it optional. + + The following configuration values are optional: + + - HOST_MAP: a list of key=value pairs mapping hostnames to IP addresses, + or fully-qualified domain names. Useful if you + cannot rely on hostname resolution working for your deploment. + + The extension will connect to root@location via ssh to copy the kernel and + rootfs, and configure the nfs server. It will duplicate the kernel and + rootfs once for each node in the distbuild network. + + The deployment mechanism makes assumptions about the bootloader + configuration of the target machines. diff --git a/morphlib/exts/distbuild-trove-nfsboot.write b/morphlib/exts/distbuild-trove-nfsboot.write new file mode 100755 index 00000000..a5a5b094 --- /dev/null +++ b/morphlib/exts/distbuild-trove-nfsboot.write @@ -0,0 +1,283 @@ +#!/usr/bin/python +# Copyright (C) 2013-2015 Codethink Limited +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; version 2 of the License. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with this program. If not, see <http://www.gnu.org/licenses/>. + + +'''Morph .write extension for a distbuild network booting off a Trove with NFS. + +''' + + +import os +import sys +import tempfile + +import cliapp +import morphlib.writeexts + + +class DistbuildTroveNFSBootWriteExtension(morphlib.writeexts.WriteExtension): + + '''Create an NFS root and kernel on TFTP during Morph's deployment. + + See distbuild-trove-nfsboot.help for documentation. + + ''' + + nfsboot_root = '/srv/nfsboot' + remote_user = 'root' + + def system_path(self, system_name, version_label=None): + if version_label: + # The 'run' directory is kind of a historical artifact. Baserock + # systems that have Btrfs root disks maintain an orig/ and a run/ + # subvolume, so that one can find changes that have been made at + # runtime. For distbuild systems, this isn't necessary because the + # root filesystems of the nodes are effectively stateless. However, + # existing systems have bootloaders configured to look for the + # 'run' directory, so we need to keep creating it. + return os.path.join(self.nfsboot_root, system_name, 'systems', + version_label, 'run') + else: + return os.path.join(self.nfsboot_root, system_name) + + def process_args(self, args): + if len(args) != 2: + raise cliapp.AppException('Wrong number of command line args') + + local_system_path, nfs_host = args + + nfs_netloc = '%s@%s' % (self.remote_user, nfs_host) + + version_label = os.getenv('VERSION_LABEL', 'factory') + + controller_name = os.getenv('DISTBUILD_CONTROLLER') + worker_names = os.getenv('DISTBUILD_WORKERS').split() + system_names = set([controller_name] + worker_names) + + git_server = os.getenv('DISTBUILD_GIT_SERVER') + shared_artifact_cache = os.getenv('DISTBUILD_SHARED_ARTIFACT_CACHE') + trove_id = os.getenv('DISTBUILD_TROVE_ID') + worker_ssh_key_path = os.getenv('DISTBUILD_WORKER_SSH_KEY') + + host_map = self.parse_host_map_string(os.getenv('HOST_MAP', '')) + + kernel_relpath = self.find_kernel(local_system_path) + + copied_rootfs = None + for system_name in system_names: + remote_system_path = self.system_path(system_name, version_label) + if copied_rootfs is None: + self.transfer_system( + nfs_netloc, local_system_path, remote_system_path) + copied_rootfs = remote_system_path + else: + self.duplicate_remote_system( + nfs_netloc, copied_rootfs, remote_system_path) + + for system_name in system_names: + remote_system_path = self.system_path(system_name, version_label) + self.link_kernel_to_tftpboot_path( + nfs_netloc, system_name, version_label, kernel_relpath) + self.set_hostname( + nfs_netloc, system_name, remote_system_path) + self.write_distbuild_config( + nfs_netloc, system_name, remote_system_path, git_server, + shared_artifact_cache, trove_id, worker_ssh_key_path, + controller_name, worker_names, host_map=host_map) + + self.configure_nfs_exports(nfs_netloc, system_names) + + for system_name in system_names: + self.update_default_version(nfs_netloc, system_name, version_label) + + def parse_host_map_string(self, host_map_string): + '''Parse the HOST_MAP variable + + Returns a dict mapping hostname to value (where value is an IP + address, a fully-qualified domain name, an alternate hostname, or + whatever). + + ''' + pairs = host_map_string.split(' ') + return morphlib.util.parse_environment_pairs({}, pairs) + + def transfer_system(self, nfs_netloc, local_system_path, + remote_system_path): + self.status(msg='Copying rootfs to %(nfs_netloc)s', + nfs_netloc=nfs_netloc) + cliapp.ssh_runcmd( + nfs_netloc, ['mkdir', '-p', remote_system_path]) + # The deployed rootfs may have been created by OSTree, so definitely + # don't pass --hard-links to `rsync`. + cliapp.runcmd( + ['rsync', '--archive', '--delete', '--info=progress2', + '--protect-args', '--partial', '--sparse', '--xattrs', + local_system_path + '/', + '%s:%s' % (nfs_netloc, remote_system_path)], stdout=sys.stdout) + + def duplicate_remote_system(self, nfs_netloc, source_system_path, + target_system_path): + self.status(msg='Duplicating rootfs to %(target_system_path)s', + target_system_path=target_system_path) + cliapp.ssh_runcmd(nfs_netloc, + ['mkdir', '-p', target_system_path]) + # We can't pass --info=progress2 here, because it may not be available + # in the remote 'rsync'. The --info setting was added in RSync 3.1.0, + # old versions of Baserock have RSync 3.0.9. So the user doesn't get + # any progress info on stdout for the 'duplicate' stage. + cliapp.ssh_runcmd(nfs_netloc, + ['rsync', '--archive', '--delete', '--protect-args', '--partial', + '--sparse', '--xattrs', source_system_path + '/', + target_system_path], stdout=sys.stdout) + + def find_kernel(self, local_system_path): + bootdir = os.path.join(local_system_path, 'boot') + image_names = ['vmlinuz', 'zImage', 'uImage'] + + for name in image_names: + try_path = os.path.join(bootdir, name) + if os.path.exists(try_path): + kernel_path = os.path.relpath(try_path, local_system_path) + break + else: + raise cliapp.AppException( + 'Could not find a kernel in the system: none of ' + '%s found' % ', '.join(image_names)) + return kernel_path + + def link_kernel_to_tftpboot_path(self, nfs_netloc, system_name, + version_label, kernel_relpath): + '''Create links for TFTP server for a system's kernel.''' + + remote_system_path = self.system_path(system_name, version_label) + kernel_dest = os.path.join(remote_system_path, kernel_relpath) + + self.status(msg='Creating links to %(name)s kernel in tftp directory', + name=system_name) + tftp_dir = os.path.join(self.nfsboot_root , 'tftp') + + versioned_kernel_name = "%s-%s" % (system_name, version_label) + kernel_name = system_name + + cliapp.ssh_runcmd(nfs_netloc, + ['ln', '-f', kernel_dest, + os.path.join(tftp_dir, versioned_kernel_name)]) + + cliapp.ssh_runcmd(nfs_netloc, + ['ln', '-sf', versioned_kernel_name, + os.path.join(tftp_dir, kernel_name)]) + + def set_remote_file_contents(self, nfs_netloc, path, text): + with tempfile.NamedTemporaryFile() as f: + f.write(text) + f.flush() + cliapp.runcmd( + ['scp', f.name, '%s:%s' % (nfs_netloc, path)]) + + def set_hostname(self, nfs_netloc, system_name, system_path): + hostname_path = os.path.join(system_path, 'etc', 'hostname') + self.set_remote_file_contents( + nfs_netloc, hostname_path, system_name + '\n') + + def write_distbuild_config(self, nfs_netloc, system_name, system_path, + git_server, shared_artifact_cache, trove_id, + worker_ssh_key_path, controller_name, + worker_names, host_map = {}): + '''Write /etc/distbuild/distbuild.conf on the node. + + This .write extension takes advantage of the 'generic' mode of + distbuild.configure. Each node is not configured until first-boot, + when distbuild-setup.service runs and configures the node based on the + contents of /etc/distbuild/distbuild.conf. + + ''' + def host(hostname): + return host_map.get(hostname, hostname) + + config = { + 'ARTIFACT_CACHE_SERVER': host(shared_artifact_cache), + 'CONTROLLERHOST': host(controller_name), + 'TROVE_HOST': host(git_server), + 'TROVE_ID': trove_id, + 'DISTBUILD_CONTROLLER': system_name == controller_name, + 'DISTBUILD_WORKER': system_name in worker_names, + 'WORKERS': ', '.join(map(host, worker_names)), + 'WORKER_SSH_KEY': '/etc/distbuild/worker.key', + } + + config_text = '\n'.join( + '%s: %s' % (key, value) for key, value in config.iteritems()) + config_text = \ + '# Generated by distbuild-trove-nfsboot.write\n' + \ + config_text + '\n' + path = os.path.join(system_path, 'etc', 'distbuild') + cliapp.ssh_runcmd( + nfs_netloc, ['mkdir', '-p', path]) + cliapp.runcmd( + ['scp', worker_ssh_key_path, '%s:%s' % (nfs_netloc, path)]) + self.set_remote_file_contents( + nfs_netloc, os.path.join(path, 'distbuild.conf'), config_text) + + def configure_nfs_exports(self, nfs_netloc, system_names): + '''Ensure the Trove is set up to export the NFS roots we need. + + This doesn't handle setting up the TFTP daemon. We assume that is + already running. + + ''' + for system_name in system_names: + exported_path = self.system_path(system_name) + exports_path = '/etc/exports' + + # Rather ugly SSH hackery follows to ensure each system path is + # listed in /etc/exports. + try: + cliapp.ssh_runcmd( + nfs_netloc, ['grep', '-q', exported_path, exports_path]) + except cliapp.AppException: + ip_mask = '*' + options = 'rw,no_subtree_check,no_root_squash,async' + exports_string = '%s %s(%s)\n' % (exported_path, ip_mask, + options) + exports_append_sh = '''\ + set -eu + target="$1" + temp=$(mktemp) + cat "$target" > "$temp" + cat >> "$temp" + mv "$temp" "$target" + ''' + cliapp.ssh_runcmd( + nfs_netloc, + ['sh', '-c', exports_append_sh, '--', exports_path], + feed_stdin=exports_string) + + cliapp.ssh_runcmd(nfs_netloc, + ['systemctl', 'restart', 'nfs-server.service']) + + def update_default_version(self, remote_netloc, system_name, + version_label): + self.status(msg='Linking \'default\' to %(version)s for %(system)s', + version=version_label, system=system_name) + system_path = self.system_path(system_name) + system_version_path = os.path.join(system_path, 'systems', + version_label) + default_path = os.path.join(system_path, 'systems', 'default') + + cliapp.ssh_runcmd(remote_netloc, + ['ln', '-sfn', system_version_path, default_path]) + + +DistbuildTroveNFSBootWriteExtension().run() |