From a24db5b6746dad1a489aad8de65c1539ef8aff00 Mon Sep 17 00:00:00 2001 From: Lars Wirzenius Date: Fri, 8 Aug 2014 13:17:01 +0000 Subject: Transfer sparse files faster for kvm, vbox deployment The KVM and VirtualBox deployments use sparse files for raw disk images. This means they can store a large disk (say, tens or hundreds of gigabytes) without using more disk space than is required for the actual content (e.g., a gigabyte or so for the files in the root filesystem). The kernel and filesystem make the unwritten parts of the disk image look as if they are filled with zero bytes. This is good. However, during deployment those sparse files get transferred as if there really are a lot of zeroes. Those zeroes take a lot of time to transfer. rsync, for example, does not handle large holes efficiently. This change introduces a couple of helper tools (morphlib/xfer-hole and morphlib/recv-hole), which transfer the holes more efficiently. The xfer-hole program reads a file and outputs records like these: DATA 123 binary data (exaclyt 123 bytes and no newline at the end) HOLE 3245 xfer-hole can do this efficiently, without having to read through all the zeroes in the holes, using the SEEK_DATA and SEEK_HOLE arguments to lseek. Using this, the holes take only take a few bytes each, making it possible to transfer a disk image faster. In my benchmarks, transferring a 100G byte disk image took about 100 seconds for KVM, and 220 seconds for VirtualBox (which needs to more work at the receiver to convert the raw disk to a VDI). Both benchmarks were from a VM on my laptop to the laptop itself. The interesting bit here is that the receiver (recv-hole) is simple enough that it can be implemented in a bit of shell script, and the text of the shell script can be run on the remote end by giving it to ssh as a command line argument. This means there is no need to install any special tools on the receiver, which makes using this improvement much simpler. --- morphlib/exts/kvm.write | 13 +++- morphlib/exts/virtualbox-ssh.write | 16 +++-- morphlib/recv-hole | 134 +++++++++++++++++++++++++++++++++++++ morphlib/util.py | 24 +++++++ morphlib/xfer-hole | 132 ++++++++++++++++++++++++++++++++++++ 5 files changed, 311 insertions(+), 8 deletions(-) create mode 100755 morphlib/recv-hole create mode 100755 morphlib/xfer-hole diff --git a/morphlib/exts/kvm.write b/morphlib/exts/kvm.write index 94a55daa..3e3b3eb1 100755 --- a/morphlib/exts/kvm.write +++ b/morphlib/exts/kvm.write @@ -90,9 +90,16 @@ class KvmPlusSshWriteExtension(morphlib.writeexts.WriteExtension): '''Transfer raw disk image to libvirt host.''' self.status(msg='Transferring disk image') - target = '%s:%s' % (ssh_host, vm_path) - with open(raw_disk, 'rb') as f: - cliapp.runcmd(['rsync', '-szS', raw_disk, target]) + + xfer_hole_path = morphlib.util.get_data_path('xfer-hole') + recv_hole = morphlib.util.get_data('recv-hole') + + cliapp.runcmd( + ['python', xfer_hole_path, raw_disk], + ['ssh', ssh_host, + 'sh', '-c', cliapp.shell_quote(recv_hole), + 'dummy-argv0', 'file', vm_path], + stdout=None, stderr=None) def create_libvirt_guest(self, ssh_host, vm_name, vm_path, autostart): '''Create the libvirt virtual machine.''' diff --git a/morphlib/exts/virtualbox-ssh.write b/morphlib/exts/virtualbox-ssh.write index 42585f5e..1aebe490 100755 --- a/morphlib/exts/virtualbox-ssh.write +++ b/morphlib/exts/virtualbox-ssh.write @@ -102,11 +102,17 @@ class VirtualBoxPlusSshWriteExtension(morphlib.writeexts.WriteExtension): '''Transfer raw disk image to VirtualBox host, and convert to VDI.''' self.status(msg='Transfer disk and convert to VDI') - with open(raw_disk, 'rb') as f: - cliapp.ssh_runcmd(ssh_host, - ['VBoxManage', 'convertfromraw', 'stdin', vdi_path, - str(os.path.getsize(raw_disk))], - stdin=f) + + st = os.lstat(raw_disk) + xfer_hole_path = morphlib.util.get_data_path('xfer-hole') + recv_hole = morphlib.util.get_data('recv-hole') + + cliapp.runcmd( + ['python', xfer_hole_path, raw_disk], + ['ssh', ssh_host, + 'sh', '-c', cliapp.shell_quote(recv_hole), + 'dummy-argv0', 'vbox', vdi_path, str(st.st_size)], + stdout=None, stderr=None) def virtualbox_version(self, ssh_host): 'Get the version number of the VirtualBox running on the remote host.' diff --git a/morphlib/recv-hole b/morphlib/recv-hole new file mode 100755 index 00000000..75f80a6a --- /dev/null +++ b/morphlib/recv-hole @@ -0,0 +1,134 @@ +#!/bin/sh +# +# Copyright (C) 2014 Codethink Limited +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; version 2 of the License. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. +# +# =*= License: GPL-2 =*= + + +# Receive a data stream describing a sparse file, and reproduce it, +# either to a named file or stdout. +# +# The data stream is simple: it's a sequence of DATA or HOLE records: +# +# DATA +# 123 +# <123 bytes of binary data, NOT including newline at the end> +# +# HOLE +# 123 +# +# This shell script can be executed over ssh (given to ssh as an arguemnt, +# with suitable escaping) on a different computer. This allows a large +# sparse file (e.g., disk image) be transferred quickly. + + +set -eu + + +die() +{ + echo "$@" 1>&2 + exit 1 +} + + +recv_hole_to_file() +{ + local n + + read n + truncate --size "+$n" "$1" +} + + +recv_data_to_file() +{ + local n + read n + + local blocksize=1048576 + local blocks="$(echo "$n" / "$blocksize" | bc)" + local extra="$(echo "$n" % "$blocksize" | bc)" + + xfer_data_to_stdout "$blocksize" "$blocks" >> "$1" + xfer_data_to_stdout 1 "$extra" >> "$1" +} + + +recv_hole_to_stdout() +{ + local n + read n + (echo "$n"; cat /dev/zero) | recv_data_to_stdout +} + + +recv_data_to_stdout() +{ + local n + read n + + local blocksize=1048576 + local blocks="$(echo "$n" / "$blocksize" | bc)" + local extra="$(echo "$n" % "$blocksize" | bc)" + + xfer_data_to_stdout "$blocksize" "$blocks" + xfer_data_to_stdout 1 "$extra" +} + + +xfer_data_to_stdout() +{ + local log="$(mktemp)" + if ! dd "bs=$1" count="$2" iflag=fullblock status=noxfer 2> "$log" + then + cat "$log" 1>&2 + rm -f "$log" + exit 1 + else + rm -f "$log" + fi +} + + +type="$1" +case "$type" in + file) + output="$2" + truncate --size=0 "$output" + while read what + do + case "$what" in + DATA) recv_data_to_file "$output" ;; + HOLE) recv_hole_to_file "$output" ;; + *) die "Unknown instruction: $what" ;; + esac + done + ;; + vbox) + output="$2" + disk_size="$3" + while read what + do + case "$what" in + DATA) recv_data_to_stdout ;; + HOLE) recv_hole_to_stdout ;; + *) die "Unknown instruction: $what" ;; + esac + done | + VBoxManage convertfromraw stdin "$output" "$disk_size" + ;; +esac diff --git a/morphlib/util.py b/morphlib/util.py index 0c551296..36ab4e21 100644 --- a/morphlib/util.py +++ b/morphlib/util.py @@ -463,3 +463,27 @@ def iter_trickle(iterable, limit): if len(buf) == 0: break yield buf + + +def get_data_path(relative_path): # pragma: no cover + '''Return path to a data file in the morphlib Python package. + + ``relative_path`` is the name of the data file, relative to the + location in morphlib where the data files are. + + ''' + + morphlib_dir = os.path.dirname(morphlib.__file__) + return os.path.join(morphlib_dir, relative_path) + + +def get_data(relative_path): # pragma: no cover + '''Return contents of a data file from the morphlib Python package. + + ``relative_path`` is the name of the data file, relative to the + location in morphlib where the data files are. + + ''' + + with open(get_data_path(relative_path)) as f: + return f.read() diff --git a/morphlib/xfer-hole b/morphlib/xfer-hole new file mode 100755 index 00000000..0d4cee7a --- /dev/null +++ b/morphlib/xfer-hole @@ -0,0 +1,132 @@ +#!/usr/bin/env python +# +# Send a sparse file more space-efficiently. +# See recv-hole for a description of the protocol. +# +# Note that xfer-hole requires a version of Linux with support for +# SEEK_DATA and SEEK_HOLE. +# +# +# Copyright (C) 2014 Codethink Limited +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; version 2 of the License. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. +# +# =*= License: GPL-2 =*= + + + +import errno +import os +import sys + + +SEEK_DATA = 3 +SEEK_HOLE = 4 + + +filename = sys.argv[1] +fd = os.open(filename, os.O_RDONLY) +pos = 0 + + +DATA = 'data' +HOLE = 'hole' +EOF = 'eof' + + +def safe_lseek(fd, pos, whence): + try: + return os.lseek(fd, pos, whence) + except OSError as e: + if e.errno == errno.ENXIO: + return -1 + raise + + +def current_data_or_pos(fd, pos): + length = safe_lseek(fd, 0, os.SEEK_END) + next_data = safe_lseek(fd, pos, SEEK_DATA) + next_hole = safe_lseek(fd, pos, SEEK_HOLE) + + if pos == length: + return EOF, pos + elif pos == next_data: + return DATA, pos + elif pos == next_hole: + return HOLE, pos + else: + assert False, \ + ("Do not understand: pos=%d next_data=%d next_hole=%d" % + (pos, next_data, next_hole)) + + +def next_data_or_hole(fd, pos): + length = safe_lseek(fd, 0, os.SEEK_END) + next_data = safe_lseek(fd, pos, SEEK_DATA) + next_hole = safe_lseek(fd, pos, SEEK_HOLE) + + if pos == length: + return EOF, pos + elif pos == next_data: + # We are at data. + if next_hole == -1 or next_hole == length: + return EOF, length + else: + return HOLE, next_hole + elif pos == next_hole: + # We are at a hole. + if next_data == -1 or next_data == length: + return EOF, length + else: + return DATA, next_data + else: + assert False, \ + ("Do not understand: pos=%d next_data=%d next_hole=%d" % + (pos, next_data, next_hole)) + + +def find_data_and_holes(fd): + pos = safe_lseek(fd, 0, os.SEEK_CUR) + + kind, pos = current_data_or_pos(fd, pos) + while kind != EOF: + yield kind, pos + kind, pos = next_data_or_hole(fd, pos) + yield kind, pos + + +def make_xfer_instructions(fd): + prev_kind = None + prev_pos = None + for kind, pos in find_data_and_holes(fd): + if prev_kind == DATA: + yield (DATA, prev_pos, pos) + elif prev_kind == HOLE: + yield (HOLE, prev_pos, pos) + prev_kind = kind + prev_pos = pos + + +def copy_slice_from_file(to, fd, start, end): + safe_lseek(fd, start, os.SEEK_SET) + data = os.read(fd, end - start) + to.write(data) + + +for kind, start, end in make_xfer_instructions(fd): + if kind == HOLE: + sys.stdout.write('HOLE\n%d\n' % (end - start)) + elif kind == DATA: + sys.stdout.write('DATA\n%d\n' % (end - start)) + copy_slice_from_file(sys.stdout, fd, start, end) -- cgit v1.2.1 From 45601a522620bb9e8a4e1b3ddd5de700f733421e Mon Sep 17 00:00:00 2001 From: Lars Wirzenius Date: Fri, 5 Sep 2014 13:18:05 +0000 Subject: Document why VBoxManage is run in recv-hole instead of caller --- morphlib/recv-hole | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) diff --git a/morphlib/recv-hole b/morphlib/recv-hole index 75f80a6a..a406cc98 100755 --- a/morphlib/recv-hole +++ b/morphlib/recv-hole @@ -33,6 +33,31 @@ # This shell script can be executed over ssh (given to ssh as an arguemnt, # with suitable escaping) on a different computer. This allows a large # sparse file (e.g., disk image) be transferred quickly. +# +# This script should be called in one of the following ways: +# +# recv-hole file FILENAME +# recv-hole vbox FILENAME DISKSIZE +# +# In both cases, FILENAME is the pathname of the disk image on the +# receiving end. DISKSIZE is the size of the disk image in bytes. The +# first form is used when transferring a disk image to become an +# identical file on the receiving end. +# +# The second form is used when the disk image should be converted for +# use by VirtualBox. In this case, we want to avoid writing a +# temporary file on disk, and then calling the VirtualBox VBoxManage +# tool to do the conversion, since that would involve large amounts of +# unnecessary I/O and disk usage. Instead we pipe the file directly to +# VBoxManage, avoiding those issues. The piping is done here in this +# script, instead of in the caller, to make it easier to run things +# over ssh. +# +# However, since it's not possible seek in a Unix pipe, we have to +# explicitly write the zeroes into the pipe. This is not +# super-efficient, but the way to avoid that would be to avoid sending +# a sparse file, and do the conversion to a VDI on the sending end. +# That is out of scope for xfer-hole and recv-hole. set -eu -- cgit v1.2.1 From e062415af49f2439eff2a7702f83f1a9f6a8afe6 Mon Sep 17 00:00:00 2001 From: Lars Wirzenius Date: Fri, 5 Sep 2014 13:27:36 +0000 Subject: Use $((...))) instead of $(... | bc) --- morphlib/recv-hole | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/morphlib/recv-hole b/morphlib/recv-hole index a406cc98..d6504bf6 100755 --- a/morphlib/recv-hole +++ b/morphlib/recv-hole @@ -85,8 +85,8 @@ recv_data_to_file() read n local blocksize=1048576 - local blocks="$(echo "$n" / "$blocksize" | bc)" - local extra="$(echo "$n" % "$blocksize" | bc)" + local blocks=$(($n / $blocksize)) + local extra=$(($n % $blocksize)) xfer_data_to_stdout "$blocksize" "$blocks" >> "$1" xfer_data_to_stdout 1 "$extra" >> "$1" @@ -107,8 +107,8 @@ recv_data_to_stdout() read n local blocksize=1048576 - local blocks="$(echo "$n" / "$blocksize" | bc)" - local extra="$(echo "$n" % "$blocksize" | bc)" + local blocks=$(($n / $blocksize)) + local extra=$(($n % $blocksize)) xfer_data_to_stdout "$blocksize" "$blocks" xfer_data_to_stdout 1 "$extra" -- cgit v1.2.1 From 19ceccea15e712616b1c5a48339be10fcadba41b Mon Sep 17 00:00:00 2001 From: Lars Wirzenius Date: Fri, 5 Sep 2014 14:17:53 +0000 Subject: Fix shell quoting for ssh remote command lines Found by Richard Maw. --- morphlib/exts/kvm.write | 8 +++++--- morphlib/exts/virtualbox-ssh.write | 9 ++++++--- 2 files changed, 11 insertions(+), 6 deletions(-) diff --git a/morphlib/exts/kvm.write b/morphlib/exts/kvm.write index 3e3b3eb1..16f188b5 100755 --- a/morphlib/exts/kvm.write +++ b/morphlib/exts/kvm.write @@ -94,11 +94,13 @@ class KvmPlusSshWriteExtension(morphlib.writeexts.WriteExtension): xfer_hole_path = morphlib.util.get_data_path('xfer-hole') recv_hole = morphlib.util.get_data('recv-hole') + ssh_remote_cmd = [ + 'sh', '-c', recv_hole, 'dummy-argv0', 'file', vm_path + ] + cliapp.runcmd( ['python', xfer_hole_path, raw_disk], - ['ssh', ssh_host, - 'sh', '-c', cliapp.shell_quote(recv_hole), - 'dummy-argv0', 'file', vm_path], + ['ssh', ssh_host] + map(cliapp.shell_quote, ssh_remote_cmd), stdout=None, stderr=None) def create_libvirt_guest(self, ssh_host, vm_name, vm_path, autostart): diff --git a/morphlib/exts/virtualbox-ssh.write b/morphlib/exts/virtualbox-ssh.write index 1aebe490..39ea8f86 100755 --- a/morphlib/exts/virtualbox-ssh.write +++ b/morphlib/exts/virtualbox-ssh.write @@ -107,11 +107,14 @@ class VirtualBoxPlusSshWriteExtension(morphlib.writeexts.WriteExtension): xfer_hole_path = morphlib.util.get_data_path('xfer-hole') recv_hole = morphlib.util.get_data('recv-hole') + ssh_remote_cmd = [ + 'sh', '-c', recv_hole, + 'dummy-argv0', 'vbox', vdi_path, str(st.st_size), + ] + cliapp.runcmd( ['python', xfer_hole_path, raw_disk], - ['ssh', ssh_host, - 'sh', '-c', cliapp.shell_quote(recv_hole), - 'dummy-argv0', 'vbox', vdi_path, str(st.st_size)], + ['ssh', ssh_host] + map(cliapp.shell_quote, ssh_remote_cmd), stdout=None, stderr=None) def virtualbox_version(self, ssh_host): -- cgit v1.2.1