summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLars Wirzenius <lars.wirzenius@codethink.co.uk>2014-09-05 15:55:28 +0000
committerLars Wirzenius <lars.wirzenius@codethink.co.uk>2014-09-05 15:55:28 +0000
commitef6a4743aaaada781685ed6988917f299dbcfcda (patch)
treeefb4ebd14cf8123bc6a20e5c7b2c3fc7351ca035
parentaea1029044b7e0d4578f3896bf85898f33791c89 (diff)
parent19ceccea15e712616b1c5a48339be10fcadba41b (diff)
downloadmorph-ef6a4743aaaada781685ed6988917f299dbcfcda.tar.gz
Merge branch 'baserock/liw/xfer-hole'
Reviewed by various people.
-rwxr-xr-xmorphlib/exts/kvm.write15
-rwxr-xr-xmorphlib/exts/virtualbox-ssh.write19
-rwxr-xr-xmorphlib/recv-hole159
-rw-r--r--morphlib/util.py24
-rwxr-xr-xmorphlib/xfer-hole132
5 files changed, 341 insertions, 8 deletions
diff --git a/morphlib/exts/kvm.write b/morphlib/exts/kvm.write
index 94a55daa..16f188b5 100755
--- a/morphlib/exts/kvm.write
+++ b/morphlib/exts/kvm.write
@@ -90,9 +90,18 @@ class KvmPlusSshWriteExtension(morphlib.writeexts.WriteExtension):
'''Transfer raw disk image to libvirt host.'''
self.status(msg='Transferring disk image')
- target = '%s:%s' % (ssh_host, vm_path)
- with open(raw_disk, 'rb') as f:
- cliapp.runcmd(['rsync', '-szS', raw_disk, target])
+
+ xfer_hole_path = morphlib.util.get_data_path('xfer-hole')
+ recv_hole = morphlib.util.get_data('recv-hole')
+
+ ssh_remote_cmd = [
+ 'sh', '-c', recv_hole, 'dummy-argv0', 'file', vm_path
+ ]
+
+ cliapp.runcmd(
+ ['python', xfer_hole_path, raw_disk],
+ ['ssh', ssh_host] + map(cliapp.shell_quote, ssh_remote_cmd),
+ stdout=None, stderr=None)
def create_libvirt_guest(self, ssh_host, vm_name, vm_path, autostart):
'''Create the libvirt virtual machine.'''
diff --git a/morphlib/exts/virtualbox-ssh.write b/morphlib/exts/virtualbox-ssh.write
index 42585f5e..39ea8f86 100755
--- a/morphlib/exts/virtualbox-ssh.write
+++ b/morphlib/exts/virtualbox-ssh.write
@@ -102,11 +102,20 @@ class VirtualBoxPlusSshWriteExtension(morphlib.writeexts.WriteExtension):
'''Transfer raw disk image to VirtualBox host, and convert to VDI.'''
self.status(msg='Transfer disk and convert to VDI')
- with open(raw_disk, 'rb') as f:
- cliapp.ssh_runcmd(ssh_host,
- ['VBoxManage', 'convertfromraw', 'stdin', vdi_path,
- str(os.path.getsize(raw_disk))],
- stdin=f)
+
+ st = os.lstat(raw_disk)
+ xfer_hole_path = morphlib.util.get_data_path('xfer-hole')
+ recv_hole = morphlib.util.get_data('recv-hole')
+
+ ssh_remote_cmd = [
+ 'sh', '-c', recv_hole,
+ 'dummy-argv0', 'vbox', vdi_path, str(st.st_size),
+ ]
+
+ cliapp.runcmd(
+ ['python', xfer_hole_path, raw_disk],
+ ['ssh', ssh_host] + map(cliapp.shell_quote, ssh_remote_cmd),
+ stdout=None, stderr=None)
def virtualbox_version(self, ssh_host):
'Get the version number of the VirtualBox running on the remote host.'
diff --git a/morphlib/recv-hole b/morphlib/recv-hole
new file mode 100755
index 00000000..d6504bf6
--- /dev/null
+++ b/morphlib/recv-hole
@@ -0,0 +1,159 @@
+#!/bin/sh
+#
+# Copyright (C) 2014 Codethink Limited
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; version 2 of the License.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License along
+# with this program; if not, write to the Free Software Foundation, Inc.,
+# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+#
+# =*= License: GPL-2 =*=
+
+
+# Receive a data stream describing a sparse file, and reproduce it,
+# either to a named file or stdout.
+#
+# The data stream is simple: it's a sequence of DATA or HOLE records:
+#
+# DATA
+# 123
+# <123 bytes of binary data, NOT including newline at the end>
+#
+# HOLE
+# 123
+#
+# This shell script can be executed over ssh (given to ssh as an arguemnt,
+# with suitable escaping) on a different computer. This allows a large
+# sparse file (e.g., disk image) be transferred quickly.
+#
+# This script should be called in one of the following ways:
+#
+# recv-hole file FILENAME
+# recv-hole vbox FILENAME DISKSIZE
+#
+# In both cases, FILENAME is the pathname of the disk image on the
+# receiving end. DISKSIZE is the size of the disk image in bytes. The
+# first form is used when transferring a disk image to become an
+# identical file on the receiving end.
+#
+# The second form is used when the disk image should be converted for
+# use by VirtualBox. In this case, we want to avoid writing a
+# temporary file on disk, and then calling the VirtualBox VBoxManage
+# tool to do the conversion, since that would involve large amounts of
+# unnecessary I/O and disk usage. Instead we pipe the file directly to
+# VBoxManage, avoiding those issues. The piping is done here in this
+# script, instead of in the caller, to make it easier to run things
+# over ssh.
+#
+# However, since it's not possible seek in a Unix pipe, we have to
+# explicitly write the zeroes into the pipe. This is not
+# super-efficient, but the way to avoid that would be to avoid sending
+# a sparse file, and do the conversion to a VDI on the sending end.
+# That is out of scope for xfer-hole and recv-hole.
+
+
+set -eu
+
+
+die()
+{
+ echo "$@" 1>&2
+ exit 1
+}
+
+
+recv_hole_to_file()
+{
+ local n
+
+ read n
+ truncate --size "+$n" "$1"
+}
+
+
+recv_data_to_file()
+{
+ local n
+ read n
+
+ local blocksize=1048576
+ local blocks=$(($n / $blocksize))
+ local extra=$(($n % $blocksize))
+
+ xfer_data_to_stdout "$blocksize" "$blocks" >> "$1"
+ xfer_data_to_stdout 1 "$extra" >> "$1"
+}
+
+
+recv_hole_to_stdout()
+{
+ local n
+ read n
+ (echo "$n"; cat /dev/zero) | recv_data_to_stdout
+}
+
+
+recv_data_to_stdout()
+{
+ local n
+ read n
+
+ local blocksize=1048576
+ local blocks=$(($n / $blocksize))
+ local extra=$(($n % $blocksize))
+
+ xfer_data_to_stdout "$blocksize" "$blocks"
+ xfer_data_to_stdout 1 "$extra"
+}
+
+
+xfer_data_to_stdout()
+{
+ local log="$(mktemp)"
+ if ! dd "bs=$1" count="$2" iflag=fullblock status=noxfer 2> "$log"
+ then
+ cat "$log" 1>&2
+ rm -f "$log"
+ exit 1
+ else
+ rm -f "$log"
+ fi
+}
+
+
+type="$1"
+case "$type" in
+ file)
+ output="$2"
+ truncate --size=0 "$output"
+ while read what
+ do
+ case "$what" in
+ DATA) recv_data_to_file "$output" ;;
+ HOLE) recv_hole_to_file "$output" ;;
+ *) die "Unknown instruction: $what" ;;
+ esac
+ done
+ ;;
+ vbox)
+ output="$2"
+ disk_size="$3"
+ while read what
+ do
+ case "$what" in
+ DATA) recv_data_to_stdout ;;
+ HOLE) recv_hole_to_stdout ;;
+ *) die "Unknown instruction: $what" ;;
+ esac
+ done |
+ VBoxManage convertfromraw stdin "$output" "$disk_size"
+ ;;
+esac
diff --git a/morphlib/util.py b/morphlib/util.py
index 0c551296..36ab4e21 100644
--- a/morphlib/util.py
+++ b/morphlib/util.py
@@ -463,3 +463,27 @@ def iter_trickle(iterable, limit):
if len(buf) == 0:
break
yield buf
+
+
+def get_data_path(relative_path): # pragma: no cover
+ '''Return path to a data file in the morphlib Python package.
+
+ ``relative_path`` is the name of the data file, relative to the
+ location in morphlib where the data files are.
+
+ '''
+
+ morphlib_dir = os.path.dirname(morphlib.__file__)
+ return os.path.join(morphlib_dir, relative_path)
+
+
+def get_data(relative_path): # pragma: no cover
+ '''Return contents of a data file from the morphlib Python package.
+
+ ``relative_path`` is the name of the data file, relative to the
+ location in morphlib where the data files are.
+
+ '''
+
+ with open(get_data_path(relative_path)) as f:
+ return f.read()
diff --git a/morphlib/xfer-hole b/morphlib/xfer-hole
new file mode 100755
index 00000000..0d4cee7a
--- /dev/null
+++ b/morphlib/xfer-hole
@@ -0,0 +1,132 @@
+#!/usr/bin/env python
+#
+# Send a sparse file more space-efficiently.
+# See recv-hole for a description of the protocol.
+#
+# Note that xfer-hole requires a version of Linux with support for
+# SEEK_DATA and SEEK_HOLE.
+#
+#
+# Copyright (C) 2014 Codethink Limited
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; version 2 of the License.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License along
+# with this program; if not, write to the Free Software Foundation, Inc.,
+# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+#
+# =*= License: GPL-2 =*=
+
+
+
+import errno
+import os
+import sys
+
+
+SEEK_DATA = 3
+SEEK_HOLE = 4
+
+
+filename = sys.argv[1]
+fd = os.open(filename, os.O_RDONLY)
+pos = 0
+
+
+DATA = 'data'
+HOLE = 'hole'
+EOF = 'eof'
+
+
+def safe_lseek(fd, pos, whence):
+ try:
+ return os.lseek(fd, pos, whence)
+ except OSError as e:
+ if e.errno == errno.ENXIO:
+ return -1
+ raise
+
+
+def current_data_or_pos(fd, pos):
+ length = safe_lseek(fd, 0, os.SEEK_END)
+ next_data = safe_lseek(fd, pos, SEEK_DATA)
+ next_hole = safe_lseek(fd, pos, SEEK_HOLE)
+
+ if pos == length:
+ return EOF, pos
+ elif pos == next_data:
+ return DATA, pos
+ elif pos == next_hole:
+ return HOLE, pos
+ else:
+ assert False, \
+ ("Do not understand: pos=%d next_data=%d next_hole=%d" %
+ (pos, next_data, next_hole))
+
+
+def next_data_or_hole(fd, pos):
+ length = safe_lseek(fd, 0, os.SEEK_END)
+ next_data = safe_lseek(fd, pos, SEEK_DATA)
+ next_hole = safe_lseek(fd, pos, SEEK_HOLE)
+
+ if pos == length:
+ return EOF, pos
+ elif pos == next_data:
+ # We are at data.
+ if next_hole == -1 or next_hole == length:
+ return EOF, length
+ else:
+ return HOLE, next_hole
+ elif pos == next_hole:
+ # We are at a hole.
+ if next_data == -1 or next_data == length:
+ return EOF, length
+ else:
+ return DATA, next_data
+ else:
+ assert False, \
+ ("Do not understand: pos=%d next_data=%d next_hole=%d" %
+ (pos, next_data, next_hole))
+
+
+def find_data_and_holes(fd):
+ pos = safe_lseek(fd, 0, os.SEEK_CUR)
+
+ kind, pos = current_data_or_pos(fd, pos)
+ while kind != EOF:
+ yield kind, pos
+ kind, pos = next_data_or_hole(fd, pos)
+ yield kind, pos
+
+
+def make_xfer_instructions(fd):
+ prev_kind = None
+ prev_pos = None
+ for kind, pos in find_data_and_holes(fd):
+ if prev_kind == DATA:
+ yield (DATA, prev_pos, pos)
+ elif prev_kind == HOLE:
+ yield (HOLE, prev_pos, pos)
+ prev_kind = kind
+ prev_pos = pos
+
+
+def copy_slice_from_file(to, fd, start, end):
+ safe_lseek(fd, start, os.SEEK_SET)
+ data = os.read(fd, end - start)
+ to.write(data)
+
+
+for kind, start, end in make_xfer_instructions(fd):
+ if kind == HOLE:
+ sys.stdout.write('HOLE\n%d\n' % (end - start))
+ elif kind == DATA:
+ sys.stdout.write('DATA\n%d\n' % (end - start))
+ copy_slice_from_file(sys.stdout, fd, start, end)