summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLars Wirzenius <lars.wirzenius@codethink.co.uk>2014-08-08 13:17:01 +0000
committerLars Wirzenius <lars.wirzenius@codethink.co.uk>2014-09-05 15:38:59 +0000
commit65ed235de623fd152dd2967b9ff2e1f60626c658 (patch)
tree48dca56f6860cd2b50b58d6ae3b81ad82edee0a5
parent8c4f5dd9adce099693c53d14c1a549d5b4fa88d1 (diff)
downloaddefinitions-65ed235de623fd152dd2967b9ff2e1f60626c658.tar.gz
Transfer sparse files faster for kvm, vbox deployment
The KVM and VirtualBox deployments use sparse files for raw disk images. This means they can store a large disk (say, tens or hundreds of gigabytes) without using more disk space than is required for the actual content (e.g., a gigabyte or so for the files in the root filesystem). The kernel and filesystem make the unwritten parts of the disk image look as if they are filled with zero bytes. This is good. However, during deployment those sparse files get transferred as if there really are a lot of zeroes. Those zeroes take a lot of time to transfer. rsync, for example, does not handle large holes efficiently. This change introduces a couple of helper tools (morphlib/xfer-hole and morphlib/recv-hole), which transfer the holes more efficiently. The xfer-hole program reads a file and outputs records like these: DATA 123 binary data (exaclyt 123 bytes and no newline at the end) HOLE 3245 xfer-hole can do this efficiently, without having to read through all the zeroes in the holes, using the SEEK_DATA and SEEK_HOLE arguments to lseek. Using this, the holes take only take a few bytes each, making it possible to transfer a disk image faster. In my benchmarks, transferring a 100G byte disk image took about 100 seconds for KVM, and 220 seconds for VirtualBox (which needs to more work at the receiver to convert the raw disk to a VDI). Both benchmarks were from a VM on my laptop to the laptop itself. The interesting bit here is that the receiver (recv-hole) is simple enough that it can be implemented in a bit of shell script, and the text of the shell script can be run on the remote end by giving it to ssh as a command line argument. This means there is no need to install any special tools on the receiver, which makes using this improvement much simpler.
-rwxr-xr-xkvm.write13
-rwxr-xr-xvirtualbox-ssh.write16
2 files changed, 21 insertions, 8 deletions
diff --git a/kvm.write b/kvm.write
index 94a55daa..3e3b3eb1 100755
--- a/kvm.write
+++ b/kvm.write
@@ -90,9 +90,16 @@ class KvmPlusSshWriteExtension(morphlib.writeexts.WriteExtension):
'''Transfer raw disk image to libvirt host.'''
self.status(msg='Transferring disk image')
- target = '%s:%s' % (ssh_host, vm_path)
- with open(raw_disk, 'rb') as f:
- cliapp.runcmd(['rsync', '-szS', raw_disk, target])
+
+ xfer_hole_path = morphlib.util.get_data_path('xfer-hole')
+ recv_hole = morphlib.util.get_data('recv-hole')
+
+ cliapp.runcmd(
+ ['python', xfer_hole_path, raw_disk],
+ ['ssh', ssh_host,
+ 'sh', '-c', cliapp.shell_quote(recv_hole),
+ 'dummy-argv0', 'file', vm_path],
+ stdout=None, stderr=None)
def create_libvirt_guest(self, ssh_host, vm_name, vm_path, autostart):
'''Create the libvirt virtual machine.'''
diff --git a/virtualbox-ssh.write b/virtualbox-ssh.write
index 42585f5e..1aebe490 100755
--- a/virtualbox-ssh.write
+++ b/virtualbox-ssh.write
@@ -102,11 +102,17 @@ class VirtualBoxPlusSshWriteExtension(morphlib.writeexts.WriteExtension):
'''Transfer raw disk image to VirtualBox host, and convert to VDI.'''
self.status(msg='Transfer disk and convert to VDI')
- with open(raw_disk, 'rb') as f:
- cliapp.ssh_runcmd(ssh_host,
- ['VBoxManage', 'convertfromraw', 'stdin', vdi_path,
- str(os.path.getsize(raw_disk))],
- stdin=f)
+
+ st = os.lstat(raw_disk)
+ xfer_hole_path = morphlib.util.get_data_path('xfer-hole')
+ recv_hole = morphlib.util.get_data('recv-hole')
+
+ cliapp.runcmd(
+ ['python', xfer_hole_path, raw_disk],
+ ['ssh', ssh_host,
+ 'sh', '-c', cliapp.shell_quote(recv_hole),
+ 'dummy-argv0', 'vbox', vdi_path, str(st.st_size)],
+ stdout=None, stderr=None)
def virtualbox_version(self, ssh_host):
'Get the version number of the VirtualBox running on the remote host.'