From 840292841f4495a79a037f81a26d6b3f51e7cb8c Mon Sep 17 00:00:00 2001 From: Adam Coldrick Date: Thu, 4 Jun 2015 15:25:33 +0000 Subject: Move xfer-hole and recv-hole into extensions/ Change-Id: I46ed5f3ec85f9662bebac592eff7a6eb6d628f28 --- extensions/recv-hole | 158 +++++++++++++++++++++++++++++++++++++++++++++++++++ extensions/xfer-hole | 137 ++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 295 insertions(+) create mode 100755 extensions/recv-hole create mode 100755 extensions/xfer-hole (limited to 'extensions') diff --git a/extensions/recv-hole b/extensions/recv-hole new file mode 100755 index 00000000..fe69f304 --- /dev/null +++ b/extensions/recv-hole @@ -0,0 +1,158 @@ +#!/bin/sh +# +# Copyright (C) 2014-2015 Codethink Limited +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; version 2 of the License. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with this program. If not, see . +# +# =*= License: GPL-2 =*= + + +# Receive a data stream describing a sparse file, and reproduce it, +# either to a named file or stdout. +# +# The data stream is simple: it's a sequence of DATA or HOLE records: +# +# DATA +# 123 +# <123 bytes of binary data, NOT including newline at the end> +# +# HOLE +# 123 +# +# This shell script can be executed over ssh (given to ssh as an arguemnt, +# with suitable escaping) on a different computer. This allows a large +# sparse file (e.g., disk image) be transferred quickly. +# +# This script should be called in one of the following ways: +# +# recv-hole file FILENAME +# recv-hole vbox FILENAME DISKSIZE +# +# In both cases, FILENAME is the pathname of the disk image on the +# receiving end. DISKSIZE is the size of the disk image in bytes. The +# first form is used when transferring a disk image to become an +# identical file on the receiving end. +# +# The second form is used when the disk image should be converted for +# use by VirtualBox. In this case, we want to avoid writing a +# temporary file on disk, and then calling the VirtualBox VBoxManage +# tool to do the conversion, since that would involve large amounts of +# unnecessary I/O and disk usage. Instead we pipe the file directly to +# VBoxManage, avoiding those issues. The piping is done here in this +# script, instead of in the caller, to make it easier to run things +# over ssh. +# +# However, since it's not possible seek in a Unix pipe, we have to +# explicitly write the zeroes into the pipe. This is not +# super-efficient, but the way to avoid that would be to avoid sending +# a sparse file, and do the conversion to a VDI on the sending end. +# That is out of scope for xfer-hole and recv-hole. + + +set -eu + + +die() +{ + echo "$@" 1>&2 + exit 1 +} + + +recv_hole_to_file() +{ + local n + + read n + truncate --size "+$n" "$1" +} + + +recv_data_to_file() +{ + local n + read n + + local blocksize=1048576 + local blocks=$(($n / $blocksize)) + local extra=$(($n % $blocksize)) + + xfer_data_to_stdout "$blocksize" "$blocks" >> "$1" + xfer_data_to_stdout 1 "$extra" >> "$1" +} + + +recv_hole_to_stdout() +{ + local n + read n + (echo "$n"; cat /dev/zero) | recv_data_to_stdout +} + + +recv_data_to_stdout() +{ + local n + read n + + local blocksize=1048576 + local blocks=$(($n / $blocksize)) + local extra=$(($n % $blocksize)) + + xfer_data_to_stdout "$blocksize" "$blocks" + xfer_data_to_stdout 1 "$extra" +} + + +xfer_data_to_stdout() +{ + local log="$(mktemp)" + if ! dd "bs=$1" count="$2" iflag=fullblock status=noxfer 2> "$log" + then + cat "$log" 1>&2 + rm -f "$log" + exit 1 + else + rm -f "$log" + fi +} + + +type="$1" +case "$type" in + file) + output="$2" + truncate --size=0 "$output" + while read what + do + case "$what" in + DATA) recv_data_to_file "$output" ;; + HOLE) recv_hole_to_file "$output" ;; + *) die "Unknown instruction: $what" ;; + esac + done + ;; + vbox) + output="$2" + disk_size="$3" + while read what + do + case "$what" in + DATA) recv_data_to_stdout ;; + HOLE) recv_hole_to_stdout ;; + *) die "Unknown instruction: $what" ;; + esac + done | + VBoxManage convertfromraw stdin "$output" "$disk_size" + ;; +esac diff --git a/extensions/xfer-hole b/extensions/xfer-hole new file mode 100755 index 00000000..91f1be01 --- /dev/null +++ b/extensions/xfer-hole @@ -0,0 +1,137 @@ +#!/usr/bin/env python +# +# Send a sparse file more space-efficiently. +# See recv-hole for a description of the protocol. +# +# Note that xfer-hole requires a version of Linux with support for +# SEEK_DATA and SEEK_HOLE. +# +# +# Copyright (C) 2014-2015 Codethink Limited +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; version 2 of the License. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with this program. If not, see . +# +# =*= License: GPL-2 =*= + + + +import errno +import os +import sys + + +SEEK_DATA = 3 +SEEK_HOLE = 4 + + +filename = sys.argv[1] +fd = os.open(filename, os.O_RDONLY) +pos = 0 + + +DATA = 'data' +HOLE = 'hole' +EOF = 'eof' + + +def safe_lseek(fd, pos, whence): + try: + return os.lseek(fd, pos, whence) + except OSError as e: + if e.errno == errno.ENXIO: + return -1 + raise + + +def current_data_or_pos(fd, pos): + length = safe_lseek(fd, 0, os.SEEK_END) + next_data = safe_lseek(fd, pos, SEEK_DATA) + next_hole = safe_lseek(fd, pos, SEEK_HOLE) + + if pos == length: + return EOF, pos + elif pos == next_data: + return DATA, pos + elif pos == next_hole: + return HOLE, pos + else: + assert False, \ + ("Do not understand: pos=%d next_data=%d next_hole=%d" % + (pos, next_data, next_hole)) + + +def next_data_or_hole(fd, pos): + length = safe_lseek(fd, 0, os.SEEK_END) + next_data = safe_lseek(fd, pos, SEEK_DATA) + next_hole = safe_lseek(fd, pos, SEEK_HOLE) + + if pos == length: + return EOF, pos + elif pos == next_data: + # We are at data. + if next_hole == -1 or next_hole == length: + return EOF, length + else: + return HOLE, next_hole + elif pos == next_hole: + # We are at a hole. + if next_data == -1 or next_data == length: + return EOF, length + else: + return DATA, next_data + else: + assert False, \ + ("Do not understand: pos=%d next_data=%d next_hole=%d" % + (pos, next_data, next_hole)) + + +def find_data_and_holes(fd): + pos = safe_lseek(fd, 0, os.SEEK_CUR) + + kind, pos = current_data_or_pos(fd, pos) + while kind != EOF: + yield kind, pos + kind, pos = next_data_or_hole(fd, pos) + yield kind, pos + + +def make_xfer_instructions(fd): + prev_kind = None + prev_pos = None + for kind, pos in find_data_and_holes(fd): + if prev_kind == DATA: + yield (DATA, prev_pos, pos) + elif prev_kind == HOLE: + yield (HOLE, prev_pos, pos) + prev_kind = kind + prev_pos = pos + + +def copy_slice_from_file(to, fd, start, end): + safe_lseek(fd, start, os.SEEK_SET) + nbytes = end - start + max_at_a_time = 1024**2 + while nbytes > 0: + data = os.read(fd, min(nbytes, max_at_a_time)) + if not data: + break + to.write(data) + nbytes -= len(data) + + +for kind, start, end in make_xfer_instructions(fd): + if kind == HOLE: + sys.stdout.write('HOLE\n%d\n' % (end - start)) + elif kind == DATA: + sys.stdout.write('DATA\n%d\n' % (end - start)) + copy_slice_from_file(sys.stdout, fd, start, end) -- cgit v1.2.1